diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index 7fd885c36f..2ee5362295 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -402,9 +402,36 @@ def _clean_license_detection_data(detection_data): if from_file_path: match_data["from_file"] = from_file_path.removeprefix("codebase/") + if license_key := match_data.get("license_expression_spdx"): + match_data["license_expression_spdx"] = scancode.normalize_spdx_identifier( + license_key + ) + updated_matches.append(match_data) detection_data["matches"] = updated_matches + + if license_expression_spdx := detection_data.get("license_expression_spdx"): + from licensedcode.cache import get_licensing + licensing = get_licensing() + try: + parsed = licensing.parse(license_expression_spdx, simple=True) + if parsed: + normalized_keys = [ + scancode.normalize_spdx_identifier(key) + for key in parsed.keys() + ] + expression_parts = license_expression_spdx.split() + for i, part in enumerate(expression_parts): + if part in parsed.keys(): + idx = list(parsed.keys()).index(part) + expression_parts[i] = normalized_keys[idx] + detection_data["license_expression_spdx"] = " ".join(expression_parts) + except Exception: + detection_data["license_expression_spdx"] = scancode.normalize_spdx_identifier( + license_expression_spdx + ) + return detection_data diff --git a/scanpipe/pipes/scancode.py b/scanpipe/pipes/scancode.py index cd496cd793..ae0fb90a01 100644 --- a/scanpipe/pipes/scancode.py +++ b/scanpipe/pipes/scancode.py @@ -111,6 +111,15 @@ def get_max_workers(keep_available): return max_workers +def normalize_spdx_identifier(license_key): + SPDX_MAPPINGS = { + "GPL-2.0": "GPL-2.0-only", + "GPL-3.0": "GPL-3.0-only", + "LGPL-2.1": "LGPL-2.1-only" + } + return SPDX_MAPPINGS.get(license_key, license_key) + + def extract_archive(location, target): """ Extract a single archive or compressed file at `location` to the `target` diff --git a/scanpipe/tests/test_license_improvements.py b/scanpipe/tests/test_license_improvements.py new file mode 100644 index 0000000000..83ad41add1 --- /dev/null +++ b/scanpipe/tests/test_license_improvements.py @@ -0,0 +1,21 @@ +from django.test import TestCase +from scanpipe.pipes.scancode import normalize_spdx_identifier + + +class TestLicenseImprovements(TestCase): + + def test_normalize_gpl_2(self): + result = normalize_spdx_identifier("GPL-2.0") + self.assertEqual(result, "GPL-2.0-only") + + def test_normalize_gpl_3(self): + result = normalize_spdx_identifier("GPL-3.0") + self.assertEqual(result, "GPL-3.0-only") + + def test_normalize_lgpl_21(self): + result = normalize_spdx_identifier("LGPL-2.1") + self.assertEqual(result, "LGPL-2.1-only") + + def test_normalize_pass_through(self): + result = normalize_spdx_identifier("MIT") + self.assertEqual(result, "MIT")