From df2e604e18cd28311930e0137d5377b2abf28dfa Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Mon, 21 Apr 2025 21:23:36 +0900 Subject: [PATCH 1/3] Scancode result cleanup:SPDX-license-identifier Signed-off-by: Wonjae Park --- .../_parsing_scancode_file_item.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 250762b..674848f 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -21,11 +21,12 @@ os.path.sep for dir_name in _exclude_directory] _exclude_directory.append("/.") REMOVE_LICENSE = ["warranty-disclaimer"] -regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE) +regex = re.compile(r'(?:licenseref-|SPDX-license-identifier-)([^",\s]+)', re.IGNORECASE) find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE) KEYWORD_SPDX_ID = r'SPDX-License-Identifier\s*[\S]+' KEYWORD_DOWNLOAD_LOC = r'DownloadLocation\s*[\S]+' KEYWORD_SCANCODE_UNKNOWN = "unknown-spdx" +KEYWORD_SCANCODE_PROPRIETARY_LICENSE = "proprietary-license" SPDX_REPLACE_WORDS = ["(", ")"] KEY_AND = r"(?<=\s)and(?=\s)" KEY_OR = r"(?<=\s)or(?=\s)" @@ -132,12 +133,12 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals license_value = spdx.lower() if license_value != "": - if key == KEYWORD_SCANCODE_UNKNOWN: + if key == KEYWORD_SCANCODE_UNKNOWN or key == KEYWORD_SCANCODE_PROPRIETARY_LICENSE: try: matched_txt = lic_item.get("matched_text", "").lower() matched = regex.search(matched_txt) if matched: - license_value = str(matched.group()) + license_value = str(matched.group(1)) except Exception: pass @@ -229,11 +230,14 @@ def parsing_scancode_32_later( licenses = file.get("license_detections", []) if not licenses: continue + print("file path:", file.get('path', '')) for lic in licenses: matched_lic_list = lic.get("matches", []) for matched_lic in matched_lic_list: found_lic_list = matched_lic.get("license_expression", "") + print("found_lic_list:", found_lic_list) matched_txt = matched_lic.get("matched_text", "") + print("matched_txt:", matched_txt) if found_lic_list: found_lic_list = found_lic_list.lower() for found_lic in split_spdx_expression(found_lic_list): @@ -241,11 +245,11 @@ def parsing_scancode_32_later( found_lic = found_lic.strip() if found_lic in REMOVE_LICENSE: continue - elif found_lic == KEYWORD_SCANCODE_UNKNOWN: + elif found_lic == KEYWORD_SCANCODE_UNKNOWN or found_lic == KEYWORD_SCANCODE_PROPRIETARY_LICENSE: try: matched = regex.search(matched_txt.lower()) if matched: - found_lic = str(matched.group()) + found_lic = str(matched.group(1)) except Exception: pass for word in replace_word: From 5ec2fb0f8474a02eba46d128a041670702ea1add Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Thu, 24 Apr 2025 14:25:14 +0900 Subject: [PATCH 2/3] Scancode result cleanup:unkown-license-reference Signed-off-by: Wonjae Park --- src/fosslight_source/_parsing_scancode_file_item.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 674848f..83e6563 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -27,11 +27,14 @@ KEYWORD_DOWNLOAD_LOC = r'DownloadLocation\s*[\S]+' KEYWORD_SCANCODE_UNKNOWN = "unknown-spdx" KEYWORD_SCANCODE_PROPRIETARY_LICENSE = "proprietary-license" +KEYWORD_UNKNOWN_LICENSE_REFERENCE = "unknown-license-reference" +KEYWORD_LGE_PROPRIETARY = "lge-proprietary" SPDX_REPLACE_WORDS = ["(", ")"] KEY_AND = r"(?<=\s)and(?=\s)" KEY_OR = r"(?<=\s)or(?=\s)" + def get_error_from_header(header_item: list) -> Tuple[bool, str]: has_error = False str_error = "" @@ -163,6 +166,9 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals result_item.is_license_text = matched_rule.get("is_license_text", False) if len(license_detected) > 0: + # unknown-license-reference를 제거하고 lge-proprietary만 남김 + if KEYWORD_UNKNOWN_LICENSE_REFERENCE.lower() in [x.lower() for x in license_detected] and KEYWORD_LGE_PROPRIETARY.lower() in [x.lower() for x in license_detected]: + license_detected.remove(KEYWORD_UNKNOWN_LICENSE_REFERENCE) result_item.licenses = license_detected if len(license_expression_list) > 0: @@ -262,6 +268,11 @@ def parsing_scancode_32_later( lic_info = MatchedLicense(found_lic, "", matched_txt, file_path) license_list[lic_matched_key] = lic_info license_detected.append(found_lic) + + # unknown-license-reference를 제거하고 lge-proprietary만 남김 + if KEYWORD_UNKNOWN_LICENSE_REFERENCE.lower() in [x.lower() for x in license_detected] and KEYWORD_LGE_PROPRIETARY.lower() in [x.lower() for x in license_detected]: + license_detected.remove(KEYWORD_UNKNOWN_LICENSE_REFERENCE) + result_item.licenses = license_detected if len(license_detected) > 1: license_expression_spdx = file.get("detected_license_expression_spdx", "") From 8a5a2a9df3b0577d00213c29043e41903698ad22 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Thu, 24 Apr 2025 16:06:38 +0900 Subject: [PATCH 3/3] Remove unnecessary changes and fix flake8 Signed-off-by: Wonjae Park --- .../_parsing_scancode_file_item.py | 29 ++++++++++++------- tox.ini | 6 ++-- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 83e6563..1012f43 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -34,7 +34,6 @@ KEY_OR = r"(?<=\s)or(?=\s)" - def get_error_from_header(header_item: list) -> Tuple[bool, str]: has_error = False str_error = "" @@ -166,8 +165,12 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals result_item.is_license_text = matched_rule.get("is_license_text", False) if len(license_detected) > 0: - # unknown-license-reference를 제거하고 lge-proprietary만 남김 - if KEYWORD_UNKNOWN_LICENSE_REFERENCE.lower() in [x.lower() for x in license_detected] and KEYWORD_LGE_PROPRIETARY.lower() in [x.lower() for x in license_detected]: + # Remove unknown-license-reference and leave only lge-proprietary + license_lower = [x.lower() for x in license_detected] + if ( + KEYWORD_UNKNOWN_LICENSE_REFERENCE.lower() in license_lower + and KEYWORD_LGE_PROPRIETARY.lower() in license_lower + ): license_detected.remove(KEYWORD_UNKNOWN_LICENSE_REFERENCE) result_item.licenses = license_detected @@ -236,14 +239,11 @@ def parsing_scancode_32_later( licenses = file.get("license_detections", []) if not licenses: continue - print("file path:", file.get('path', '')) for lic in licenses: matched_lic_list = lic.get("matches", []) for matched_lic in matched_lic_list: found_lic_list = matched_lic.get("license_expression", "") - print("found_lic_list:", found_lic_list) matched_txt = matched_lic.get("matched_text", "") - print("matched_txt:", matched_txt) if found_lic_list: found_lic_list = found_lic_list.lower() for found_lic in split_spdx_expression(found_lic_list): @@ -251,7 +251,10 @@ def parsing_scancode_32_later( found_lic = found_lic.strip() if found_lic in REMOVE_LICENSE: continue - elif found_lic == KEYWORD_SCANCODE_UNKNOWN or found_lic == KEYWORD_SCANCODE_PROPRIETARY_LICENSE: + elif ( + found_lic == KEYWORD_SCANCODE_UNKNOWN + or found_lic == KEYWORD_SCANCODE_PROPRIETARY_LICENSE + ): try: matched = regex.search(matched_txt.lower()) if matched: @@ -268,11 +271,15 @@ def parsing_scancode_32_later( lic_info = MatchedLicense(found_lic, "", matched_txt, file_path) license_list[lic_matched_key] = lic_info license_detected.append(found_lic) - - # unknown-license-reference를 제거하고 lge-proprietary만 남김 - if KEYWORD_UNKNOWN_LICENSE_REFERENCE.lower() in [x.lower() for x in license_detected] and KEYWORD_LGE_PROPRIETARY.lower() in [x.lower() for x in license_detected]: + + # Remove unknown-license-reference and leave only lge-proprietary + license_lower = [x.lower() for x in license_detected] + if ( + KEYWORD_UNKNOWN_LICENSE_REFERENCE.lower() in license_lower + and KEYWORD_LGE_PROPRIETARY.lower() in license_lower + ): license_detected.remove(KEYWORD_UNKNOWN_LICENSE_REFERENCE) - + result_item.licenses = license_detected if len(license_detected) > 1: license_expression_spdx = file.get("detected_license_expression_spdx", "") diff --git a/tox.ini b/tox.ini index bf0dc5a..10659b5 100644 --- a/tox.ini +++ b/tox.ini @@ -41,6 +41,8 @@ commands = pytest -v --flake8 [testenv:flake8] +basepython = python3.11 deps = flake8 -commands = -pytest tests/test_tox.py::test_flake8 \ No newline at end of file +changedir = {toxinidir} +commands = + flake8 src/fosslight_source tests \ No newline at end of file