From 1b9b3cfc68199a473c16681d3515b6fbc1e63d9c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 1 Jun 2026 23:51:31 +0530 Subject: [PATCH 01/21] Mark advisories as unfurled Signed-off-by: Tushar Goel --- CHANGELOG.rst | 5 +- vulnerabilities/api_v3.py | 227 ++++++++++++------ vulnerabilities/improvers/__init__.py | 14 +- ...isory_id_alter_advisoryv2_avid_and_more.py | 48 ++++ .../0134_alter_advisoryset_unique_together.py | 17 ++ .../0135_advisoryv2__all_impacts_unfurled.py | 21 ++ ...dpackage_vulnerabili_advisor_1e3414_idx.py | 19 ++ vulnerabilities/models.py | 129 +++++++--- .../v2_improvers/collect_ssvc_trees.py | 126 ++++++++-- .../v2_improvers/compute_package_risk.py | 13 +- .../group_advisories_for_packages.py | 34 +-- .../v2_improvers/mark_all_impacts_unfurled.py | 96 ++++++++ .../v2_improvers/unfurl_version_range.py | 13 +- vulnerabilities/pipes/advisory.py | 17 ++ vulnerabilities/pipes/export.py | 8 +- vulnerabilities/pipes/group_advisories.py | 158 ++++++++++-- vulnerabilities/pipes/risk_score.py | 133 ++++++++++ vulnerabilities/risk.py | 18 -- .../test_compute_advisory_todo_v2.py | 20 +- .../test_mark_all_impacts_unfurled.py | 171 +++++++++++++ vulnerabilities/tests/test_advisory_merge.py | 26 +- vulnerabilities/tests/test_api_v3.py | 6 +- vulnerabilities/tests/test_risk.py | 5 - vulnerabilities/utils.py | 78 +++--- vulnerabilities/views.py | 70 +++--- 25 files changed, 1116 insertions(+), 356 deletions(-) create mode 100644 vulnerabilities/migrations/0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more.py create mode 100644 vulnerabilities/migrations/0134_alter_advisoryset_unique_together.py create mode 100644 vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled.py create mode 100644 vulnerabilities/migrations/0136_impactedpackage_vulnerabili_advisor_1e3414_idx.py create mode 100644 vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py create mode 100644 vulnerabilities/pipes/risk_score.py create mode 100644 vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f701c0802..ea825421d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,7 +5,10 @@ next release --------------------- - WARNING: Vulnerablecode V1 API and UI has stopped supporting Ubuntu OVAL advisories, please shift to V3 API for new Ubuntu advisories. -- Add attribute ``pipeline_id`` to AdvisoryV2 to track the pipeline that created the advisory, also rename existing ``datasource_id`` and AVIDs. +- WARNING: We will deprecate improver pipelines for calculating package version rank and grouping advisories for packages in the next release, we are doing it at advisory import time instead of as separate pipelines, this will improve the performance and consistency of the data. +- Calculate package verion rank, group advisories for packages and package risk score and advisory risk score during import of advisories. +- Add attribute ``pipeline_id`` to AdvisoryV2 to track the pipeline that created the advisory, also rename existing ``datasource_id`` and AVIDs. + Version v38.6.0 --------------------- diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index 634476ddf..00b5d170c 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -8,12 +8,9 @@ # from collections import defaultdict -from typing import List from urllib.parse import urlencode -from django.db.models import Exists from django.db.models import Max -from django.db.models import OuterRef from django.db.models import Prefetch from django_filters import rest_framework as filters from drf_spectacular.utils import extend_schema @@ -23,6 +20,7 @@ from rest_framework.reverse import reverse from rest_framework.throttling import AnonRateThrottle +from vulnerabilities.models import SSVC from vulnerabilities.models import AdvisoryAlias from vulnerabilities.models import AdvisoryReference from vulnerabilities.models import AdvisorySet @@ -30,14 +28,11 @@ from vulnerabilities.models import AdvisorySeverity from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import AdvisoryWeakness -from vulnerabilities.models import Group from vulnerabilities.models import GroupedAdvisory from vulnerabilities.models import ImpactedPackageAffecting from vulnerabilities.models import PackageV2 from vulnerabilities.throttling import PermissionBasedUserRateThrottle from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS -from vulnerabilities.utils import get_advisories_from_groups -from vulnerabilities.utils import merge_and_save_grouped_advisories class PackageQuerySerializer(serializers.Serializer): @@ -48,6 +43,7 @@ class PackageQuerySerializer(serializers.Serializer): ) details = serializers.BooleanField(default=False) ignore_qualifiers_subpath = serializers.BooleanField(default=False) + max_advisories = serializers.IntegerField(default=100, min_value=1, max_value=10000) def validate(self, data): if not data["purls"]: @@ -227,29 +223,54 @@ def get_affected_by_vulnerabilities(self, package): result = [] for adv in advisories: - fixed = impact_map.get(adv["avid"]) - adv.pop("avid", None) + fixed = impact_map.get(adv["advisory_uid"]) or [] + resource_url = None + + if request := self.context.get("request", None): + resource_url = adv.get("resource_url") or None + resource_url = request.build_absolute_uri(location=resource_url) result.append( { - **adv, + "advisory_id": adv["advisory_id"], + "advisory_uid": adv["advisory_uid"], + "aliases": adv["aliases"], + "summary": adv["summary"], + "weighted_severity": adv["weighted_severity"], + "exploitability": adv["exploitability"], + "risk_score": adv["risk_score"], + "ssvc_trees": adv["ssvc_trees"], "fixed_by_packages": fixed, + "resource_url": resource_url, } ) return result - advisories_qs = AdvisoryV2.objects.latest_affecting_advisories_for_purl(package.package_url) + if package.type not in TYPES_WITH_MULTIPLE_IMPORTERS: + advisories_qs = AdvisoryV2.objects.latest_affecting_advisories_for_purl( + package.package_url + ) - advisories = [] + advisories = [] - if package.type not in TYPES_WITH_MULTIPLE_IMPORTERS: advisories_ids = advisories_qs.only("id") advisories_ids = list(advisories_ids[:101]) - if len(advisories_ids) > 100: + if len(advisories_ids) > self.context.get("max_advisories", 100): return None + advisories_qs = advisories_qs.prefetch_related( + "aliases", + Prefetch( + "related_ssvcs", + queryset=SSVC.objects.select_related("source_advisory") + .only("id", "decision", "options", "vector", "source_advisory__url") + .distinct("source_advisory__url"), + to_attr="prefetched_ssvc_trees", + ), + ) + advisory_by_avid = {adv.avid: adv for adv in advisories_qs} avids = advisory_by_avid.keys() @@ -265,8 +286,14 @@ def get_affected_by_vulnerabilities(self, package): for advisory in advisories_qs: impact = impact_by_avid.get(advisory.avid) - if not impact: - continue + fixed_by_packages = [] + if impact: + fixed_by_packages = [pkg.purl for pkg in impact.fixed_by_packages.all()] + + resource_url = None + + if request := self.context.get("request", None): + resource_url = request.build_absolute_uri(location=advisory.get_absolute_url()) result.append( { @@ -274,73 +301,78 @@ def get_affected_by_vulnerabilities(self, package): "advisory_uid": advisory.avid, "aliases": [alias.alias for alias in advisory.aliases.all()], "summary": advisory.summary, - "severity": advisory.weighted_severity, + "weighted_severity": advisory.weighted_severity, "exploitability": advisory.exploitability, "risk_score": advisory.risk_score, - "fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()], + "fixed_by_packages": fixed_by_packages, + "resource_url": resource_url, + "ssvc_trees": [ + { + "vector": ssvc.vector, + "decision": ssvc.decision, + "options": ssvc.options, + "source_url": ssvc.source_advisory.url, + } + for ssvc in advisory.prefetched_ssvc_trees + ], } ) return result - if not advisories: - if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: - advisories_qs = advisories_qs.prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", - ) - advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( - package, advisories_qs, "affecting" - ) - return self.return_advisories_data(package, advisories_qs, advisories) - def get_fixing_vulnerabilities(self, package): advisories = self.context["fixing_advisory_map"].get(package.id, []) - if advisories: - return advisories + results = [] + resource_url = None + for advisory in advisories: + if request := self.context.get("request", None): + resource_url = request.build_absolute_uri(location=advisory["resource_url"]) + results.append( + { + "advisory_id": advisory["advisory_id"], + "resource_url": resource_url, + "advisory_uid": advisory["advisory_uid"], + } + ) + if results: + return results - advisories_qs = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl(package.package_url) - if not package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + if package.type not in TYPES_WITH_MULTIPLE_IMPORTERS: + advisories_qs = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl(package.package_url) advisories_ids = advisories_qs.only("id") advisories_ids = list(advisories_ids[:101]) - if len(advisories_ids) > 100: + if len(advisories_ids) > self.context.get("max_advisories", 100): return None - results = [] - for advisory in advisories_qs: + resource_url = None + if request := self.context.get("request", None): + resource_url = request.build_absolute_uri(location=advisory.get_absolute_url()) results.append( { "advisory_id": advisory.advisory_id.split("/")[-1], "advisory_uid": advisory.avid, + "resource_url": resource_url, } ) - return results - - if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: - advisories_qs = advisories_qs.prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", - ) - if not advisories_qs.exists(): - return [] - advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( - package, advisories_qs, "fixing" - ) - return self.return_fixing_advisories_data(advisories) + return results def return_fixing_advisories_data(self, advisories): result = [] for advisory in advisories: assert isinstance(advisory, GroupedAdvisory) + resource_url = None + if request := self.context.get("request", None): + resource_url = request.build_absolute_uri( + location=advisory.advisory.get_absolute_url() + ) result.append( { "advisory_id": advisory.identifier, "advisory_uid": advisory.advisory.avid, + "resource_url": resource_url, } ) @@ -361,22 +393,28 @@ def return_advisories_data(self, package, advisories_qs, advisories): result = [] for advisory in advisories: assert isinstance(advisory, GroupedAdvisory) + resource_url = None + fixed_by_packages = [] + if request := self.context.get("request", None): + resource_url = request.build_absolute_uri( + location=advisory.advisory.get_absolute_url() + ) impact = impact_by_avid.get(advisory.advisory.avid) - if not impact: - continue + if impact: + fixed_by_packages = list(set([pkg.purl for pkg in impact.fixed_by_packages.all()])) result.append( { "advisory_id": advisory.identifier, "advisory_uid": advisory.advisory.avid, "aliases": [alias.alias for alias in advisory.aliases], + "summary": advisory.advisory.summary, "weighted_severity": advisory.weighted_severity, "exploitability": advisory.exploitability, "risk_score": advisory.risk_score, - "summary": advisory.advisory.summary, - "fixed_by_packages": list( - set([pkg.purl for pkg in impact.fixed_by_packages.all()]) - ), + "fixed_by_packages": fixed_by_packages, + "resource_url": resource_url, + "ssvc_trees": advisory.ssvc_trees, } ) @@ -405,16 +443,10 @@ def create(self, request, *args, **kwargs): purls = serializer.validated_data["purls"] details = serializer.validated_data["details"] ignore_qualifiers_subpath = serializer.validated_data["ignore_qualifiers_subpath"] + max_advisories = serializer.validated_data["max_advisories"] if not purls: - impacted = ImpactedPackageAffecting.objects.filter(package_id=OuterRef("id")) - - query = ( - PackageV2.objects.annotate(has_vuln=Exists(impacted)) - .filter(has_vuln=True) - .values_list("package_url", flat=True) - .order_by("package_url") - ) + query = PackageV2.objects.all_vulnerable_purls().order_by("package_url") page = self.paginate_queryset(query) return self.get_paginated_response(page) @@ -436,13 +468,13 @@ def create(self, request, *args, **kwargs): if not details: if ignore_qualifiers_subpath: query = ( - PackageV2.objects.filter(plain_package_url__in=plain_purls) + PackageV2.objects.filter_plain_purls(plain_purls) .values_list("plain_package_url", flat=True) .order_by("plain_package_url") ) else: query = ( - PackageV2.objects.filter(package_url__in=purls) + PackageV2.objects.filter_purls(purls) .order_by("package_url") .values_list("package_url", flat=True) ) @@ -451,11 +483,9 @@ def create(self, request, *args, **kwargs): return self.get_paginated_response(page) if ignore_qualifiers_subpath: - query = PackageV2.objects.filter(plain_package_url__in=plain_purls).order_by( - "plain_package_url" - ) + query = PackageV2.objects.filter_plain_purls(plain_purls).order_by("plain_package_url") else: - query = PackageV2.objects.filter(package_url__in=purls).order_by("package_url") + query = PackageV2.objects.filter_purls(purls).order_by("package_url") page = self.paginate_queryset(query) affected_advisory_map = get_affected_advisories_bulk(page) @@ -469,6 +499,7 @@ def create(self, request, *args, **kwargs): "advisory_map": affected_advisory_map, "impact_map": impact_map, "fixing_advisory_map": fixing_advisory_map, + "max_advisories": max_advisories, }, ) return self.get_paginated_response(serializer.data) @@ -583,7 +614,27 @@ def get_affected_advisories_bulk(packages): relation_type="affecting", ) .select_related("primary_advisory") - .prefetch_related(Prefetch("aliases", queryset=AdvisoryAlias.objects.only("alias"))) + .prefetch_related( + Prefetch("aliases", queryset=AdvisoryAlias.objects.only("alias")), + Prefetch( + "members", + queryset=AdvisorySetMember.objects.select_related("advisory").prefetch_related( + Prefetch( + "advisory__related_ssvcs", + queryset=SSVC.objects.select_related("source_advisory") + .only( + "id", + "options", + "decision", + "vector", + "source_advisory__url", + ) + .distinct("source_advisory__url"), + to_attr="prefetched_ssvc_trees", + ) + ), + ), + ) .annotate( max_severity=Max( "members__advisory__weighted_severity", @@ -627,6 +678,22 @@ def get_affected_advisories_bulk(packages): identifier = primary.advisory_id.split("/")[-1] aliases = [a for a in adv._aliases_cache if a != identifier] + all_ssvc = [] + + for member in adv.members.all(): + all_ssvc.extend(member.advisory.prefetched_ssvc_trees) + + ssvcs = [] + + for ssvc in all_ssvc: + ssvcs.append( + { + "vector": ssvc.vector, + "decision": ssvc.decision, + "options": ssvc.options, + "source_url": ssvc.source_advisory.url, + } + ) grouped.append( { @@ -637,6 +704,8 @@ def get_affected_advisories_bulk(packages): "exploitability": exploitability, "risk_score": risk_score, "summary": primary.summary, + "resource_url": primary.get_absolute_url(), + "ssvc_trees": ssvcs, } ) @@ -649,7 +718,11 @@ def get_impacts_bulk(packages): package_ids = [p.id for p in packages] impacts = ( - ImpactedPackageAffecting.objects.filter(package_id__in=package_ids) + ImpactedPackageAffecting.objects.filter( + package_id__in=package_ids, + impacted_package__advisory__is_latest=True, + impacted_package__advisory___all_impacts_unfurled=True, + ) .select_related("impacted_package__advisory") .prefetch_related( Prefetch( @@ -697,7 +770,7 @@ def get_fixing_advisories_bulk(packages): package_map = defaultdict(list) for adv in advisory_sets: - package_map[adv.package_id].append(adv.primary_advisory.advisory_id) + package_map[adv.package_id].append(adv.primary_advisory) result = {} @@ -705,9 +778,13 @@ def get_fixing_advisories_bulk(packages): groups = package_map.get(package.id, []) grouped = [] - for adv_id in groups: + for advisory in groups: grouped.append( - {"advisory_id": adv_id.split("/")[-1], "advisory_uid": adv_id.split("/")[-1]} + { + "advisory_id": advisory.advisory_id.split("/")[-1], + "resource_url": advisory.get_absolute_url(), + "advisory_uid": advisory.avid, + } ) result[package.id] = grouped diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 5b47a7cf1..db5ee6af9 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -9,7 +9,6 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_status -from vulnerabilities.pipelines import add_cvss31_to_CVEs from vulnerabilities.pipelines import compute_package_risk from vulnerabilities.pipelines import compute_package_version_rank from vulnerabilities.pipelines import enhance_with_exploitdb @@ -17,14 +16,10 @@ from vulnerabilities.pipelines import enhance_with_metasploit from vulnerabilities.pipelines import flag_ghost_packages from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline -from vulnerabilities.pipelines import remove_duplicate_advisories from vulnerabilities.pipelines.v2_improvers import archive_urls from vulnerabilities.pipelines.v2_improvers import collect_ssvc_trees from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2 from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2 -from vulnerabilities.pipelines.v2_improvers import ( - computer_package_version_rank as compute_version_rank_v2, -) from vulnerabilities.pipelines.v2_improvers import enhance_with_exploitdb as exploitdb_v2 from vulnerabilities.pipelines.v2_improvers import enhance_with_github_poc from vulnerabilities.pipelines.v2_improvers import enhance_with_kev as enhance_with_kev_v2 @@ -32,7 +27,9 @@ enhance_with_metasploit as enhance_with_metasploit_v2, ) from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 -from vulnerabilities.pipelines.v2_improvers import group_advisories_for_packages +from vulnerabilities.pipelines.v2_improvers import ( + mark_all_impacts_unfurled as mark_all_impacts_unfurled_v2, +) from vulnerabilities.pipelines.v2_improvers import reference_collect_commits from vulnerabilities.pipelines.v2_improvers import relate_severities from vulnerabilities.pipelines.v2_improvers import unfurl_version_range as unfurl_version_range_v2 @@ -62,22 +59,19 @@ enhance_with_exploitdb.ExploitDBImproverPipeline, compute_package_risk.ComputePackageRiskPipeline, compute_package_version_rank.ComputeVersionRankPipeline, - add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, - remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, exploitdb_v2.ExploitDBImproverPipeline, enhance_with_kev_v2.VulnerabilityKevPipeline, flag_ghost_packages_v2.FlagGhostPackagePipeline, enhance_with_metasploit_v2.MetasploitImproverPipeline, compute_package_risk_v2.ComputePackageRiskPipeline, - compute_version_rank_v2.ComputeVersionRankPipeline, unfurl_version_range_v2.UnfurlVersionRangePipeline, collect_ssvc_trees.CollectSSVCPipeline, relate_severities.RelateSeveritiesPipeline, archive_urls.ArchiveImproverPipeline, - group_advisories_for_packages.GroupAdvisoriesForPackages, compute_advisory_todo_v2.ComputeToDo, reference_collect_commits.CollectReferencesFixCommitsPipeline, enhance_with_github_poc.GithubPocsImproverPipeline, + mark_all_impacts_unfurled_v2.MarkAllImpactsUnfurledPipeline, ] ) diff --git a/vulnerabilities/migrations/0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more.py b/vulnerabilities/migrations/0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more.py new file mode 100644 index 000000000..8f45487b2 --- /dev/null +++ b/vulnerabilities/migrations/0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more.py @@ -0,0 +1,48 @@ +# Generated by Django 5.2.11 on 2026-05-26 08:07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0132_migrate_advisoryv2_datasource_ids"), + ] + + operations = [ + migrations.AlterField( + model_name="advisoryv2", + name="advisory_id", + field=models.CharField( + db_index=True, + help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233", + max_length=200, + ), + ), + migrations.AlterField( + model_name="advisoryv2", + name="avid", + field=models.CharField( + help_text="Unique ID for the datasource used for this advisory .e.g.: pysec_importer_v2/PYSEC-2020-2233", + max_length=250, + ), + ), + migrations.AlterField( + model_name="advisoryv2", + name="datasource_id", + field=models.CharField( + db_index=True, + help_text="Unique ID for the datasource used for this advisory .e.g.: nginx", + max_length=50, + ), + ), + migrations.AlterField( + model_name="advisoryv2", + name="pipeline_id", + field=models.CharField( + db_index=True, + help_text="Unique ID for the pipeline used for this advisory .e.g.: nginx_importer_v2", + max_length=50, + ), + ), + ] diff --git a/vulnerabilities/migrations/0134_alter_advisoryset_unique_together.py b/vulnerabilities/migrations/0134_alter_advisoryset_unique_together.py new file mode 100644 index 000000000..7b0c2a4be --- /dev/null +++ b/vulnerabilities/migrations/0134_alter_advisoryset_unique_together.py @@ -0,0 +1,17 @@ +# Generated by Django 5.2.11 on 2026-05-28 13:58 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more"), + ] + + operations = [ + migrations.AlterUniqueTogether( + name="advisoryset", + unique_together={("package", "relation_type", "primary_advisory")}, + ), + ] diff --git a/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled.py b/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled.py new file mode 100644 index 000000000..bcea170de --- /dev/null +++ b/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled.py @@ -0,0 +1,21 @@ +# Generated by Django 5.2.11 on 2026-06-01 10:56 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0134_alter_advisoryset_unique_together"), + ] + + operations = [ + migrations.AddField( + model_name="advisoryv2", + name="_all_impacts_unfurled", + field=models.BooleanField( + default=False, + help_text="Indicates whether all impacts for this advisory have been unfurled.", + ), + ), + ] diff --git a/vulnerabilities/migrations/0136_impactedpackage_vulnerabili_advisor_1e3414_idx.py b/vulnerabilities/migrations/0136_impactedpackage_vulnerabili_advisor_1e3414_idx.py new file mode 100644 index 000000000..033193c34 --- /dev/null +++ b/vulnerabilities/migrations/0136_impactedpackage_vulnerabili_advisor_1e3414_idx.py @@ -0,0 +1,19 @@ +# Generated by Django 5.2.11 on 2026-06-01 11:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0135_advisoryv2__all_impacts_unfurled"), + ] + + operations = [ + migrations.AddIndex( + model_name="impactedpackage", + index=models.Index( + fields=["advisory", "last_range_unfurl_at"], name="vulnerabili_advisor_1e3414_idx" + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index d05529638..c64ba757a 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -19,6 +19,7 @@ from itertools import groupby from operator import attrgetter from traceback import format_exc as traceback_format_exc +from typing import Dict from typing import List from typing import NamedTuple from typing import Optional @@ -2934,38 +2935,54 @@ def latest_for_avids(self, avids): return self.filter(avid__in=avids).latest_per_avid() def latest_affecting_advisories_for_purl(self, purl): - adv_ids = ImpactedPackageAffecting.objects.filter(package__package_url=purl).values_list( - "impacted_package__advisory_id", - flat=True, - ) + adv_ids = ImpactedPackage.objects.filter( + affecting_packages__package_url=purl, + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, + ).values_list("advisory_id", flat=True) + return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() def latest_affecting_advisories_for_purls(self, purls): - adv_ids = ImpactedPackageAffecting.objects.filter( - package__package_url__in=purls + adv_ids = ImpactedPackage.objects.filter( + affecting_packages__package_url__in=purls, + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, ).values_list( - "impacted_package__advisory_id", + "advisory_id", flat=True, ) return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() - def latest_affecting_advisories_for_packages(self, purls): - adv_ids = ImpactedPackageAffecting.objects.filter(package__in=purls).values_list( - "impacted_package__advisory_id", + def latest_affecting_advisories_for_packages(self, packages): + adv_ids = ImpactedPackage.objects.filter( + affecting_packages__in=packages, + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, + ).values_list( + "advisory_id", flat=True, ) return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() def latest_fixed_by_advisories_for_purl(self, purl): - adv_ids = ImpactedPackageFixedBy.objects.filter(package__package_url=purl).values_list( - "impacted_package__advisory_id", + adv_ids = ImpactedPackage.objects.filter( + fixed_by_packages__package_url=purl, + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, + ).values_list( + "advisory_id", flat=True, ) return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() def latest_fixed_by_advisories_for_purls(self, purls): - adv_ids = ImpactedPackageFixedBy.objects.filter(package__package_url__in=purls).values_list( - "impacted_package__advisory_id", + adv_ids = ImpactedPackage.objects.filter( + fixed_by_packages__package_url__in=purls, + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, + ).values_list( + "advisory_id", flat=True, ) @@ -2973,14 +2990,22 @@ def latest_fixed_by_advisories_for_purls(self, purls): def latest_advisories_for_purls(self, purls): adv_ids = ( - ImpactedPackageAffecting.objects.filter(package__package_url__in=purls) + ImpactedPackage.objects.filter( + affecting_packages__package_url__in=purls, + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, + ) .values_list( - "impacted_package__advisory_id", + "advisory_id", flat=True, ) .union( - ImpactedPackageFixedBy.objects.filter(package__package_url__in=purls).values_list( - "impacted_package__advisory_id", + ImpactedPackage.objects.filter( + fixed_by_packages__package_url__in=purls, + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, + ).values_list( + "advisory_id", flat=True, ) ) @@ -2991,14 +3016,22 @@ def latest_advisories_for_purls(self, purls): def latest_advisories_for_purl(self, purl): adv_ids = ( - ImpactedPackageAffecting.objects.filter(package__package_url=purl) + ImpactedPackage.objects.filter( + affecting_packages__package_url=purl, + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, + ) .values_list( - "impacted_package__advisory_id", + "advisory_id", flat=True, ) .union( - ImpactedPackageFixedBy.objects.filter(package__package_url=purl).values_list( - "impacted_package__advisory_id", + ImpactedPackage.objects.filter( + fixed_by_packages__package_url=purl, + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, + ).values_list( + "advisory_id", flat=True, ) ) @@ -3034,6 +3067,9 @@ class AdvisorySet(models.Model): created_at = models.DateTimeField(auto_now_add=True) + class Meta: + unique_together = ("package", "relation_type", "primary_advisory") + class AdvisorySetMember(models.Model): @@ -3055,7 +3091,7 @@ class AdvisoryV2(models.Model): # This is similar to a type or a namespace datasource_id = models.CharField( - max_length=200, + max_length=50, blank=False, null=False, db_index=True, @@ -3063,7 +3099,7 @@ class AdvisoryV2(models.Model): ) pipeline_id = models.CharField( - max_length=200, + max_length=50, blank=False, null=False, db_index=True, @@ -3072,7 +3108,7 @@ class AdvisoryV2(models.Model): # This is similar to a name advisory_id = models.CharField( - max_length=500, + max_length=200, blank=False, null=False, unique=False, @@ -3082,7 +3118,7 @@ class AdvisoryV2(models.Model): ) avid = models.CharField( - max_length=500, + max_length=250, blank=False, null=False, help_text="Unique ID for the datasource used for this advisory ." @@ -3208,6 +3244,11 @@ class AdvisoryV2(models.Model): help_text="Risk expressed as a number ranging from 0 to 10. Risk is calculated from weighted severity and exploitability values. It is the maximum value of (the weighted severity multiplied by its exploitability) or 10. Risk = min(weighted severity * exploitability, 10)", ) + _all_impacts_unfurled = models.BooleanField( + default=False, + help_text="Indicates whether all impacts for this advisory have been unfurled.", + ) + objects = AdvisoryV2QuerySet.as_manager() class Meta: @@ -3372,6 +3413,11 @@ def to_affected_package_data(self): return AffectedPackageV2.from_dict(self.to_dict()) + class Meta: + indexes = [ + models.Index(fields=["advisory", "last_range_unfurl_at"]), + ] + class ToDoRelatedAdvisory(models.Model): todo = models.ForeignKey( @@ -3422,7 +3468,7 @@ def search(self, query: str = None): except ValueError: # otherwise use query as a plain string qs = qs.filter(package_url__icontains=query) - return qs.order_by("package_url").order_by("-version_rank") + return qs.order_by("type", "namespace", "name", "-version_rank") def with_vulnerability_counts(self): return self.annotate( @@ -3515,22 +3561,36 @@ def _vulnerable(self, vulnerable=True): """ return self.with_is_vulnerable().filter(is_vulnerable=vulnerable) - def vulnerable(self): - """ - Return only packages that are vulnerable. - """ - return self.filter(id__in=ImpactedPackageAffecting.objects.values("package_id").distinct()) - def with_is_vulnerable(self): """ Annotate Package with ``is_vulnerable`` boolean attribute. """ return self.annotate( is_vulnerable=Exists( - ImpactedPackage.objects.filter(affecting_packages__pk=OuterRef("pk")) + ImpactedPackage.objects.filter( + affecting_packages__pk=OuterRef("pk"), advisory__is_latest=True + ) ) ) + def all_vulnerable(self): + latest_impacts = ImpactedPackageAffecting.objects.filter( + package_id=OuterRef("pk"), + impacted_package__advisory__is_latest=True, + ) + + query = PackageV2.objects.filter(Exists(latest_impacts)) + return query + + def all_vulnerable_purls(self): + return self.all_vulnerable().values_list("package_url", flat=True) + + def filter_plain_purls(self, plain_purls=[]): + return PackageV2.objects.filter(plain_package_url__in=plain_purls) + + def filter_purls(self, purls=[]): + return PackageV2.objects.filter(package_url__in=purls) + def from_purl(self, purl: Union[PackageURL, str]): """ Return a new Package given a ``purl`` PackageURL object or PURL string. @@ -3875,6 +3935,7 @@ class GroupedAdvisory(NamedTuple): weighted_severity: Optional[float] exploitability: Optional[float] risk_score: Optional[float] + ssvc_trees: List[Dict] class AdvisoryPOC(models.Model): diff --git a/vulnerabilities/pipelines/v2_improvers/collect_ssvc_trees.py b/vulnerabilities/pipelines/v2_improvers/collect_ssvc_trees.py index d96e25cd7..4bd7ef273 100644 --- a/vulnerabilities/pipelines/v2_improvers/collect_ssvc_trees.py +++ b/vulnerabilities/pipelines/v2_improvers/collect_ssvc_trees.py @@ -8,6 +8,7 @@ # import logging +from collections import defaultdict from django.db.models import Prefetch from django.db.models import Q @@ -35,53 +36,126 @@ def steps(cls): return (cls.collect_ssvc_data,) def collect_ssvc_data(self): - vulnrichment_advisories = ( + advisories = list( AdvisoryV2.objects.latest_per_avid() .filter( severities__scoring_system=SCORING_SYSTEMS["ssvc"], ) - .distinct() .prefetch_related( Prefetch( "severities", queryset=AdvisorySeverity.objects.filter( scoring_system=SCORING_SYSTEMS["ssvc"] - ), - ) + ).only("id", "scoring_elements"), + ), + "aliases", ) + .only("id", "advisory_id") + .distinct() ) - self.log( - f"Found {vulnrichment_advisories.count()} advisories from Vulnrichment with SSVC severities." + self.log(f"Found {len(advisories)} advisories from Vulnrichment with SSVC severities.") + advisory_ids = {a.advisory_id for a in advisories} + + all_related = ( + AdvisoryV2.objects.filter( + Q(advisory_id__in=advisory_ids) | Q(aliases__alias__in=advisory_ids) + ) + .distinct() + .only("id", "advisory_id") + .prefetch_related("aliases") ) - for advisory in vulnrichment_advisories: + + advisory_map = defaultdict(set) + + for adv in all_related: + advisory_map[adv.advisory_id].add(adv) + for alias in adv.aliases.all(): + if alias.alias in advisory_ids: + advisory_map[alias.alias].add(adv) + + existing_ssvc = { + s.source_advisory_id: s + for s in SSVC.objects.filter(source_advisory_id__in=[a.id for a in advisories]) + } + + to_create = [] + to_update = [] + + for advisory in advisories: self.log(f"Processing advisory: {advisory.advisory_id}") + for severity in advisory.severities.all(): ssvc_vector = severity.scoring_elements - self.log(f"SSVC Vector found: {ssvc_vector}") + try: ssvc_tree, decision = convert_vector_to_tree_and_decision(ssvc_vector) - self.log( - f"Advisory: {advisory.advisory_id}, SSVC Tree: {ssvc_tree}, Decision: {decision}, vector: {ssvc_vector}" - ) - ssvc_obj, _ = SSVC.objects.get_or_create( - source_advisory=advisory, - defaults={ - "options": ssvc_tree, - "decision": decision, - "vector": ssvc_vector, - }, - ) - # All advisories that have advisory.advisory_id in their aliases or advisory_id same as advisory.advisory_id - related_advisories = AdvisoryV2.objects.filter( - Q(advisory_id=advisory.advisory_id) | Q(aliases__alias=advisory.advisory_id) - ).distinct() - related_advisories = related_advisories.exclude(id=advisory.id) - ssvc_obj.related_advisories.set(related_advisories) + + existing = existing_ssvc.get(advisory.id) + + if existing: + existing.options = ssvc_tree + existing.decision = decision + existing.vector = ssvc_vector + to_update.append(existing) + ssvc_obj = existing + else: + ssvc_obj = SSVC( + source_advisory=advisory, + options=ssvc_tree, + decision=decision, + vector=ssvc_vector, + ) + to_create.append(ssvc_obj) + except Exception as e: logger.error( - f"Failed to parse SSVC vector '{ssvc_vector}' for advisory '{advisory}': {e}" + f"Failed to parse SSVC vector '{ssvc_vector}' " + f"for advisory '{advisory}': {e}" + ) + + SSVC.objects.bulk_create(to_create, batch_size=1000) + + SSVC.objects.bulk_update( + to_update, + ["options", "decision", "vector"], + batch_size=1000, + ) + + # Refresh newly created IDs + created_ssvc = { + s.source_advisory_id: s + for s in SSVC.objects.filter(source_advisory_id__in=[a.id for a in advisories]) + } + + through_model = SSVC.related_advisories.through + + through_rows = [] + + for advisory in advisories: + ssvc_obj = created_ssvc.get(advisory.id) + + if not ssvc_obj: + continue + + related = advisory_map.get(advisory.advisory_id, set()) + + for related_adv in related: + if related_adv.id == advisory.id: + continue + + through_rows.append( + through_model( + ssvc_id=ssvc_obj.id, + advisoryv2_id=related_adv.id, ) + ) + + through_model.objects.bulk_create( + through_rows, + ignore_conflicts=True, + batch_size=5000, + ) REVERSE_POINTS = { diff --git a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py index dacf7e6c8..3d3da7c94 100644 --- a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py +++ b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py @@ -16,6 +16,7 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import PackageV2 from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipes.risk_score import bulk_update from vulnerabilities.risk import compute_vulnerability_risk_factors @@ -177,15 +178,3 @@ def compute_and_store_package_risk_score(self): logger=self.log, ) self.log(f"Successfully added risk score for {updated:,d} package") - - -def bulk_update(model, items, fields, logger): - item_count = 0 - if items: - try: - model.objects.bulk_update(objs=items, fields=fields) - item_count += len(items) - except Exception as e: - logger(f"Error updating {model.__name__}: {e}") - items.clear() - return item_count diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index ea6fc9185..642b5e3d2 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -7,21 +7,17 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -from typing import List - -from vulnerabilities.models import AdvisoryV2 -from vulnerabilities.models import Group from vulnerabilities.models import PackageV2 from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.pipes.group_advisories import delete_and_save_advisory_set +from vulnerabilities.pipes.group_advisories import group_advisory_for_package from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS -from vulnerabilities.utils import merge_advisories class GroupAdvisoriesForPackages(VulnerableCodePipeline): """Group advisories for packages that have multiple importers""" pipeline_id = "group_advisories_for_packages" + run_once = True @classmethod def steps(cls): @@ -33,28 +29,4 @@ def group_advisories_for_packages(self): def group_advisoris_for_packages(logger=None): for package in PackageV2.objects.filter(type__in=TYPES_WITH_MULTIPLE_IMPORTERS).iterator(): - logger(f"Grouping advisories for package {package.purl}") - affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( - purl=package.purl - ).prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", - ) - - fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( - purl=package.purl - ).prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", - ) - - try: - affected_groups: List[Group] = merge_advisories(affecting_advisories, package) - fixed_by_groups: List[Group] = merge_advisories(fixed_by_advisories, package) - delete_and_save_advisory_set(affected_groups, package, relation="affecting") - delete_and_save_advisory_set(fixed_by_groups, package, relation="fixing") - except Exception as e: - logger(f"Failed rebuilding advisory sets for package {package.purl}: {e!r}") - continue + group_advisory_for_package(package, logger=logger) diff --git a/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py b/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py new file mode 100644 index 000000000..466aeda84 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py @@ -0,0 +1,96 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from django.db import transaction +from django.db.models import Exists +from django.db.models import OuterRef + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import ImpactedPackage +from vulnerabilities.models import ImpactedPackageAffecting +from vulnerabilities.models import ImpactedPackageFixedBy +from vulnerabilities.models import PackageV2 +from vulnerabilities.models import PipelineSchedule +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipes.group_advisories import group_advisory_for_package +from vulnerabilities.pipes.risk_score import compute_package_risk_score_bulk +from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS + + +class MarkAllImpactsUnfurledPipeline(VulnerableCodePipeline): + """Mark advisories as fully unfurled once every related impact has been unfurled.""" + + pipeline_id = "mark_all_impacts_unfurled_v2" + run_interval = 1 + run_priority = PipelineSchedule.ExecutionPriority.HIGH + + @classmethod + def steps(cls): + return (cls.mark_all_impacts_unfurled,) + + def mark_all_impacts_unfurled(self): + while True: + advisories = list(latest_advisories_with_all_impacts_unfurled()[:100]) + + if not advisories: + break + + complete_advisories_import(AdvisoryV2.objects.filter(id__in=[a.id for a in advisories])) + + +@transaction.atomic +def complete_advisories_import(advisories): + + advisory_ids = list(advisories.values_list("id", flat=True)) + + if not advisory_ids: + return + + AdvisoryV2.objects.filter(id__in=advisory_ids).update(_all_impacts_unfurled=True) + + affecting_package_ids = set( + ImpactedPackageAffecting.objects.filter( + impacted_package__advisory_id__in=advisory_ids + ).values_list( + "package_id", + flat=True, + ) + ) + + fixed_by_package_ids = set( + ImpactedPackageFixedBy.objects.filter( + impacted_package__advisory_id__in=advisory_ids + ).values_list( + "package_id", + flat=True, + ) + ) + + compute_package_risk_score_bulk(PackageV2.objects.filter(id__in=affecting_package_ids)) + + group_package_ids = affecting_package_ids | fixed_by_package_ids + + for package in PackageV2.objects.filter( + id__in=group_package_ids, type__in=TYPES_WITH_MULTIPLE_IMPORTERS + ).iterator(chunk_size=2000): + group_advisory_for_package(package) + + +def latest_advisories_with_all_impacts_unfurled(): + remaining_unfurled_impacts = ImpactedPackage.objects.filter( + advisory_id=OuterRef("pk"), + last_range_unfurl_at__isnull=True, + ) + + return ( + AdvisoryV2.objects.filter(_all_impacts_unfurled=False, is_latest=True) + .annotate(has_remaining_unfurled=Exists(remaining_unfurled_impacts)) + .filter(has_remaining_unfurled=False) + .order_by("id") + ) diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index 9d874c635..db68d7c1f 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -12,6 +12,7 @@ from traceback import format_exc as traceback_format_exc from aboutcode.pipeline import LoopProgress +from django.db import transaction from django.db.models import F from django.db.models import Q from django.utils import timezone @@ -54,8 +55,8 @@ def unfurl_version_range(self): cached_versions = {} update_unfurl_date = [] update_successful_unfurl_date = [] - update_batch_size = 5000 - chunk_size = 5000 + update_batch_size = 500 + chunk_size = 500 impacted_packages = impacted_package_qs(cutoff_day=self.reunfurl_after_days) impacted_packages_count = impacted_packages.count() @@ -159,8 +160,11 @@ def get_purl_versions(purl, cached_versions, logger): return cached_versions.get(purl) or [] +@transaction.atomic def bulk_create_with_m2m(purls, impact, relation, logger): - """Bulk create PackageV2 and also bulk populate M2M Impact and Package relationships.""" + """Bulk create PackageV2 and also bulk populate M2M Impact and Package relationships. + This function assumes same base purl is used for all versions in ``purls`` list. + """ if not purls: return 0 @@ -190,8 +194,9 @@ def impacted_package_qs(cutoff_day=2): ImpactedPackage.objects.filter( (Q(last_range_unfurl_at__isnull=True) | Q(last_range_unfurl_at__lte=cutoff)) & Q(affecting_vers__isnull=False) + & Q(advisory__is_latest=True) & ~Q(affecting_vers="") ) - .order_by(F("last_range_unfurl_at").asc(nulls_first=True)) + .order_by("advisory__id", F("last_range_unfurl_at").asc(nulls_first=True)) .only("pk", "affecting_vers", "advisory", "base_purl") ) diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 732e2e0ab..4198349a6 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -47,6 +47,8 @@ from vulnerabilities.models import VulnerabilityRelatedReference from vulnerabilities.models import VulnerabilitySeverity from vulnerabilities.models import Weakness +from vulnerabilities.pipes.group_advisories import group_advisory_for_package +from vulnerabilities.pipes.risk_score import compute_advisory_risk_score from vulnerabilities.pipes.univers_utils import get_exact_purls_v2 @@ -362,6 +364,16 @@ def insert_advisory_v2( if values: getattr(advisory_obj, field_name).add(*values) + weighted_severity, exploitability, risk_score = compute_advisory_risk_score(advisory_obj) + advisory_obj.weighted_severity = ( + round(weighted_severity, 1) if weighted_severity is not None else None + ) + advisory_obj.exploitability = round(exploitability, 1) if exploitability is not None else None + advisory_obj.risk_score = round(risk_score, 1) if risk_score is not None else None + if not advisory.affected_packages: + advisory_obj._all_impacts_unfurled = True + advisory_obj.save() + for affected_pkg in advisory.affected_packages: impact = ImpactedPackage.objects.create( advisory=advisory_obj, @@ -390,6 +402,11 @@ def insert_advisory_v2( impact.affecting_packages.add(*affected_packages_v2) impact.fixed_by_packages.add(*fixed_packages_v2) + if affected_packages_v2: + affected_packages_v2[0].calculate_version_rank + elif fixed_packages_v2: + fixed_packages_v2[0].calculate_version_rank + introduced_commit_v2 = get_or_create_advisory_package_commit_patches( affected_pkg.introduced_by_commit_patches ) diff --git a/vulnerabilities/pipes/export.py b/vulnerabilities/pipes/export.py index 8b77d53cb..fcd3b327b 100644 --- a/vulnerabilities/pipes/export.py +++ b/vulnerabilities/pipes/export.py @@ -25,7 +25,9 @@ def package_prefetched_qs(checkpoint): .prefetch_related( Prefetch( "affected_in_impacts", - queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related( + queryset=ImpactedPackage.objects.filter(advisory__is_latest=True) + .only("advisory_id") + .prefetch_related( Prefetch( "advisory", queryset=AdvisoryV2.objects.only("avid"), @@ -34,7 +36,9 @@ def package_prefetched_qs(checkpoint): ), Prefetch( "fixed_in_impacts", - queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related( + queryset=ImpactedPackage.objects.filter(advisory__is_latest=True) + .only("advisory_id") + .prefetch_related( Prefetch( "advisory", queryset=AdvisoryV2.objects.only("avid"), diff --git a/vulnerabilities/pipes/group_advisories.py b/vulnerabilities/pipes/group_advisories.py index 983ac3386..ae4bc2171 100644 --- a/vulnerabilities/pipes/group_advisories.py +++ b/vulnerabilities/pipes/group_advisories.py @@ -7,46 +7,156 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +from collections import defaultdict +from typing import List + from django.db import transaction +from vulnerabilities.models import AdvisorySet +from vulnerabilities.models import AdvisorySetMember +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import Group + @transaction.atomic def delete_and_save_advisory_set(groups, package, relation=None): - from vulnerabilities.models import AdvisorySet - from vulnerabilities.models import AdvisorySetMember - from vulnerabilities.models import Group + print(f"Grouping advisories for package: {package.purl}") - AdvisorySet.objects.filter(package=package, relation_type=relation).delete() + AdvisorySet.objects.filter( + package=package, + relation_type=relation, + ).delete() - membership_to_create = [] + advisory_sets = [] + primary_to_group = {} for group in groups: - - assert isinstance(group, Group) - advisory_set = AdvisorySet.objects.create( - package=package, - relation_type=relation, - primary_advisory=group.primary, + advisory_sets.append( + AdvisorySet( + package=package, + relation_type=relation, + primary_advisory_id=group.primary.id, + ) ) - advisory_set.aliases.add(*group.aliases) - advisory_set.save() + primary_to_group[group.primary.id] = group + + AdvisorySet.objects.bulk_create( + advisory_sets, + batch_size=5000, + ) + + created_sets = AdvisorySet.objects.filter( + package=package, + relation_type=relation, + ).only("id", "primary_advisory_id") + + advisory_set_map = {adv_set.primary_advisory_id: adv_set.id for adv_set in created_sets} + + alias_through_model = AdvisorySet.aliases.through + + alias_links = [] + memberships = [] - membership_to_create.append( + for primary_id, group in primary_to_group.items(): + advisory_set_id = advisory_set_map[primary_id] + + memberships.append( AdvisorySetMember( - advisory_set=advisory_set, - advisory=group.primary, + advisory_set_id=advisory_set_id, + advisory_id=group.primary.id, is_primary=True, ) ) - for adv in group.secondaries: - membership_to_create.append( - AdvisorySetMember( - advisory_set=advisory_set, - advisory=adv, - is_primary=False, - ) + memberships.extend( + AdvisorySetMember( + advisory_set_id=advisory_set_id, + advisory_id=adv.id, + is_primary=False, + ) + for adv in group.secondaries + ) + + alias_links.extend( + alias_through_model( + advisoryset_id=advisory_set_id, + advisoryalias_id=alias.id, ) + for alias in group.aliases + ) + + if alias_links: + alias_through_model.objects.bulk_create( + alias_links, + batch_size=10000, + ) + + if memberships: + AdvisorySetMember.objects.bulk_create( + memberships, + batch_size=10000, + ) + + print(f"Successfully saved advisory sets for package: {package.purl}") + + +def group_advisory_for_package(package, logger=None): + """ + Group advisories for a given package and save the advisory sets for the package. + """ + from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS + + if package.type not in TYPES_WITH_MULTIPLE_IMPORTERS: + return + + affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( + purl=package.purl + ).prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", + ) + + fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( + purl=package.purl + ).prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", + ) + + try: + affected_groups: List[Group] = merge_advisories(affecting_advisories, package) + fixed_by_groups: List[Group] = merge_advisories(fixed_by_advisories, package) + delete_and_save_advisory_set(affected_groups, package, relation="affecting") + delete_and_save_advisory_set(fixed_by_groups, package, relation="fixing") + logger(f"Successfully rebuilt advisory sets for package {package.purl}") + except Exception as e: + if logger: + logger(f"Failed rebuilding advisory sets for package {package.purl}: {e!r}") + return + + +def merge_advisories(advisories, package): + """ + Merge advisories based on their content hash and identifiers. + """ + from vulnerabilities.utils import compute_advisory_content_hash + from vulnerabilities.utils import get_merged_identifier_groups + + advisories = list(advisories) + + content_hash_map = defaultdict(list) + + for adv in advisories: + content_hash = compute_advisory_content_hash(adv, package) + content_hash_map[content_hash].append(adv) + + final_groups: List[Group] = [] + + for group in content_hash_map.values(): + groups = get_merged_identifier_groups(group) + final_groups.extend(groups) - AdvisorySetMember.objects.bulk_create(membership_to_create) + return final_groups diff --git a/vulnerabilities/pipes/risk_score.py b/vulnerabilities/pipes/risk_score.py new file mode 100644 index 000000000..8a4bf00cf --- /dev/null +++ b/vulnerabilities/pipes/risk_score.py @@ -0,0 +1,133 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from decimal import ROUND_HALF_UP +from decimal import Decimal + +from django.db.models import Max + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import PackageV2 + + +def quantize_1(value): + if value is None: + return None + + return Decimal(str(value)).quantize( + Decimal("0.1"), + rounding=ROUND_HALF_UP, + ) + + +def compute_package_risk_score(package, current_advisory_risk_score=None): + """Calculate the risk score for a single PackageV2 object.""" + max_risk = ( + AdvisoryV2.objects.latest_affecting_advisories_for_purl(package.package_url) + .aggregate(max_risk=Max("risk_score")) + .get("max_risk") + ) + # include current advisory risk score in the calculation if provided and is higher than the max risk score from the database + if current_advisory_risk_score is not None: + max_risk = max(max_risk or 0, current_advisory_risk_score) + if max_risk is None: + return None + return round(float(max_risk), 1) + + +def compute_package_risk_score_bulk(packages): + """Calculate the risk score for a single PackageV2 object.""" + purls = packages.values_list("package_url", flat=True) + advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purls(purls).only( + "id", "risk_score" + ) + qs = ( + PackageV2.objects.filter( + id__in=packages.values_list("id", flat=True), + affected_in_impacts__advisory__risk_score__isnull=False, + affected_in_impacts__advisory__in=advisories, + ) + .distinct() + .annotate(computed_risk=Max("affected_in_impacts__advisory__risk_score")) + .only("id") + ) + + batch = [] + batch_size = 5000 + updated = 0 + + for pkg in qs.iterator(chunk_size=batch_size): + pkg.risk_score = round(float(pkg.computed_risk), 1) + batch.append(pkg) + + if len(batch) >= batch_size: + updated += bulk_update( + model=PackageV2, + items=batch, + fields=["risk_score"], + ) + batch.clear() + + updated += bulk_update( + model=PackageV2, + items=batch, + fields=["risk_score"], + ) + + +def compute_advisory_risk_score(advisory): + """ + Calculate the risk score for a single AdvisoryV2 object. + Returns a tuple of (weighted_severity, exploitability, risk_score). + """ + from vulnerabilities.risk import compute_vulnerability_risk_factors + + weighted_severity = None + exploitability = None + risk_score = None + + references = advisory.references.all() + exploits = advisory.exploits.all() + + severities = list(advisory.severities.all()) + + for rel in advisory.related_advisory_severities.all(): + severities.extend(rel.severities.all()) + + try: + calculated_weighted_severity, calculated_exploitability = ( + compute_vulnerability_risk_factors( + references=references, + severities=severities, + exploits=exploits, + ) + ) + + weighted_severity = calculated_weighted_severity + exploitability = calculated_exploitability + if exploitability and weighted_severity: + risk_score = min(float(exploitability * weighted_severity), 10.0) + risk_score = round(risk_score, 1) + except Exception as e: + risk_score = None + + return quantize_1(weighted_severity), quantize_1(exploitability), quantize_1(risk_score) + + +def bulk_update(model, items, fields, logger=None): + item_count = 0 + if items: + try: + model.objects.bulk_update(objs=items, fields=fields) + item_count += len(items) + except Exception as e: + if logger: + logger(f"Error updating {model.__name__}: {e}") + items.clear() + return item_count diff --git a/vulnerabilities/risk.py b/vulnerabilities/risk.py index dd7401d80..b7782ebbc 100644 --- a/vulnerabilities/risk.py +++ b/vulnerabilities/risk.py @@ -119,21 +119,3 @@ def compute_package_risk(package): return return round(max(result), 1) - - -def compute_package_risk_v2(package): - """ - Calculate the risk for a package by iterating over all vulnerabilities that affects this package - and determining the associated risk. - """ - - max_risk = ( - AdvisoryV2.objects.latest_affecting_advisories_for_purl(package.purl).aggregate( - max_risk=Max("risk_score") - ) - )["max_risk"] - - if max_risk is None: - return - - return round(float(max_risk), 1) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py index c07dcf9d9..3d95a0561 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py @@ -293,13 +293,13 @@ def test_todo_conflict_details_partial_curation(self): "package": { "type": "npm", "namespace": "", - "name": "package1", + "name": "package2", "version": "", "qualifiers": "", "subpath": "", }, "affected_version_range": "vers:npm/>=1.0.0|<=2.0.0", - "fixed_version_range": "vers:npm/2.0.1", + "fixed_version_range": None, "introduced_by_commit_patches": [], "fixed_by_commit_patches": [], }, @@ -312,36 +312,36 @@ def test_todo_conflict_details_partial_curation(self): "qualifiers": "", "subpath": "", }, - "affected_version_range": "vers:npm/>=1.0.0|<=2.0.0", + "affected_version_range": "vers:npm/>=3.0.0|<=3.9.0", "fixed_version_range": None, "introduced_by_commit_patches": [], "fixed_by_commit_patches": [], }, { "package": { - "type": "npm", + "type": "pypi", "namespace": "", - "name": "package2", + "name": "package1", "version": "", "qualifiers": "", "subpath": "", }, - "affected_version_range": "vers:npm/>=3.0.0|<=3.9.0", - "fixed_version_range": None, + "affected_version_range": "vers:pypi/>=1.0.0|<=2.0.0", + "fixed_version_range": "vers:pypi/2.0.1", "introduced_by_commit_patches": [], "fixed_by_commit_patches": [], }, { "package": { - "type": "pypi", + "type": "npm", "namespace": "", "name": "package1", "version": "", "qualifiers": "", "subpath": "", }, - "affected_version_range": "vers:pypi/>=1.0.0|<=2.0.0", - "fixed_version_range": "vers:pypi/2.0.1", + "affected_version_range": "vers:npm/>=1.0.0|<=2.0.0", + "fixed_version_range": "vers:npm/2.0.1", "introduced_by_commit_patches": [], "fixed_by_commit_patches": [], }, diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py b/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py new file mode 100644 index 000000000..08b751e46 --- /dev/null +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py @@ -0,0 +1,171 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import uuid + +import pytest +from django.utils import timezone + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import ImpactedPackage +from vulnerabilities.pipelines.v2_improvers.mark_all_impacts_unfurled import ( + MarkAllImpactsUnfurledPipeline, +) +from vulnerabilities.pipelines.v2_improvers.mark_all_impacts_unfurled import ( + latest_advisories_with_all_impacts_unfurled, +) + + +def create_advisory( + *, + is_latest=True, + all_impacts_unfurled=False, +): + unique = uuid.uuid4().hex + + return AdvisoryV2.objects.create( + datasource_id="test", + pipeline_id="test_pipeline", + advisory_id=f"ADV-{unique}", + avid=f"test/{unique}", + unique_content_id=uuid.uuid4().hex, + url="https://example.com/advisory", + is_latest=is_latest, + _all_impacts_unfurled=all_impacts_unfurled, + ) + + +@pytest.mark.django_db +def test_returns_advisory_when_all_impacts_unfurled(): + advisory = create_advisory() + + ImpactedPackage.objects.create( + advisory=advisory, + base_purl="pkg:pypi/django", + last_range_unfurl_at=timezone.now(), + ) + + ImpactedPackage.objects.create( + advisory=advisory, + base_purl="pkg:pypi/flask", + last_range_unfurl_at=timezone.now(), + ) + + advisories = latest_advisories_with_all_impacts_unfurled() + + assert advisory in advisories + + +@pytest.mark.django_db +def test_does_not_return_advisory_when_one_impact_not_unfurled(): + advisory = create_advisory() + + ImpactedPackage.objects.create( + advisory=advisory, + base_purl="pkg:pypi/django", + last_range_unfurl_at=timezone.now(), + ) + + ImpactedPackage.objects.create( + advisory=advisory, + base_purl="pkg:pypi/flask", + last_range_unfurl_at=None, + ) + + advisories = latest_advisories_with_all_impacts_unfurled() + + assert advisory not in advisories + + +@pytest.mark.django_db +def test_does_not_return_non_latest_advisory(): + advisory = create_advisory(is_latest=False) + + ImpactedPackage.objects.create( + advisory=advisory, + base_purl="pkg:pypi/django", + last_range_unfurl_at=timezone.now(), + ) + + advisories = latest_advisories_with_all_impacts_unfurled() + + assert advisory not in advisories + + +@pytest.mark.django_db +def test_does_not_return_already_marked_advisory(): + advisory = create_advisory( + all_impacts_unfurled=True, + ) + + ImpactedPackage.objects.create( + advisory=advisory, + base_purl="pkg:pypi/django", + last_range_unfurl_at=timezone.now(), + ) + + advisories = latest_advisories_with_all_impacts_unfurled() + + assert advisory not in advisories + + +@pytest.mark.django_db +def test_pipeline_marks_matching_advisories(): + advisory = create_advisory() + + ImpactedPackage.objects.create( + advisory=advisory, + base_purl="pkg:pypi/django", + last_range_unfurl_at=timezone.now(), + ) + + pipeline = MarkAllImpactsUnfurledPipeline() + pipeline.mark_all_impacts_unfurled() + + advisory.refresh_from_db() + + assert advisory._all_impacts_unfurled is True + + +@pytest.mark.django_db +def test_pipeline_does_not_mark_partial_advisory(): + advisory = create_advisory() + + ImpactedPackage.objects.create( + advisory=advisory, + base_purl="pkg:pypi/django", + last_range_unfurl_at=timezone.now(), + ) + + ImpactedPackage.objects.create( + advisory=advisory, + base_purl="pkg:pypi/flask", + last_range_unfurl_at=None, + ) + + pipeline = MarkAllImpactsUnfurledPipeline() + pipeline.mark_all_impacts_unfurled() + + advisory.refresh_from_db() + + assert advisory._all_impacts_unfurled is False + + +@pytest.mark.django_db +def test_zero_impacts_advisory_is_returned(): + """ + Current behavior: + advisory with zero impacts is considered fully unfurled. + """ + + advisory = create_advisory() + + advisories = latest_advisories_with_all_impacts_unfurled() + + assert advisory in advisories diff --git a/vulnerabilities/tests/test_advisory_merge.py b/vulnerabilities/tests/test_advisory_merge.py index 71c214bbb..c4c738483 100644 --- a/vulnerabilities/tests/test_advisory_merge.py +++ b/vulnerabilities/tests/test_advisory_merge.py @@ -18,12 +18,11 @@ from vulnerabilities.models import Group from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import PackageV2 +from vulnerabilities.pipes.group_advisories import delete_and_save_advisory_set +from vulnerabilities.pipes.group_advisories import merge_advisories from vulnerabilities.utils import compute_advisory_content_hash -from vulnerabilities.utils import delete_and_save_advisory_set from vulnerabilities.utils import get_advisories_from_groups from vulnerabilities.utils import get_merged_identifier_groups -from vulnerabilities.utils import merge_advisories -from vulnerabilities.utils import merge_and_save_grouped_advisories @pytest.mark.django_db @@ -162,27 +161,6 @@ def test_delete_and_save_advisory_set(self): assert any(m.is_primary for m in members) assert any(not m.is_primary for m in members) - def test_merge_and_save_integration(self): - package = PackageV2.objects.from_purl("pkg:pypi/sample@1.0.0") - - adv1 = self.create_advisory("A1", ["1.0"], ["2.0"]) - adv2 = self.create_advisory("A2", ["1.0"], ["2.0"]) - - alias = AdvisoryAlias.objects.create(alias="CVE-1") - - adv1.aliases.add(alias) - adv2.aliases.add(alias) - - result = merge_and_save_grouped_advisories( - package, - [adv1, adv2], - relation="test", - ) - - assert len(result) == 1 - assert AdvisorySet.objects.count() == 1 - assert AdvisorySetMember.objects.count() == 2 - def test_merge_advisories_separates_different_content(self): package = PackageV2.objects.from_purl("pkg:pypi/sample@1.0.0") diff --git a/vulnerabilities/tests/test_api_v3.py b/vulnerabilities/tests/test_api_v3.py index cd944a203..e9d275a6c 100644 --- a/vulnerabilities/tests/test_api_v3.py +++ b/vulnerabilities/tests/test_api_v3.py @@ -36,10 +36,12 @@ def setUp(self): datasource_id="ghsa", logger=self.logger.write, ) + self.advisory.save() self.package = PackageV2.objects.from_purl(purl="pkg:pypi/sample@1.0.0") self.impact = ImpactedPackage.objects.create( - advisory=self.advisory, base_purl="pkg:pypi/sample" + advisory=self.advisory, + base_purl="pkg:pypi/sample", ) self.impact.affecting_packages.add(self.package) @@ -67,7 +69,7 @@ def test_packages_post_without_details(self): def test_packages_post_with_details(self): url = reverse("package-v3-list") - with self.assertNumQueries(31): + with self.assertNumQueries(12): response = self.client.post( url, data={ diff --git a/vulnerabilities/tests/test_risk.py b/vulnerabilities/tests/test_risk.py index 420c8c402..b5c4d4997 100644 --- a/vulnerabilities/tests/test_risk.py +++ b/vulnerabilities/tests/test_risk.py @@ -24,7 +24,6 @@ @pytest.fixture -@pytest.mark.django_db def vulnerability(): vul = Vulnerability(vulnerability_id="VCID-Existing") vul.save() @@ -50,7 +49,6 @@ def vulnerability(): @pytest.fixture -@pytest.mark.django_db def exploit(): vul = Vulnerability(vulnerability_id="VCID-Exploit") vul.save() @@ -58,7 +56,6 @@ def exploit(): @pytest.fixture -@pytest.mark.django_db def vulnerability_with_exploit_ref(): vul = Vulnerability(vulnerability_id="VCID-Exploit-Ref") vul.save() @@ -74,7 +71,6 @@ def vulnerability_with_exploit_ref(): @pytest.fixture -@pytest.mark.django_db def high_epss_score(): vul = Vulnerability(vulnerability_id="VCID-HIGH-EPSS") vul.save() @@ -90,7 +86,6 @@ def high_epss_score(): @pytest.fixture -@pytest.mark.django_db def low_epss_score(): vul = Vulnerability(vulnerability_id="VCID-LOW-EPSS") vul.save() diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 2e618a920..dc72ad4d2 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -35,6 +35,7 @@ import urllib3 from cwe2.database import Database from cwe2.database import InvalidCWEError +from django.db.models import Prefetch from packageurl import PackageURL from packageurl.contrib.django.utils import without_empty_values from univers.version_range import RANGE_CLASS_BY_SCHEMES @@ -43,7 +44,6 @@ from univers.version_range import VersionRange from aboutcode.hashid import build_vcid -from vulnerabilities.pipes.group_advisories import delete_and_save_advisory_set logger = logging.getLogger(__name__) @@ -868,29 +868,6 @@ def compute_patch_checksum(patch_text: str): return hashlib.sha512(patch_text.encode("utf-8")).hexdigest() -def merge_advisories(advisories, package): - """ - Merge advisories based on their content hash and identifiers. - """ - from vulnerabilities.models import Group - - advisories = list(advisories) - - content_hash_map = defaultdict(list) - - for adv in advisories: - content_hash = compute_advisory_content_hash(adv, package) - content_hash_map[content_hash].append(adv) - - final_groups: List[Group] = [] - - for group in content_hash_map.values(): - groups = get_merged_identifier_groups(group) - final_groups.extend(groups) - - return final_groups - - def compute_advisory_content_hash(adv, package): """Compute a content hash for an advisory based on its affected and fixed packages for a given package. This is used to determine if two advisories are the same based on their content.""" @@ -906,8 +883,12 @@ def compute_advisory_content_hash(adv, package): ) for impact in adv.impacted_packages.filter(base_purl=str(version_less_purl)): - affected.extend([pkg.package_url for pkg in impact.affecting_packages.all()]) - fixed.extend([pkg.package_url for pkg in impact.fixed_by_packages.all()]) + for pkg in impact.affecting_packages.all(): + if pkg.package_url: + affected.extend([pkg.package_url]) + for pkg in impact.fixed_by_packages.all(): + if pkg.package_url: + fixed.extend([pkg.package_url]) normalized_data = { "affected_packages": normalize_list(affected), @@ -979,10 +960,12 @@ def get_merged_identifier_groups(advisories): return final_groups -def get_advisories_from_groups(groups): +def get_advisories_from_groups(groups, include_ssvc_trees=False): """ Return a list of advisories from the merged groups of advisories. """ + from vulnerabilities.models import SSVC + from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import Group from vulnerabilities.models import GroupedAdvisory @@ -1016,6 +999,35 @@ def get_advisories_from_groups(groups): identifier = group.primary.advisory_id.split("/")[-1] filtered_aliases = [alias for alias in group.aliases if alias.alias != identifier] + ssvc_trees = [] + + if include_ssvc_trees: + + all_advs = [group.primary] + list(group.secondaries) + + advisories_qs = AdvisoryV2.objects.filter( + id__in=[adv.id for adv in all_advs] + ).prefetch_related( + Prefetch( + "related_ssvcs", + queryset=SSVC.objects.select_related("source_advisory") + .only("id", "vector", "decision", "options", "source_advisory__url") + .distinct(), + to_attr="ssvc_trees", + ) + ) + + ssvc_trees = [ + { + "vector": ssvc.vector, + "decision": ssvc.decision, + "options": ssvc.options, + "url": ssvc.source_advisory.url if ssvc.source_advisory else None, + } + for adv in advisories_qs + for ssvc in adv.ssvc_trees + ] + advisories.append( GroupedAdvisory( aliases=filtered_aliases, @@ -1024,23 +1036,13 @@ def get_advisories_from_groups(groups): weighted_severity=weighted_severity, exploitability=exploitability, risk_score=risk_score, + ssvc_trees=ssvc_trees or [], ) ) return advisories -def merge_and_save_grouped_advisories(package, advisories, relation): - """ - Merge advisories based on their content and identifiers and save the merged advisories to the database. - """ - groups = merge_advisories(advisories, package) - delete_and_save_advisory_set(groups, package, relation) - advisories = get_advisories_from_groups(groups) - - return advisories - - TYPES_WITH_MULTIPLE_IMPORTERS = [ "pypi", "maven", diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 04ac8a787..438a70d77 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -23,6 +23,7 @@ from django.db.models import Exists from django.db.models import OuterRef from django.db.models import Prefetch +from django.db.models import Q from django.http import HttpResponse from django.http.response import Http404 from django.shortcuts import get_object_or_404 @@ -49,6 +50,8 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import Group from vulnerabilities.models import GroupedAdvisory +from vulnerabilities.models import ImpactedPackageAffecting +from vulnerabilities.models import ImpactedPackageFixedBy from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule from vulnerabilities.pipelines.v2_importers.epss_importer_v2 import EPSSImporterPipeline @@ -58,7 +61,6 @@ from vulnerabilities.throttling import AnonUserUIThrottle from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS from vulnerabilities.utils import get_advisories_from_groups -from vulnerabilities.utils import merge_and_save_grouped_advisories from vulnerablecode import __version__ as VULNERABLECODE_VERSION from vulnerablecode.settings import env @@ -212,7 +214,25 @@ def get_queryset(self, query=None): on exact purl, partial purl or just name and namespace. """ query = query or self.request.GET.get("search") or "" - return self.model.objects.search(query).prefetch_related().with_is_vulnerable() + affecting_exists = ImpactedPackageAffecting.objects.filter( + package_id=OuterRef("pk"), + impacted_package__advisory___all_impacts_unfurled=True, + ) + + fixed_by_exists = ImpactedPackageFixedBy.objects.filter( + package_id=OuterRef("pk"), + impacted_package__advisory___all_impacts_unfurled=True, + ) + + return ( + self.model.objects.search(query) + .annotate( + has_affecting=Exists(affecting_exists), + has_fixed_by=Exists(fixed_by_exists), + ) + .filter(Q(has_affecting=True) | Q(has_fixed_by=True)) + .with_is_vulnerable() + ) class AffectedByAdvisoriesListView(VulnerableCodeListView): @@ -395,42 +415,6 @@ def get_context_data(self, **kwargs): return context - if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: - affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( - purl=package.purl - ) - - fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( - purl=package.purl - ) - fixed_pkg_details = get_fixed_package_details(package) - context["fixed_package_details"] = fixed_pkg_details - context["grouped"] = True - - affecting_advisories = affecting_advisories.prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", - ) - - affected_by_advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( - package, affecting_advisories, "affecting" - ) - - fixed_by_advisories = fixed_by_advisories.prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", - ) - - fixing_advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( - package, fixed_by_advisories, "fixing" - ) - - context["affected_by_advisories_v2"] = affected_by_advisories - context["fixing_advisories_v2"] = fixing_advisories - return context - def get_object(self, queryset=None): if queryset is None: queryset = self.get_queryset() @@ -463,7 +447,11 @@ def get_fixed_package_details(package): p.id: p for p in models.PackageV2.objects.filter(id__in=pkg_ids, is_ghost=False).annotate( is_vulnerable=Exists( - models.ImpactedPackage.objects.filter(affecting_packages=OuterRef("pk")) + models.ImpactedPackage.objects.filter( + affecting_packages=OuterRef("pk"), + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, + ) ) ) } @@ -932,7 +920,9 @@ def get_queryset(self): .prefetch_related( Prefetch( "impacted_packages", - queryset=models.ImpactedPackage.objects.prefetch_related( + queryset=models.ImpactedPackage.objects.filter( + advisory__is_latest=True, advisory___all_impacts_unfurled=True + ).prefetch_related( Prefetch( "affecting_packages", queryset=models.PackageV2.objects.only( From 87b6eeb59f961e61a2ac078ea0c419201b319aa2 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 2 Jun 2026 00:40:11 +0530 Subject: [PATCH 02/21] Fix qs Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 5 +++-- vulnerabilities/models.py | 5 ++++- vulnerabilities/pipes/export.py | 8 ++++++-- vulnerabilities/views.py | 2 ++ 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index 00b5d170c..bd5890406 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -337,9 +337,10 @@ def get_fixing_vulnerabilities(self, package): if results: return results - if package.type not in TYPES_WITH_MULTIPLE_IMPORTERS: - advisories_qs = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl(package.package_url) + advisories_qs = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( + package.package_url + ) advisories_ids = advisories_qs.only("id") advisories_ids = list(advisories_ids[:101]) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index c64ba757a..e02571462 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -3568,7 +3568,9 @@ def with_is_vulnerable(self): return self.annotate( is_vulnerable=Exists( ImpactedPackage.objects.filter( - affecting_packages__pk=OuterRef("pk"), advisory__is_latest=True + affecting_packages__pk=OuterRef("pk"), + advisory__is_latest=True, + advisory___all_impacts_unfurled=True, ) ) ) @@ -3577,6 +3579,7 @@ def all_vulnerable(self): latest_impacts = ImpactedPackageAffecting.objects.filter( package_id=OuterRef("pk"), impacted_package__advisory__is_latest=True, + impacted_package__advisory___all_impacts_unfurled=True, ) query = PackageV2.objects.filter(Exists(latest_impacts)) diff --git a/vulnerabilities/pipes/export.py b/vulnerabilities/pipes/export.py index fcd3b327b..7f0feb8f6 100644 --- a/vulnerabilities/pipes/export.py +++ b/vulnerabilities/pipes/export.py @@ -25,7 +25,9 @@ def package_prefetched_qs(checkpoint): .prefetch_related( Prefetch( "affected_in_impacts", - queryset=ImpactedPackage.objects.filter(advisory__is_latest=True) + queryset=ImpactedPackage.objects.filter( + advisory__is_latest=True, advisory___all_impacts_unfurled=True + ) .only("advisory_id") .prefetch_related( Prefetch( @@ -36,7 +38,9 @@ def package_prefetched_qs(checkpoint): ), Prefetch( "fixed_in_impacts", - queryset=ImpactedPackage.objects.filter(advisory__is_latest=True) + queryset=ImpactedPackage.objects.filter( + advisory__is_latest=True, advisory___all_impacts_unfurled=True + ) .only("advisory_id") .prefetch_related( Prefetch( diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 438a70d77..4a00675fd 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -217,11 +217,13 @@ def get_queryset(self, query=None): affecting_exists = ImpactedPackageAffecting.objects.filter( package_id=OuterRef("pk"), impacted_package__advisory___all_impacts_unfurled=True, + impacted_package__advisory__is_latest=True, ) fixed_by_exists = ImpactedPackageFixedBy.objects.filter( package_id=OuterRef("pk"), impacted_package__advisory___all_impacts_unfurled=True, + impacted_package__advisory__is_latest=True, ) return ( From 6b58173b720a49e62bd090c7496847bea7771e19 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 2 Jun 2026 00:49:28 +0530 Subject: [PATCH 03/21] Fix migration order Signed-off-by: Tushar Goel --- ...isoryv2__all_impacts_unfurled_and_more.py} | 22 +++++++++++++++++-- .../0134_alter_advisoryset_unique_together.py | 17 -------------- .../0135_advisoryv2__all_impacts_unfurled.py | 21 ------------------ ...dpackage_vulnerabili_advisor_1e3414_idx.py | 19 ---------------- 4 files changed, 20 insertions(+), 59 deletions(-) rename vulnerabilities/migrations/{0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more.py => 0134_advisoryv2__all_impacts_unfurled_and_more.py} (63%) delete mode 100644 vulnerabilities/migrations/0134_alter_advisoryset_unique_together.py delete mode 100644 vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled.py delete mode 100644 vulnerabilities/migrations/0136_impactedpackage_vulnerabili_advisor_1e3414_idx.py diff --git a/vulnerabilities/migrations/0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more.py b/vulnerabilities/migrations/0134_advisoryv2__all_impacts_unfurled_and_more.py similarity index 63% rename from vulnerabilities/migrations/0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more.py rename to vulnerabilities/migrations/0134_advisoryv2__all_impacts_unfurled_and_more.py index 8f45487b2..fd6f44668 100644 --- a/vulnerabilities/migrations/0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more.py +++ b/vulnerabilities/migrations/0134_advisoryv2__all_impacts_unfurled_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.2.11 on 2026-05-26 08:07 +# Generated by Django 5.2.11 on 2026-06-01 19:18 from django.db import migrations, models @@ -6,10 +6,18 @@ class Migration(migrations.Migration): dependencies = [ - ("vulnerabilities", "0132_migrate_advisoryv2_datasource_ids"), + ("vulnerabilities", "0133_alter_advisorytodov2_issue_detail"), ] operations = [ + migrations.AddField( + model_name="advisoryv2", + name="_all_impacts_unfurled", + field=models.BooleanField( + default=False, + help_text="Indicates whether all impacts for this advisory have been unfurled.", + ), + ), migrations.AlterField( model_name="advisoryv2", name="advisory_id", @@ -45,4 +53,14 @@ class Migration(migrations.Migration): max_length=50, ), ), + migrations.AlterUniqueTogether( + name="advisoryset", + unique_together={("package", "relation_type", "primary_advisory")}, + ), + migrations.AddIndex( + model_name="impactedpackage", + index=models.Index( + fields=["advisory", "last_range_unfurl_at"], name="vulnerabili_advisor_1e3414_idx" + ), + ), ] diff --git a/vulnerabilities/migrations/0134_alter_advisoryset_unique_together.py b/vulnerabilities/migrations/0134_alter_advisoryset_unique_together.py deleted file mode 100644 index 7b0c2a4be..000000000 --- a/vulnerabilities/migrations/0134_alter_advisoryset_unique_together.py +++ /dev/null @@ -1,17 +0,0 @@ -# Generated by Django 5.2.11 on 2026-05-28 13:58 - -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0133_alter_advisoryv2_advisory_id_alter_advisoryv2_avid_and_more"), - ] - - operations = [ - migrations.AlterUniqueTogether( - name="advisoryset", - unique_together={("package", "relation_type", "primary_advisory")}, - ), - ] diff --git a/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled.py b/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled.py deleted file mode 100644 index bcea170de..000000000 --- a/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled.py +++ /dev/null @@ -1,21 +0,0 @@ -# Generated by Django 5.2.11 on 2026-06-01 10:56 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0134_alter_advisoryset_unique_together"), - ] - - operations = [ - migrations.AddField( - model_name="advisoryv2", - name="_all_impacts_unfurled", - field=models.BooleanField( - default=False, - help_text="Indicates whether all impacts for this advisory have been unfurled.", - ), - ), - ] diff --git a/vulnerabilities/migrations/0136_impactedpackage_vulnerabili_advisor_1e3414_idx.py b/vulnerabilities/migrations/0136_impactedpackage_vulnerabili_advisor_1e3414_idx.py deleted file mode 100644 index 033193c34..000000000 --- a/vulnerabilities/migrations/0136_impactedpackage_vulnerabili_advisor_1e3414_idx.py +++ /dev/null @@ -1,19 +0,0 @@ -# Generated by Django 5.2.11 on 2026-06-01 11:25 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0135_advisoryv2__all_impacts_unfurled"), - ] - - operations = [ - migrations.AddIndex( - model_name="impactedpackage", - index=models.Index( - fields=["advisory", "last_range_unfurl_at"], name="vulnerabili_advisor_1e3414_idx" - ), - ), - ] From d54a3cb67d414c7e41a0765cfcdf2cdac67ace2b Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 2 Jun 2026 01:15:06 +0530 Subject: [PATCH 04/21] Only use latest impacts for checking pending unfurls Signed-off-by: Tushar Goel --- .../pipelines/v2_improvers/mark_all_impacts_unfurled.py | 1 + vulnerabilities/pipes/advisory.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py b/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py index 466aeda84..c5ee215b4 100644 --- a/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py +++ b/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py @@ -86,6 +86,7 @@ def latest_advisories_with_all_impacts_unfurled(): remaining_unfurled_impacts = ImpactedPackage.objects.filter( advisory_id=OuterRef("pk"), last_range_unfurl_at__isnull=True, + advisory__is_latest=True, ) return ( diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 4198349a6..517175489 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -47,7 +47,6 @@ from vulnerabilities.models import VulnerabilityRelatedReference from vulnerabilities.models import VulnerabilitySeverity from vulnerabilities.models import Weakness -from vulnerabilities.pipes.group_advisories import group_advisory_for_package from vulnerabilities.pipes.risk_score import compute_advisory_risk_score from vulnerabilities.pipes.univers_utils import get_exact_purls_v2 From ef611b2bc464a75c3f8bea50707d7dd0a302d36e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 2 Jun 2026 02:00:58 +0530 Subject: [PATCH 05/21] Make search fast Signed-off-by: Tushar Goel --- vulnerabilities/views.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 4a00675fd..144b19e46 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -210,30 +210,31 @@ def get_context_data(self, **kwargs): def get_queryset(self, query=None): """ Return a Package queryset for the ``query``. - Make a best effort approach to find matching packages either based - on exact purl, partial purl or just name and namespace. """ + query = query or self.request.GET.get("search") or "" - affecting_exists = ImpactedPackageAffecting.objects.filter( - package_id=OuterRef("pk"), + affecting_package_ids = ImpactedPackageAffecting.objects.filter( impacted_package__advisory___all_impacts_unfurled=True, impacted_package__advisory__is_latest=True, - ) + ).values("package_id") - fixed_by_exists = ImpactedPackageFixedBy.objects.filter( - package_id=OuterRef("pk"), + fixed_by_package_ids = ImpactedPackageFixedBy.objects.filter( impacted_package__advisory___all_impacts_unfurled=True, impacted_package__advisory__is_latest=True, - ) + ).values("package_id") return ( self.model.objects.search(query) + .filter(Q(id__in=affecting_package_ids) | Q(id__in=fixed_by_package_ids)) .annotate( - has_affecting=Exists(affecting_exists), - has_fixed_by=Exists(fixed_by_exists), + is_vulnerable=Exists( + ImpactedPackageAffecting.objects.filter( + package_id=OuterRef("pk"), + impacted_package__advisory___all_impacts_unfurled=True, + impacted_package__advisory__is_latest=True, + ) + ) ) - .filter(Q(has_affecting=True) | Q(has_fixed_by=True)) - .with_is_vulnerable() ) From 124f010cb7c9c811df656665b9f4e3b4edf887c0 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 2 Jun 2026 11:54:34 +0530 Subject: [PATCH 06/21] mark unfurl should be a step in unfurling version range pipeline Signed-off-by: Tushar Goel --- vulnerabilities/importers/__init__.py | 64 ++++++------ vulnerabilities/improvers/__init__.py | 50 +++++----- .../v2_improvers/mark_all_impacts_unfurled.py | 97 ------------------- .../v2_improvers/unfurl_version_range.py | 73 +++++++++++++- .../test_mark_all_impacts_unfurled.py | 10 +- 5 files changed, 130 insertions(+), 164 deletions(-) delete mode 100644 vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index f0065dfe7..e5d28f6ff 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -120,39 +120,8 @@ ubuntu_osv_importer_v2.UbuntuOSVImporterPipeline, alpine_linux_importer_v2.AlpineLinuxImporterPipeline, linux_kernel_importer_v2.LinuxKernelPipeline, - github_importer.GitHubAPIImporterPipeline, - gitlab_importer.GitLabImporterPipeline, - github_osv.GithubOSVImporter, - pypa_importer.PyPaImporterPipeline, - npm_importer.NpmImporterPipeline, - nginx_importer.NginxImporterPipeline, - pysec_importer.PyPIImporterPipeline, - fireeye_importer_v2.FireeyeImporterPipeline, - apache_tomcat.ApacheTomcatImporter, - postgresql.PostgreSQLImporter, - debian.DebianImporter, - curl.CurlImporter, - epss.EPSSImporter, - vulnrichment.VulnrichImporter, - alpine_linux_importer.AlpineLinuxImporterPipeline, - ruby.RubyImporter, - apache_kafka.ApacheKafkaImporter, openssl_importer_v2.OpenSSLImporterPipeline, - redhat.RedhatImporter, - archlinux.ArchlinuxImporter, - debian_oval.DebianOvalImporter, - retiredotnet.RetireDotnetImporter, - apache_httpd.ApacheHTTPDImporter, - mozilla.MozillaImporter, - gentoo.GentooImporter, - istio.IstioImporter, - project_kb_msr2019.ProjectKBMSRImporter, - suse_scores.SUSESeverityScoreImporter, - elixir_security.ElixirSecurityImporter, - xen.XenImporter, - ubuntu_usn.UbuntuUSNImporter, - fireeye.FireyeImporter, - oss_fuzz.OSSFuzzImporter, + fireeye_importer_v2.FireeyeImporterPipeline, collect_fix_commits_v2.CollectLinuxFixCommitsPipeline, collect_fix_commits_v2.CollectBusyBoxFixCommitsPipeline, collect_fix_commits_v2.CollectNginxFixCommitsPipeline, @@ -190,6 +159,37 @@ collect_fix_commits_v2.CollectGitFixCommitsPipeline, collect_fix_commits_v2.CollectJenkinsFixCommitsPipeline, collect_fix_commits_v2.CollectGitlabFixCommitsPipeline, + # github_importer.GitHubAPIImporterPipeline, + # gitlab_importer.GitLabImporterPipeline, + # github_osv.GithubOSVImporter, + # pypa_importer.PyPaImporterPipeline, + # npm_importer.NpmImporterPipeline, + # nginx_importer.NginxImporterPipeline, + # pysec_importer.PyPIImporterPipeline, + # apache_tomcat.ApacheTomcatImporter, + # postgresql.PostgreSQLImporter, + # debian.DebianImporter, + # curl.CurlImporter, + # epss.EPSSImporter, + # vulnrichment.VulnrichImporter, + # alpine_linux_importer.AlpineLinuxImporterPipeline, + # apache_kafka.ApacheKafkaImporter, + # ruby.RubyImporter, + # redhat.RedhatImporter, + # archlinux.ArchlinuxImporter, + # debian_oval.DebianOvalImporter, + # retiredotnet.RetireDotnetImporter, + # apache_httpd.ApacheHTTPDImporter, + # mozilla.MozillaImporter, + # gentoo.GentooImporter, + # istio.IstioImporter, + # project_kb_msr2019.ProjectKBMSRImporter, + # suse_scores.SUSESeverityScoreImporter, + # elixir_security.ElixirSecurityImporter, + # xen.XenImporter, + # ubuntu_usn.UbuntuUSNImporter, + # fireeye.FireyeImporter, + # oss_fuzz.OSSFuzzImporter, ] ) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index db5ee6af9..be3278734 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -27,9 +27,6 @@ enhance_with_metasploit as enhance_with_metasploit_v2, ) from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 -from vulnerabilities.pipelines.v2_improvers import ( - mark_all_impacts_unfurled as mark_all_impacts_unfurled_v2, -) from vulnerabilities.pipelines.v2_improvers import reference_collect_commits from vulnerabilities.pipelines.v2_improvers import relate_severities from vulnerabilities.pipelines.v2_improvers import unfurl_version_range as unfurl_version_range_v2 @@ -37,29 +34,6 @@ IMPROVERS_REGISTRY = create_registry( [ - valid_versions.GitHubBasicImprover, - valid_versions.GitLabBasicImprover, - valid_versions.NginxBasicImprover, - valid_versions.ApacheHTTPDImprover, - valid_versions.DebianBasicImprover, - valid_versions.NpmImprover, - valid_versions.ElixirImprover, - valid_versions.ApacheTomcatImprover, - valid_versions.ApacheKafkaImprover, - valid_versions.IstioImprover, - valid_versions.DebianOvalImprover, - valid_versions.OSSFuzzImprover, - valid_versions.RubyImprover, - valid_versions.GithubOSVImprover, - vulnerability_status.VulnerabilityStatusImprover, - valid_versions.CurlImprover, - flag_ghost_packages.FlagGhostPackagePipeline, - enhance_with_kev.VulnerabilityKevPipeline, - enhance_with_metasploit.MetasploitImproverPipeline, - enhance_with_exploitdb.ExploitDBImproverPipeline, - compute_package_risk.ComputePackageRiskPipeline, - compute_package_version_rank.ComputeVersionRankPipeline, - populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, exploitdb_v2.ExploitDBImproverPipeline, enhance_with_kev_v2.VulnerabilityKevPipeline, flag_ghost_packages_v2.FlagGhostPackagePipeline, @@ -72,6 +46,28 @@ compute_advisory_todo_v2.ComputeToDo, reference_collect_commits.CollectReferencesFixCommitsPipeline, enhance_with_github_poc.GithubPocsImproverPipeline, - mark_all_impacts_unfurled_v2.MarkAllImpactsUnfurledPipeline, + # valid_versions.GitHubBasicImprover, + # valid_versions.GitLabBasicImprover, + # valid_versions.NginxBasicImprover, + # valid_versions.ApacheHTTPDImprover, + # valid_versions.DebianBasicImprover, + # valid_versions.NpmImprover, + # valid_versions.ElixirImprover, + # valid_versions.ApacheTomcatImprover, + # valid_versions.ApacheKafkaImprover, + # valid_versions.IstioImprover, + # valid_versions.DebianOvalImprover, + # valid_versions.OSSFuzzImprover, + # valid_versions.RubyImprover, + # valid_versions.GithubOSVImprover, + # vulnerability_status.VulnerabilityStatusImprover, + # valid_versions.CurlImprover, + # flag_ghost_packages.FlagGhostPackagePipeline, + # enhance_with_kev.VulnerabilityKevPipeline, + # enhance_with_metasploit.MetasploitImproverPipeline, + # enhance_with_exploitdb.ExploitDBImproverPipeline, + # compute_package_risk.ComputePackageRiskPipeline, + # compute_package_version_rank.ComputeVersionRankPipeline, + # populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, ] ) diff --git a/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py b/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py deleted file mode 100644 index c5ee215b4..000000000 --- a/vulnerabilities/pipelines/v2_improvers/mark_all_impacts_unfurled.py +++ /dev/null @@ -1,97 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# VulnerableCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/aboutcode-org/vulnerablecode for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -from django.db import transaction -from django.db.models import Exists -from django.db.models import OuterRef - -from vulnerabilities.models import AdvisoryV2 -from vulnerabilities.models import ImpactedPackage -from vulnerabilities.models import ImpactedPackageAffecting -from vulnerabilities.models import ImpactedPackageFixedBy -from vulnerabilities.models import PackageV2 -from vulnerabilities.models import PipelineSchedule -from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.pipes.group_advisories import group_advisory_for_package -from vulnerabilities.pipes.risk_score import compute_package_risk_score_bulk -from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS - - -class MarkAllImpactsUnfurledPipeline(VulnerableCodePipeline): - """Mark advisories as fully unfurled once every related impact has been unfurled.""" - - pipeline_id = "mark_all_impacts_unfurled_v2" - run_interval = 1 - run_priority = PipelineSchedule.ExecutionPriority.HIGH - - @classmethod - def steps(cls): - return (cls.mark_all_impacts_unfurled,) - - def mark_all_impacts_unfurled(self): - while True: - advisories = list(latest_advisories_with_all_impacts_unfurled()[:100]) - - if not advisories: - break - - complete_advisories_import(AdvisoryV2.objects.filter(id__in=[a.id for a in advisories])) - - -@transaction.atomic -def complete_advisories_import(advisories): - - advisory_ids = list(advisories.values_list("id", flat=True)) - - if not advisory_ids: - return - - AdvisoryV2.objects.filter(id__in=advisory_ids).update(_all_impacts_unfurled=True) - - affecting_package_ids = set( - ImpactedPackageAffecting.objects.filter( - impacted_package__advisory_id__in=advisory_ids - ).values_list( - "package_id", - flat=True, - ) - ) - - fixed_by_package_ids = set( - ImpactedPackageFixedBy.objects.filter( - impacted_package__advisory_id__in=advisory_ids - ).values_list( - "package_id", - flat=True, - ) - ) - - compute_package_risk_score_bulk(PackageV2.objects.filter(id__in=affecting_package_ids)) - - group_package_ids = affecting_package_ids | fixed_by_package_ids - - for package in PackageV2.objects.filter( - id__in=group_package_ids, type__in=TYPES_WITH_MULTIPLE_IMPORTERS - ).iterator(chunk_size=2000): - group_advisory_for_package(package) - - -def latest_advisories_with_all_impacts_unfurled(): - remaining_unfurled_impacts = ImpactedPackage.objects.filter( - advisory_id=OuterRef("pk"), - last_range_unfurl_at__isnull=True, - advisory__is_latest=True, - ) - - return ( - AdvisoryV2.objects.filter(_all_impacts_unfurled=False, is_latest=True) - .annotate(has_remaining_unfurled=Exists(remaining_unfurled_impacts)) - .filter(has_remaining_unfurled=False) - .order_by("id") - ) diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index db68d7c1f..b051e1ec9 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -13,7 +13,9 @@ from aboutcode.pipeline import LoopProgress from django.db import transaction +from django.db.models import Exists from django.db.models import F +from django.db.models import OuterRef from django.db.models import Q from django.utils import timezone from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS @@ -21,12 +23,17 @@ from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import VersionRange +from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import ImpactedPackageAffecting +from vulnerabilities.models import ImpactedPackageFixedBy from vulnerabilities.models import PackageV2 from vulnerabilities.models import PipelineSchedule from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipes.fetchcode_utils import get_versions +from vulnerabilities.pipes.group_advisories import group_advisory_for_package +from vulnerabilities.pipes.risk_score import compute_package_risk_score_bulk +from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS from vulnerabilities.utils import update_purl_version @@ -39,7 +46,7 @@ class UnfurlVersionRangePipeline(VulnerableCodePipeline): pipeline_id = "unfurl_version_range_v2" - run_interval = 2 + run_interval = 1 run_priority = PipelineSchedule.ExecutionPriority.HIGH # Days elapsed before version range is re-unfurled @@ -47,7 +54,7 @@ class UnfurlVersionRangePipeline(VulnerableCodePipeline): @classmethod def steps(cls): - return (cls.unfurl_version_range,) + return (cls.unfurl_version_range, cls.mark_all_impacts_unfurled) def unfurl_version_range(self): processed_impacted_packages_count = 0 @@ -115,6 +122,15 @@ def unfurl_version_range(self): self.log(f"Successfully processed {processed_impacted_packages_count:,d} ImpactedPackage.") self.log(f"{processed_affected_packages_count:,d} new Impact-Package relation created.") + def mark_all_impacts_unfurled(self): + while True: + advisories = list(latest_advisories_with_all_impacts_unfurled()[:100]) + + if not advisories: + break + + complete_advisories_import(AdvisoryV2.objects.filter(id__in=[a.id for a in advisories])) + def get_affected_purls(versions, impact, logger): affecting_version_range = VersionRange.from_string(impact.affecting_vers) @@ -200,3 +216,56 @@ def impacted_package_qs(cutoff_day=2): .order_by("advisory__id", F("last_range_unfurl_at").asc(nulls_first=True)) .only("pk", "affecting_vers", "advisory", "base_purl") ) + + +@transaction.atomic +def complete_advisories_import(advisories): + + advisory_ids = list(advisories.values_list("id", flat=True)) + + if not advisory_ids: + return + + AdvisoryV2.objects.filter(id__in=advisory_ids).update(_all_impacts_unfurled=True) + + affecting_package_ids = set( + ImpactedPackageAffecting.objects.filter( + impacted_package__advisory_id__in=advisory_ids + ).values_list( + "package_id", + flat=True, + ) + ) + + fixed_by_package_ids = set( + ImpactedPackageFixedBy.objects.filter( + impacted_package__advisory_id__in=advisory_ids + ).values_list( + "package_id", + flat=True, + ) + ) + + compute_package_risk_score_bulk(PackageV2.objects.filter(id__in=affecting_package_ids)) + + group_package_ids = affecting_package_ids | fixed_by_package_ids + + for package in PackageV2.objects.filter( + id__in=group_package_ids, type__in=TYPES_WITH_MULTIPLE_IMPORTERS + ).iterator(chunk_size=2000): + group_advisory_for_package(package) + + +def latest_advisories_with_all_impacts_unfurled(): + remaining_unfurled_impacts = ImpactedPackage.objects.filter( + advisory_id=OuterRef("pk"), + last_range_unfurl_at__isnull=True, + advisory__is_latest=True, + ) + + return ( + AdvisoryV2.objects.filter(_all_impacts_unfurled=False, is_latest=True) + .annotate(has_remaining_unfurled=Exists(remaining_unfurled_impacts)) + .filter(has_remaining_unfurled=False) + .order_by("id") + ) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py b/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py index 08b751e46..d55fb10ec 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py @@ -14,10 +14,8 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import ImpactedPackage -from vulnerabilities.pipelines.v2_improvers.mark_all_impacts_unfurled import ( - MarkAllImpactsUnfurledPipeline, -) -from vulnerabilities.pipelines.v2_improvers.mark_all_impacts_unfurled import ( +from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import UnfurlVersionRangePipeline +from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import ( latest_advisories_with_all_impacts_unfurled, ) @@ -125,7 +123,7 @@ def test_pipeline_marks_matching_advisories(): last_range_unfurl_at=timezone.now(), ) - pipeline = MarkAllImpactsUnfurledPipeline() + pipeline = UnfurlVersionRangePipeline() pipeline.mark_all_impacts_unfurled() advisory.refresh_from_db() @@ -149,7 +147,7 @@ def test_pipeline_does_not_mark_partial_advisory(): last_range_unfurl_at=None, ) - pipeline = MarkAllImpactsUnfurledPipeline() + pipeline = UnfurlVersionRangePipeline() pipeline.mark_all_impacts_unfurled() advisory.refresh_from_db() From 3644b8b0d08f660004ca86e1627ae959ca27f699 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 2 Jun 2026 16:14:37 +0530 Subject: [PATCH 07/21] Fix risk score pipeline Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 20 ++++++++++++------- .../v2_improvers/compute_package_risk.py | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e02571462..3d8d50ea8 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2930,6 +2930,12 @@ def latest_for_avid(self, avid: str): def latest_per_avid(self): return self.filter(is_latest=True) + + def latest_for_avid_completely_imported_advisories(self, avid): + return self.get(avid=avid, is_latest=True, _all_impacts_unfurled=True) + + def latest_completely_imported_advisories_per_avid(self): + return self.filter(is_latest=True, _all_impacts_unfurled=True) def latest_for_avids(self, avids): return self.filter(avid__in=avids).latest_per_avid() @@ -2941,7 +2947,7 @@ def latest_affecting_advisories_for_purl(self, purl): advisory___all_impacts_unfurled=True, ).values_list("advisory_id", flat=True) - return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() + return self.filter(id__in=Subquery(adv_ids)) def latest_affecting_advisories_for_purls(self, purls): adv_ids = ImpactedPackage.objects.filter( @@ -2952,7 +2958,7 @@ def latest_affecting_advisories_for_purls(self, purls): "advisory_id", flat=True, ) - return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() + return self.filter(id__in=Subquery(adv_ids)) def latest_affecting_advisories_for_packages(self, packages): adv_ids = ImpactedPackage.objects.filter( @@ -2963,7 +2969,7 @@ def latest_affecting_advisories_for_packages(self, packages): "advisory_id", flat=True, ) - return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() + return self.filter(id__in=Subquery(adv_ids)) def latest_fixed_by_advisories_for_purl(self, purl): adv_ids = ImpactedPackage.objects.filter( @@ -2974,7 +2980,7 @@ def latest_fixed_by_advisories_for_purl(self, purl): "advisory_id", flat=True, ) - return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() + return self.filter(id__in=Subquery(adv_ids)) def latest_fixed_by_advisories_for_purls(self, purls): adv_ids = ImpactedPackage.objects.filter( @@ -2986,7 +2992,7 @@ def latest_fixed_by_advisories_for_purls(self, purls): flat=True, ) - return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() + return self.filter(id__in=Subquery(adv_ids)) def latest_advisories_for_purls(self, purls): adv_ids = ( @@ -3012,7 +3018,7 @@ def latest_advisories_for_purls(self, purls): ) qs = self.filter(id__in=Subquery(adv_ids)) - return qs.latest_per_avid() + return qs def latest_advisories_for_purl(self, purl): adv_ids = ( @@ -3038,7 +3044,7 @@ def latest_advisories_for_purl(self, purl): ) qs = self.filter(id__in=Subquery(adv_ids)) - return qs.latest_per_avid() + return qs def todo_excluded(self): """Exclude advisory ineligible for ToDo computation.""" diff --git a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py index 3d3da7c94..d97d637e1 100644 --- a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py +++ b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py @@ -132,7 +132,7 @@ def compute_and_store_vulnerability_risk_score(self): def compute_and_store_package_risk_score(self): - latest_advisories = AdvisoryV2.objects.latest_per_avid() + latest_advisories = AdvisoryV2.objects.latest_completely_imported_advisories_per_avid() qs = ( PackageV2.objects.filter( From 8923705344821ef6e1baa65fbf028ba3bfc7f56c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Jun 2026 12:40:43 +0530 Subject: [PATCH 08/21] Revert search code Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 9 ++++++--- vulnerabilities/views.py | 23 +---------------------- 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 3d8d50ea8..cc2c5709b 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2930,15 +2930,18 @@ def latest_for_avid(self, avid: str): def latest_per_avid(self): return self.filter(is_latest=True) - + + def latest_for_avids(self, avids): + return self.filter(avid__in=avids).latest_per_avid() + def latest_for_avid_completely_imported_advisories(self, avid): return self.get(avid=avid, is_latest=True, _all_impacts_unfurled=True) def latest_completely_imported_advisories_per_avid(self): return self.filter(is_latest=True, _all_impacts_unfurled=True) - def latest_for_avids(self, avids): - return self.filter(avid__in=avids).latest_per_avid() + def latest_for_avids_completely_imported_advisories(self, avids): + return self.get(avid__in=avids, is_latest=True, _all_impacts_unfurled=True) def latest_affecting_advisories_for_purl(self, purl): adv_ids = ImpactedPackage.objects.filter( diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 144b19e46..a9cab5e52 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -213,29 +213,8 @@ def get_queryset(self, query=None): """ query = query or self.request.GET.get("search") or "" - affecting_package_ids = ImpactedPackageAffecting.objects.filter( - impacted_package__advisory___all_impacts_unfurled=True, - impacted_package__advisory__is_latest=True, - ).values("package_id") - fixed_by_package_ids = ImpactedPackageFixedBy.objects.filter( - impacted_package__advisory___all_impacts_unfurled=True, - impacted_package__advisory__is_latest=True, - ).values("package_id") - - return ( - self.model.objects.search(query) - .filter(Q(id__in=affecting_package_ids) | Q(id__in=fixed_by_package_ids)) - .annotate( - is_vulnerable=Exists( - ImpactedPackageAffecting.objects.filter( - package_id=OuterRef("pk"), - impacted_package__advisory___all_impacts_unfurled=True, - impacted_package__advisory__is_latest=True, - ) - ) - ) - ) + return self.model.objects.search(query).prefetch_related().with_is_vulnerable() class AffectedByAdvisoriesListView(VulnerableCodeListView): From 8ef20926dbe8aea9986d4be1297e5fa37af877a1 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Jun 2026 13:32:08 +0530 Subject: [PATCH 09/21] Refine search Signed-off-by: Tushar Goel --- ...v2_advisory_latest_by_avid_idx_and_more.py | 23 +++++++++++++++++++ vulnerabilities/models.py | 6 +++++ vulnerabilities/views.py | 17 +++++++++++++- 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 vulnerabilities/migrations/0135_remove_advisoryv2_advisory_latest_by_avid_idx_and_more.py diff --git a/vulnerabilities/migrations/0135_remove_advisoryv2_advisory_latest_by_avid_idx_and_more.py b/vulnerabilities/migrations/0135_remove_advisoryv2_advisory_latest_by_avid_idx_and_more.py new file mode 100644 index 000000000..3d63339f5 --- /dev/null +++ b/vulnerabilities/migrations/0135_remove_advisoryv2_advisory_latest_by_avid_idx_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.11 on 2026-06-03 08:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0134_advisoryv2__all_impacts_unfurled_and_more"), + ] + + operations = [ + migrations.RemoveIndex( + model_name="advisoryv2", + name="advisory_latest_by_avid_idx", + ), + migrations.AddIndex( + model_name="advisoryv2", + index=models.Index( + fields=["_all_impacts_unfurled", "id"], name="advisory_unfurled_idx" + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index cc2c5709b..e96b5c540 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -3274,6 +3274,12 @@ class Meta: name="advisory_latest_by_avid_idx", ) ] + indexes = [ + models.Index( + fields=["_all_impacts_unfurled", "id"], + name="advisory_unfurled_idx", + ), + ] def save(self, *args, **kwargs): self.full_clean() diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index a9cab5e52..680d08cc0 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -214,7 +214,22 @@ def get_queryset(self, query=None): query = query or self.request.GET.get("search") or "" - return self.model.objects.search(query).prefetch_related().with_is_vulnerable() + qs = self.model.objects.search(query) + + affecting_exists = ImpactedPackageAffecting.objects.filter( + packagev2_id=OuterRef("pk"), + impactedpackage__advisory___all_impacts_unfurled=True, + ) + + fixed_by_exists = ImpactedPackageFixedBy.objects.filter( + packagev2_id=OuterRef("pk"), + impactedpackage__advisory___all_impacts_unfurled=True, + ) + + return qs.objects.annotate( + is_vulnerable=Exists(affecting_exists), + is_fixing=Exists(fixed_by_exists), + ).filter(Q(is_vulnerable=True) | Q(is_fixing=True)) class AffectedByAdvisoriesListView(VulnerableCodeListView): From 2061d0d7126e2841481173bdad83e6e93b19fba5 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Jun 2026 13:34:08 +0530 Subject: [PATCH 10/21] Fix views Signed-off-by: Tushar Goel --- vulnerabilities/views.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 680d08cc0..c45025dce 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -217,16 +217,16 @@ def get_queryset(self, query=None): qs = self.model.objects.search(query) affecting_exists = ImpactedPackageAffecting.objects.filter( - packagev2_id=OuterRef("pk"), - impactedpackage__advisory___all_impacts_unfurled=True, + package_id=OuterRef("pk"), + impacted_package__advisory___all_impacts_unfurled=True, ) fixed_by_exists = ImpactedPackageFixedBy.objects.filter( - packagev2_id=OuterRef("pk"), - impactedpackage__advisory___all_impacts_unfurled=True, + package_id=OuterRef("pk"), + impacted_package__advisory___all_impacts_unfurled=True, ) - return qs.objects.annotate( + return qs.annotate( is_vulnerable=Exists(affecting_exists), is_fixing=Exists(fixed_by_exists), ).filter(Q(is_vulnerable=True) | Q(is_fixing=True)) From a74589bc1beb0cf9e3be31f7c8388dfcf4831814 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Jun 2026 13:43:05 +0530 Subject: [PATCH 11/21] Use latest advisories for impacts Signed-off-by: Tushar Goel --- vulnerabilities/views.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index c45025dce..385795f12 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -219,11 +219,13 @@ def get_queryset(self, query=None): affecting_exists = ImpactedPackageAffecting.objects.filter( package_id=OuterRef("pk"), impacted_package__advisory___all_impacts_unfurled=True, + impacted_package__advisory__is_latest=True, ) fixed_by_exists = ImpactedPackageFixedBy.objects.filter( package_id=OuterRef("pk"), impacted_package__advisory___all_impacts_unfurled=True, + impacted_package__advisory__is_latest=True, ) return qs.annotate( From 39b28fb048c94fd695d302161f6db6ce221cbf43 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Jun 2026 21:14:45 +0530 Subject: [PATCH 12/21] Fix tests Signed-off-by: Tushar Goel --- ...impacts_unfurled_successfully_and_more.py} | 10 +- vulnerabilities/models.py | 9 +- .../v2_improvers/unfurl_version_range.py | 107 +++- vulnerabilities/pipes/advisory.py | 1 + .../test_federate_vulnerabilities.py | 11 +- .../test_compute_package_risk_v2.py | 1 + .../test_mark_all_impacts_unfurled.py | 549 ++++++++++++++---- vulnerabilities/tests/test_api_v3.py | 10 +- 8 files changed, 547 insertions(+), 151 deletions(-) rename vulnerabilities/migrations/{0135_remove_advisoryv2_advisory_latest_by_avid_idx_and_more.py => 0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py} (59%) diff --git a/vulnerabilities/migrations/0135_remove_advisoryv2_advisory_latest_by_avid_idx_and_more.py b/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py similarity index 59% rename from vulnerabilities/migrations/0135_remove_advisoryv2_advisory_latest_by_avid_idx_and_more.py rename to vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py index 3d63339f5..20717d277 100644 --- a/vulnerabilities/migrations/0135_remove_advisoryv2_advisory_latest_by_avid_idx_and_more.py +++ b/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.2.11 on 2026-06-03 08:00 +# Generated by Django 5.2.11 on 2026-06-03 12:46 from django.db import migrations, models @@ -10,9 +10,13 @@ class Migration(migrations.Migration): ] operations = [ - migrations.RemoveIndex( + migrations.AddField( model_name="advisoryv2", - name="advisory_latest_by_avid_idx", + name="_all_impacts_unfurled_successfully", + field=models.BooleanField( + default=False, + help_text="Indicates whether all impacts for this advisory have been unfurled successfully.", + ), ), migrations.AddIndex( model_name="advisoryv2", diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e96b5c540..ff12b5c92 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -3258,6 +3258,11 @@ class AdvisoryV2(models.Model): help_text="Indicates whether all impacts for this advisory have been unfurled.", ) + _all_impacts_unfurled_successfully = models.BooleanField( + default=False, + help_text="Indicates whether all impacts for this advisory have been unfurled successfully.", + ) + objects = AdvisoryV2QuerySet.as_manager() class Meta: @@ -3272,9 +3277,7 @@ class Meta: models.Index( fields=["avid", "-date_collected", "-id"], name="advisory_latest_by_avid_idx", - ) - ] - indexes = [ + ), models.Index( fields=["_all_impacts_unfurled", "id"], name="advisory_unfurled_idx", diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index b051e1ec9..9529be095 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -51,10 +51,15 @@ class UnfurlVersionRangePipeline(VulnerableCodePipeline): # Days elapsed before version range is re-unfurled reunfurl_after_days = 2 + impacted_packages = None @classmethod def steps(cls): - return (cls.unfurl_version_range, cls.mark_all_impacts_unfurled) + return ( + cls.unfurl_version_range, + cls.mark_all_impacts_unfurled_sucessfully, + cls.mark_all_impacts_unfurl_attempted, + ) def unfurl_version_range(self): processed_impacted_packages_count = 0 @@ -66,6 +71,7 @@ def unfurl_version_range(self): chunk_size = 500 impacted_packages = impacted_package_qs(cutoff_day=self.reunfurl_after_days) + self.impacted_packages = impacted_packages impacted_packages_count = impacted_packages.count() self.log(f"Unfurl affected vers range for {impacted_packages_count:,d} ImpactedPackage.") @@ -104,32 +110,67 @@ def unfurl_version_range(self): processed_impacted_packages_count += 1 if len(update_unfurl_date) > update_batch_size: + cur_time = timezone.now() ImpactedPackage.objects.filter(pk__in=update_unfurl_date).update( - last_range_unfurl_at=timezone.now() + last_range_unfurl_at=cur_time ) ImpactedPackage.objects.filter(pk__in=update_successful_unfurl_date).update( - last_successful_range_unfurl_at=timezone.now() + last_successful_range_unfurl_at=cur_time ) update_unfurl_date.clear() update_successful_unfurl_date.clear() + cur_time = timezone.now() + ImpactedPackage.objects.filter(pk__in=update_unfurl_date).update( - last_range_unfurl_at=timezone.now() + last_range_unfurl_at=cur_time ) ImpactedPackage.objects.filter(pk__in=update_successful_unfurl_date).update( - last_successful_range_unfurl_at=timezone.now() + last_successful_range_unfurl_at=cur_time ) self.log(f"Successfully processed {processed_impacted_packages_count:,d} ImpactedPackage.") self.log(f"{processed_affected_packages_count:,d} new Impact-Package relation created.") - def mark_all_impacts_unfurled(self): + def mark_all_impacts_unfurled_sucessfully(self): + impacted_packages = self.impacted_packages or impacted_package_qs( + cutoff_day=self.reunfurl_after_days + ) while True: - advisories = list(latest_advisories_with_all_impacts_unfurled()[:100]) + advisory_ids = list( + latest_advisories_with_all_impacts_unfurled_successfully( + impacted_packages=impacted_packages + )[:100] + ) - if not advisories: + if not advisory_ids: break - complete_advisories_import(AdvisoryV2.objects.filter(id__in=[a.id for a in advisories])) + complete_advisories_import(advisory_ids=advisory_ids, success=True) + + def mark_all_impacts_unfurl_attempted(self): + impacted_packages = self.impacted_packages or impacted_package_qs( + cutoff_day=self.reunfurl_after_days + ) + advisories_qs = latest_advisories_with_all_impacts_unfurled_attempted( + impacted_packages=impacted_packages + ) + + batch_size = 100 + batch = [] + + for advisory_id in advisories_qs.iterator(chunk_size=100): + batch.append(advisory_id) + + if len(batch) >= batch_size: + complete_advisories_import( + advisory_ids=list(batch), + ) + batch.clear() + + if batch: + complete_advisories_import( + advisory_ids=list(batch), + ) def get_affected_purls(versions, impact, logger): @@ -219,15 +260,17 @@ def impacted_package_qs(cutoff_day=2): @transaction.atomic -def complete_advisories_import(advisories): - - advisory_ids = list(advisories.values_list("id", flat=True)) - +def complete_advisories_import(advisory_ids, success=False): if not advisory_ids: return AdvisoryV2.objects.filter(id__in=advisory_ids).update(_all_impacts_unfurled=True) + if success: + AdvisoryV2.objects.filter(id__in=advisory_ids).update( + _all_impacts_unfurled_successfully=True + ) + affecting_package_ids = set( ImpactedPackageAffecting.objects.filter( impacted_package__advisory_id__in=advisory_ids @@ -256,16 +299,42 @@ def complete_advisories_import(advisories): group_advisory_for_package(package) -def latest_advisories_with_all_impacts_unfurled(): - remaining_unfurled_impacts = ImpactedPackage.objects.filter( +def latest_advisories_with_all_impacts_unfurled_successfully( + impacted_packages=None, +): + unsuccessful_impacts = impacted_packages.filter( + advisory_id=OuterRef("pk"), + advisory__is_latest=True, + ).filter(Q(last_range_unfurl_at__isnull=True) | Q(last_successful_range_unfurl_at__isnull=True)) + + return ( + AdvisoryV2.objects.filter( + _all_impacts_unfurled_successfully=False, + is_latest=True, + ) + .annotate(has_unsuccessful_impacts=Exists(unsuccessful_impacts)) + .filter(has_unsuccessful_impacts=False) + .order_by("id") + .values_list("id", flat=True) + ) + + +def latest_advisories_with_all_impacts_unfurled_attempted( + impacted_packages=None, +): + impacts_not_attempted = impacted_packages.filter( advisory_id=OuterRef("pk"), - last_range_unfurl_at__isnull=True, advisory__is_latest=True, + last_range_unfurl_at__isnull=True, ) return ( - AdvisoryV2.objects.filter(_all_impacts_unfurled=False, is_latest=True) - .annotate(has_remaining_unfurled=Exists(remaining_unfurled_impacts)) - .filter(has_remaining_unfurled=False) + AdvisoryV2.objects.filter( + _all_impacts_unfurled_successfully=False, + is_latest=True, + ) + .annotate(has_unattempted_impacts=Exists(impacts_not_attempted)) + .filter(has_unattempted_impacts=False) .order_by("id") + .values_list("id", flat=True) ) diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 517175489..9c9c0eb25 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -371,6 +371,7 @@ def insert_advisory_v2( advisory_obj.risk_score = round(risk_score, 1) if risk_score is not None else None if not advisory.affected_packages: advisory_obj._all_impacts_unfurled = True + advisory_obj._all_impacts_unfurled_successfully = True advisory_obj.save() for affected_pkg in advisory.affected_packages: diff --git a/vulnerabilities/tests/pipelines/exporters/test_federate_vulnerabilities.py b/vulnerabilities/tests/pipelines/exporters/test_federate_vulnerabilities.py index 800163bfa..3f736cf9e 100644 --- a/vulnerabilities/tests/pipelines/exporters/test_federate_vulnerabilities.py +++ b/vulnerabilities/tests/pipelines/exporters/test_federate_vulnerabilities.py @@ -83,18 +83,25 @@ def setUp(self): date_published=datetime.now() - timedelta(days=10), url="https://example.com/advisory/2", ) - insert_advisory_v2( + a1 = insert_advisory_v2( advisory=advisory1, pipeline_id="test_pipeline_v2", logger=self.logger.write, datasource_id="test", ) - insert_advisory_v2( + a1._all_impacts_unfurled = True + a1._all_impacts_unfurled_successfully = True + a1.save() + a2 = insert_advisory_v2( advisory=advisory2, pipeline_id="test_pipeline_v2", logger=self.logger.write, datasource_id="test", ) + a2._all_impacts_unfurled = True + a2._all_impacts_unfurled_successfully = True + a2.save() + @patch( "vulnerabilities.pipelines.exporters.federate_vulnerabilities.FederatePackageVulnerabilities.clone_federation_repository" diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py index ff6d1fd61..a4454258d 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py @@ -36,6 +36,7 @@ def test_simple_risk_pipeline(): url="https://test.com", date_collected=datetime.now(), is_latest=True, + _all_impacts_unfurled=True ) adv.save() diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py b/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py index d55fb10ec..62d05f0ed 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py @@ -8,6 +8,7 @@ # import uuid +from unittest import mock import pytest from django.utils import timezone @@ -15,155 +16,459 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import ImpactedPackage from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import UnfurlVersionRangePipeline +from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import complete_advisories_import from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import ( - latest_advisories_with_all_impacts_unfurled, + latest_advisories_with_all_impacts_unfurled_attempted, +) +from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import ( + latest_advisories_with_all_impacts_unfurled_successfully, ) - - -def create_advisory( - *, - is_latest=True, - all_impacts_unfurled=False, -): - unique = uuid.uuid4().hex - - return AdvisoryV2.objects.create( - datasource_id="test", - pipeline_id="test_pipeline", - advisory_id=f"ADV-{unique}", - avid=f"test/{unique}", - unique_content_id=uuid.uuid4().hex, - url="https://example.com/advisory", - is_latest=is_latest, - _all_impacts_unfurled=all_impacts_unfurled, - ) @pytest.mark.django_db -def test_returns_advisory_when_all_impacts_unfurled(): - advisory = create_advisory() - - ImpactedPackage.objects.create( - advisory=advisory, - base_purl="pkg:pypi/django", - last_range_unfurl_at=timezone.now(), - ) - - ImpactedPackage.objects.create( - advisory=advisory, - base_purl="pkg:pypi/flask", - last_range_unfurl_at=timezone.now(), +class TestMarkAllImpactsUnfurledSuccessfully: + @mock.patch( + "vulnerabilities.pipelines.v2_improvers.unfurl_version_range.complete_advisories_import", + wraps=complete_advisories_import, ) - - advisories = latest_advisories_with_all_impacts_unfurled() - - assert advisory in advisories + def test_marks_only_fully_successful_advisories( + self, + mock_complete_advisories_import, + ): + now = timezone.now() + + # Fully successful + advisory_a = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="1", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/1", + unique_content_id=f"121", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_a, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=now, + ) + + # Partial success + advisory_b = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="2", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/2", + unique_content_id=f"122", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_b, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) + + pipeline = UnfurlVersionRangePipeline() + + pipeline.impacted_packages = ImpactedPackage.objects.all() + + pipeline.mark_all_impacts_unfurled_sucessfully() + + mock_complete_advisories_import.assert_called_once_with( + advisory_ids=[advisory_a.id], + success=True, + ) @pytest.mark.django_db -def test_does_not_return_advisory_when_one_impact_not_unfurled(): - advisory = create_advisory() - - ImpactedPackage.objects.create( - advisory=advisory, - base_purl="pkg:pypi/django", - last_range_unfurl_at=timezone.now(), - ) - - ImpactedPackage.objects.create( - advisory=advisory, - base_purl="pkg:pypi/flask", - last_range_unfurl_at=None, +class TestMarkAllImpactsUnfurlAttempted: + @mock.patch( + "vulnerabilities.pipelines.v2_improvers.unfurl_version_range.complete_advisories_import" ) - - advisories = latest_advisories_with_all_impacts_unfurled() - - assert advisory not in advisories + def test_marks_only_fully_attempted_advisories( + self, + mock_complete_advisories_import, + ): + now = timezone.now() + + # All attempted + advisory_a = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="2", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/2", + unique_content_id=f"122", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_a, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) + + ImpactedPackage.objects.create( + advisory=advisory_a, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=now, + ) + + # Not all attempted + advisory_b = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="3", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/3", + unique_content_id=f"123", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_b, + last_range_unfurl_at=None, + last_successful_range_unfurl_at=None, + ) + + pipeline = UnfurlVersionRangePipeline() + + pipeline.impacted_packages = ImpactedPackage.objects.all() + + pipeline.mark_all_impacts_unfurl_attempted() + + mock_complete_advisories_import.assert_called_once_with( + advisory_ids=[advisory_a.id], + ) + + ids = latest_advisories_with_all_impacts_unfurled_attempted( + impacted_packages=ImpactedPackage.objects.all() + ) + + assert len(ids) == 1 @pytest.mark.django_db -def test_does_not_return_non_latest_advisory(): - advisory = create_advisory(is_latest=False) - - ImpactedPackage.objects.create( - advisory=advisory, - base_purl="pkg:pypi/django", - last_range_unfurl_at=timezone.now(), +class TestAttemptedBatching: + @mock.patch( + "vulnerabilities.pipelines.v2_improvers.unfurl_version_range.complete_advisories_import" ) + def test_attempted_advisories_are_chunked_in_batches_of_100( + self, + mock_complete_advisories_import, + ): + now = timezone.now() - advisories = latest_advisories_with_all_impacts_unfurled() - - assert advisory not in advisories + advisories = [] + for i in range(250): + adv = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id=str(i), + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/{i}", + unique_content_id=f"12{i}", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) -@pytest.mark.django_db -def test_does_not_return_already_marked_advisory(): - advisory = create_advisory( - all_impacts_unfurled=True, - ) + advisories.append(adv) - ImpactedPackage.objects.create( - advisory=advisory, - base_purl="pkg:pypi/django", - last_range_unfurl_at=timezone.now(), - ) + ImpactedPackage.objects.create( + advisory=adv, + base_purl="pkg:pypi/django", + affecting_vers="<2.0", + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) - advisories = latest_advisories_with_all_impacts_unfurled() + pipeline = UnfurlVersionRangePipeline() - assert advisory not in advisories + pipeline.impacted_packages = ImpactedPackage.objects.all() + pipeline.mark_all_impacts_unfurl_attempted() -@pytest.mark.django_db -def test_pipeline_marks_matching_advisories(): - advisory = create_advisory() + assert mock_complete_advisories_import.call_count == 3 - ImpactedPackage.objects.create( - advisory=advisory, - base_purl="pkg:pypi/django", - last_range_unfurl_at=timezone.now(), - ) + first_call_ids = mock_complete_advisories_import.call_args_list[0][1]["advisory_ids"] + second_call_ids = mock_complete_advisories_import.call_args_list[1][1]["advisory_ids"] + third_call_ids = mock_complete_advisories_import.call_args_list[2][1]["advisory_ids"] - pipeline = UnfurlVersionRangePipeline() - pipeline.mark_all_impacts_unfurled() - - advisory.refresh_from_db() - - assert advisory._all_impacts_unfurled is True + assert len(first_call_ids) == 100 + assert len(second_call_ids) == 100 + assert len(third_call_ids) == 50 @pytest.mark.django_db -def test_pipeline_does_not_mark_partial_advisory(): - advisory = create_advisory() - - ImpactedPackage.objects.create( - advisory=advisory, - base_purl="pkg:pypi/django", - last_range_unfurl_at=timezone.now(), - ) - - ImpactedPackage.objects.create( - advisory=advisory, - base_purl="pkg:pypi/flask", - last_range_unfurl_at=None, - ) - - pipeline = UnfurlVersionRangePipeline() - pipeline.mark_all_impacts_unfurled() - - advisory.refresh_from_db() - - assert advisory._all_impacts_unfurled is False +class TestLatestAdvisoriesWithAllImpactsUnfurledSuccessfully: + def test_returns_only_advisories_with_all_successful_impacts(self): + now = timezone.now() + + # Advisory A + # ALL impacts successful + # SHOULD be returned + advisory_a = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="1", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/1", + unique_content_id="121", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_a, + affecting_vers=">2.0.0", + base_purl="pkg:pypi/django", + last_range_unfurl_at=now, + last_successful_range_unfurl_at=now, + ) + + # Advisory B + # Partial success + # SHOULD NOT be returned + advisory_b = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="2", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/2", + unique_content_id="122", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_b, + affecting_vers=">2.0.0", + base_purl="pkg:pypi/django", + last_range_unfurl_at=now, + last_successful_range_unfurl_at=now, + ) + + ImpactedPackage.objects.create( + advisory=advisory_b, + affecting_vers=">2.0.0", + base_purl="pkg:pypi/djangob", + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) + + # Advisory C + # No attempts + # SHOULD NOT be returned + advisory_c = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="3", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/3", + unique_content_id="123", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_c, + affecting_vers=">2.0.0", + base_purl="pkg:pypi/djangob", + last_range_unfurl_at=None, + last_successful_range_unfurl_at=None, + ) + + # Advisory D + # All attempted but all failed + # SHOULD NOT be returned + advisory_d = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="4", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/4", + unique_content_id="124", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_d, + affecting_vers=">2.0.0", + base_purl="pkg:pypi/djangob", + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) + + ImpactedPackage.objects.create( + advisory=advisory_d, + affecting_vers=">2.0.0", + base_purl="pkg:pypi/djangoc", + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) + + qs = latest_advisories_with_all_impacts_unfurled_successfully( + impacted_packages=ImpactedPackage.objects.all() + ) + + advisories_avids = list(AdvisoryV2.objects.filter(id__in=qs).values_list("avid", flat=True)) + + assert advisories_avids == ["ghsa/1"] @pytest.mark.django_db -def test_zero_impacts_advisory_is_returned(): - """ - Current behavior: - advisory with zero impacts is considered fully unfurled. - """ - - advisory = create_advisory() - - advisories = latest_advisories_with_all_impacts_unfurled() - - assert advisory in advisories +class TestLatestAdvisoriesWithAllImpactsUnfurledAttempted: + def test_returns_only_advisories_with_all_impacts_attempted(self): + now = timezone.now() + + # Advisory A + # All attempted successfully + # SHOULD be returned + advisory_a = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="4", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/4", + unique_content_id="124", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_a, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=now, + ) + + ImpactedPackage.objects.create( + advisory=advisory_a, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=now, + ) + + # Advisory B + # Partial success + # BUT all attempted + # SHOULD be returned + advisory_b = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="5", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/5", + unique_content_id="125", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_b, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=now, + ) + + ImpactedPackage.objects.create( + advisory=advisory_b, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) + + # Advisory C + # One impact never attempted + # SHOULD NOT be returned + advisory_c = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="6", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/6", + unique_content_id="126", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_c, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) + + ImpactedPackage.objects.create( + advisory=advisory_c, + last_range_unfurl_at=None, + last_successful_range_unfurl_at=None, + ) + + # Advisory D + # All attempted but failed + # SHOULD be returned + advisory_d = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id="7", + pipeline_id="ghsa_importer_v2", + avid=f"ghsa/7", + unique_content_id="127", + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=1, + is_latest=True, + _all_impacts_unfurled_successfully=False, + ) + + ImpactedPackage.objects.create( + advisory=advisory_d, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) + + ImpactedPackage.objects.create( + advisory=advisory_d, + last_range_unfurl_at=now, + last_successful_range_unfurl_at=None, + ) + + qs = latest_advisories_with_all_impacts_unfurled_attempted( + impacted_packages=ImpactedPackage.objects.all() + ) + + advisories_avids = list(AdvisoryV2.objects.filter(id__in=qs).values_list("avid", flat=True)) + + assert advisories_avids == [ + "ghsa/4", + "ghsa/5", + "ghsa/7", + ] diff --git a/vulnerabilities/tests/test_api_v3.py b/vulnerabilities/tests/test_api_v3.py index e9d275a6c..c07df13c7 100644 --- a/vulnerabilities/tests/test_api_v3.py +++ b/vulnerabilities/tests/test_api_v3.py @@ -184,7 +184,10 @@ def setUp(self): original_advisory_text="Sample advisory text", ) - insert_advisory_v2(advisory, "ghsa_importer", print, "ghsa", 100) + advisory_obj = insert_advisory_v2(advisory, "ghsa_importer", print, "ghsa", 100) + + advisory_obj._all_impacts_unfurled = True + advisory_obj.save() self.client = APIClient(enforce_csrf_checks=True) @@ -227,7 +230,10 @@ def setUp(self): original_advisory_text="Sample advisory text", ) - insert_advisory_v2(advisory, "ghsa_importer", print, "ghsa", 100) + advisory_obj = insert_advisory_v2(advisory, "ghsa_importer", print, "ghsa", 100) + + advisory_obj._all_impacts_unfurled = True + advisory_obj.save() self.client = APIClient(enforce_csrf_checks=True) From ede5c36f1cc8ecd2c04607bd1a99f97981e4505b Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Jun 2026 21:21:44 +0530 Subject: [PATCH 13/21] Fix migration order Signed-off-by: Tushar Goel --- ...dvisoryv2__all_impacts_unfurled_successfully_and_more.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py b/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py index 20717d277..5e0fd2a5f 100644 --- a/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py +++ b/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py @@ -18,10 +18,4 @@ class Migration(migrations.Migration): help_text="Indicates whether all impacts for this advisory have been unfurled successfully.", ), ), - migrations.AddIndex( - model_name="advisoryv2", - index=models.Index( - fields=["_all_impacts_unfurled", "id"], name="advisory_unfurled_idx" - ), - ), ] From 5404bd111d5b75cbc7ea3b5b33246a03e7b1190c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Jun 2026 21:47:30 +0530 Subject: [PATCH 14/21] Fix migration order Signed-off-by: Tushar Goel --- ...dvisoryv2__all_impacts_unfurled_successfully_and_more.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py b/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py index 5e0fd2a5f..20717d277 100644 --- a/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py +++ b/vulnerabilities/migrations/0135_advisoryv2__all_impacts_unfurled_successfully_and_more.py @@ -18,4 +18,10 @@ class Migration(migrations.Migration): help_text="Indicates whether all impacts for this advisory have been unfurled successfully.", ), ), + migrations.AddIndex( + model_name="advisoryv2", + index=models.Index( + fields=["_all_impacts_unfurled", "id"], name="advisory_unfurled_idx" + ), + ), ] From be805ba9f88e84c8db5f8be9eba7b70ad469e965 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Jun 2026 21:52:55 +0530 Subject: [PATCH 15/21] Change impacted package qs Signed-off-by: Tushar Goel --- .../pipelines/v2_improvers/unfurl_version_range.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index 9529be095..6f35c234b 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -132,9 +132,7 @@ def unfurl_version_range(self): self.log(f"{processed_affected_packages_count:,d} new Impact-Package relation created.") def mark_all_impacts_unfurled_sucessfully(self): - impacted_packages = self.impacted_packages or impacted_package_qs( - cutoff_day=self.reunfurl_after_days - ) + impacted_packages = ImpactedPackage.objects.all() while True: advisory_ids = list( latest_advisories_with_all_impacts_unfurled_successfully( @@ -148,9 +146,7 @@ def mark_all_impacts_unfurled_sucessfully(self): complete_advisories_import(advisory_ids=advisory_ids, success=True) def mark_all_impacts_unfurl_attempted(self): - impacted_packages = self.impacted_packages or impacted_package_qs( - cutoff_day=self.reunfurl_after_days - ) + impacted_packages = ImpactedPackage.objects.all() advisories_qs = latest_advisories_with_all_impacts_unfurled_attempted( impacted_packages=impacted_packages ) From 8d6f17b0edaeaa4ffd50fa70f0f2e044da9a477c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 4 Jun 2026 19:35:40 +0530 Subject: [PATCH 16/21] Mark empty or null vers as unfurled Signed-off-by: Tushar Goel --- .../pipelines/v2_improvers/unfurl_version_range.py | 4 ++++ .../pipelines/exporters/test_federate_vulnerabilities.py | 1 - .../pipelines/v2_improvers/test_compute_package_risk_v2.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index 6f35c234b..27cf930fb 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -62,6 +62,10 @@ def steps(cls): ) def unfurl_version_range(self): + cur_time = timezone.now() + ImpactedPackage.objects.filter( + Q(affecting_vers__isnull=True) | Q(affecting_vers="") + ).update(last_range_unfurl_at=cur_time, last_successful_range_unfurl_at=cur_time) processed_impacted_packages_count = 0 processed_affected_packages_count = 0 cached_versions = {} diff --git a/vulnerabilities/tests/pipelines/exporters/test_federate_vulnerabilities.py b/vulnerabilities/tests/pipelines/exporters/test_federate_vulnerabilities.py index 3f736cf9e..65804a4e3 100644 --- a/vulnerabilities/tests/pipelines/exporters/test_federate_vulnerabilities.py +++ b/vulnerabilities/tests/pipelines/exporters/test_federate_vulnerabilities.py @@ -102,7 +102,6 @@ def setUp(self): a2._all_impacts_unfurled_successfully = True a2.save() - @patch( "vulnerabilities.pipelines.exporters.federate_vulnerabilities.FederatePackageVulnerabilities.clone_federation_repository" ) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py index a4454258d..fc63a8ad9 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py @@ -36,7 +36,7 @@ def test_simple_risk_pipeline(): url="https://test.com", date_collected=datetime.now(), is_latest=True, - _all_impacts_unfurled=True + _all_impacts_unfurled=True, ) adv.save() From 9fba5a8d30d1755b969b7a6ecdb8701fe083ee3e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 4 Jun 2026 20:14:39 +0530 Subject: [PATCH 17/21] Fix tests Signed-off-by: Tushar Goel --- .../test_compute_advisory_todo_v2.py | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py index 3d95a0561..48f4a5af0 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py @@ -8,6 +8,7 @@ # import json +from collections.abc import Mapping from datetime import datetime from django.test import TestCase @@ -381,7 +382,10 @@ def test_todo_conflict_details_partial_curation(self): result_partial_curation = issue_details["partial_curation_advisory"] self.assertEqual(1, AdvisoryToDoV2.objects.count()) self.assertEqual("CONFLICTING_FIXED_BY_PACKAGES", todo.issue_type) - self.assertDictEqual(expected_partial_curation_advisory, result_partial_curation) + self.assertEqual( + normalize(expected_partial_curation_advisory), + normalize(result_partial_curation), + ) def test_todo_conflict_details_partial_curation_unpaired_purl_and_conflicting_affected_and_fixed( self, @@ -513,7 +517,10 @@ def test_todo_conflict_details_partial_curation_unpaired_purl_and_conflicting_fi result_partial_curation = issue_details["partial_curation_advisory"] self.assertEqual(1, AdvisoryToDoV2.objects.count()) self.assertEqual("CONFLICTING_FIXED_BY_PACKAGES", todo.issue_type) - self.assertDictEqual(expected_partial_curation_advisory, result_partial_curation) + self.assertEqual( + normalize(expected_partial_curation_advisory), + normalize(result_partial_curation), + ) def test_todo_conflict_details_partial_curation_unpaired_purl_and_conflicting_affected(self): expected_partial_curation_advisory = { @@ -586,3 +593,18 @@ def test_todo_conflict_details_partial_curation_unpaired_purl_and_conflicting_af self.assertEqual(1, AdvisoryToDoV2.objects.count()) self.assertEqual("CONFLICTING_AFFECTED_PACKAGES", todo.issue_type) self.assertDictEqual(expected_partial_curation_advisory, result_partial_curation) + + +def normalize(obj): + if isinstance(obj, Mapping): + return {k: normalize(v) for k, v in sorted(obj.items())} + + if isinstance(obj, list): + normalized = [normalize(item) for item in obj] + + return sorted( + normalized, + key=lambda x: repr(x), + ) + + return obj From a0299d2d6405255abfed4770eb75ece82a30fa4d Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 4 Jun 2026 20:16:06 +0530 Subject: [PATCH 18/21] Allow V1 pipelines Signed-off-by: Tushar Goel --- vulnerabilities/importers/__init__.py | 62 +++++++++++++-------------- vulnerabilities/improvers/__init__.py | 46 ++++++++++---------- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index e5d28f6ff..5ff64a1ff 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -159,37 +159,37 @@ collect_fix_commits_v2.CollectGitFixCommitsPipeline, collect_fix_commits_v2.CollectJenkinsFixCommitsPipeline, collect_fix_commits_v2.CollectGitlabFixCommitsPipeline, - # github_importer.GitHubAPIImporterPipeline, - # gitlab_importer.GitLabImporterPipeline, - # github_osv.GithubOSVImporter, - # pypa_importer.PyPaImporterPipeline, - # npm_importer.NpmImporterPipeline, - # nginx_importer.NginxImporterPipeline, - # pysec_importer.PyPIImporterPipeline, - # apache_tomcat.ApacheTomcatImporter, - # postgresql.PostgreSQLImporter, - # debian.DebianImporter, - # curl.CurlImporter, - # epss.EPSSImporter, - # vulnrichment.VulnrichImporter, - # alpine_linux_importer.AlpineLinuxImporterPipeline, - # apache_kafka.ApacheKafkaImporter, - # ruby.RubyImporter, - # redhat.RedhatImporter, - # archlinux.ArchlinuxImporter, - # debian_oval.DebianOvalImporter, - # retiredotnet.RetireDotnetImporter, - # apache_httpd.ApacheHTTPDImporter, - # mozilla.MozillaImporter, - # gentoo.GentooImporter, - # istio.IstioImporter, - # project_kb_msr2019.ProjectKBMSRImporter, - # suse_scores.SUSESeverityScoreImporter, - # elixir_security.ElixirSecurityImporter, - # xen.XenImporter, - # ubuntu_usn.UbuntuUSNImporter, - # fireeye.FireyeImporter, - # oss_fuzz.OSSFuzzImporter, + github_importer.GitHubAPIImporterPipeline, + gitlab_importer.GitLabImporterPipeline, + github_osv.GithubOSVImporter, + pypa_importer.PyPaImporterPipeline, + npm_importer.NpmImporterPipeline, + nginx_importer.NginxImporterPipeline, + pysec_importer.PyPIImporterPipeline, + apache_tomcat.ApacheTomcatImporter, + postgresql.PostgreSQLImporter, + debian.DebianImporter, + curl.CurlImporter, + epss.EPSSImporter, + vulnrichment.VulnrichImporter, + alpine_linux_importer.AlpineLinuxImporterPipeline, + apache_kafka.ApacheKafkaImporter, + ruby.RubyImporter, + redhat.RedhatImporter, + archlinux.ArchlinuxImporter, + debian_oval.DebianOvalImporter, + retiredotnet.RetireDotnetImporter, + apache_httpd.ApacheHTTPDImporter, + mozilla.MozillaImporter, + gentoo.GentooImporter, + istio.IstioImporter, + project_kb_msr2019.ProjectKBMSRImporter, + suse_scores.SUSESeverityScoreImporter, + elixir_security.ElixirSecurityImporter, + xen.XenImporter, + ubuntu_usn.UbuntuUSNImporter, + fireeye.FireyeImporter, + oss_fuzz.OSSFuzzImporter, ] ) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index be3278734..b4644e845 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -46,28 +46,28 @@ compute_advisory_todo_v2.ComputeToDo, reference_collect_commits.CollectReferencesFixCommitsPipeline, enhance_with_github_poc.GithubPocsImproverPipeline, - # valid_versions.GitHubBasicImprover, - # valid_versions.GitLabBasicImprover, - # valid_versions.NginxBasicImprover, - # valid_versions.ApacheHTTPDImprover, - # valid_versions.DebianBasicImprover, - # valid_versions.NpmImprover, - # valid_versions.ElixirImprover, - # valid_versions.ApacheTomcatImprover, - # valid_versions.ApacheKafkaImprover, - # valid_versions.IstioImprover, - # valid_versions.DebianOvalImprover, - # valid_versions.OSSFuzzImprover, - # valid_versions.RubyImprover, - # valid_versions.GithubOSVImprover, - # vulnerability_status.VulnerabilityStatusImprover, - # valid_versions.CurlImprover, - # flag_ghost_packages.FlagGhostPackagePipeline, - # enhance_with_kev.VulnerabilityKevPipeline, - # enhance_with_metasploit.MetasploitImproverPipeline, - # enhance_with_exploitdb.ExploitDBImproverPipeline, - # compute_package_risk.ComputePackageRiskPipeline, - # compute_package_version_rank.ComputeVersionRankPipeline, - # populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, + valid_versions.GitHubBasicImprover, + valid_versions.GitLabBasicImprover, + valid_versions.NginxBasicImprover, + valid_versions.ApacheHTTPDImprover, + valid_versions.DebianBasicImprover, + valid_versions.NpmImprover, + valid_versions.ElixirImprover, + valid_versions.ApacheTomcatImprover, + valid_versions.ApacheKafkaImprover, + valid_versions.IstioImprover, + valid_versions.DebianOvalImprover, + valid_versions.OSSFuzzImprover, + valid_versions.RubyImprover, + valid_versions.GithubOSVImprover, + vulnerability_status.VulnerabilityStatusImprover, + valid_versions.CurlImprover, + flag_ghost_packages.FlagGhostPackagePipeline, + enhance_with_kev.VulnerabilityKevPipeline, + enhance_with_metasploit.MetasploitImproverPipeline, + enhance_with_exploitdb.ExploitDBImproverPipeline, + compute_package_risk.ComputePackageRiskPipeline, + compute_package_version_rank.ComputeVersionRankPipeline, + populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, ] ) From 532efd17af0ffea1d55fa2ddbf0d9a14859bfaac Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 4 Jun 2026 20:20:46 +0530 Subject: [PATCH 19/21] Compute ToDos for unfurled advisories only Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py b/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py index b1a9b4469..1e08bd9c1 100644 --- a/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py +++ b/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py @@ -49,6 +49,7 @@ def compute_individual_advisory_todo(self): advisories = ( AdvisoryV2.objects.todo_excluded() .latest_per_avid() + .filter(_all_impacts_unfurled=True) .exclude(advisory_todos__issue_type="MISSING_SUMMARY") .exclude(advisory_todos__issue_type="MISSING_AFFECTED_PACKAGE") .exclude(advisory_todos__issue_type="MISSING_FIXED_BY_PACKAGE") From 7eae034e49bfbffed7b35f6abdfc6ae330c7fab3 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 4 Jun 2026 20:23:25 +0530 Subject: [PATCH 20/21] Fix advisory todo tests Signed-off-by: Tushar Goel --- .../v2_improvers/test_compute_advisory_todo_v2.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py index 48f4a5af0..f9bb48c44 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_todo_v2.py @@ -190,6 +190,7 @@ def test_advisory_todo_missing_summary(self): ) adv = AdvisoryV2.objects.first() adv.summary = "" + adv._all_impacts_unfurled = True adv.save() pipeline = ComputeToDo() pipeline.execute() @@ -200,13 +201,15 @@ def test_advisory_todo_missing_summary(self): self.assertEqual(1, todo.advisories.count()) def test_advisory_todo_missing_fixed(self): - insert_advisory_v2( + adv = insert_advisory_v2( advisory=self.advisory_data2, pipeline_id="test_pipeline1", logger=self.log.write, datasource_id="test1", ) pipeline = ComputeToDo() + adv._all_impacts_unfurled = True + adv.save() pipeline.execute() todo = AdvisoryToDoV2.objects.first() @@ -215,12 +218,14 @@ def test_advisory_todo_missing_fixed(self): self.assertEqual(1, todo.advisories.count()) def test_advisory_todo_missing_affected(self): - insert_advisory_v2( + adv = insert_advisory_v2( advisory=self.advisory_data3, logger=self.log.write, datasource_id="test1", pipeline_id="test_pipeline1", ) + adv._all_impacts_unfurled = True + adv.save() pipeline = ComputeToDo() pipeline.execute() From f761a7876ad022354a186ea5cea5516f28c5027b Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 4 Jun 2026 21:39:20 +0530 Subject: [PATCH 21/21] Add separate pipeline for marking unfurls Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 1 + vulnerabilities/improvers/__init__.py | 2 + .../v2_improvers/mark_unfurl_version_range.py | 160 ++++++++++++++++++ .../v2_improvers/unfurl_version_range.py | 126 +------------- .../test_mark_all_impacts_unfurled.py | 31 ++-- 5 files changed, 178 insertions(+), 142 deletions(-) create mode 100644 vulnerabilities/pipelines/v2_improvers/mark_unfurl_version_range.py diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index bd5890406..7c789716a 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -319,6 +319,7 @@ def get_affected_by_vulnerabilities(self, package): ) return result + return [] def get_fixing_vulnerabilities(self, package): advisories = self.context["fixing_advisory_map"].get(package.id, []) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index b4644e845..8745d8d35 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -27,6 +27,7 @@ enhance_with_metasploit as enhance_with_metasploit_v2, ) from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 +from vulnerabilities.pipelines.v2_improvers import mark_unfurl_version_range from vulnerabilities.pipelines.v2_improvers import reference_collect_commits from vulnerabilities.pipelines.v2_improvers import relate_severities from vulnerabilities.pipelines.v2_improvers import unfurl_version_range as unfurl_version_range_v2 @@ -46,6 +47,7 @@ compute_advisory_todo_v2.ComputeToDo, reference_collect_commits.CollectReferencesFixCommitsPipeline, enhance_with_github_poc.GithubPocsImproverPipeline, + mark_unfurl_version_range.MarkUnfurlVersionRangePipeline, valid_versions.GitHubBasicImprover, valid_versions.GitLabBasicImprover, valid_versions.NginxBasicImprover, diff --git a/vulnerabilities/pipelines/v2_improvers/mark_unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/mark_unfurl_version_range.py new file mode 100644 index 000000000..eedd8d9f7 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/mark_unfurl_version_range.py @@ -0,0 +1,160 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from django.db import transaction +from django.db.models import Exists +from django.db.models import OuterRef +from django.db.models import Q + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import ImpactedPackage +from vulnerabilities.models import ImpactedPackageAffecting +from vulnerabilities.models import ImpactedPackageFixedBy +from vulnerabilities.models import PackageV2 +from vulnerabilities.models import PipelineSchedule +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipes.group_advisories import group_advisory_for_package +from vulnerabilities.pipes.risk_score import compute_package_risk_score_bulk +from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS + + +class MarkUnfurlVersionRangePipeline(VulnerableCodePipeline): + """ + Mark advisories as unfurled whose all impacts have been unfurled + """ + + pipeline_id = "mark_unfurl_version_range_v2" + + run_interval = 1 + run_priority = PipelineSchedule.ExecutionPriority.HIGH + + @classmethod + def steps(cls): + return ( + cls.mark_all_impacts_unfurled_sucessfully, + cls.mark_all_impacts_unfurl_attempted, + ) + + def mark_all_impacts_unfurled_sucessfully(self): + impacted_packages = ImpactedPackage.objects.all() + while True: + advisory_ids = list( + latest_advisories_with_all_impacts_unfurled_successfully( + impacted_packages=impacted_packages + )[:100] + ) + + if not advisory_ids: + break + + complete_advisories_import(advisory_ids=advisory_ids, success=True) + + def mark_all_impacts_unfurl_attempted(self): + impacted_packages = ImpactedPackage.objects.all() + advisories_qs = latest_advisories_with_all_impacts_unfurled_attempted( + impacted_packages=impacted_packages + ) + + batch_size = 100 + batch = [] + + for advisory_id in advisories_qs.iterator(chunk_size=100): + batch.append(advisory_id) + + if len(batch) >= batch_size: + complete_advisories_import( + advisory_ids=list(batch), + ) + batch.clear() + + if batch: + complete_advisories_import( + advisory_ids=list(batch), + ) + + +def latest_advisories_with_all_impacts_unfurled_successfully( + impacted_packages=None, +): + unsuccessful_impacts = impacted_packages.filter( + advisory_id=OuterRef("pk"), + advisory__is_latest=True, + ).filter(Q(last_range_unfurl_at__isnull=True) | Q(last_successful_range_unfurl_at__isnull=True)) + + return ( + AdvisoryV2.objects.filter( + _all_impacts_unfurled_successfully=False, + is_latest=True, + ) + .annotate(has_unsuccessful_impacts=Exists(unsuccessful_impacts)) + .filter(has_unsuccessful_impacts=False) + .order_by("id") + .values_list("id", flat=True) + ) + + +def latest_advisories_with_all_impacts_unfurled_attempted( + impacted_packages=None, +): + impacts_not_attempted = impacted_packages.filter( + advisory_id=OuterRef("pk"), + advisory__is_latest=True, + last_range_unfurl_at__isnull=True, + ) + + return ( + AdvisoryV2.objects.filter( + _all_impacts_unfurled_successfully=False, + is_latest=True, + ) + .annotate(has_unattempted_impacts=Exists(impacts_not_attempted)) + .filter(has_unattempted_impacts=False) + .order_by("_all_impacts_unfurled", "id") + .values_list("id", flat=True) + ) + + +@transaction.atomic +def complete_advisories_import(advisory_ids, success=False): + if not advisory_ids: + return + + AdvisoryV2.objects.filter(id__in=advisory_ids).update(_all_impacts_unfurled=True) + + if success: + AdvisoryV2.objects.filter(id__in=advisory_ids).update( + _all_impacts_unfurled_successfully=True + ) + + affecting_package_ids = set( + ImpactedPackageAffecting.objects.filter( + impacted_package__advisory_id__in=advisory_ids + ).values_list( + "package_id", + flat=True, + ) + ) + + fixed_by_package_ids = set( + ImpactedPackageFixedBy.objects.filter( + impacted_package__advisory_id__in=advisory_ids + ).values_list( + "package_id", + flat=True, + ) + ) + + compute_package_risk_score_bulk(PackageV2.objects.filter(id__in=affecting_package_ids)) + + group_package_ids = affecting_package_ids | fixed_by_package_ids + + for package in PackageV2.objects.filter( + id__in=group_package_ids, type__in=TYPES_WITH_MULTIPLE_IMPORTERS + ).iterator(chunk_size=2000): + group_advisory_for_package(package) diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index 27cf930fb..b099b26bf 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -13,9 +13,7 @@ from aboutcode.pipeline import LoopProgress from django.db import transaction -from django.db.models import Exists from django.db.models import F -from django.db.models import OuterRef from django.db.models import Q from django.utils import timezone from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS @@ -55,11 +53,7 @@ class UnfurlVersionRangePipeline(VulnerableCodePipeline): @classmethod def steps(cls): - return ( - cls.unfurl_version_range, - cls.mark_all_impacts_unfurled_sucessfully, - cls.mark_all_impacts_unfurl_attempted, - ) + return (cls.unfurl_version_range,) def unfurl_version_range(self): cur_time = timezone.now() @@ -135,43 +129,6 @@ def unfurl_version_range(self): self.log(f"Successfully processed {processed_impacted_packages_count:,d} ImpactedPackage.") self.log(f"{processed_affected_packages_count:,d} new Impact-Package relation created.") - def mark_all_impacts_unfurled_sucessfully(self): - impacted_packages = ImpactedPackage.objects.all() - while True: - advisory_ids = list( - latest_advisories_with_all_impacts_unfurled_successfully( - impacted_packages=impacted_packages - )[:100] - ) - - if not advisory_ids: - break - - complete_advisories_import(advisory_ids=advisory_ids, success=True) - - def mark_all_impacts_unfurl_attempted(self): - impacted_packages = ImpactedPackage.objects.all() - advisories_qs = latest_advisories_with_all_impacts_unfurled_attempted( - impacted_packages=impacted_packages - ) - - batch_size = 100 - batch = [] - - for advisory_id in advisories_qs.iterator(chunk_size=100): - batch.append(advisory_id) - - if len(batch) >= batch_size: - complete_advisories_import( - advisory_ids=list(batch), - ) - batch.clear() - - if batch: - complete_advisories_import( - advisory_ids=list(batch), - ) - def get_affected_purls(versions, impact, logger): affecting_version_range = VersionRange.from_string(impact.affecting_vers) @@ -257,84 +214,3 @@ def impacted_package_qs(cutoff_day=2): .order_by("advisory__id", F("last_range_unfurl_at").asc(nulls_first=True)) .only("pk", "affecting_vers", "advisory", "base_purl") ) - - -@transaction.atomic -def complete_advisories_import(advisory_ids, success=False): - if not advisory_ids: - return - - AdvisoryV2.objects.filter(id__in=advisory_ids).update(_all_impacts_unfurled=True) - - if success: - AdvisoryV2.objects.filter(id__in=advisory_ids).update( - _all_impacts_unfurled_successfully=True - ) - - affecting_package_ids = set( - ImpactedPackageAffecting.objects.filter( - impacted_package__advisory_id__in=advisory_ids - ).values_list( - "package_id", - flat=True, - ) - ) - - fixed_by_package_ids = set( - ImpactedPackageFixedBy.objects.filter( - impacted_package__advisory_id__in=advisory_ids - ).values_list( - "package_id", - flat=True, - ) - ) - - compute_package_risk_score_bulk(PackageV2.objects.filter(id__in=affecting_package_ids)) - - group_package_ids = affecting_package_ids | fixed_by_package_ids - - for package in PackageV2.objects.filter( - id__in=group_package_ids, type__in=TYPES_WITH_MULTIPLE_IMPORTERS - ).iterator(chunk_size=2000): - group_advisory_for_package(package) - - -def latest_advisories_with_all_impacts_unfurled_successfully( - impacted_packages=None, -): - unsuccessful_impacts = impacted_packages.filter( - advisory_id=OuterRef("pk"), - advisory__is_latest=True, - ).filter(Q(last_range_unfurl_at__isnull=True) | Q(last_successful_range_unfurl_at__isnull=True)) - - return ( - AdvisoryV2.objects.filter( - _all_impacts_unfurled_successfully=False, - is_latest=True, - ) - .annotate(has_unsuccessful_impacts=Exists(unsuccessful_impacts)) - .filter(has_unsuccessful_impacts=False) - .order_by("id") - .values_list("id", flat=True) - ) - - -def latest_advisories_with_all_impacts_unfurled_attempted( - impacted_packages=None, -): - impacts_not_attempted = impacted_packages.filter( - advisory_id=OuterRef("pk"), - advisory__is_latest=True, - last_range_unfurl_at__isnull=True, - ) - - return ( - AdvisoryV2.objects.filter( - _all_impacts_unfurled_successfully=False, - is_latest=True, - ) - .annotate(has_unattempted_impacts=Exists(impacts_not_attempted)) - .filter(has_unattempted_impacts=False) - .order_by("id") - .values_list("id", flat=True) - ) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py b/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py index 62d05f0ed..fa740ae6c 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_mark_all_impacts_unfurled.py @@ -7,7 +7,6 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import uuid from unittest import mock import pytest @@ -15,12 +14,16 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import ImpactedPackage -from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import UnfurlVersionRangePipeline -from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import complete_advisories_import -from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import ( +from vulnerabilities.pipelines.v2_improvers.mark_unfurl_version_range import ( + MarkUnfurlVersionRangePipeline, +) +from vulnerabilities.pipelines.v2_improvers.mark_unfurl_version_range import ( + complete_advisories_import, +) +from vulnerabilities.pipelines.v2_improvers.mark_unfurl_version_range import ( latest_advisories_with_all_impacts_unfurled_attempted, ) -from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import ( +from vulnerabilities.pipelines.v2_improvers.mark_unfurl_version_range import ( latest_advisories_with_all_impacts_unfurled_successfully, ) @@ -28,7 +31,7 @@ @pytest.mark.django_db class TestMarkAllImpactsUnfurledSuccessfully: @mock.patch( - "vulnerabilities.pipelines.v2_improvers.unfurl_version_range.complete_advisories_import", + "vulnerabilities.pipelines.v2_improvers.mark_unfurl_version_range.complete_advisories_import", wraps=complete_advisories_import, ) def test_marks_only_fully_successful_advisories( @@ -77,9 +80,7 @@ def test_marks_only_fully_successful_advisories( last_successful_range_unfurl_at=None, ) - pipeline = UnfurlVersionRangePipeline() - - pipeline.impacted_packages = ImpactedPackage.objects.all() + pipeline = MarkUnfurlVersionRangePipeline() pipeline.mark_all_impacts_unfurled_sucessfully() @@ -92,7 +93,7 @@ def test_marks_only_fully_successful_advisories( @pytest.mark.django_db class TestMarkAllImpactsUnfurlAttempted: @mock.patch( - "vulnerabilities.pipelines.v2_improvers.unfurl_version_range.complete_advisories_import" + "vulnerabilities.pipelines.v2_improvers.mark_unfurl_version_range.complete_advisories_import" ) def test_marks_only_fully_attempted_advisories( self, @@ -146,9 +147,7 @@ def test_marks_only_fully_attempted_advisories( last_successful_range_unfurl_at=None, ) - pipeline = UnfurlVersionRangePipeline() - - pipeline.impacted_packages = ImpactedPackage.objects.all() + pipeline = MarkUnfurlVersionRangePipeline() pipeline.mark_all_impacts_unfurl_attempted() @@ -166,7 +165,7 @@ def test_marks_only_fully_attempted_advisories( @pytest.mark.django_db class TestAttemptedBatching: @mock.patch( - "vulnerabilities.pipelines.v2_improvers.unfurl_version_range.complete_advisories_import" + "vulnerabilities.pipelines.v2_improvers.mark_unfurl_version_range.complete_advisories_import" ) def test_attempted_advisories_are_chunked_in_batches_of_100( self, @@ -200,9 +199,7 @@ def test_attempted_advisories_are_chunked_in_batches_of_100( last_successful_range_unfurl_at=None, ) - pipeline = UnfurlVersionRangePipeline() - - pipeline.impacted_packages = ImpactedPackage.objects.all() + pipeline = MarkUnfurlVersionRangePipeline() pipeline.mark_all_impacts_unfurl_attempted()