From 659968bd3d778822d4c45236828a2748e37ed5b1 Mon Sep 17 00:00:00 2001 From: "murat.oguz" Date: Thu, 8 Jan 2026 13:45:58 +0300 Subject: [PATCH] Refactor repository scan dependency handling to group libraries by source file path and include ecosystem information --- backend/app/models/repository_scan.py | 2 ++ backend/app/views/library_view.py | 34 ++++++++++++++++----------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/backend/app/models/repository_scan.py b/backend/app/models/repository_scan.py index 9b0fffe..976ddda 100644 --- a/backend/app/models/repository_scan.py +++ b/backend/app/models/repository_scan.py @@ -8,6 +8,7 @@ class RepoLibrary(BaseModel): library_name: str = Field(..., description='Discovered library name') library_version: str = Field(..., description='Discovered library version') + ecosystem: str = Field(..., description='Ecosystem of the library (e.g., npm, pip, maven, etc.)') model_config = ConfigDict(populate_by_name=True) @@ -16,6 +17,7 @@ class RepoDependency(BaseModel): libraries: List[RepoLibrary] = Field(default_factory=list, description='Libraries found in this file') + class RepositoryScanBase(BaseModel): repository_url: str = Field(..., description='Full repository URL (e.g., GitHub repo URL)') repository_platform: str = Field(..., description='Platform of the repository (github/gitlab/bitbucket/etc)') diff --git a/backend/app/views/library_view.py b/backend/app/views/library_view.py index 541abd6..ef9c42f 100644 --- a/backend/app/views/library_view.py +++ b/backend/app/views/library_view.py @@ -409,24 +409,30 @@ async def handle_repo_scan_highest_risk(payload: dict): # Persist summarized scan to repository_scans collection platform, repo_name = _infer_repo_meta(repo_url) try: + # Group enriched dependencies by their source file path (library_path) + grouped: Dict[str, List[Dict[str, Any]]] = {} + for d in dependencies: + # prefer explicit file field, fall back to first source if present + path = d.get('file') or (d.get('sources')[0] if d.get('sources') else None) or 'unknown' + if not d.get('name'): + continue + lib_entry = { + "library_name": d.get('name'), + "library_version": normalize_version(d.get('version')) or d.get('version') or "unknown", + "ecosystem": d.get('ecosystem') + } + grouped.setdefault(path, []).append(lib_entry) + + deps_payload = [ + {"library_path": path, "libraries": libs} + for path, libs in grouped.items() + ] + payload = RepositoryScanCreate( repository_url=repo_url, repository_platform=platform, repository_name=repo_name, - dependencies=[ - { - "library_path": file.get("path"), - "libraries": [ - { - "library_name": dep.get("name"), - "library_version": normalize_version(dep.get("version")) or dep.get("version") or "unknown" - } - for dep in (file.get("report", {}).get("dependencies") or []) - if dep.get("name") - ], - } - for file in analyzed_files - ], + dependencies=deps_payload, ) await create_repository_scan(payload) except Exception as exc: