Skip to content

Commit 65d19e2

Browse files
committed
fix: persist include_paths to DB and pass to dependency analyzer (OPE-162)
Root cause of 1000-node hairball: user indexes packages/sql + packages/vitest (26 files) but dependency graph scans entire Effect-TS clone (1,767 files). Fix: 1. Save include_paths to repositories table during indexing 2. All 3 analysis routes now pass repo.get('include_paths') to build_dependency_graph 3. Added force=true query param to dependencies endpoint to bypass stale cache built without include_paths filtering After re-indexing, Effect-TS subset repos will show 26 nodes instead of 1000. Closes OPE-162
1 parent 20604de commit 65d19e2

2 files changed

Lines changed: 16 additions & 9 deletions

File tree

backend/routes/analysis.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,21 @@ class ImpactRequest(BaseModel):
2121
@router.get("/{repo_id}/dependencies")
2222
async def get_dependency_graph(
2323
repo_id: str,
24+
force: bool = False,
2425
auth: AuthContext = Depends(require_auth)
2526
):
26-
"""Get dependency graph for repository."""
27+
"""Get dependency graph for repository. Use force=true to rebuild from scratch."""
2728
try:
2829
repo = get_repo_or_404(repo_id, auth.user_id)
2930

30-
cached_graph = dependency_analyzer.load_from_cache(repo_id)
31-
if cached_graph:
32-
logger.debug("Using cached dependency graph", repo_id=repo_id)
33-
return {**cached_graph, "cached": True}
31+
if not force:
32+
cached_graph = dependency_analyzer.load_from_cache(repo_id)
33+
if cached_graph:
34+
logger.debug("Using cached dependency graph", repo_id=repo_id)
35+
return {**cached_graph, "cached": True}
3436

35-
logger.info("Building fresh dependency graph", repo_id=repo_id)
36-
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
37+
logger.info("Building fresh dependency graph", repo_id=repo_id, include_paths=repo.get("include_paths"))
38+
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
3739
dependency_analyzer.save_to_cache(repo_id, graph_data)
3840

3941
return {**graph_data, "cached": False}
@@ -64,7 +66,7 @@ async def analyze_impact(
6466
graph_data = dependency_analyzer.load_from_cache(repo_id)
6567
if not graph_data:
6668
logger.info("Building dependency graph for impact analysis", repo_id=repo_id)
67-
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
69+
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
6870
dependency_analyzer.save_to_cache(repo_id, graph_data)
6971

7072
impact = dependency_analyzer.get_file_impact(
@@ -94,7 +96,7 @@ async def get_repository_insights(
9496
graph_data = dependency_analyzer.load_from_cache(repo_id)
9597
if not graph_data:
9698
logger.info("Building dependency graph for insights", repo_id=repo_id)
97-
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
99+
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
98100
dependency_analyzer.save_to_cache(repo_id, graph_data)
99101

100102
return {

backend/routes/repos.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,11 @@ async def _run_async_indexing(
602602

603603
repo_manager.update_status(repo_id, "indexing")
604604

605+
# Persist include_paths so dependency analyzer and other tools use the subset
606+
if include_paths:
607+
from services.supabase_service import get_supabase_service
608+
get_supabase_service().update_repository(repo_id, {"include_paths": include_paths})
609+
605610
# Publish initial progress to confirm connection
606611
if publisher:
607612
publisher.publish_progress(repo_id, 0, 1, 0, "Starting...")

0 commit comments

Comments
 (0)