Skip to content

Commit cdc7c47

Browse files
authored
Merge pull request #283 from DevanshuNEU/fix/persist-include-paths
fix: persist include_paths -- subset repos show 26 nodes not 1000 (OPE-162)
2 parents 20604de + 9c9f20f commit cdc7c47

4 files changed

Lines changed: 80 additions & 9 deletions

File tree

backend/routes/analysis.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,21 @@ class ImpactRequest(BaseModel):
2121
@router.get("/{repo_id}/dependencies")
2222
async def get_dependency_graph(
2323
repo_id: str,
24+
force: bool = False,
2425
auth: AuthContext = Depends(require_auth)
2526
):
26-
"""Get dependency graph for repository."""
27+
"""Get dependency graph for repository. Use force=true to rebuild from scratch."""
2728
try:
2829
repo = get_repo_or_404(repo_id, auth.user_id)
2930

30-
cached_graph = dependency_analyzer.load_from_cache(repo_id)
31-
if cached_graph:
32-
logger.debug("Using cached dependency graph", repo_id=repo_id)
33-
return {**cached_graph, "cached": True}
31+
if not force:
32+
cached_graph = dependency_analyzer.load_from_cache(repo_id)
33+
if cached_graph:
34+
logger.debug("Using cached dependency graph", repo_id=repo_id)
35+
return {**cached_graph, "cached": True}
3436

35-
logger.info("Building fresh dependency graph", repo_id=repo_id)
36-
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
37+
logger.info("Building fresh dependency graph", repo_id=repo_id, include_paths=repo.get("include_paths"))
38+
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
3739
dependency_analyzer.save_to_cache(repo_id, graph_data)
3840

3941
return {**graph_data, "cached": False}
@@ -64,7 +66,7 @@ async def analyze_impact(
6466
graph_data = dependency_analyzer.load_from_cache(repo_id)
6567
if not graph_data:
6668
logger.info("Building dependency graph for impact analysis", repo_id=repo_id)
67-
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
69+
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
6870
dependency_analyzer.save_to_cache(repo_id, graph_data)
6971

7072
impact = dependency_analyzer.get_file_impact(
@@ -94,7 +96,7 @@ async def get_repository_insights(
9496
graph_data = dependency_analyzer.load_from_cache(repo_id)
9597
if not graph_data:
9698
logger.info("Building dependency graph for insights", repo_id=repo_id)
97-
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
99+
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
98100
dependency_analyzer.save_to_cache(repo_id, graph_data)
99101

100102
return {

backend/routes/repos.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,14 @@ async def _run_async_indexing(
602602

603603
repo_manager.update_status(repo_id, "indexing")
604604

605+
# Persist include_paths (or clear it if re-indexing full repo)
606+
from services.supabase_service import get_supabase_service
607+
db = get_supabase_service()
608+
db.update_repository(repo_id, {"include_paths": include_paths})
609+
610+
# Clear stale dependency cache so next graph build uses new include_paths
611+
db.clear_file_dependencies(repo_id)
612+
605613
# Publish initial progress to confirm connection
606614
if publisher:
607615
publisher.publish_progress(repo_id, 0, 1, 0, "Starting...")

backend/services/dependency_analyzer.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,18 @@ def build_dependency_graph(self, repo_path: str, include_paths: List[str] = None
142142
"""Build dependency graph. If include_paths set, only analyze those dirs."""
143143
repo_path = Path(repo_path)
144144

145+
# Sanitize include_paths from DB (could be corrupt jsonb)
146+
if include_paths:
147+
cleaned = []
148+
for p in include_paths:
149+
if not isinstance(p, str):
150+
continue
151+
p = p.replace('\\', '/').strip().strip('/')
152+
if not p or '..' in p.split('/'):
153+
continue
154+
cleaned.append(p)
155+
include_paths = cleaned or None
156+
145157
# Discover code files
146158
code_files = []
147159
extensions = {'.py', '.js', '.jsx', '.ts', '.tsx'}

backend/tests/test_dependency_analyzer.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,55 @@ def test_include_paths_multiple_dirs(self, analyzer, ts_repo):
280280
assert any('packages/schema' in f for f in file_paths)
281281
assert not any('backend' in f for f in file_paths)
282282

283+
def test_include_paths_with_corrupt_data(self, analyzer, ts_repo):
284+
"""Corrupt jsonb from DB should not crash -- non-strings are filtered out"""
285+
graph = analyzer.build_dependency_graph(
286+
str(ts_repo),
287+
include_paths=[123, None, '', 'packages/effect', True]
288+
)
289+
file_paths = set(graph['dependencies'].keys())
290+
# Should only include effect files, corrupt entries filtered
291+
assert all('packages/effect' in f for f in file_paths)
292+
assert len(file_paths) > 0
293+
294+
def test_include_paths_all_corrupt_scans_everything(self, analyzer, ts_repo):
295+
"""If all include_paths entries are invalid, fall back to full scan"""
296+
graph = analyzer.build_dependency_graph(
297+
str(ts_repo),
298+
include_paths=[123, None, '', False]
299+
)
300+
file_paths = set(graph['dependencies'].keys())
301+
# Should fall back to scanning everything
302+
assert any('backend' in f for f in file_paths)
303+
assert any('packages/effect' in f for f in file_paths)
304+
305+
def test_include_paths_empty_list_scans_everything(self, analyzer, ts_repo):
306+
"""Empty list should be treated same as None"""
307+
graph = analyzer.build_dependency_graph(str(ts_repo), include_paths=[])
308+
file_paths = set(graph['dependencies'].keys())
309+
assert any('backend' in f for f in file_paths)
310+
311+
def test_include_paths_traversal_rejected(self, analyzer, ts_repo):
312+
"""Path traversal attempts should be stripped, not crash"""
313+
graph = analyzer.build_dependency_graph(
314+
str(ts_repo),
315+
include_paths=['../etc/passwd', 'packages/effect', '../../secrets']
316+
)
317+
file_paths = set(graph['dependencies'].keys())
318+
# Traversal entries filtered, only packages/effect remains
319+
assert all('packages/effect' in f for f in file_paths)
320+
assert len(file_paths) > 0
321+
322+
def test_include_paths_backslash_normalized(self, analyzer, ts_repo):
323+
"""Windows-style backslashes should be normalized"""
324+
graph = analyzer.build_dependency_graph(
325+
str(ts_repo),
326+
include_paths=['packages\\effect']
327+
)
328+
file_paths = set(graph['dependencies'].keys())
329+
assert all('packages/effect' in f for f in file_paths)
330+
assert len(file_paths) > 0
331+
283332

284333
class TestGraphMetrics:
285334
"""Verify graph statistics are correct"""

0 commit comments

Comments
 (0)