diff --git a/backend/routes/analysis.py b/backend/routes/analysis.py index 4ce126c..bc59ea0 100644 --- a/backend/routes/analysis.py +++ b/backend/routes/analysis.py @@ -21,19 +21,21 @@ class ImpactRequest(BaseModel): @router.get("/{repo_id}/dependencies") async def get_dependency_graph( repo_id: str, + force: bool = False, auth: AuthContext = Depends(require_auth) ): - """Get dependency graph for repository.""" + """Get dependency graph for repository. Use force=true to rebuild from scratch.""" try: repo = get_repo_or_404(repo_id, auth.user_id) - cached_graph = dependency_analyzer.load_from_cache(repo_id) - if cached_graph: - logger.debug("Using cached dependency graph", repo_id=repo_id) - return {**cached_graph, "cached": True} + if not force: + cached_graph = dependency_analyzer.load_from_cache(repo_id) + if cached_graph: + logger.debug("Using cached dependency graph", repo_id=repo_id) + return {**cached_graph, "cached": True} - logger.info("Building fresh dependency graph", repo_id=repo_id) - graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"]) + logger.info("Building fresh dependency graph", repo_id=repo_id, include_paths=repo.get("include_paths")) + graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths")) dependency_analyzer.save_to_cache(repo_id, graph_data) return {**graph_data, "cached": False} @@ -64,7 +66,7 @@ async def analyze_impact( graph_data = dependency_analyzer.load_from_cache(repo_id) if not graph_data: logger.info("Building dependency graph for impact analysis", repo_id=repo_id) - graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"]) + graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths")) dependency_analyzer.save_to_cache(repo_id, graph_data) impact = dependency_analyzer.get_file_impact( @@ -94,7 +96,7 @@ async def get_repository_insights( graph_data = dependency_analyzer.load_from_cache(repo_id) if not graph_data: logger.info("Building dependency graph for insights", repo_id=repo_id) - graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"]) + graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths")) dependency_analyzer.save_to_cache(repo_id, graph_data) return { diff --git a/backend/routes/repos.py b/backend/routes/repos.py index a630aaf..46421ae 100644 --- a/backend/routes/repos.py +++ b/backend/routes/repos.py @@ -602,6 +602,14 @@ async def _run_async_indexing( repo_manager.update_status(repo_id, "indexing") + # Persist include_paths (or clear it if re-indexing full repo) + from services.supabase_service import get_supabase_service + db = get_supabase_service() + db.update_repository(repo_id, {"include_paths": include_paths}) + + # Clear stale dependency cache so next graph build uses new include_paths + db.clear_file_dependencies(repo_id) + # Publish initial progress to confirm connection if publisher: publisher.publish_progress(repo_id, 0, 1, 0, "Starting...") diff --git a/backend/services/dependency_analyzer.py b/backend/services/dependency_analyzer.py index 927efc6..d28062d 100644 --- a/backend/services/dependency_analyzer.py +++ b/backend/services/dependency_analyzer.py @@ -142,6 +142,18 @@ def build_dependency_graph(self, repo_path: str, include_paths: List[str] = None """Build dependency graph. If include_paths set, only analyze those dirs.""" repo_path = Path(repo_path) + # Sanitize include_paths from DB (could be corrupt jsonb) + if include_paths: + cleaned = [] + for p in include_paths: + if not isinstance(p, str): + continue + p = p.replace('\\', '/').strip().strip('/') + if not p or '..' in p.split('/'): + continue + cleaned.append(p) + include_paths = cleaned or None + # Discover code files code_files = [] extensions = {'.py', '.js', '.jsx', '.ts', '.tsx'} diff --git a/backend/tests/test_dependency_analyzer.py b/backend/tests/test_dependency_analyzer.py index 317dfe1..dcf7518 100644 --- a/backend/tests/test_dependency_analyzer.py +++ b/backend/tests/test_dependency_analyzer.py @@ -280,6 +280,55 @@ def test_include_paths_multiple_dirs(self, analyzer, ts_repo): assert any('packages/schema' in f for f in file_paths) assert not any('backend' in f for f in file_paths) + def test_include_paths_with_corrupt_data(self, analyzer, ts_repo): + """Corrupt jsonb from DB should not crash -- non-strings are filtered out""" + graph = analyzer.build_dependency_graph( + str(ts_repo), + include_paths=[123, None, '', 'packages/effect', True] + ) + file_paths = set(graph['dependencies'].keys()) + # Should only include effect files, corrupt entries filtered + assert all('packages/effect' in f for f in file_paths) + assert len(file_paths) > 0 + + def test_include_paths_all_corrupt_scans_everything(self, analyzer, ts_repo): + """If all include_paths entries are invalid, fall back to full scan""" + graph = analyzer.build_dependency_graph( + str(ts_repo), + include_paths=[123, None, '', False] + ) + file_paths = set(graph['dependencies'].keys()) + # Should fall back to scanning everything + assert any('backend' in f for f in file_paths) + assert any('packages/effect' in f for f in file_paths) + + def test_include_paths_empty_list_scans_everything(self, analyzer, ts_repo): + """Empty list should be treated same as None""" + graph = analyzer.build_dependency_graph(str(ts_repo), include_paths=[]) + file_paths = set(graph['dependencies'].keys()) + assert any('backend' in f for f in file_paths) + + def test_include_paths_traversal_rejected(self, analyzer, ts_repo): + """Path traversal attempts should be stripped, not crash""" + graph = analyzer.build_dependency_graph( + str(ts_repo), + include_paths=['../etc/passwd', 'packages/effect', '../../secrets'] + ) + file_paths = set(graph['dependencies'].keys()) + # Traversal entries filtered, only packages/effect remains + assert all('packages/effect' in f for f in file_paths) + assert len(file_paths) > 0 + + def test_include_paths_backslash_normalized(self, analyzer, ts_repo): + """Windows-style backslashes should be normalized""" + graph = analyzer.build_dependency_graph( + str(ts_repo), + include_paths=['packages\\effect'] + ) + file_paths = set(graph['dependencies'].keys()) + assert all('packages/effect' in f for f in file_paths) + assert len(file_paths) > 0 + class TestGraphMetrics: """Verify graph statistics are correct"""