Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions backend/routes/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,21 @@ class ImpactRequest(BaseModel):
@router.get("/{repo_id}/dependencies")
async def get_dependency_graph(
repo_id: str,
force: bool = False,
auth: AuthContext = Depends(require_auth)
):
"""Get dependency graph for repository."""
"""Get dependency graph for repository. Use force=true to rebuild from scratch."""
try:
repo = get_repo_or_404(repo_id, auth.user_id)

cached_graph = dependency_analyzer.load_from_cache(repo_id)
if cached_graph:
logger.debug("Using cached dependency graph", repo_id=repo_id)
return {**cached_graph, "cached": True}
if not force:
cached_graph = dependency_analyzer.load_from_cache(repo_id)
if cached_graph:
logger.debug("Using cached dependency graph", repo_id=repo_id)
return {**cached_graph, "cached": True}

logger.info("Building fresh dependency graph", repo_id=repo_id)
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
logger.info("Building fresh dependency graph", repo_id=repo_id, include_paths=repo.get("include_paths"))
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
dependency_analyzer.save_to_cache(repo_id, graph_data)
Comment thread
DevanshuNEU marked this conversation as resolved.

return {**graph_data, "cached": False}
Expand Down Expand Up @@ -64,7 +66,7 @@ async def analyze_impact(
graph_data = dependency_analyzer.load_from_cache(repo_id)
if not graph_data:
logger.info("Building dependency graph for impact analysis", repo_id=repo_id)
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
dependency_analyzer.save_to_cache(repo_id, graph_data)

impact = dependency_analyzer.get_file_impact(
Expand Down Expand Up @@ -94,7 +96,7 @@ async def get_repository_insights(
graph_data = dependency_analyzer.load_from_cache(repo_id)
if not graph_data:
logger.info("Building dependency graph for insights", repo_id=repo_id)
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
dependency_analyzer.save_to_cache(repo_id, graph_data)

return {
Expand Down
8 changes: 8 additions & 0 deletions backend/routes/repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,14 @@ async def _run_async_indexing(

repo_manager.update_status(repo_id, "indexing")

# Persist include_paths (or clear it if re-indexing full repo)
from services.supabase_service import get_supabase_service
db = get_supabase_service()
db.update_repository(repo_id, {"include_paths": include_paths})

# Clear stale dependency cache so next graph build uses new include_paths
db.clear_file_dependencies(repo_id)

# Publish initial progress to confirm connection
if publisher:
publisher.publish_progress(repo_id, 0, 1, 0, "Starting...")
Expand Down
12 changes: 12 additions & 0 deletions backend/services/dependency_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,18 @@ def build_dependency_graph(self, repo_path: str, include_paths: List[str] = None
"""Build dependency graph. If include_paths set, only analyze those dirs."""
repo_path = Path(repo_path)

# Sanitize include_paths from DB (could be corrupt jsonb)
if include_paths:
cleaned = []
for p in include_paths:
if not isinstance(p, str):
continue
p = p.replace('\\', '/').strip().strip('/')
if not p or '..' in p.split('/'):
continue
cleaned.append(p)
include_paths = cleaned or None

# Discover code files
code_files = []
extensions = {'.py', '.js', '.jsx', '.ts', '.tsx'}
Expand Down
49 changes: 49 additions & 0 deletions backend/tests/test_dependency_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,55 @@ def test_include_paths_multiple_dirs(self, analyzer, ts_repo):
assert any('packages/schema' in f for f in file_paths)
assert not any('backend' in f for f in file_paths)

def test_include_paths_with_corrupt_data(self, analyzer, ts_repo):
"""Corrupt jsonb from DB should not crash -- non-strings are filtered out"""
graph = analyzer.build_dependency_graph(
str(ts_repo),
include_paths=[123, None, '', 'packages/effect', True]
)
file_paths = set(graph['dependencies'].keys())
# Should only include effect files, corrupt entries filtered
assert all('packages/effect' in f for f in file_paths)
assert len(file_paths) > 0

def test_include_paths_all_corrupt_scans_everything(self, analyzer, ts_repo):
"""If all include_paths entries are invalid, fall back to full scan"""
graph = analyzer.build_dependency_graph(
str(ts_repo),
include_paths=[123, None, '', False]
)
file_paths = set(graph['dependencies'].keys())
# Should fall back to scanning everything
assert any('backend' in f for f in file_paths)
assert any('packages/effect' in f for f in file_paths)

def test_include_paths_empty_list_scans_everything(self, analyzer, ts_repo):
"""Empty list should be treated same as None"""
graph = analyzer.build_dependency_graph(str(ts_repo), include_paths=[])
file_paths = set(graph['dependencies'].keys())
assert any('backend' in f for f in file_paths)

Comment thread
DevanshuNEU marked this conversation as resolved.
def test_include_paths_traversal_rejected(self, analyzer, ts_repo):
"""Path traversal attempts should be stripped, not crash"""
graph = analyzer.build_dependency_graph(
str(ts_repo),
include_paths=['../etc/passwd', 'packages/effect', '../../secrets']
)
file_paths = set(graph['dependencies'].keys())
# Traversal entries filtered, only packages/effect remains
assert all('packages/effect' in f for f in file_paths)
assert len(file_paths) > 0

def test_include_paths_backslash_normalized(self, analyzer, ts_repo):
"""Windows-style backslashes should be normalized"""
graph = analyzer.build_dependency_graph(
str(ts_repo),
include_paths=['packages\\effect']
)
file_paths = set(graph['dependencies'].keys())
assert all('packages/effect' in f for f in file_paths)
assert len(file_paths) > 0


class TestGraphMetrics:
"""Verify graph statistics are correct"""
Expand Down