Skip to content

Commit 04ac019

Browse files
committed
fix: 3 edge cases -- clear stale include_paths, invalidate dep cache, sanitize corrupt DB data
Adversarial review found 3 bugs: 1. CRITICAL: Re-indexing full repo didn't clear old include_paths. if include_paths was truthy before save, re-index with None skipped the DB update. Now ALWAYS writes include_paths (None clears it). 2. HIGH: Stale dep cache after re-index with different include_paths. Dependency cache persisted across re-indexes. Now clear_file_dependencies runs at start of every re-index so graph rebuilds with new include_paths. 3. MEDIUM: Corrupt jsonb from DB (e.g. [123, null]) crashed Path().parts. build_dependency_graph now filters non-string entries and falls back to full scan if all entries are invalid. 3 new tests: - test_include_paths_with_corrupt_data: mixed valid/invalid entries - test_include_paths_all_corrupt_scans_everything: all invalid -> full scan - test_include_paths_empty_list_scans_everything: [] same as None
1 parent 65d19e2 commit 04ac019

3 files changed

Lines changed: 41 additions & 4 deletions

File tree

backend/routes/repos.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -602,10 +602,13 @@ async def _run_async_indexing(
602602

603603
repo_manager.update_status(repo_id, "indexing")
604604

605-
# Persist include_paths so dependency analyzer and other tools use the subset
606-
if include_paths:
607-
from services.supabase_service import get_supabase_service
608-
get_supabase_service().update_repository(repo_id, {"include_paths": include_paths})
605+
# Persist include_paths (or clear it if re-indexing full repo)
606+
from services.supabase_service import get_supabase_service
607+
db = get_supabase_service()
608+
db.update_repository(repo_id, {"include_paths": include_paths})
609+
610+
# Clear stale dependency cache so next graph build uses new include_paths
611+
db.clear_file_dependencies(repo_id)
609612

610613
# Publish initial progress to confirm connection
611614
if publisher:

backend/services/dependency_analyzer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,12 @@ def build_dependency_graph(self, repo_path: str, include_paths: List[str] = None
142142
"""Build dependency graph. If include_paths set, only analyze those dirs."""
143143
repo_path = Path(repo_path)
144144

145+
# Sanitize include_paths from DB (could be corrupt jsonb)
146+
if include_paths:
147+
include_paths = [p for p in include_paths if isinstance(p, str) and p.strip()]
148+
if not include_paths:
149+
include_paths = None
150+
145151
# Discover code files
146152
code_files = []
147153
extensions = {'.py', '.js', '.jsx', '.ts', '.tsx'}

backend/tests/test_dependency_analyzer.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,34 @@ def test_include_paths_multiple_dirs(self, analyzer, ts_repo):
280280
assert any('packages/schema' in f for f in file_paths)
281281
assert not any('backend' in f for f in file_paths)
282282

283+
def test_include_paths_with_corrupt_data(self, analyzer, ts_repo):
284+
"""Corrupt jsonb from DB should not crash -- non-strings are filtered out"""
285+
graph = analyzer.build_dependency_graph(
286+
str(ts_repo),
287+
include_paths=[123, None, '', 'packages/effect', True]
288+
)
289+
file_paths = set(graph['dependencies'].keys())
290+
# Should only include effect files, corrupt entries filtered
291+
assert all('packages/effect' in f for f in file_paths)
292+
assert len(file_paths) > 0
293+
294+
def test_include_paths_all_corrupt_scans_everything(self, analyzer, ts_repo):
295+
"""If all include_paths entries are invalid, fall back to full scan"""
296+
graph = analyzer.build_dependency_graph(
297+
str(ts_repo),
298+
include_paths=[123, None, '', False]
299+
)
300+
file_paths = set(graph['dependencies'].keys())
301+
# Should fall back to scanning everything
302+
assert any('backend' in f for f in file_paths)
303+
assert any('packages/effect' in f for f in file_paths)
304+
305+
def test_include_paths_empty_list_scans_everything(self, analyzer, ts_repo):
306+
"""Empty list should be treated same as None"""
307+
graph = analyzer.build_dependency_graph(str(ts_repo), include_paths=[])
308+
file_paths = set(graph['dependencies'].keys())
309+
assert any('backend' in f for f in file_paths)
310+
283311

284312
class TestGraphMetrics:
285313
"""Verify graph statistics are correct"""

0 commit comments

Comments
 (0)