diff --git a/backend/main.py b/backend/main.py index 04ccecf..08d789f 100644 --- a/backend/main.py +++ b/backend/main.py @@ -44,6 +44,10 @@ @asynccontextmanager async def lifespan(app: FastAPI): validate_environment() + # Any repo left 'indexing' at boot is orphaned (indexing runs in-process; a restart + # kills it). Reset so the user can retry instead of seeing an eternal spinner. (#311) + from services.supabase_service import get_supabase_service + get_supabase_service().reset_stuck_indexing_jobs() await load_demo_repos() yield # Shutdown (cleanup if needed) diff --git a/backend/migrations/003_add_indexing_started_at.sql b/backend/migrations/003_add_indexing_started_at.sql new file mode 100644 index 0000000..eeb1b33 --- /dev/null +++ b/backend/migrations/003_add_indexing_started_at.sql @@ -0,0 +1,11 @@ +-- Durable repo-state v0.1 (issue #311) +-- Records when a repo entered the 'indexing' state so the stuck-job reaper can tell a +-- live job from an orphaned one (process died mid-index, leaving status='indexing' forever). + +ALTER TABLE repositories + ADD COLUMN IF NOT EXISTS indexing_started_at TIMESTAMPTZ; + +-- Partial index: the reaper and the steal-on-retry path only ever scan rows currently indexing. +CREATE INDEX IF NOT EXISTS idx_repositories_indexing_started_at + ON repositories(indexing_started_at) + WHERE status = 'indexing'; diff --git a/backend/routes/analysis.py b/backend/routes/analysis.py index 2eb4daa..b7c5ab9 100644 --- a/backend/routes/analysis.py +++ b/backend/routes/analysis.py @@ -4,7 +4,7 @@ from dependencies import ( dependency_analyzer, style_analyzer, dna_extractor, - get_repo_or_404 + get_repo_or_404, repo_manager ) from services.input_validator import InputValidator from middleware.auth import require_auth, AuthContext @@ -35,6 +35,7 @@ async def get_dependency_graph( return {**cached_graph, "cached": True} logger.info("Building fresh dependency graph", repo_id=repo_id, include_paths=repo.get("include_paths")) + await repo_manager.ensure_clone(repo) graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths")) dependency_analyzer.save_to_cache(repo_id, graph_data) @@ -57,12 +58,15 @@ async def analyze_impact( try: repo = get_repo_or_404(repo_id, auth.user_id) + # Validate user input BEFORE any expensive/external work (re-clone). Reject traversal first. valid_path, path_error = InputValidator.validate_file_path( request.file_path, repo["local_path"] ) if not valid_path: raise HTTPException(status_code=400, detail=f"Invalid file path: {path_error}") + await repo_manager.ensure_clone(repo) + graph_data = dependency_analyzer.load_from_cache(repo_id) if not graph_data: logger.info("Building dependency graph for impact analysis", repo_id=repo_id) @@ -96,6 +100,7 @@ async def get_repository_insights( graph_data = dependency_analyzer.load_from_cache(repo_id) if not graph_data: logger.info("Building dependency graph for insights", repo_id=repo_id) + await repo_manager.ensure_clone(repo) graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths")) dependency_analyzer.save_to_cache(repo_id, graph_data) @@ -134,6 +139,7 @@ async def get_style_analysis( return {**cached_style, "cached": True} logger.info("Analyzing code style", repo_id=repo_id) + await repo_manager.ensure_clone(repo) style_data = style_analyzer.analyze_repository_style(repo["local_path"], include_paths=repo.get("include_paths")) style_analyzer.save_to_cache(repo_id, style_data) @@ -178,6 +184,7 @@ async def get_codebase_dna( logger.info("Extracting codebase DNA", repo_id=repo_id) metrics.increment("dna_extractions") + await repo_manager.ensure_clone(repo) dna = dna_extractor.extract_dna(repo["local_path"], repo_id, include_paths=repo.get("include_paths")) dna_extractor.save_to_cache(repo_id, dna) diff --git a/backend/routes/repos.py b/backend/routes/repos.py index 46421ae..55bb511 100644 --- a/backend/routes/repos.py +++ b/backend/routes/repos.py @@ -470,11 +470,10 @@ async def get_repo_directories( directories to index instead of the entire repo. """ repo = get_repo_or_404(repo_id, auth.user_id) + # Re-clone if the container was redeployed and wiped the working tree (#311). + await repo_manager.ensure_clone(repo) local_path = Path(repo["local_path"]) - if not local_path.exists(): - raise HTTPException(status_code=404, detail="Repo not cloned yet") - dirs = await asyncio.to_thread(_scan_directories, local_path) return { @@ -501,7 +500,9 @@ async def index_repository( try: repo = get_repo_or_404(repo_id, user_id) - + # Re-clone if the container was redeployed and wiped the working tree (#311). + await repo_manager.ensure_clone(repo) + # Re-check size limits before indexing (in case tier changed or repo updated) analysis = repo_validator.analyze_repo(repo["local_path"]) @@ -765,10 +766,13 @@ async def index_repository_async( try: repo = get_repo_or_404(repo_id, user_id) - + # Re-clone before the size-check (which reads the working tree) so a redeployed + # container rehydrates here, before the 202, rather than failing the gate (#311). + await repo_manager.ensure_clone(repo) + # Re-check size limits analysis = repo_validator.analyze_repo(repo["local_path"]) - + if not analysis.success: raise HTTPException( status_code=500, @@ -867,7 +871,15 @@ async def websocket_index(websocket: WebSocket, repo_id: str): if not repo: await websocket.close(code=4004, reason="Repository not found") return - + + # Re-clone if the container was redeployed and wiped the working tree (#311). + try: + await repo_manager.ensure_clone(repo) + except Exception as e: + logger.error("Re-clone failed for websocket indexing", repo_id=repo_id, error=str(e)) + await websocket.close(code=4005, reason="Repository clone unavailable") + return + # Check size limits before WebSocket indexing analysis = repo_validator.analyze_repo(repo["local_path"]) diff --git a/backend/services/repo_manager.py b/backend/services/repo_manager.py index e6e0b38..d209221 100644 --- a/backend/services/repo_manager.py +++ b/backend/services/repo_manager.py @@ -2,14 +2,42 @@ Repository Manager (Supabase Edition) Handles repository CRUD operations with PostgreSQL via Supabase """ +import asyncio +import os +import shutil import uuid -from typing import List, Optional +from typing import Dict, List, Optional import git from pathlib import Path +from fastapi import HTTPException from services.supabase_service import get_supabase_service from services.observability import logger, metrics +class RepoCloneError(HTTPException): + """Repo working tree is missing and could not be restored from its git remote. + + Subclasses HTTPException so it surfaces as an actionable 503 (handlers already re-raise + HTTPException) instead of an opaque 500. UX matters here: a redeploy is invisible to the + user, so the message has to tell them what to actually do. (#311) + """ + + def __init__(self, repo_id: str, reason: str = ""): + super().__init__( + status_code=503, + detail={ + "error": "REPO_UNAVAILABLE", + "repo_id": repo_id, + "message": ( + "Repository source files are temporarily unavailable and could not be " + "restored from the git remote. Private repositories are not yet supported " + "for re-sync; for public repos, please retry shortly." + ), + }, + ) + self.reason = reason + + class RepositoryManager: """Manage repositories with Supabase persistence""" @@ -17,7 +45,11 @@ def __init__(self): self.repos_dir = Path("./repos") self.repos_dir.mkdir(exist_ok=True) self.db = get_supabase_service() - + + # Per-repo locks so two concurrent ops on the same missing clone don't both clone. + # Single uvicorn worker means an in-process lock is sufficient here. + self._clone_locks: Dict[str, asyncio.Lock] = {} + # Discover and sync existing repositories on startup self._sync_existing_repos() @@ -126,10 +158,69 @@ def add_repo(self, name: str, git_url: str, branch: str = "main", user_id: Optio except Exception as e: # Cleanup on failure if local_path.exists(): - import shutil shutil.rmtree(local_path) raise Exception(f"Failed to clone repository: {str(e)}") - + + async def ensure_clone(self, repo: dict) -> str: + """Guarantee the working tree exists on disk, lazily re-cloning from git_url if needed. + + Railway redeploys wipe ./repos (ephemeral disk) but Pinecone/Supabase survive, so + local_path is a cache hint, not source of truth -- the git remote is. On a warm hit + this is a sub-millisecond stat with no behavior change; on a miss it re-clones. + Returns the canonical local path and refreshes repo['local_path'] in place. + """ + repo_id = repo["id"] + canonical = self.repos_dir / repo_id + + # Warm path: clone present. No re-clone, no event-loop work. + if (canonical / ".git").exists(): + repo["local_path"] = str(canonical) + return str(canonical) + + git_url = repo.get("git_url") + if not git_url or git_url == "unknown": + raise RepoCloneError(repo_id, "no git_url on record") + branch = repo.get("branch") or "main" + + lock = self._clone_locks.setdefault(repo_id, asyncio.Lock()) + async with lock: + # Another coroutine may have cloned while we waited for the lock. + if not (canonical / ".git").exists(): + try: + await asyncio.to_thread(self._clone_into_place, repo_id, git_url, branch, canonical) + except Exception as e: + # Private repo (no creds on a fresh container), network failure, deleted + # remote: surface as an actionable 503, not an opaque 500. + logger.error("Re-clone failed", repo_id=repo_id, git_url=git_url, error=str(e)) + raise RepoCloneError(repo_id, str(e)) from e + logger.info("Re-cloned repo on demand (cache miss)", repo_id=repo_id, git_url=git_url) + metrics.increment("repos_recloned") + + repo["local_path"] = str(canonical) + return str(canonical) + + def _clone_into_place(self, repo_id: str, git_url: str, branch: str, canonical: Path) -> None: + """Clone into a temp dir then atomically rename into the canonical path. + + The rename is the correctness guarantee: a crashed or concurrent clone never leaves a + half-populated canonical dir for a reader to trip over. Runs in a worker thread (git is + blocking I/O); never call directly on the event loop. + """ + tmp = self.repos_dir / f".{repo_id}.tmp.{uuid.uuid4().hex}" + try: + git.Repo.clone_from(git_url, tmp, branch=branch, depth=1) + # Clear any leftover partial dir before the atomic swap. Do NOT ignore errors here: + # a failed removal must surface (the outer except re-raises it, and ensure_clone wraps + # it into a logged RepoCloneError) rather than letting us rename onto a dir we could + # not clean, which would fail later with a more confusing error. + if canonical.exists(): + shutil.rmtree(canonical) + os.rename(tmp, canonical) # atomic on the same filesystem + except Exception: + if tmp.exists(): + shutil.rmtree(tmp, ignore_errors=True) + raise + def update_status(self, repo_id: str, status: str): """Update repository status""" self.db.update_repository_status(repo_id, status) @@ -158,8 +249,6 @@ def update_last_commit(self, repo_id: str, commit_sha: str, function_count: int def delete_repo(self, repo_id: str) -> bool: """Delete repository and clean up local files""" - import shutil - repo = self.get_repo(repo_id) if not repo: return False diff --git a/backend/services/supabase_service.py b/backend/services/supabase_service.py index 1ed3ea9..5a0a067 100644 --- a/backend/services/supabase_service.py +++ b/backend/services/supabase_service.py @@ -4,12 +4,16 @@ """ import os from typing import Dict, List, Optional -from datetime import datetime +from datetime import datetime, timedelta from supabase import create_client, Client, ClientOptions import uuid from services.observability import logger +# A repo stuck in 'indexing' longer than this is treated as orphaned (its indexer +# process died) and may be re-claimed on retry. The startup sweep is unconditional. +STUCK_INDEXING_THRESHOLD_MINUTES = 30 + class SupabaseService: """Service for Supabase database operations""" @@ -87,23 +91,78 @@ def update_repository(self, repo_id: str, updates: Dict) -> Optional[Dict]: return result.data[0] if result.data else None def update_repository_status(self, repo_id: str, status: str) -> None: - """Update repository status""" - self.client.table("repositories").update({"status": status}).eq("id", repo_id).execute() - + """Update repository status. Transitioning to 'indexing' stamps indexing_started_at + so the stuck-job reaper has a clock to measure against, regardless of which code path + started the job.""" + updates: Dict = {"status": status} + if status == "indexing": + updates["indexing_started_at"] = datetime.utcnow().isoformat() + try: + self.client.table("repositories").update(updates).eq("id", repo_id).execute() + except Exception as e: + # Only swallow the specific case where migration 003 (indexing_started_at) has not been + # applied yet -- detected by the column name appearing in the DB error. Any other error + # (network, constraint, etc.) must re-raise so a real failure isn't masked by the retry. + if "indexing_started_at" not in updates or "indexing_started_at" not in str(e): + raise + logger.warning( + "indexing_started_at column missing; retrying status update without it (apply migration 003)", + repo_id=repo_id, error=str(e), + ) + self.client.table("repositories").update({"status": status}).eq("id", repo_id).execute() + def try_set_indexing_status(self, repo_id: str) -> bool: """ - Atomically set status to 'indexing' only if not already indexing. - - Returns True if status was set, False if repo was already indexing. - This prevents TOCTOU race conditions where two requests could both - see status != 'indexing' and both start indexing. + Atomically set status to 'indexing' only if not already actively indexing. + + Returns True if status was set, False if a fresh indexing job already owns the repo. + This prevents TOCTOU race conditions where two requests both see status != 'indexing' + and both start indexing. A row stuck in 'indexing' past the threshold (its process died) + -- or with no start stamp at all (legacy/pre-migration) -- is treated as orphaned and + re-claimed, so a crashed job never permanently blocks retry. """ - result = self.client.table("repositories").update( - {"status": "indexing"} - ).eq("id", repo_id).neq("status", "indexing").execute() - - # If result.data is empty, no rows matched (already indexing) + # cutoff is the staleness threshold for stealing an orphaned 'indexing' lock, derived from + # STUCK_INDEXING_THRESHOLD_MINUTES and a SERVER-CONTROLLED datetime.utcnow(). It is never + # user input, so interpolating it into the PostgREST .or_ filter string is safe. If any + # user-supplied value is ever introduced into this filter, parameterize it via the SDK + # (do not f-string it) to avoid filter-injection. + cutoff = (datetime.utcnow() - timedelta(minutes=STUCK_INDEXING_THRESHOLD_MINUTES)).isoformat() + try: + result = self.client.table("repositories").update( + {"status": "indexing", "indexing_started_at": datetime.utcnow().isoformat()} + ).eq("id", repo_id).or_( + f"status.neq.indexing,indexing_started_at.is.null,indexing_started_at.lt.{cutoff}" + ).execute() + except Exception as e: + # Only fall back when migration 003 (indexing_started_at) is missing -- detected by the + # column name in the DB error. Re-raise anything else so a real failure isn't masked. + if "indexing_started_at" not in str(e): + raise + logger.warning( + "indexing_started_at column missing; falling back to basic CAS (apply migration 003)", + repo_id=repo_id, error=str(e), + ) + result = self.client.table("repositories").update( + {"status": "indexing"} + ).eq("id", repo_id).neq("status", "indexing").execute() + + # If result.data is empty, no rows matched (a fresh indexing job owns it) return bool(result.data) + + def reset_stuck_indexing_jobs(self) -> int: + """Reset every repo left in 'indexing' to 'error' so the user can retry. + + Called once on startup: indexing runs in-process (BackgroundTasks / WebSocket), so a + restart kills any in-flight job and every 'indexing' row at boot is by definition + orphaned. Returns the number of rows reset. + """ + result = self.client.table("repositories").update( + {"status": "error"} + ).eq("status", "indexing").execute() + count = len(result.data) if result.data else 0 + if count: + logger.warning("Reset orphaned indexing jobs on startup", count=count) + return count def update_file_count(self, repo_id: str, count: int) -> None: """Update repository file count""" diff --git a/backend/tests/test_durable_repo_state.py b/backend/tests/test_durable_repo_state.py new file mode 100644 index 0000000..5fb7521 --- /dev/null +++ b/backend/tests/test_durable_repo_state.py @@ -0,0 +1,216 @@ +"""Durable repo-state v0.1 (#311): lazy re-clone + stuck-job recovery. + +Covers ensure_clone (warm no-op / cold re-clone / concurrency / no git_url) and the +stuck-indexing recovery paths (timestamp stamping, steal-on-retry filter, startup sweep). +""" +import asyncio +import time +from pathlib import Path +from unittest.mock import MagicMock, patch, AsyncMock + +import pytest + +from services.repo_manager import RepositoryManager, RepoCloneError + + +def _make_manager(tmp_path) -> RepositoryManager: + """RepositoryManager with disk scan + Supabase stubbed, repos_dir pointed at a tmp dir.""" + with patch.object(RepositoryManager, "_sync_existing_repos"), \ + patch("services.repo_manager.get_supabase_service") as mock_get_db: + mock_get_db.return_value = MagicMock() + mgr = RepositoryManager() + mgr.repos_dir = Path(tmp_path) + return mgr + + +class TestEnsureClone: + async def test_warm_path_does_not_clone(self, tmp_path): + """Clone already present -> stat only, no re-clone, no behavior change (AC4).""" + mgr = _make_manager(tmp_path) + repo_id = "repo-warm" + (mgr.repos_dir / repo_id / ".git").mkdir(parents=True) + repo = {"id": repo_id, "git_url": "https://example.com/x.git", "branch": "main"} + + with patch.object(mgr, "_clone_into_place") as clone: + path = await mgr.ensure_clone(repo) + + clone.assert_not_called() + assert path == str(mgr.repos_dir / repo_id) + assert repo["local_path"] == str(mgr.repos_dir / repo_id) + + async def test_cold_path_reclones(self, tmp_path): + """Clone missing (redeploy) -> re-clone from git_url@branch (AC1).""" + mgr = _make_manager(tmp_path) + repo_id = "repo-cold" + repo = {"id": repo_id, "git_url": "https://example.com/x.git", "branch": "dev"} + + def fake_clone(rid, url, branch, canonical): + (canonical / ".git").mkdir(parents=True) + + with patch.object(mgr, "_clone_into_place", side_effect=fake_clone) as clone: + path = await mgr.ensure_clone(repo) + + clone.assert_called_once() + rid, url, branch, _canonical = clone.call_args.args + assert (rid, url, branch) == (repo_id, "https://example.com/x.git", "dev") + assert (Path(path) / ".git").exists() + assert repo["local_path"] == str(mgr.repos_dir / repo_id) + + async def test_missing_git_url_raises_actionable_503(self, tmp_path): + """No usable git_url -> actionable 503, not an opaque 500 or silent 404.""" + mgr = _make_manager(tmp_path) + repo = {"id": "repo-nourl", "git_url": "unknown", "branch": "main"} + with pytest.raises(RepoCloneError) as exc: + await mgr.ensure_clone(repo) + assert exc.value.status_code == 503 + assert exc.value.detail["error"] == "REPO_UNAVAILABLE" + + async def test_clone_failure_raises_actionable_503(self, tmp_path): + """A failed clone (private repo / network) surfaces as 503, not a raw 500.""" + mgr = _make_manager(tmp_path) + repo = {"id": "repo-fail", "git_url": "https://example.com/private.git", "branch": "main"} + + def boom(rid, url, branch, canonical): + raise RuntimeError("authentication required") + + with patch.object(mgr, "_clone_into_place", side_effect=boom): + with pytest.raises(RepoCloneError) as exc: + await mgr.ensure_clone(repo) + assert exc.value.status_code == 503 + + async def test_concurrent_callers_clone_once(self, tmp_path): + """Two simultaneous ops on the same missing repo clone exactly once (AC2).""" + mgr = _make_manager(tmp_path) + repo_id = "repo-race" + calls = {"n": 0} + + def fake_clone(rid, url, branch, canonical): + calls["n"] += 1 + time.sleep(0.05) # widen the window so the second caller is forced to wait on the lock + (canonical / ".git").mkdir(parents=True) + + repo_a = {"id": repo_id, "git_url": "https://example.com/x.git", "branch": "main"} + repo_b = {"id": repo_id, "git_url": "https://example.com/x.git", "branch": "main"} + + with patch.object(mgr, "_clone_into_place", side_effect=fake_clone): + await asyncio.gather(mgr.ensure_clone(repo_a), mgr.ensure_clone(repo_b)) + + assert calls["n"] == 1 + + +class TestStuckJobRecovery: + def _service(self): + from services.supabase_service import SupabaseService + svc = SupabaseService() + svc.client = MagicMock() + return svc + + def test_update_status_indexing_stamps_started_at(self): + """Transition to 'indexing' records when it started, for the reaper's clock.""" + svc = self._service() + svc.update_repository_status("r1", "indexing") + updates = svc.client.table.return_value.update.call_args.args[0] + assert updates["status"] == "indexing" + assert "indexing_started_at" in updates + + def test_update_status_non_indexing_does_not_stamp(self): + svc = self._service() + svc.update_repository_status("r1", "indexed") + updates = svc.client.table.return_value.update.call_args.args[0] + assert updates == {"status": "indexed"} + + def test_try_set_indexing_steal_filter(self): + """Steal condition covers fresh-claim, stale, and legacy NULL-timestamp rows (AC5).""" + svc = self._service() + exec_result = MagicMock() + exec_result.data = [{"id": "r1"}] + svc.client.table.return_value.update.return_value.eq.return_value.or_.return_value.execute.return_value = exec_result + + assert svc.try_set_indexing_status("r1") is True + or_arg = svc.client.table.return_value.update.return_value.eq.return_value.or_.call_args.args[0] + assert "status.neq.indexing" in or_arg + assert "indexing_started_at.is.null" in or_arg + assert "indexing_started_at.lt." in or_arg + + def test_try_set_indexing_false_when_fresh_job_owns(self): + """A live, recently-started job is not stealable -> returns False.""" + svc = self._service() + exec_result = MagicMock() + exec_result.data = [] + svc.client.table.return_value.update.return_value.eq.return_value.or_.return_value.execute.return_value = exec_result + assert svc.try_set_indexing_status("r1") is False + + def test_reset_stuck_jobs_resets_indexing_to_error(self): + """Startup sweep flips every 'indexing' row to 'error' and reports the count (AC6).""" + svc = self._service() + exec_result = MagicMock() + exec_result.data = [{"id": "a"}, {"id": "b"}] + svc.client.table.return_value.update.return_value.eq.return_value.execute.return_value = exec_result + + count = svc.reset_stuck_indexing_jobs() + assert count == 2 + assert svc.client.table.return_value.update.call_args.args[0] == {"status": "error"} + assert svc.client.table.return_value.update.return_value.eq.call_args.args == ("status", "indexing") + + def test_update_status_falls_back_when_column_missing(self): + """If migration 003 isn't applied, the status update degrades to no-column instead of 500.""" + svc = self._service() + chain = svc.client.table.return_value.update.return_value.eq.return_value + chain.execute.side_effect = [Exception("PGRST204 column indexing_started_at not found"), MagicMock()] + + svc.update_repository_status("r1", "indexing") + + calls = svc.client.table.return_value.update.call_args_list + assert len(calls) == 2 + assert "indexing_started_at" in calls[0].args[0] # first attempt includes the new column + assert calls[1].args[0] == {"status": "indexing"} # retry drops it + + def test_try_set_indexing_falls_back_to_basic_cas_when_column_missing(self): + """Steal path errors without the column -> fall back to the original atomic CAS.""" + svc = self._service() + eq_chain = svc.client.table.return_value.update.return_value.eq.return_value + eq_chain.or_.return_value.execute.side_effect = Exception( + "PGRST204 Could not find the 'indexing_started_at' column in schema cache" + ) + fallback = MagicMock() + fallback.data = [{"id": "r1"}] + eq_chain.neq.return_value.execute.return_value = fallback + + assert svc.try_set_indexing_status("r1") is True + eq_chain.neq.assert_called_once_with("status", "indexing") + + def test_update_status_reraises_unrelated_error(self): + """A non-migration error must NOT be masked by the missing-column fallback.""" + svc = self._service() + chain = svc.client.table.return_value.update.return_value.eq.return_value + chain.execute.side_effect = Exception("connection reset by peer") + with pytest.raises(Exception, match="connection reset"): + svc.update_repository_status("r1", "indexing") + + def test_try_set_indexing_reraises_unrelated_error(self): + """A non-migration error must NOT trigger the basic-CAS fallback.""" + svc = self._service() + eq_chain = svc.client.table.return_value.update.return_value.eq.return_value + eq_chain.or_.return_value.execute.side_effect = Exception("connection reset by peer") + with pytest.raises(Exception, match="connection reset"): + svc.try_set_indexing_status("r1") + + +class TestRouteWiring: + """Integration: a route actually invokes ensure_clone (guards against the guard being removed).""" + + def test_dependency_route_triggers_ensure_clone(self, client): + repo = { + "id": "r-int", "git_url": "https://example.com/x.git", "branch": "main", + "local_path": "./repos/r-int", "include_paths": None, + "name": "x", "status": "indexed", "file_count": 0, + } + with patch("routes.analysis.get_repo_or_404", return_value=repo), \ + patch("routes.analysis.repo_manager.ensure_clone", new=AsyncMock(return_value="./repos/r-int")) as ensure, \ + patch("routes.analysis.dependency_analyzer.load_from_cache", return_value=None), \ + patch("routes.analysis.dependency_analyzer.build_dependency_graph", return_value={"dependencies": {}, "metrics": {}}), \ + patch("routes.analysis.dependency_analyzer.save_to_cache"): + resp = client.get("/api/v1/repos/r-int/dependencies?force=true") + + assert resp.status_code == 200 + ensure.assert_awaited_once()