|
2 | 2 | Repository Manager (Supabase Edition) |
3 | 3 | Handles repository CRUD operations with PostgreSQL via Supabase |
4 | 4 | """ |
| 5 | +import asyncio |
| 6 | +import os |
| 7 | +import shutil |
5 | 8 | import uuid |
6 | | -from typing import List, Optional |
| 9 | +from typing import Dict, List, Optional |
7 | 10 | import git |
8 | 11 | from pathlib import Path |
| 12 | +from fastapi import HTTPException |
9 | 13 | from services.supabase_service import get_supabase_service |
10 | 14 | from services.observability import logger, metrics |
11 | 15 |
|
12 | 16 |
|
| 17 | +class RepoCloneError(HTTPException): |
| 18 | + """Repo working tree is missing and could not be restored from its git remote. |
| 19 | +
|
| 20 | + Subclasses HTTPException so it surfaces as an actionable 503 (handlers already re-raise |
| 21 | + HTTPException) instead of an opaque 500. UX matters here: a redeploy is invisible to the |
| 22 | + user, so the message has to tell them what to actually do. (#311) |
| 23 | + """ |
| 24 | + |
| 25 | + def __init__(self, repo_id: str, reason: str = ""): |
| 26 | + super().__init__( |
| 27 | + status_code=503, |
| 28 | + detail={ |
| 29 | + "error": "REPO_UNAVAILABLE", |
| 30 | + "repo_id": repo_id, |
| 31 | + "message": ( |
| 32 | + "Repository source files are temporarily unavailable and could not be " |
| 33 | + "restored from the git remote. Private repositories are not yet supported " |
| 34 | + "for re-sync; for public repos, please retry shortly." |
| 35 | + ), |
| 36 | + }, |
| 37 | + ) |
| 38 | + self.reason = reason |
| 39 | + |
| 40 | + |
13 | 41 | class RepositoryManager: |
14 | 42 | """Manage repositories with Supabase persistence""" |
15 | 43 |
|
16 | 44 | def __init__(self): |
17 | 45 | self.repos_dir = Path("./repos") |
18 | 46 | self.repos_dir.mkdir(exist_ok=True) |
19 | 47 | self.db = get_supabase_service() |
20 | | - |
| 48 | + |
| 49 | + # Per-repo locks so two concurrent ops on the same missing clone don't both clone. |
| 50 | + # Single uvicorn worker means an in-process lock is sufficient here. |
| 51 | + self._clone_locks: Dict[str, asyncio.Lock] = {} |
| 52 | + |
21 | 53 | # Discover and sync existing repositories on startup |
22 | 54 | self._sync_existing_repos() |
23 | 55 |
|
@@ -126,10 +158,69 @@ def add_repo(self, name: str, git_url: str, branch: str = "main", user_id: Optio |
126 | 158 | except Exception as e: |
127 | 159 | # Cleanup on failure |
128 | 160 | if local_path.exists(): |
129 | | - import shutil |
130 | 161 | shutil.rmtree(local_path) |
131 | 162 | raise Exception(f"Failed to clone repository: {str(e)}") |
132 | | - |
| 163 | + |
| 164 | + async def ensure_clone(self, repo: dict) -> str: |
| 165 | + """Guarantee the working tree exists on disk, lazily re-cloning from git_url if needed. |
| 166 | +
|
| 167 | + Railway redeploys wipe ./repos (ephemeral disk) but Pinecone/Supabase survive, so |
| 168 | + local_path is a cache hint, not source of truth -- the git remote is. On a warm hit |
| 169 | + this is a sub-millisecond stat with no behavior change; on a miss it re-clones. |
| 170 | + Returns the canonical local path and refreshes repo['local_path'] in place. |
| 171 | + """ |
| 172 | + repo_id = repo["id"] |
| 173 | + canonical = self.repos_dir / repo_id |
| 174 | + |
| 175 | + # Warm path: clone present. No re-clone, no event-loop work. |
| 176 | + if (canonical / ".git").exists(): |
| 177 | + repo["local_path"] = str(canonical) |
| 178 | + return str(canonical) |
| 179 | + |
| 180 | + git_url = repo.get("git_url") |
| 181 | + if not git_url or git_url == "unknown": |
| 182 | + raise RepoCloneError(repo_id, "no git_url on record") |
| 183 | + branch = repo.get("branch") or "main" |
| 184 | + |
| 185 | + lock = self._clone_locks.setdefault(repo_id, asyncio.Lock()) |
| 186 | + async with lock: |
| 187 | + # Another coroutine may have cloned while we waited for the lock. |
| 188 | + if not (canonical / ".git").exists(): |
| 189 | + try: |
| 190 | + await asyncio.to_thread(self._clone_into_place, repo_id, git_url, branch, canonical) |
| 191 | + except Exception as e: |
| 192 | + # Private repo (no creds on a fresh container), network failure, deleted |
| 193 | + # remote: surface as an actionable 503, not an opaque 500. |
| 194 | + logger.error("Re-clone failed", repo_id=repo_id, git_url=git_url, error=str(e)) |
| 195 | + raise RepoCloneError(repo_id, str(e)) from e |
| 196 | + logger.info("Re-cloned repo on demand (cache miss)", repo_id=repo_id, git_url=git_url) |
| 197 | + metrics.increment("repos_recloned") |
| 198 | + |
| 199 | + repo["local_path"] = str(canonical) |
| 200 | + return str(canonical) |
| 201 | + |
| 202 | + def _clone_into_place(self, repo_id: str, git_url: str, branch: str, canonical: Path) -> None: |
| 203 | + """Clone into a temp dir then atomically rename into the canonical path. |
| 204 | +
|
| 205 | + The rename is the correctness guarantee: a crashed or concurrent clone never leaves a |
| 206 | + half-populated canonical dir for a reader to trip over. Runs in a worker thread (git is |
| 207 | + blocking I/O); never call directly on the event loop. |
| 208 | + """ |
| 209 | + tmp = self.repos_dir / f".{repo_id}.tmp.{uuid.uuid4().hex}" |
| 210 | + try: |
| 211 | + git.Repo.clone_from(git_url, tmp, branch=branch, depth=1) |
| 212 | + # Clear any leftover partial dir before the atomic swap. Do NOT ignore errors here: |
| 213 | + # a failed removal must surface (the outer except re-raises it, and ensure_clone wraps |
| 214 | + # it into a logged RepoCloneError) rather than letting us rename onto a dir we could |
| 215 | + # not clean, which would fail later with a more confusing error. |
| 216 | + if canonical.exists(): |
| 217 | + shutil.rmtree(canonical) |
| 218 | + os.rename(tmp, canonical) # atomic on the same filesystem |
| 219 | + except Exception: |
| 220 | + if tmp.exists(): |
| 221 | + shutil.rmtree(tmp, ignore_errors=True) |
| 222 | + raise |
| 223 | + |
133 | 224 | def update_status(self, repo_id: str, status: str): |
134 | 225 | """Update repository status""" |
135 | 226 | self.db.update_repository_status(repo_id, status) |
@@ -158,8 +249,6 @@ def update_last_commit(self, repo_id: str, commit_sha: str, function_count: int |
158 | 249 |
|
159 | 250 | def delete_repo(self, repo_id: str) -> bool: |
160 | 251 | """Delete repository and clean up local files""" |
161 | | - import shutil |
162 | | - |
163 | 252 | repo = self.get_repo(repo_id) |
164 | 253 | if not repo: |
165 | 254 | return False |
|
0 commit comments