Skip to content

Commit 8da4376

Browse files
committed
fix: reuse httpx client + add 5-min cache to /repos/analyze
1. _fetch_directory_tree now accepts optional client param -- analyze_repository opens one AsyncClient for both the metadata and tree API calls instead of two 2. Results cached for 5 min (same TTL as validate-repo) keyed by owner/repo, avoids redundant GitHub API calls on page refresh or retry 3. Cache uses existing Redis cache from dependencies (same as validate-repo) 24 tests pass.
1 parent 82ef4ef commit 8da4376

1 file changed

Lines changed: 54 additions & 23 deletions

File tree

backend/routes/repos.py

Lines changed: 54 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -171,29 +171,40 @@ def _github_headers() -> dict:
171171

172172
async def _fetch_directory_tree(
173173
owner: str, repo: str, branch: str,
174+
client: Optional[httpx.AsyncClient] = None,
174175
) -> dict:
175176
"""Fetch directory structure from GitHub Tree API.
176177
177178
Returns a dict with directories (name, path, file_count) grouped
178179
at the most useful level -- top-level for flat repos, package-level
179180
for monorepos with a packages/ directory.
181+
182+
Args:
183+
client: Reuse an existing httpx client to avoid opening a second
184+
connection. If None, creates and closes its own.
180185
"""
181186
from services.repo_validator import RepoValidator
182187

183188
url = f"{_GITHUB_API_BASE}/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
184189

185-
async with httpx.AsyncClient(timeout=15.0) as client:
186-
response = await client.get(url, headers=_github_headers())
190+
async def _get(c: httpx.AsyncClient) -> httpx.Response:
191+
return await c.get(url, headers=_github_headers())
187192

188-
if response.status_code == 404:
189-
raise HTTPException(status_code=404, detail="Repository or branch not found")
190-
if response.status_code == 403:
191-
raise HTTPException(status_code=429, detail="GitHub API rate limit exceeded")
192-
if response.status_code != 200:
193-
raise HTTPException(status_code=502, detail=f"GitHub API error: {response.status_code}")
193+
if client:
194+
response = await _get(client)
195+
else:
196+
async with httpx.AsyncClient(timeout=15.0) as c:
197+
response = await _get(c)
198+
199+
if response.status_code == 404:
200+
raise HTTPException(status_code=404, detail="Repository or branch not found")
201+
if response.status_code == 403:
202+
raise HTTPException(status_code=429, detail="GitHub API rate limit exceeded")
203+
if response.status_code != 200:
204+
raise HTTPException(status_code=502, detail=f"GitHub API error: {response.status_code}")
194205

195-
data = response.json()
196-
truncated = data.get("truncated", False)
206+
data = response.json()
207+
truncated = data.get("truncated", False)
197208

198209
code_extensions = RepoValidator.CODE_EXTENSIONS
199210
skip_dirs = RepoValidator.SKIP_DIRS
@@ -284,12 +295,17 @@ def validate_url(cls, v: str) -> str:
284295
return v
285296

286297

298+
_ANALYZE_CACHE_TTL = 300 # 5 minutes, same as validate-repo
299+
300+
287301
@router.post("/analyze")
288302
async def analyze_repository(request: AnalyzeRepoRequest) -> dict:
289303
"""Analyze a GitHub repo's directory structure WITHOUT cloning.
290304
291305
Returns directory tree with file counts so the user can select
292306
which directories to index (monorepo subset selection).
307+
308+
Results are cached for 5 minutes to avoid redundant GitHub API calls.
293309
"""
294310
match = _GITHUB_URL_RE.match(request.github_url)
295311
if not match:
@@ -301,25 +317,34 @@ async def analyze_repository(request: AnalyzeRepoRequest) -> dict:
301317
owner = match.group("owner")
302318
repo_name = match.group("repo").removesuffix(".git")
303319

304-
# Fetch repo metadata for default branch and size
305-
async with httpx.AsyncClient(timeout=10.0) as client:
320+
# Check cache first (same pattern as validate-repo)
321+
from dependencies import cache
322+
cache_key = f"analyze:{owner}/{repo_name}"
323+
cached = cache.get(cache_key) if cache else None
324+
if cached:
325+
logger.info("Returning cached analysis", owner=owner, repo=repo_name)
326+
return cached
327+
328+
# Single httpx client for both GitHub API calls
329+
async with httpx.AsyncClient(timeout=15.0) as client:
330+
# 1. Fetch repo metadata for default branch and size
306331
meta_resp = await client.get(
307332
f"{_GITHUB_API_BASE}/repos/{owner}/{repo_name}",
308333
headers=_github_headers(),
309334
)
310335

311-
if meta_resp.status_code == 404:
312-
raise HTTPException(status_code=404, detail="Repository not found")
313-
if meta_resp.status_code == 403:
314-
raise HTTPException(status_code=429, detail="GitHub API rate limit exceeded")
315-
if meta_resp.status_code != 200:
316-
raise HTTPException(status_code=502, detail="Failed to fetch repository metadata")
336+
if meta_resp.status_code == 404:
337+
raise HTTPException(status_code=404, detail="Repository not found")
338+
if meta_resp.status_code == 403:
339+
raise HTTPException(status_code=429, detail="GitHub API rate limit exceeded")
340+
if meta_resp.status_code != 200:
341+
raise HTTPException(status_code=502, detail="Failed to fetch repository metadata")
317342

318-
metadata = meta_resp.json()
319-
default_branch = metadata.get("default_branch", "main")
343+
metadata = meta_resp.json()
344+
default_branch = metadata.get("default_branch", "main")
320345

321-
# Fetch directory tree
322-
tree_data = await _fetch_directory_tree(owner, repo_name, default_branch)
346+
# 2. Fetch directory tree (reuse same client)
347+
tree_data = await _fetch_directory_tree(owner, repo_name, default_branch, client=client)
323348

324349
logger.info(
325350
"Analyzed repo structure",
@@ -329,7 +354,7 @@ async def analyze_repository(request: AnalyzeRepoRequest) -> dict:
329354
suggestion=tree_data.get("suggestion"),
330355
)
331356

332-
return {
357+
result = {
333358
"owner": owner,
334359
"repo": repo_name,
335360
"default_branch": default_branch,
@@ -339,6 +364,12 @@ async def analyze_repository(request: AnalyzeRepoRequest) -> dict:
339364
**tree_data,
340365
}
341366

367+
# Cache for 5 minutes
368+
if cache:
369+
cache.set(cache_key, result, ttl=_ANALYZE_CACHE_TTL)
370+
371+
return result
372+
342373

343374
@router.delete("/{repo_id}")
344375
async def delete_repository(

0 commit comments

Comments
 (0)