@@ -171,29 +171,40 @@ def _github_headers() -> dict:
171171
172172async def _fetch_directory_tree (
173173 owner : str , repo : str , branch : str ,
174+ client : Optional [httpx .AsyncClient ] = None ,
174175) -> dict :
175176 """Fetch directory structure from GitHub Tree API.
176177
177178 Returns a dict with directories (name, path, file_count) grouped
178179 at the most useful level -- top-level for flat repos, package-level
179180 for monorepos with a packages/ directory.
181+
182+ Args:
183+ client: Reuse an existing httpx client to avoid opening a second
184+ connection. If None, creates and closes its own.
180185 """
181186 from services .repo_validator import RepoValidator
182187
183188 url = f"{ _GITHUB_API_BASE } /repos/{ owner } /{ repo } /git/trees/{ branch } ?recursive=1"
184189
185- async with httpx .AsyncClient ( timeout = 15.0 ) as client :
186- response = await client .get (url , headers = _github_headers ())
190+ async def _get ( c : httpx .AsyncClient ) -> httpx . Response :
191+ return await c .get (url , headers = _github_headers ())
187192
188- if response .status_code == 404 :
189- raise HTTPException (status_code = 404 , detail = "Repository or branch not found" )
190- if response .status_code == 403 :
191- raise HTTPException (status_code = 429 , detail = "GitHub API rate limit exceeded" )
192- if response .status_code != 200 :
193- raise HTTPException (status_code = 502 , detail = f"GitHub API error: { response .status_code } " )
193+ if client :
194+ response = await _get (client )
195+ else :
196+ async with httpx .AsyncClient (timeout = 15.0 ) as c :
197+ response = await _get (c )
198+
199+ if response .status_code == 404 :
200+ raise HTTPException (status_code = 404 , detail = "Repository or branch not found" )
201+ if response .status_code == 403 :
202+ raise HTTPException (status_code = 429 , detail = "GitHub API rate limit exceeded" )
203+ if response .status_code != 200 :
204+ raise HTTPException (status_code = 502 , detail = f"GitHub API error: { response .status_code } " )
194205
195- data = response .json ()
196- truncated = data .get ("truncated" , False )
206+ data = response .json ()
207+ truncated = data .get ("truncated" , False )
197208
198209 code_extensions = RepoValidator .CODE_EXTENSIONS
199210 skip_dirs = RepoValidator .SKIP_DIRS
@@ -284,12 +295,17 @@ def validate_url(cls, v: str) -> str:
284295 return v
285296
286297
298+ _ANALYZE_CACHE_TTL = 300 # 5 minutes, same as validate-repo
299+
300+
287301@router .post ("/analyze" )
288302async def analyze_repository (request : AnalyzeRepoRequest ) -> dict :
289303 """Analyze a GitHub repo's directory structure WITHOUT cloning.
290304
291305 Returns directory tree with file counts so the user can select
292306 which directories to index (monorepo subset selection).
307+
308+ Results are cached for 5 minutes to avoid redundant GitHub API calls.
293309 """
294310 match = _GITHUB_URL_RE .match (request .github_url )
295311 if not match :
@@ -301,25 +317,34 @@ async def analyze_repository(request: AnalyzeRepoRequest) -> dict:
301317 owner = match .group ("owner" )
302318 repo_name = match .group ("repo" ).removesuffix (".git" )
303319
304- # Fetch repo metadata for default branch and size
305- async with httpx .AsyncClient (timeout = 10.0 ) as client :
320+ # Check cache first (same pattern as validate-repo)
321+ from dependencies import cache
322+ cache_key = f"analyze:{ owner } /{ repo_name } "
323+ cached = cache .get (cache_key ) if cache else None
324+ if cached :
325+ logger .info ("Returning cached analysis" , owner = owner , repo = repo_name )
326+ return cached
327+
328+ # Single httpx client for both GitHub API calls
329+ async with httpx .AsyncClient (timeout = 15.0 ) as client :
330+ # 1. Fetch repo metadata for default branch and size
306331 meta_resp = await client .get (
307332 f"{ _GITHUB_API_BASE } /repos/{ owner } /{ repo_name } " ,
308333 headers = _github_headers (),
309334 )
310335
311- if meta_resp .status_code == 404 :
312- raise HTTPException (status_code = 404 , detail = "Repository not found" )
313- if meta_resp .status_code == 403 :
314- raise HTTPException (status_code = 429 , detail = "GitHub API rate limit exceeded" )
315- if meta_resp .status_code != 200 :
316- raise HTTPException (status_code = 502 , detail = "Failed to fetch repository metadata" )
336+ if meta_resp .status_code == 404 :
337+ raise HTTPException (status_code = 404 , detail = "Repository not found" )
338+ if meta_resp .status_code == 403 :
339+ raise HTTPException (status_code = 429 , detail = "GitHub API rate limit exceeded" )
340+ if meta_resp .status_code != 200 :
341+ raise HTTPException (status_code = 502 , detail = "Failed to fetch repository metadata" )
317342
318- metadata = meta_resp .json ()
319- default_branch = metadata .get ("default_branch" , "main" )
343+ metadata = meta_resp .json ()
344+ default_branch = metadata .get ("default_branch" , "main" )
320345
321- # Fetch directory tree
322- tree_data = await _fetch_directory_tree (owner , repo_name , default_branch )
346+ # 2. Fetch directory tree (reuse same client)
347+ tree_data = await _fetch_directory_tree (owner , repo_name , default_branch , client = client )
323348
324349 logger .info (
325350 "Analyzed repo structure" ,
@@ -329,7 +354,7 @@ async def analyze_repository(request: AnalyzeRepoRequest) -> dict:
329354 suggestion = tree_data .get ("suggestion" ),
330355 )
331356
332- return {
357+ result = {
333358 "owner" : owner ,
334359 "repo" : repo_name ,
335360 "default_branch" : default_branch ,
@@ -339,6 +364,12 @@ async def analyze_repository(request: AnalyzeRepoRequest) -> dict:
339364 ** tree_data ,
340365 }
341366
367+ # Cache for 5 minutes
368+ if cache :
369+ cache .set (cache_key , result , ttl = _ANALYZE_CACHE_TTL )
370+
371+ return result
372+
342373
343374@router .delete ("/{repo_id}" )
344375async def delete_repository (
0 commit comments