33from pydantic import BaseModel , field_validator
44from typing import List , Optional
55from pathlib import Path
6+ from urllib .parse import quote
67import hashlib
78import os
89import re
@@ -185,16 +186,24 @@ async def _fetch_directory_tree(
185186 """
186187 from services .repo_validator import RepoValidator
187188
188- url = f"{ _GITHUB_API_BASE } /repos/{ owner } /{ repo } /git/trees/{ branch } ?recursive=1"
189+ # Encode branch for URL safety -- "feature/foo" -> "feature%2Ffoo"
190+ encoded_branch = quote (branch , safe = "" )
191+ url = f"{ _GITHUB_API_BASE } /repos/{ owner } /{ repo } /git/trees/{ encoded_branch } ?recursive=1"
189192
190193 async def _get (c : httpx .AsyncClient ) -> httpx .Response :
191194 return await c .get (url , headers = _github_headers ())
192195
193- if client :
194- response = await _get (client )
195- else :
196- async with httpx .AsyncClient (timeout = 15.0 ) as c :
197- response = await _get (c )
196+ try :
197+ if client :
198+ response = await _get (client )
199+ else :
200+ async with httpx .AsyncClient (timeout = 15.0 ) as c :
201+ response = await _get (c )
202+ except httpx .TimeoutException :
203+ raise HTTPException (status_code = 504 , detail = "GitHub API request timed out" )
204+ except httpx .RequestError as e :
205+ logger .error ("GitHub tree API network error" , error = str (e ))
206+ raise HTTPException (status_code = 502 , detail = "Failed to connect to GitHub API" )
198207
199208 if response .status_code == 404 :
200209 raise HTTPException (status_code = 404 , detail = "Repository or branch not found" )
@@ -203,7 +212,10 @@ async def _get(c: httpx.AsyncClient) -> httpx.Response:
203212 if response .status_code != 200 :
204213 raise HTTPException (status_code = 502 , detail = f"GitHub API error: { response .status_code } " )
205214
206- data = response .json ()
215+ try :
216+ data = response .json ()
217+ except ValueError :
218+ raise HTTPException (status_code = 502 , detail = "Invalid response from GitHub API" )
207219 truncated = data .get ("truncated" , False )
208220
209221 code_extensions = RepoValidator .CODE_EXTENSIONS
@@ -290,8 +302,10 @@ def validate_url(cls, v: str) -> str:
290302 v = v .strip ().rstrip ("/" )
291303 if not v :
292304 raise ValueError ("GitHub URL is required" )
293- if "github.com" not in v .lower ():
294- raise ValueError ("Only GitHub URLs are supported" )
305+ if not _GITHUB_URL_RE .match (v ):
306+ raise ValueError (
307+ "Invalid GitHub URL. Expected: https://github.com/owner/repo"
308+ )
295309 return v
296310
297311
@@ -326,25 +340,36 @@ async def analyze_repository(request: AnalyzeRepoRequest) -> dict:
326340 return cached
327341
328342 # Single httpx client for both GitHub API calls
329- async with httpx .AsyncClient (timeout = 15.0 ) as client :
330- # 1. Fetch repo metadata for default branch and size
331- meta_resp = await client .get (
332- f"{ _GITHUB_API_BASE } /repos/{ owner } /{ repo_name } " ,
333- headers = _github_headers (),
334- )
343+ try :
344+ async with httpx .AsyncClient (timeout = 15.0 ) as client :
345+ # 1. Fetch repo metadata for default branch and size
346+ meta_resp = await client .get (
347+ f"{ _GITHUB_API_BASE } /repos/{ owner } /{ repo_name } " ,
348+ headers = _github_headers (),
349+ )
335350
336- if meta_resp .status_code == 404 :
337- raise HTTPException (status_code = 404 , detail = "Repository not found" )
338- if meta_resp .status_code == 403 :
339- raise HTTPException (status_code = 429 , detail = "GitHub API rate limit exceeded" )
340- if meta_resp .status_code != 200 :
341- raise HTTPException (status_code = 502 , detail = "Failed to fetch repository metadata" )
351+ if meta_resp .status_code == 404 :
352+ raise HTTPException (status_code = 404 , detail = "Repository not found" )
353+ if meta_resp .status_code == 403 :
354+ raise HTTPException (status_code = 429 , detail = "GitHub API rate limit exceeded" )
355+ if meta_resp .status_code != 200 :
356+ raise HTTPException (status_code = 502 , detail = "Failed to fetch repository metadata" )
342357
343- metadata = meta_resp .json ()
344- default_branch = metadata .get ("default_branch" , "main" )
358+ try :
359+ metadata = meta_resp .json ()
360+ except ValueError :
361+ raise HTTPException (status_code = 502 , detail = "Invalid response from GitHub API" )
362+ default_branch = metadata .get ("default_branch" , "main" )
345363
346- # 2. Fetch directory tree (reuse same client)
347- tree_data = await _fetch_directory_tree (owner , repo_name , default_branch , client = client )
364+ # 2. Fetch directory tree (reuse same client)
365+ tree_data = await _fetch_directory_tree (owner , repo_name , default_branch , client = client )
366+ except HTTPException :
367+ raise
368+ except httpx .TimeoutException :
369+ raise HTTPException (status_code = 504 , detail = "GitHub API request timed out" )
370+ except httpx .RequestError as e :
371+ logger .error ("GitHub API network error" , error = str (e ))
372+ raise HTTPException (status_code = 502 , detail = "Failed to connect to GitHub API" )
348373
349374 logger .info (
350375 "Analyzed repo structure" ,
0 commit comments