Skip to content

Commit 41f3428

Browse files
committed
refactor: split playground.py (1306 lines) into 6 focused modules
playground.py was 1306 lines -- 6.5x our 200-line guideline. Split into a package with focused modules: playground/__init__.py (29 lines) -- combines routers, re-exports playground/helpers.py (80 lines) -- constants, DEMO_REPO_IDS, session utils playground/search.py (218 lines) -- POST /search, GET /repos, GET /stats playground/session.py (68 lines) -- GET /limits, GET /session playground/validation.py(185 lines) -- POST /validate-repo, GitHub API helpers playground/indexing.py (247 lines) -- POST /index, GET /index/{job_id} Total: 827 lines across 6 files (was 1306 in 1 file). Net: -479 lines removed during cleanup. main.py unchanged -- imports router and load_demo_repos from routes.playground which __init__.py re-exports. Test updates: - test_validate_repo.py: updated imports + patch targets - test_anonymous_indexing.py: updated 40+ patch decorators to point to correct new modules (indexing.* for indexing tests, search.* for search tests) 289 tests pass. Partial OPE-78 (repos.py split to follow)
1 parent 68ac966 commit 41f3428

9 files changed

Lines changed: 902 additions & 1384 deletions

File tree

backend/routes/playground.py

Lines changed: 0 additions & 1303 deletions
This file was deleted.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""
2+
Playground routes package.
3+
4+
Split from a 1306-line monolith into focused modules:
5+
search.py -- search endpoint, repo resolution
6+
session.py -- session info, rate limits
7+
validation.py -- GitHub URL validation, metadata
8+
indexing.py -- anonymous indexing start + status
9+
helpers.py -- shared constants and utilities
10+
"""
11+
from fastapi import APIRouter
12+
13+
from routes.playground.helpers import load_demo_repos
14+
from routes.playground.search import router as search_router
15+
from routes.playground.session import router as session_router
16+
from routes.playground.validation import router as validation_router
17+
from routes.playground.indexing import router as indexing_router
18+
19+
# Re-export for main.py: from routes.playground import router, load_demo_repos
20+
router = APIRouter(prefix="/playground", tags=["Playground"])
21+
router.include_router(session_router)
22+
router.include_router(search_router)
23+
router.include_router(validation_router)
24+
router.include_router(indexing_router)
25+
26+
# Re-export DEMO_REPO_IDS for tests that reference it
27+
from routes.playground.helpers import DEMO_REPO_IDS
28+
29+
__all__ = ["router", "load_demo_repos", "DEMO_REPO_IDS"]
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""
2+
Shared helpers and constants for playground routes.
3+
4+
All playground sub-modules import from here to avoid circular deps.
5+
"""
6+
import os
7+
import re
8+
from typing import Optional
9+
from fastapi import Request, Response
10+
11+
from dependencies import repo_manager, redis_client
12+
from services.observability import logger
13+
from services.playground_limiter import PlaygroundLimiter, get_playground_limiter
14+
15+
# Demo repo mapping (populated on startup via load_demo_repos)
16+
DEMO_REPO_IDS = {}
17+
18+
# Session cookie config
19+
SESSION_COOKIE_NAME = "pg_session"
20+
SESSION_COOKIE_MAX_AGE = 86400 # 24 hours
21+
IS_PRODUCTION = os.getenv("ENVIRONMENT", "development").lower() == "production"
22+
23+
# GitHub validation config
24+
GITHUB_URL_PATTERN = re.compile(
25+
r"^https?://github\.com/(?P<owner>[a-zA-Z0-9_.-]+)/(?P<repo>[a-zA-Z0-9_.-]+)/?$"
26+
)
27+
ANONYMOUS_FILE_LIMIT = 200
28+
GITHUB_API_BASE = "https://api.github.com"
29+
GITHUB_API_TIMEOUT = 10.0
30+
VALIDATION_CACHE_TTL = 300 # 5 minutes
31+
32+
33+
async def load_demo_repos() -> None:
34+
"""Load pre-indexed demo repos. Called from main.py on startup."""
35+
try:
36+
repos = repo_manager.list_repos()
37+
for repo in repos:
38+
name_lower = repo.get("name", "").lower()
39+
if "flask" in name_lower:
40+
DEMO_REPO_IDS["flask"] = repo["id"]
41+
elif "fastapi" in name_lower:
42+
DEMO_REPO_IDS["fastapi"] = repo["id"]
43+
elif "express" in name_lower:
44+
DEMO_REPO_IDS["express"] = repo["id"]
45+
elif "react" in name_lower:
46+
DEMO_REPO_IDS["react"] = repo["id"]
47+
logger.info("Loaded demo repos", repos=list(DEMO_REPO_IDS.keys()))
48+
except Exception as e:
49+
logger.warning("Could not load demo repos", error=str(e))
50+
51+
52+
def get_client_ip(req: Request) -> str:
53+
"""Extract client IP from request."""
54+
client_ip = req.client.host if req.client else "unknown"
55+
forwarded = req.headers.get("x-forwarded-for")
56+
if forwarded:
57+
client_ip = forwarded.split(",")[0].strip()
58+
return client_ip
59+
60+
61+
def get_session_token(req: Request) -> Optional[str]:
62+
"""Get session token from cookie."""
63+
return req.cookies.get(SESSION_COOKIE_NAME)
64+
65+
66+
def set_session_cookie(response: Response, token: str) -> None:
67+
"""Set httpOnly session cookie."""
68+
response.set_cookie(
69+
key=SESSION_COOKIE_NAME,
70+
value=token,
71+
max_age=SESSION_COOKIE_MAX_AGE,
72+
httponly=True,
73+
samesite="lax",
74+
secure=IS_PRODUCTION,
75+
)
76+
77+
78+
def get_limiter() -> PlaygroundLimiter:
79+
"""Get the playground limiter instance."""
80+
return get_playground_limiter(redis_client)
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
"""Anonymous indexing routes for the playground."""
2+
import time
3+
from typing import Optional
4+
from datetime import datetime, timezone
5+
from fastapi import APIRouter, HTTPException, Request, Response, BackgroundTasks
6+
from pydantic import BaseModel, field_validator
7+
8+
from dependencies import indexer, redis_client
9+
from services.observability import logger
10+
from services.anonymous_indexer import AnonymousIndexingJob, run_indexing_job
11+
from routes.playground.helpers import (
12+
ANONYMOUS_FILE_LIMIT,
13+
get_client_ip, get_session_token, set_session_cookie, get_limiter,
14+
)
15+
from routes.playground.validation import (
16+
parse_github_url, fetch_repo_metadata, count_code_files,
17+
)
18+
19+
router = APIRouter()
20+
21+
22+
class IndexRepoRequest(BaseModel):
23+
"""Request body for anonymous repository indexing."""
24+
github_url: str
25+
branch: Optional[str] = None
26+
partial: bool = False
27+
28+
@field_validator("github_url")
29+
@classmethod
30+
def validate_github_url_format(cls, v: str) -> str:
31+
v = v.strip()
32+
if not v:
33+
raise ValueError("GitHub URL is required")
34+
if not v.startswith(("http://", "https://")):
35+
raise ValueError("URL must start with http:// or https://")
36+
if "github.com" not in v.lower():
37+
raise ValueError("URL must be a GitHub repository URL")
38+
return v
39+
40+
41+
@router.post("/index", status_code=202)
42+
async def start_anonymous_indexing(
43+
request: IndexRepoRequest,
44+
req: Request,
45+
response: Response,
46+
background_tasks: BackgroundTasks,
47+
):
48+
"""Start indexing a public GitHub repository for anonymous users."""
49+
start_time = time.time()
50+
limiter = get_limiter()
51+
52+
# Session validation
53+
session_token = get_session_token(req)
54+
client_ip = get_client_ip(req)
55+
56+
if not session_token:
57+
session_token = limiter._generate_session_token()
58+
limiter.create_session(session_token)
59+
set_session_cookie(response, session_token)
60+
logger.info("Created new session for indexing",
61+
session_token=session_token[:8], client_ip=client_ip)
62+
63+
# Check if session already has an indexed repo
64+
session_data = limiter.get_session_data(session_token)
65+
66+
if session_data.indexed_repo:
67+
expires_at_str = session_data.indexed_repo.get("expires_at", "")
68+
is_expired = False
69+
if expires_at_str:
70+
try:
71+
expires_at = datetime.fromisoformat(expires_at_str.replace("Z", "+00:00"))
72+
is_expired = datetime.now(timezone.utc) > expires_at
73+
except (ValueError, AttributeError):
74+
is_expired = True
75+
76+
if not is_expired:
77+
logger.info("Session already has indexed repo",
78+
session_token=session_token[:8],
79+
existing_repo=session_data.indexed_repo.get("repo_id"))
80+
raise HTTPException(
81+
status_code=409,
82+
detail={
83+
"error": "already_indexed",
84+
"message": "You already have an indexed repository. Only 1 repo per session allowed.",
85+
"indexed_repo": session_data.indexed_repo,
86+
}
87+
)
88+
else:
89+
logger.info("Existing indexed repo expired, allowing new indexing",
90+
session_token=session_token[:8])
91+
92+
# Validate GitHub URL
93+
owner, repo_name, parse_error = parse_github_url(request.github_url)
94+
if parse_error:
95+
raise HTTPException(status_code=400, detail={
96+
"error": "validation_failed", "reason": "invalid_url", "message": parse_error
97+
})
98+
99+
metadata = await fetch_repo_metadata(owner, repo_name)
100+
if "error" in metadata:
101+
error_type = metadata["error"]
102+
if error_type == "not_found":
103+
raise HTTPException(status_code=400, detail={
104+
"error": "validation_failed", "reason": "not_found",
105+
"message": "Repository not found. Check the URL or ensure it's public."
106+
})
107+
elif error_type == "rate_limited":
108+
raise HTTPException(status_code=429, detail={
109+
"error": "github_rate_limit", "message": "GitHub API rate limit exceeded. Try again later."
110+
})
111+
else:
112+
raise HTTPException(status_code=502, detail={
113+
"error": "github_error", "message": metadata.get("message", "Failed to fetch repository info")
114+
})
115+
116+
if metadata.get("private", False):
117+
raise HTTPException(status_code=400, detail={
118+
"error": "validation_failed", "reason": "private",
119+
"message": "This repository is private. Anonymous indexing only supports public repositories."
120+
})
121+
122+
branch = request.branch or metadata.get("default_branch", "main")
123+
file_count, count_error = await count_code_files(owner, repo_name, branch)
124+
125+
if count_error == "truncated":
126+
repo_size_kb = metadata.get("size", 0)
127+
file_count = max(repo_size_kb // 3, ANONYMOUS_FILE_LIMIT + 1)
128+
elif count_error:
129+
repo_size_kb = metadata.get("size", 0)
130+
file_count = max(repo_size_kb // 3, 1)
131+
132+
is_partial = False
133+
files_to_index = file_count
134+
135+
if file_count > ANONYMOUS_FILE_LIMIT:
136+
if request.partial:
137+
is_partial = True
138+
files_to_index = ANONYMOUS_FILE_LIMIT
139+
logger.info("Partial indexing enabled", total_files=file_count, indexing=files_to_index)
140+
else:
141+
raise HTTPException(status_code=400, detail={
142+
"error": "validation_failed", "reason": "too_large",
143+
"message": f"Repository has {file_count:,} code files. "
144+
f"Anonymous limit is {ANONYMOUS_FILE_LIMIT}. "
145+
f"Use partial=true to index first {ANONYMOUS_FILE_LIMIT} files.",
146+
"file_count": file_count, "limit": ANONYMOUS_FILE_LIMIT,
147+
"hint": "Set partial=true to index a subset of files",
148+
})
149+
150+
# Create job and start background indexing
151+
response_time_ms = int((time.time() - start_time) * 1000)
152+
job_manager = AnonymousIndexingJob(redis_client)
153+
job_id = job_manager.generate_job_id()
154+
155+
job_manager.create_job(
156+
job_id=job_id, session_id=session_token, github_url=request.github_url,
157+
owner=owner, repo_name=repo_name, branch=branch,
158+
file_count=file_count, is_partial=is_partial, max_files=files_to_index,
159+
)
160+
161+
background_tasks.add_task(
162+
run_indexing_job,
163+
job_manager=job_manager, indexer=indexer, limiter=limiter,
164+
job_id=job_id, session_id=session_token, github_url=request.github_url,
165+
owner=owner, repo_name=repo_name, branch=branch,
166+
file_count=files_to_index, max_files=files_to_index if is_partial else None,
167+
)
168+
169+
logger.info("Indexing job queued", job_id=job_id, owner=owner, repo=repo_name,
170+
branch=branch, file_count=files_to_index, is_partial=is_partial,
171+
session_token=session_token[:8], response_time_ms=response_time_ms)
172+
173+
estimated_seconds = max(10, int(files_to_index * 0.3))
174+
result = {
175+
"job_id": job_id, "status": "queued",
176+
"estimated_time_seconds": estimated_seconds, "file_count": files_to_index,
177+
"message": f"Indexing started. Poll /playground/index/{job_id} for status.",
178+
}
179+
180+
if is_partial:
181+
result["partial"] = True
182+
result["total_files"] = file_count
183+
result["message"] = (
184+
f"Partial indexing started ({files_to_index} of {file_count} files). "
185+
f"Poll /playground/index/{job_id} for status."
186+
)
187+
188+
return result
189+
190+
191+
@router.get("/index/{job_id}")
192+
async def get_indexing_status(job_id: str, req: Request):
193+
"""Check the status of an anonymous indexing job."""
194+
if not job_id or not job_id.startswith("idx_"):
195+
raise HTTPException(status_code=400, detail={
196+
"error": "invalid_job_id", "message": "Invalid job ID format"
197+
})
198+
199+
job_manager = AnonymousIndexingJob(redis_client)
200+
job = job_manager.get_job(job_id)
201+
202+
if not job:
203+
raise HTTPException(status_code=404, detail={
204+
"error": "job_not_found", "message": "Job not found or has expired. Jobs expire after 1 hour."
205+
})
206+
207+
status = job.get("status", "unknown")
208+
result = {
209+
"job_id": job_id, "status": status,
210+
"created_at": job.get("created_at"), "updated_at": job.get("updated_at"),
211+
"repository": {
212+
"owner": job.get("owner"), "name": job.get("repo_name"),
213+
"branch": job.get("branch"), "github_url": job.get("github_url"),
214+
},
215+
}
216+
217+
if job.get("is_partial"):
218+
result["partial"] = True
219+
result["max_files"] = job.get("max_files")
220+
221+
if status == "queued":
222+
result["message"] = "Job is queued for processing"
223+
elif status == "cloning":
224+
result["message"] = "Cloning repository..."
225+
elif status == "processing":
226+
result["message"] = "Indexing files..."
227+
if job.get("progress"):
228+
progress = job["progress"]
229+
files_processed = progress.get("files_processed", 0)
230+
files_total = progress.get("files_total", 1)
231+
percent = round((files_processed / files_total) * 100) if files_total > 0 else 0
232+
result["progress"] = {
233+
"files_processed": files_processed, "files_total": files_total,
234+
"functions_found": progress.get("functions_found", 0),
235+
"percent_complete": percent, "current_file": progress.get("current_file"),
236+
}
237+
elif status == "completed":
238+
result["message"] = "Indexing completed successfully"
239+
result["repo_id"] = job.get("repo_id")
240+
if job.get("stats"):
241+
result["stats"] = job["stats"]
242+
elif status == "failed":
243+
result["message"] = job.get("error_message", "Indexing failed")
244+
result["error"] = job.get("error", "unknown_error")
245+
result["error_message"] = job.get("error_message")
246+
247+
return result

0 commit comments

Comments
 (0)