Skip to content

Commit fdcc19d

Browse files
authored
Merge pull request #260 from DevanshuNEU/refactor/split-playground-repos
refactor: split playground.py (1306 lines) into 6 focused modules (OPE-78)
2 parents 68ac966 + f1f4d20 commit fdcc19d

9 files changed

Lines changed: 930 additions & 1384 deletions

File tree

backend/routes/playground.py

Lines changed: 0 additions & 1303 deletions
This file was deleted.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""
2+
Playground routes package.
3+
4+
Split from a 1306-line monolith into focused modules:
5+
search.py -- search endpoint, repo resolution
6+
session.py -- session info, rate limits
7+
validation.py -- GitHub URL validation, metadata
8+
indexing.py -- anonymous indexing start + status
9+
helpers.py -- shared constants and utilities
10+
"""
11+
from fastapi import APIRouter
12+
13+
from routes.playground.helpers import load_demo_repos
14+
from routes.playground.search import router as search_router
15+
from routes.playground.session import router as session_router
16+
from routes.playground.validation import router as validation_router
17+
from routes.playground.indexing import router as indexing_router
18+
19+
# Re-export for main.py: from routes.playground import router, load_demo_repos
20+
router = APIRouter(prefix="/playground", tags=["Playground"])
21+
router.include_router(session_router)
22+
router.include_router(search_router)
23+
router.include_router(validation_router)
24+
router.include_router(indexing_router)
25+
26+
# Re-export DEMO_REPO_IDS for tests that reference it
27+
from routes.playground.helpers import DEMO_REPO_IDS
28+
29+
__all__ = ["router", "load_demo_repos", "DEMO_REPO_IDS"]
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""
2+
Shared helpers and constants for playground routes.
3+
4+
All playground sub-modules import from here to avoid circular deps.
5+
"""
6+
import os
7+
import re
8+
from typing import Optional
9+
from fastapi import Request, Response
10+
11+
from dependencies import repo_manager, redis_client
12+
from services.observability import logger
13+
from services.playground_limiter import PlaygroundLimiter, get_playground_limiter
14+
15+
# Demo repo mapping (populated on startup via load_demo_repos)
16+
DEMO_REPO_IDS = {}
17+
18+
# Session cookie config
19+
SESSION_COOKIE_NAME = "pg_session"
20+
SESSION_COOKIE_MAX_AGE = 86400 # 24 hours
21+
IS_PRODUCTION = os.getenv("ENVIRONMENT", "development").lower() == "production"
22+
23+
# GitHub validation config
24+
GITHUB_URL_PATTERN = re.compile(
25+
r"^https?://github\.com/(?P<owner>[a-zA-Z0-9_.-]+)/(?P<repo>[a-zA-Z0-9_.-]+)/?$"
26+
)
27+
ANONYMOUS_FILE_LIMIT = 200
28+
GITHUB_API_BASE = "https://api.github.com"
29+
GITHUB_API_TIMEOUT = 10.0
30+
VALIDATION_CACHE_TTL = 300 # 5 minutes
31+
32+
33+
async def load_demo_repos() -> None:
34+
"""Load pre-indexed demo repos. Called from main.py on startup."""
35+
try:
36+
repos = repo_manager.list_repos()
37+
for repo in repos:
38+
name_lower = repo.get("name", "").lower()
39+
if "flask" in name_lower:
40+
DEMO_REPO_IDS["flask"] = repo["id"]
41+
elif "fastapi" in name_lower:
42+
DEMO_REPO_IDS["fastapi"] = repo["id"]
43+
elif "express" in name_lower:
44+
DEMO_REPO_IDS["express"] = repo["id"]
45+
elif "react" in name_lower:
46+
DEMO_REPO_IDS["react"] = repo["id"]
47+
logger.info("Loaded demo repos", repos=list(DEMO_REPO_IDS.keys()))
48+
except Exception as e:
49+
logger.warning("Could not load demo repos", error=str(e))
50+
51+
52+
def get_client_ip(req: Request) -> str:
53+
"""Extract client IP from request."""
54+
client_ip = req.client.host if req.client else "unknown"
55+
forwarded = req.headers.get("x-forwarded-for")
56+
if forwarded:
57+
client_ip = forwarded.split(",")[0].strip()
58+
return client_ip
59+
60+
61+
def get_session_token(req: Request) -> Optional[str]:
62+
"""Get session token from cookie."""
63+
return req.cookies.get(SESSION_COOKIE_NAME)
64+
65+
66+
def set_session_cookie(response: Response, token: str) -> None:
67+
"""Set httpOnly session cookie."""
68+
response.set_cookie(
69+
key=SESSION_COOKIE_NAME,
70+
value=token,
71+
max_age=SESSION_COOKIE_MAX_AGE,
72+
httponly=True,
73+
samesite="lax",
74+
secure=IS_PRODUCTION,
75+
)
76+
77+
78+
def get_limiter() -> PlaygroundLimiter:
79+
"""Get the playground limiter instance."""
80+
return get_playground_limiter(redis_client)
Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
"""Anonymous indexing routes for the playground."""
2+
import time
3+
from typing import Optional
4+
from datetime import datetime, timezone
5+
from fastapi import APIRouter, HTTPException, Request, Response, BackgroundTasks
6+
from pydantic import BaseModel, field_validator
7+
8+
from dependencies import indexer, redis_client
9+
from services.observability import logger
10+
from services.anonymous_indexer import AnonymousIndexingJob, run_indexing_job
11+
from routes.playground.helpers import (
12+
ANONYMOUS_FILE_LIMIT,
13+
get_client_ip, get_session_token, set_session_cookie, get_limiter,
14+
)
15+
from routes.playground.validation import (
16+
parse_github_url, fetch_repo_metadata, count_code_files,
17+
)
18+
19+
router = APIRouter()
20+
21+
22+
class IndexRepoRequest(BaseModel):
23+
"""Request body for anonymous repository indexing."""
24+
github_url: str
25+
branch: Optional[str] = None
26+
partial: bool = False
27+
28+
@field_validator("github_url")
29+
@classmethod
30+
def validate_github_url_format(cls, v: str) -> str:
31+
v = v.strip()
32+
if not v:
33+
raise ValueError("GitHub URL is required")
34+
if not v.startswith(("http://", "https://")):
35+
raise ValueError("URL must start with http:// or https://")
36+
if "github.com" not in v.lower():
37+
raise ValueError("URL must be a GitHub repository URL")
38+
return v
39+
40+
41+
@router.post("/index", status_code=202)
42+
async def start_anonymous_indexing(
43+
request: IndexRepoRequest,
44+
req: Request,
45+
response: Response,
46+
background_tasks: BackgroundTasks,
47+
) -> dict:
48+
"""Start indexing a public GitHub repository for anonymous users."""
49+
start_time = time.time()
50+
limiter = get_limiter()
51+
52+
# Session validation
53+
session_token = get_session_token(req)
54+
client_ip = get_client_ip(req)
55+
56+
if not session_token:
57+
session_token = limiter._generate_session_token()
58+
limiter.create_session(session_token)
59+
set_session_cookie(response, session_token)
60+
logger.info("Created new session for indexing",
61+
session_token=session_token[:8], client_ip=client_ip)
62+
63+
# Check if session already has an indexed repo
64+
session_data = limiter.get_session_data(session_token)
65+
66+
if session_data.indexed_repo:
67+
expires_at_str = session_data.indexed_repo.get("expires_at", "")
68+
is_expired = False
69+
if expires_at_str:
70+
try:
71+
expires_at = datetime.fromisoformat(expires_at_str.replace("Z", "+00:00"))
72+
# Ensure timezone-aware comparison
73+
if expires_at.tzinfo is None:
74+
expires_at = expires_at.replace(tzinfo=timezone.utc)
75+
is_expired = datetime.now(timezone.utc) > expires_at
76+
except (ValueError, AttributeError, TypeError):
77+
is_expired = True
78+
79+
if not is_expired:
80+
logger.info("Session already has indexed repo",
81+
session_token=session_token[:8],
82+
existing_repo=session_data.indexed_repo.get("repo_id"))
83+
raise HTTPException(
84+
status_code=409,
85+
detail={
86+
"error": "already_indexed",
87+
"message": "You already have an indexed repository. Only 1 repo per session allowed.",
88+
"indexed_repo": session_data.indexed_repo,
89+
}
90+
)
91+
else:
92+
logger.info("Existing indexed repo expired, allowing new indexing",
93+
session_token=session_token[:8])
94+
95+
# Validate GitHub URL
96+
owner, repo_name, parse_error = parse_github_url(request.github_url)
97+
if parse_error:
98+
raise HTTPException(status_code=400, detail={
99+
"error": "validation_failed", "reason": "invalid_url", "message": parse_error
100+
})
101+
102+
metadata = await fetch_repo_metadata(owner, repo_name)
103+
if "error" in metadata:
104+
error_type = metadata["error"]
105+
if error_type == "not_found":
106+
raise HTTPException(status_code=400, detail={
107+
"error": "validation_failed", "reason": "not_found",
108+
"message": "Repository not found. Check the URL or ensure it's public."
109+
})
110+
elif error_type == "rate_limited":
111+
raise HTTPException(status_code=429, detail={
112+
"error": "github_rate_limit", "message": "GitHub API rate limit exceeded. Try again later."
113+
})
114+
else:
115+
raise HTTPException(status_code=502, detail={
116+
"error": "github_error", "message": metadata.get("message", "Failed to fetch repository info")
117+
})
118+
119+
if metadata.get("private", False):
120+
raise HTTPException(status_code=400, detail={
121+
"error": "validation_failed", "reason": "private",
122+
"message": "This repository is private. Anonymous indexing only supports public repositories."
123+
})
124+
125+
branch = request.branch or metadata.get("default_branch", "main")
126+
file_count, count_error = await count_code_files(owner, repo_name, branch)
127+
128+
if count_error == "truncated":
129+
repo_size_kb = metadata.get("size", 0)
130+
file_count = max(repo_size_kb // 3, ANONYMOUS_FILE_LIMIT + 1)
131+
elif count_error:
132+
repo_size_kb = metadata.get("size", 0)
133+
file_count = max(repo_size_kb // 3, 1)
134+
135+
is_partial = False
136+
files_to_index = file_count
137+
138+
if file_count > ANONYMOUS_FILE_LIMIT:
139+
if request.partial:
140+
is_partial = True
141+
files_to_index = ANONYMOUS_FILE_LIMIT
142+
logger.info("Partial indexing enabled", total_files=file_count, indexing=files_to_index)
143+
else:
144+
raise HTTPException(status_code=400, detail={
145+
"error": "validation_failed", "reason": "too_large",
146+
"message": f"Repository has {file_count:,} code files. "
147+
f"Anonymous limit is {ANONYMOUS_FILE_LIMIT}. "
148+
f"Use partial=true to index first {ANONYMOUS_FILE_LIMIT} files.",
149+
"file_count": file_count, "limit": ANONYMOUS_FILE_LIMIT,
150+
"hint": "Set partial=true to index a subset of files",
151+
})
152+
153+
# Create job and start background indexing
154+
response_time_ms = int((time.time() - start_time) * 1000)
155+
if not redis_client:
156+
raise HTTPException(status_code=503, detail="Indexing service unavailable (Redis down)")
157+
job_manager = AnonymousIndexingJob(redis_client)
158+
job_id = job_manager.generate_job_id()
159+
160+
job_manager.create_job(
161+
job_id=job_id, session_id=session_token, github_url=request.github_url,
162+
owner=owner, repo_name=repo_name, branch=branch,
163+
file_count=file_count, is_partial=is_partial, max_files=files_to_index,
164+
)
165+
166+
background_tasks.add_task(
167+
run_indexing_job,
168+
job_manager=job_manager, indexer=indexer, limiter=limiter,
169+
job_id=job_id, session_id=session_token, github_url=request.github_url,
170+
owner=owner, repo_name=repo_name, branch=branch,
171+
file_count=files_to_index, max_files=files_to_index if is_partial else None,
172+
)
173+
174+
logger.info("Indexing job queued", job_id=job_id, owner=owner, repo=repo_name,
175+
branch=branch, file_count=files_to_index, is_partial=is_partial,
176+
session_token=session_token[:8], response_time_ms=response_time_ms)
177+
178+
estimated_seconds = max(10, int(files_to_index * 0.3))
179+
result = {
180+
"job_id": job_id, "status": "queued",
181+
"estimated_time_seconds": estimated_seconds, "file_count": files_to_index,
182+
"message": f"Indexing started. Poll /playground/index/{job_id} for status.",
183+
}
184+
185+
if is_partial:
186+
result["partial"] = True
187+
result["total_files"] = file_count
188+
result["message"] = (
189+
f"Partial indexing started ({files_to_index} of {file_count} files). "
190+
f"Poll /playground/index/{job_id} for status."
191+
)
192+
193+
return result
194+
195+
196+
@router.get("/index/{job_id}")
197+
async def get_indexing_status(job_id: str, req: Request) -> dict:
198+
"""Check the status of an anonymous indexing job."""
199+
if not job_id or not job_id.startswith("idx_"):
200+
raise HTTPException(status_code=400, detail={
201+
"error": "invalid_job_id", "message": "Invalid job ID format"
202+
})
203+
204+
if not redis_client:
205+
raise HTTPException(status_code=503, detail="Indexing service unavailable (Redis down)")
206+
job_manager = AnonymousIndexingJob(redis_client)
207+
job = job_manager.get_job(job_id)
208+
209+
if not job:
210+
raise HTTPException(status_code=404, detail={
211+
"error": "job_not_found", "message": "Job not found or has expired. Jobs expire after 1 hour."
212+
})
213+
214+
status = job.get("status", "unknown")
215+
result = {
216+
"job_id": job_id, "status": status,
217+
"created_at": job.get("created_at"), "updated_at": job.get("updated_at"),
218+
"repository": {
219+
"owner": job.get("owner"), "name": job.get("repo_name"),
220+
"branch": job.get("branch"), "github_url": job.get("github_url"),
221+
},
222+
}
223+
224+
if job.get("is_partial"):
225+
result["partial"] = True
226+
result["max_files"] = job.get("max_files")
227+
228+
if status == "queued":
229+
result["message"] = "Job is queued for processing"
230+
elif status == "cloning":
231+
result["message"] = "Cloning repository..."
232+
elif status == "processing":
233+
result["message"] = "Indexing files..."
234+
if job.get("progress"):
235+
progress = job["progress"]
236+
files_processed = progress.get("files_processed", 0)
237+
files_total = progress.get("files_total", 1)
238+
percent = round((files_processed / files_total) * 100) if files_total > 0 else 0
239+
result["progress"] = {
240+
"files_processed": files_processed, "files_total": files_total,
241+
"functions_found": progress.get("functions_found", 0),
242+
"percent_complete": percent, "current_file": progress.get("current_file"),
243+
}
244+
elif status == "completed":
245+
result["message"] = "Indexing completed successfully"
246+
result["repo_id"] = job.get("repo_id")
247+
if job.get("stats"):
248+
result["stats"] = job["stats"]
249+
elif status == "failed":
250+
result["message"] = job.get("error_message", "Indexing failed")
251+
result["error"] = job.get("error", "unknown_error")
252+
result["error_message"] = job.get("error_message")
253+
254+
return result

0 commit comments

Comments
 (0)