From c0ed87aa49af67ae517ecb014516d673e4a75064 Mon Sep 17 00:00:00 2001 From: Devanshu Rajesh Chicholikar Date: Sat, 13 Dec 2025 03:05:30 -0500 Subject: [PATCH 1/4] feat(backend): Add PlaygroundLimiter service with Redis backing - Session-based limiting (50/day per device via httpOnly cookie) - IP-based fallback (100/day for shared networks) - Global circuit breaker (10k/hour for cost protection) - Fail-open design if Redis unavailable - PlaygroundLimitResult dataclass for structured responses Part of #93 --- backend/services/playground_limiter.py | 301 +++++++++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 backend/services/playground_limiter.py diff --git a/backend/services/playground_limiter.py b/backend/services/playground_limiter.py new file mode 100644 index 0000000..59847a4 --- /dev/null +++ b/backend/services/playground_limiter.py @@ -0,0 +1,301 @@ +""" +Playground Rate Limiter +Redis-backed rate limiting for anonymous playground searches. + +Design: +- Layer 1: Session token (httpOnly cookie) - 50 searches/day per device +- Layer 2: IP-based fallback - 100 searches/day (for shared IPs) +- Layer 3: Global circuit breaker - 10,000 searches/hour (cost protection) + +Part of #93 implementation. +""" +import secrets +import hashlib +from datetime import datetime, timezone +from typing import Optional, Tuple +from dataclasses import dataclass + +from services.observability import logger +from services.sentry import capture_exception + + +@dataclass +class PlaygroundLimitResult: + """Result of a rate limit check""" + allowed: bool + remaining: int + limit: int + resets_at: datetime + reason: Optional[str] = None # Why blocked (if not allowed) + session_token: Optional[str] = None # New token if created + + def to_dict(self) -> dict: + return { + "allowed": self.allowed, + "remaining": self.remaining, + "limit": self.limit, + "resets_at": self.resets_at.isoformat(), + "reason": self.reason, + } + + +class PlaygroundLimiter: + """ + Redis-backed rate limiter for playground searches. + + Usage: + limiter = PlaygroundLimiter(redis_client) + + # Check before search + result = limiter.check_and_record(session_token, client_ip) + if not result.allowed: + raise HTTPException(429, result.reason) + + # Set cookie if new session + if result.session_token: + response.set_cookie("pg_session", result.session_token, ...) + """ + + # Limits + SESSION_LIMIT_PER_DAY = 50 # Per device (generous for conversion) + IP_LIMIT_PER_DAY = 100 # Per IP (higher for shared networks) + GLOBAL_LIMIT_PER_HOUR = 10000 # Circuit breaker (cost protection) + + # Redis key prefixes + KEY_SESSION = "playground:session:" + KEY_IP = "playground:ip:" + KEY_GLOBAL = "playground:global:hourly" + + # TTLs + TTL_DAY = 86400 # 24 hours + TTL_HOUR = 3600 # 1 hour + + def __init__(self, redis_client=None): + self.redis = redis_client + + def _get_midnight_utc(self) -> datetime: + """Get next midnight UTC for reset time""" + now = datetime.now(timezone.utc) + tomorrow = now.replace(hour=0, minute=0, second=0, microsecond=0) + if tomorrow <= now: + from datetime import timedelta + tomorrow += timedelta(days=1) + return tomorrow + + def _hash_ip(self, ip: str) -> str: + """Hash IP for privacy""" + return hashlib.sha256(ip.encode()).hexdigest()[:16] + + def _generate_session_token(self) -> str: + """Generate secure session token""" + return secrets.token_urlsafe(32) + + def check_limit( + self, + session_token: Optional[str], + client_ip: str + ) -> PlaygroundLimitResult: + """ + Check rate limit without recording a search. + Use this for GET /playground/limits endpoint. + """ + return self._check_limits(session_token, client_ip, record=False) + + def check_and_record( + self, + session_token: Optional[str], + client_ip: str + ) -> PlaygroundLimitResult: + """ + Check rate limit AND record a search if allowed. + Use this for POST /playground/search endpoint. + """ + return self._check_limits(session_token, client_ip, record=True) + + def _check_limits( + self, + session_token: Optional[str], + client_ip: str, + record: bool = False + ) -> PlaygroundLimitResult: + """ + Internal method to check all rate limit layers. + + Order of checks: + 1. Global circuit breaker (protects cost) + 2. Session-based limit (primary) + 3. IP-based limit (fallback) + """ + resets_at = self._get_midnight_utc() + new_session_token = None + + # If no Redis, fail OPEN (allow all) + if not self.redis: + logger.warning("Redis not available, allowing playground search") + return PlaygroundLimitResult( + allowed=True, + remaining=self.SESSION_LIMIT_PER_DAY, + limit=self.SESSION_LIMIT_PER_DAY, + resets_at=resets_at, + ) + + try: + # Layer 1: Global circuit breaker + global_allowed, global_count = self._check_global_limit(record) + if not global_allowed: + logger.warning("Global circuit breaker triggered", count=global_count) + return PlaygroundLimitResult( + allowed=False, + remaining=0, + limit=self.SESSION_LIMIT_PER_DAY, + resets_at=resets_at, + reason="Service is experiencing high demand. Please try again later.", + ) + + # Layer 2: Session-based limit (primary) + if session_token: + session_allowed, session_remaining = self._check_session_limit( + session_token, record + ) + if session_allowed: + return PlaygroundLimitResult( + allowed=True, + remaining=session_remaining, + limit=self.SESSION_LIMIT_PER_DAY, + resets_at=resets_at, + ) + else: + # Session exhausted + return PlaygroundLimitResult( + allowed=False, + remaining=0, + limit=self.SESSION_LIMIT_PER_DAY, + resets_at=resets_at, + reason="Daily limit reached. Sign up for unlimited searches!", + ) + + # No session token - create new one and check IP + new_session_token = self._generate_session_token() + + # Layer 3: IP-based limit (for new sessions / fallback) + ip_allowed, ip_remaining = self._check_ip_limit(client_ip, record) + if not ip_allowed: + # IP exhausted (likely abuse or shared network) + return PlaygroundLimitResult( + allowed=False, + remaining=0, + limit=self.SESSION_LIMIT_PER_DAY, + resets_at=resets_at, + reason="Daily limit reached. Sign up for unlimited searches!", + ) + + # New session allowed + if record: + # Initialize session counter + session_key = f"{self.KEY_SESSION}{new_session_token}" + self.redis.set(session_key, "1", ex=self.TTL_DAY) + + return PlaygroundLimitResult( + allowed=True, + remaining=self.SESSION_LIMIT_PER_DAY - 1 if record else self.SESSION_LIMIT_PER_DAY, + limit=self.SESSION_LIMIT_PER_DAY, + resets_at=resets_at, + session_token=new_session_token, + ) + + except Exception as e: + logger.error("Playground rate limit check failed", error=str(e)) + capture_exception(e) + # Fail OPEN - allow search but don't break UX + return PlaygroundLimitResult( + allowed=True, + remaining=self.SESSION_LIMIT_PER_DAY, + limit=self.SESSION_LIMIT_PER_DAY, + resets_at=resets_at, + ) + + def _check_global_limit(self, record: bool) -> Tuple[bool, int]: + """Check global circuit breaker""" + try: + if record: + count = self.redis.incr(self.KEY_GLOBAL) + if count == 1: + self.redis.expire(self.KEY_GLOBAL, self.TTL_HOUR) + else: + count = int(self.redis.get(self.KEY_GLOBAL) or 0) + + allowed = count <= self.GLOBAL_LIMIT_PER_HOUR + return allowed, count + except Exception as e: + logger.error("Global limit check failed", error=str(e)) + return True, 0 # Fail open + + def _check_session_limit( + self, + session_token: str, + record: bool + ) -> Tuple[bool, int]: + """Check session-based limit""" + try: + session_key = f"{self.KEY_SESSION}{session_token}" + + if record: + count = self.redis.incr(session_key) + if count == 1: + self.redis.expire(session_key, self.TTL_DAY) + else: + count = int(self.redis.get(session_key) or 0) + + remaining = max(0, self.SESSION_LIMIT_PER_DAY - count) + allowed = count <= self.SESSION_LIMIT_PER_DAY + return allowed, remaining + except Exception as e: + logger.error("Session limit check failed", error=str(e)) + return True, self.SESSION_LIMIT_PER_DAY # Fail open + + def _check_ip_limit(self, client_ip: str, record: bool) -> Tuple[bool, int]: + """Check IP-based limit""" + try: + ip_hash = self._hash_ip(client_ip) + ip_key = f"{self.KEY_IP}{ip_hash}" + + if record: + count = self.redis.incr(ip_key) + if count == 1: + self.redis.expire(ip_key, self.TTL_DAY) + else: + count = int(self.redis.get(ip_key) or 0) + + remaining = max(0, self.IP_LIMIT_PER_DAY - count) + allowed = count <= self.IP_LIMIT_PER_DAY + return allowed, remaining + except Exception as e: + logger.error("IP limit check failed", error=str(e)) + return True, self.IP_LIMIT_PER_DAY # Fail open + + def get_usage_stats(self) -> dict: + """Get current global usage stats (for monitoring)""" + if not self.redis: + return {"global_hourly": 0, "redis_available": False} + + try: + global_count = int(self.redis.get(self.KEY_GLOBAL) or 0) + return { + "global_hourly": global_count, + "global_limit": self.GLOBAL_LIMIT_PER_HOUR, + "redis_available": True, + } + except Exception as e: + return {"error": str(e), "redis_available": False} + + +# Singleton instance +_playground_limiter: Optional[PlaygroundLimiter] = None + + +def get_playground_limiter(redis_client=None) -> PlaygroundLimiter: + """Get or create PlaygroundLimiter instance""" + global _playground_limiter + if _playground_limiter is None: + _playground_limiter = PlaygroundLimiter(redis_client) + return _playground_limiter From d2872bdbea402acee33894aafa79e8d2f57c9881 Mon Sep 17 00:00:00 2001 From: Devanshu Rajesh Chicholikar Date: Sat, 13 Dec 2025 03:08:33 -0500 Subject: [PATCH 2/4] feat(backend): Wire PlaygroundLimiter to routes with cookie support - Replace in-memory rate limiting with Redis-backed limiter - Add GET /playground/limits endpoint for frontend to check remaining - Add GET /playground/stats for monitoring - Set httpOnly session cookie on first request - Export redis_client from dependencies - Python 3.9 compatible type hints Part of #93 --- backend/dependencies.py | 3 + backend/routes/playground.py | 141 ++++++++++++++++++++++++++--------- 2 files changed, 109 insertions(+), 35 deletions(-) diff --git a/backend/dependencies.py b/backend/dependencies.py index 9ccff25..9521a24 100644 --- a/backend/dependencies.py +++ b/backend/dependencies.py @@ -42,6 +42,9 @@ # Repository size validation repo_validator = get_repo_validator() +# Redis client (for playground limiter and other services) +redis_client = cache.redis if cache.redis else None + def get_repo_or_404(repo_id: str, user_id: str) -> dict: """ diff --git a/backend/routes/playground.py b/backend/routes/playground.py index 2f3b2b0..8f19ea7 100644 --- a/backend/routes/playground.py +++ b/backend/routes/playground.py @@ -1,22 +1,29 @@ -"""Playground routes - no auth required, rate limited.""" -from fastapi import APIRouter, HTTPException, Request +""" +Playground routes - no auth required, rate limited via Redis. + +Rate limiting strategy (see #93): +- Session token (httpOnly cookie): 50 searches/day per device +- IP fallback: 100 searches/day for shared networks +- Global circuit breaker: 10k searches/hour (cost protection) +""" +from typing import Optional +from fastapi import APIRouter, HTTPException, Request, Response from pydantic import BaseModel -from collections import defaultdict -import time as time_module +import time -from dependencies import indexer, cache, repo_manager +from dependencies import indexer, cache, repo_manager, redis_client from services.input_validator import InputValidator from services.observability import logger +from services.playground_limiter import PlaygroundLimiter, get_playground_limiter router = APIRouter(prefix="/playground", tags=["Playground"]) # Demo repo mapping (populated on startup) DEMO_REPO_IDS = {} -# Rate limiting config -PLAYGROUND_LIMIT = 10 # searches per hour -PLAYGROUND_WINDOW = 3600 # 1 hour -playground_rate_limits = defaultdict(list) +# Session cookie config +SESSION_COOKIE_NAME = "pg_session" +SESSION_COOKIE_MAX_AGE = 86400 # 24 hours class PlaygroundSearchRequest(BaseModel): @@ -45,21 +52,6 @@ async def load_demo_repos(): logger.warning("Could not load demo repos", error=str(e)) -def _check_rate_limit(ip: str) -> tuple[bool, int]: - """Check if IP is within rate limit.""" - now = time_module.time() - playground_rate_limits[ip] = [ - t for t in playground_rate_limits[ip] if now - t < PLAYGROUND_WINDOW - ] - remaining = PLAYGROUND_LIMIT - len(playground_rate_limits[ip]) - return (remaining > 0, max(0, remaining)) - - -def _record_search(ip: str): - """Record a search for rate limiting.""" - playground_rate_limits[ip].append(time_module.time()) - - def _get_client_ip(req: Request) -> str: """Extract client IP from request.""" client_ip = req.client.host if req.client else "unknown" @@ -69,19 +61,82 @@ def _get_client_ip(req: Request) -> str: return client_ip +def _get_session_token(req: Request) -> Optional[str]: + """Get session token from cookie.""" + return req.cookies.get(SESSION_COOKIE_NAME) + + +def _set_session_cookie(response: Response, token: str): + """Set httpOnly session cookie.""" + response.set_cookie( + key=SESSION_COOKIE_NAME, + value=token, + max_age=SESSION_COOKIE_MAX_AGE, + httponly=True, # Can't be accessed by JavaScript + samesite="lax", # CSRF protection + secure=False, # Set True in production with HTTPS + ) + + +def _get_limiter() -> PlaygroundLimiter: + """Get the playground limiter instance.""" + return get_playground_limiter(redis_client) + + +@router.get("/limits") +async def get_playground_limits(req: Request): + """ + Get current rate limit status for this user. + + Frontend should call this on page load to show accurate remaining count. + """ + session_token = _get_session_token(req) + client_ip = _get_client_ip(req) + + limiter = _get_limiter() + result = limiter.check_limit(session_token, client_ip) + + return { + "remaining": result.remaining, + "limit": result.limit, + "resets_at": result.resets_at.isoformat(), + "tier": "anonymous", + } + + @router.post("/search") -async def playground_search(request: PlaygroundSearchRequest, req: Request): - """Public playground search - rate limited by IP.""" +async def playground_search( + request: PlaygroundSearchRequest, + req: Request, + response: Response +): + """ + Public playground search - rate limited by session/IP. + + Sets httpOnly cookie on first request to track device. + """ + session_token = _get_session_token(req) client_ip = _get_client_ip(req) - # Rate limit check - allowed, remaining = _check_rate_limit(client_ip) - if not allowed: + # Rate limit check AND record + limiter = _get_limiter() + limit_result = limiter.check_and_record(session_token, client_ip) + + if not limit_result.allowed: raise HTTPException( status_code=429, - detail="Rate limit exceeded. Sign up for unlimited searches!" + detail={ + "message": limit_result.reason, + "remaining": 0, + "limit": limit_result.limit, + "resets_at": limit_result.resets_at.isoformat(), + } ) + # Set session cookie if new token was created + if limit_result.session_token: + _set_session_cookie(response, limit_result.session_token) + # Validate query valid_query, query_error = InputValidator.validate_search_query(request.query) if not valid_query: @@ -100,7 +155,6 @@ async def playground_search(request: PlaygroundSearchRequest, req: Request): detail=f"Demo repo '{request.demo_repo}' not available" ) - import time start_time = time.time() try: @@ -113,7 +167,8 @@ async def playground_search(request: PlaygroundSearchRequest, req: Request): "results": cached_results, "count": len(cached_results), "cached": True, - "remaining_searches": remaining + "remaining_searches": limit_result.remaining, + "limit": limit_result.limit, } # Search @@ -125,17 +180,23 @@ async def playground_search(request: PlaygroundSearchRequest, req: Request): use_reranking=True ) - # Cache and record + # Cache results cache.set_search_results(sanitized_query, repo_id, results, ttl=3600) - _record_search(client_ip) + + search_time = int((time.time() - start_time) * 1000) return { "results": results, "count": len(results), "cached": False, - "remaining_searches": remaining - 1 + "remaining_searches": limit_result.remaining, + "limit": limit_result.limit, + "search_time_ms": search_time, } + except HTTPException: + raise except Exception as e: + logger.error("Playground search failed", error=str(e)) raise HTTPException(status_code=500, detail=str(e)) @@ -149,3 +210,13 @@ async def list_playground_repos(): {"id": "express", "name": "Express", "description": "Node.js framework", "available": "express" in DEMO_REPO_IDS}, ] } + + +@router.get("/stats") +async def get_playground_stats(): + """ + Get playground usage stats (for monitoring/debugging). + """ + limiter = _get_limiter() + stats = limiter.get_usage_stats() + return stats From fc1690e9a3b31f47597b5e9682c67c244696f89c Mon Sep 17 00:00:00 2001 From: Devanshu Rajesh Chicholikar Date: Sat, 13 Dec 2025 03:22:47 -0500 Subject: [PATCH 3/4] feat(frontend): Update playground to use backend rate limits - Fetch limits on mount from GET /playground/limits - Include credentials for session cookie tracking - Use backend response as source of truth for remaining count - Handle 429 rate limit errors with user-friendly message - Remove client-side only tracking (was bypassable on refresh) - Update both LandingPage.tsx and Playground.tsx Part of #93 --- frontend/src/pages/LandingPage.tsx | 49 +++++++++++++++++++------ frontend/src/pages/Playground.tsx | 59 ++++++++++++++++++++---------- 2 files changed, 78 insertions(+), 30 deletions(-) diff --git a/frontend/src/pages/LandingPage.tsx b/frontend/src/pages/LandingPage.tsx index 7c7db46..1d55ad9 100644 --- a/frontend/src/pages/LandingPage.tsx +++ b/frontend/src/pages/LandingPage.tsx @@ -106,12 +106,24 @@ export function LandingPage() { const [results, setResults] = useState([]) const [loading, setLoading] = useState(false) const [searchTime, setSearchTime] = useState(null) - const [searchCount, setSearchCount] = useState(0) + const [remaining, setRemaining] = useState(50) // Will be updated from backend + const [limit, setLimit] = useState(50) // Total limit from backend const [hasSearched, setHasSearched] = useState(false) const [availableRepos, setAvailableRepos] = useState([]) + const [rateLimitError, setRateLimitError] = useState(null) - const FREE_LIMIT = 5 - const remaining = FREE_LIMIT - searchCount + // Fetch rate limit status on mount (backend is source of truth) + useEffect(() => { + fetch(`${API_URL}/playground/limits`, { + credentials: 'include', // Send cookies for session tracking + }) + .then(res => res.json()) + .then(data => { + setRemaining(data.remaining ?? 50) + setLimit(data.limit ?? 50) + }) + .catch(console.error) + }, []) useEffect(() => { fetch(`${API_URL}/playground/repos`) @@ -125,23 +137,36 @@ export function LandingPage() { const handleSearch = async (searchQuery?: string) => { const q = searchQuery || query - if (!q.trim() || loading || searchCount >= FREE_LIMIT) return + if (!q.trim() || loading || remaining <= 0) return setLoading(true) setHasSearched(true) + setRateLimitError(null) const startTime = Date.now() try { const response = await fetch(`${API_URL}/playground/search`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, + credentials: 'include', // Send cookies for session tracking body: JSON.stringify({ query: q, demo_repo: selectedRepo, max_results: 10 }) }) const data = await response.json() + if (response.ok) { setResults(data.results || []) - setSearchTime(Date.now() - startTime) - setSearchCount(prev => prev + 1) + setSearchTime(data.search_time_ms || (Date.now() - startTime)) + // Update remaining from backend (source of truth) + if (typeof data.remaining_searches === 'number') { + setRemaining(data.remaining_searches) + } + if (typeof data.limit === 'number') { + setLimit(data.limit) + } + } else if (response.status === 429) { + // Rate limit exceeded + setRateLimitError(data.detail?.message || 'Daily limit reached. Sign up for unlimited searches!') + setRemaining(0) } } catch (error) { console.error('Search error:', error) @@ -240,7 +265,7 @@ export function LandingPage() { )} diff --git a/frontend/src/pages/Playground.tsx b/frontend/src/pages/Playground.tsx index ece3d8d..5dd3a99 100644 --- a/frontend/src/pages/Playground.tsx +++ b/frontend/src/pages/Playground.tsx @@ -1,4 +1,4 @@ -import { useState } from 'react' +import { useState, useEffect } from 'react' import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter' import { oneDark } from 'react-syntax-highlighter/dist/esm/styles/prism' import { API_URL } from '../config/api' @@ -29,28 +29,38 @@ export function Playground({ onSignupClick }: PlaygroundProps) { const [results, setResults] = useState([]) const [loading, setLoading] = useState(false) const [searchTime, setSearchTime] = useState(null) - const [searchCount, setSearchCount] = useState(0) + const [remaining, setRemaining] = useState(50) // Will be updated from backend + const [limit, setLimit] = useState(50) // Total limit from backend const [hasSearched, setHasSearched] = useState(false) + const [rateLimitError, setRateLimitError] = useState(null) - const FREE_SEARCH_LIMIT = 5 + // Fetch rate limit status on mount (backend is source of truth) + useEffect(() => { + fetch(`${API_URL}/playground/limits`, { + credentials: 'include', // Send cookies for session tracking + }) + .then(res => res.json()) + .then(data => { + setRemaining(data.remaining ?? 50) + setLimit(data.limit ?? 50) + }) + .catch(console.error) + }, []) const handleSearch = async (searchQuery?: string) => { const q = searchQuery || query - if (!q.trim()) return - - if (searchCount >= FREE_SEARCH_LIMIT) { - // Show signup prompt - return - } + if (!q.trim() || loading || remaining <= 0) return setLoading(true) setHasSearched(true) + setRateLimitError(null) const startTime = Date.now() try { const response = await fetch(`${API_URL}/playground/search`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, + credentials: 'include', // Send cookies for session tracking body: JSON.stringify({ query: q, demo_repo: selectedRepo, @@ -59,9 +69,22 @@ export function Playground({ onSignupClick }: PlaygroundProps) { }) const data = await response.json() - setResults(data.results || []) - setSearchTime(Date.now() - startTime) - setSearchCount(prev => prev + 1) + + if (response.ok) { + setResults(data.results || []) + setSearchTime(data.search_time_ms || (Date.now() - startTime)) + // Update remaining from backend (source of truth) + if (typeof data.remaining_searches === 'number') { + setRemaining(data.remaining_searches) + } + if (typeof data.limit === 'number') { + setLimit(data.limit) + } + } else if (response.status === 429) { + // Rate limit exceeded + setRateLimitError(data.detail?.message || 'Daily limit reached. Sign up for unlimited searches!') + setRemaining(0) + } } catch (error) { console.error('Search error:', error) } finally { @@ -69,8 +92,6 @@ export function Playground({ onSignupClick }: PlaygroundProps) { } } - const remainingSearches = FREE_SEARCH_LIMIT - searchCount - return (
{/* Minimal Nav */} @@ -171,9 +192,9 @@ export function Playground({ onSignupClick }: PlaygroundProps) { )} {/* Remaining searches indicator */} - {searchCount > 0 && remainingSearches > 0 && ( + {hasSearched && remaining > 0 && remaining < limit && (
- {remainingSearches} free {remainingSearches === 1 ? 'search' : 'searches'} remaining •{' '} + {remaining} free {remaining === 1 ? 'search' : 'searches'} remaining •{' '} @@ -194,11 +215,11 @@ export function Playground({ onSignupClick }: PlaygroundProps) { )} {/* Limit Reached Banner */} - {searchCount >= FREE_SEARCH_LIMIT && ( + {(remaining <= 0 || rateLimitError) && (
-

You've used all free searches

+

You've reached today's limit

- Sign up to get unlimited searches, index your own repos, and more. + {rateLimitError || 'Sign up to get unlimited searches, index your own repos, and more.'}