From 2f17f3f7c571d81e8aefe617a95a36dbd9f6c9bc Mon Sep 17 00:00:00 2001
From: Devanshu Rajesh Chicholikar <chicholikar.d@northeastern.edu>
Date: Thu, 25 Dec 2025 17:18:52 -0600
Subject: [PATCH 1/5] feat(playground): add POST /index endpoint with
 validation (#125)

Checkpoint 1: Request validation and session management

- Add IndexRepoRequest Pydantic model with github_url and branch
- Add POST /playground/index endpoint (returns 202)
- Session validation: get existing or create new session
- Check has_indexed_repo: return 409 if session already has active repo
- Handle expired repos: allow re-indexing if existing repo expired
- Reuse GitHub validation logic from validate-repo endpoint
- Validate: URL format, repo exists, is public, file count <= 200

Next: Add job management and background indexing task
---
 backend/routes/playground.py | 209 +++++++++++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)

diff --git a/backend/routes/playground.py b/backend/routes/playground.py
index bb11e96..3283250 100644
--- a/backend/routes/playground.py
+++ b/backend/routes/playground.py
@@ -64,6 +64,29 @@ def validate_github_url_format(cls, v: str) -> str:
         return v
 
 
+class IndexRepoRequest(BaseModel):
+    """
+    Request body for anonymous repository indexing.
+
+    Used by POST /playground/index endpoint (#125).
+    """
+    github_url: str
+    branch: Optional[str] = None  # None = use repo's default branch
+
+    @field_validator("github_url")
+    @classmethod
+    def validate_github_url_format(cls, v: str) -> str:
+        """Basic URL format validation (detailed validation in endpoint)."""
+        v = v.strip()
+        if not v:
+            raise ValueError("GitHub URL is required")
+        if not v.startswith(("http://", "https://")):
+            raise ValueError("URL must start with http:// or https://")
+        if "github.com" not in v.lower():
+            raise ValueError("URL must be a GitHub repository URL")
+        return v
+
+
 async def load_demo_repos():
     """Load pre-indexed demo repos. Called from main.py on startup."""
     # Note: We mutate DEMO_REPO_IDS dict, no need for 'global' statement
@@ -591,3 +614,189 @@ async def validate_github_repo(request: ValidateRepoRequest, req: Request):
                 response_time_ms=response_time_ms)
 
     return result
+
+
+# =============================================================================
+# Anonymous Indexing Endpoint (#125)
+# =============================================================================
+
+@router.post("/index", status_code=202)
+async def start_anonymous_indexing(
+    request: IndexRepoRequest,
+    req: Request,
+    response: Response
+):
+    """
+    Start indexing a public GitHub repository for anonymous users.
+
+    This endpoint validates the repository and queues it for indexing.
+    Returns a job_id that can be used to poll for status via GET /index/{job_id}.
+
+    Constraints:
+    - Max 200 code files (anonymous limit)
+    - 1 repo per session (no concurrent indexing)
+    - Public repos only
+    - 24hr TTL on indexed data
+
+    See issue #125 for full specification.
+    """
+    start_time = time.time()
+    limiter = _get_limiter()
+
+    # --- Step 1: Session validation (get existing or create new) ---
+    session_token = _get_session_token(req)
+    client_ip = _get_client_ip(req)
+
+    if not session_token:
+        # Create new session
+        session_token = limiter.create_session()
+        _set_session_cookie(response, session_token)
+        logger.info("Created new session for indexing",
+                    session_token=session_token[:8],
+                    client_ip=client_ip)
+
+    # --- Step 2: Check if session already has an indexed repo ---
+    session_data = limiter.get_session_data(session_token)
+
+    if session_data.indexed_repo:
+        # Check if the existing repo has expired
+        from datetime import datetime, timezone
+
+        expires_at_str = session_data.indexed_repo.get("expires_at", "")
+        is_expired = False
+
+        if expires_at_str:
+            try:
+                expires_at = datetime.fromisoformat(expires_at_str.replace("Z", "+00:00"))
+                is_expired = datetime.now(timezone.utc) > expires_at
+            except (ValueError, AttributeError):
+                is_expired = True  # Treat parse errors as expired
+
+        if not is_expired:
+            # Session already has a valid indexed repo - return 409 Conflict
+            logger.info("Session already has indexed repo",
+                        session_token=session_token[:8],
+                        existing_repo=session_data.indexed_repo.get("repo_id"))
+
+            raise HTTPException(
+                status_code=409,
+                detail={
+                    "error": "already_indexed",
+                    "message": "You already have an indexed repository. "
+                               "Only 1 repo per session allowed.",
+                    "indexed_repo": session_data.indexed_repo
+                }
+            )
+        else:
+            # Existing repo expired - allow new indexing
+            logger.info("Existing indexed repo expired, allowing new indexing",
+                        session_token=session_token[:8])
+
+    # --- Step 3: Validate GitHub URL (reuse existing logic) ---
+    owner, repo_name, parse_error = _parse_github_url(request.github_url)
+    if parse_error:
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": "validation_failed",
+                "reason": "invalid_url",
+                "message": parse_error
+            }
+        )
+
+    # Fetch repo metadata from GitHub
+    metadata = await _fetch_repo_metadata(owner, repo_name)
+
+    if "error" in metadata:
+        error_type = metadata["error"]
+        if error_type == "not_found":
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": "validation_failed",
+                    "reason": "not_found",
+                    "message": "Repository not found. Check the URL or ensure it's public."
+                }
+            )
+        elif error_type == "rate_limited":
+            raise HTTPException(
+                status_code=429,
+                detail={
+                    "error": "github_rate_limit",
+                    "message": "GitHub API rate limit exceeded. Try again later."
+                }
+            )
+        else:
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "github_error",
+                    "message": metadata.get("message", "Failed to fetch repository info")
+                }
+            )
+
+    # Check if private
+    if metadata.get("private", False):
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": "validation_failed",
+                "reason": "private",
+                "message": "This repository is private. "
+                           "Anonymous indexing only supports public repositories."
+            }
+        )
+
+    # Determine branch
+    branch = request.branch or metadata.get("default_branch", "main")
+
+    # Get file count
+    file_count, count_error = await _count_code_files(owner, repo_name, branch)
+
+    # Handle truncated tree (very large repo)
+    if count_error == "truncated":
+        repo_size_kb = metadata.get("size", 0)
+        file_count = max(repo_size_kb // 3, ANONYMOUS_FILE_LIMIT + 1)
+    elif count_error:
+        repo_size_kb = metadata.get("size", 0)
+        file_count = max(repo_size_kb // 3, 1)
+
+    # Check file limit
+    if file_count > ANONYMOUS_FILE_LIMIT:
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": "validation_failed",
+                "reason": "too_large",
+                "message": f"Repository has {file_count:,} code files. "
+                           f"Anonymous limit is {ANONYMOUS_FILE_LIMIT}.",
+                "file_count": file_count,
+                "limit": ANONYMOUS_FILE_LIMIT
+            }
+        )
+
+    # --- Validation passed! ---
+    # Next checkpoint will add: job creation, background task, Redis tracking
+
+    response_time_ms = int((time.time() - start_time) * 1000)
+
+    # TODO: Checkpoint 2 will add actual indexing logic
+    # For now, return a placeholder to confirm endpoint works
+    logger.info("Index request validated",
+                owner=owner, repo=repo_name, branch=branch,
+                file_count=file_count, session_token=session_token[:8],
+                response_time_ms=response_time_ms)
+
+    return {
+        "status": "checkpoint_1_complete",
+        "message": "Validation passed. Indexing logic will be added in next checkpoint.",
+        "validated": {
+            "owner": owner,
+            "repo_name": repo_name,
+            "branch": branch,
+            "file_count": file_count,
+            "github_url": request.github_url
+        },
+        "session_token": session_token[:8] + "...",
+        "response_time_ms": response_time_ms
+    }

From d4ec4b4eb423d38f6f11753577116520dfa4ddf9 Mon Sep 17 00:00:00 2001
From: Devanshu Rajesh Chicholikar <chicholikar.d@northeastern.edu>
Date: Thu, 25 Dec 2025 17:24:27 -0600
Subject: [PATCH 2/5] feat(playground): add job manager and background indexing
 (#125)

---
 backend/routes/playground.py          |  73 ++++--
 backend/services/anonymous_indexer.py | 358 ++++++++++++++++++++++++++
 2 files changed, 411 insertions(+), 20 deletions(-)
 create mode 100644 backend/services/anonymous_indexer.py

diff --git a/backend/routes/playground.py b/backend/routes/playground.py
index 3283250..6ae39b5 100644
--- a/backend/routes/playground.py
+++ b/backend/routes/playground.py
@@ -10,7 +10,7 @@
 import re
 import httpx
 from typing import Optional
-from fastapi import APIRouter, HTTPException, Request, Response
+from fastapi import APIRouter, HTTPException, Request, Response, BackgroundTasks
 from pydantic import BaseModel, field_validator
 import time
 
@@ -19,6 +19,10 @@
 from services.repo_validator import RepoValidator
 from services.observability import logger
 from services.playground_limiter import PlaygroundLimiter, get_playground_limiter
+from services.anonymous_indexer import (
+    AnonymousIndexingJob,
+    run_indexing_job,
+)
 
 router = APIRouter(prefix="/playground", tags=["Playground"])
 
@@ -624,7 +628,8 @@ async def validate_github_repo(request: ValidateRepoRequest, req: Request):
 async def start_anonymous_indexing(
     request: IndexRepoRequest,
     req: Request,
-    response: Response
+    response: Response,
+    background_tasks: BackgroundTasks
 ):
     """
     Start indexing a public GitHub repository for anonymous users.
@@ -775,28 +780,56 @@ async def start_anonymous_indexing(
             }
         )
 
-    # --- Validation passed! ---
-    # Next checkpoint will add: job creation, background task, Redis tracking
+    # --- Validation passed! Create job and start background indexing ---
 
     response_time_ms = int((time.time() - start_time) * 1000)
 
-    # TODO: Checkpoint 2 will add actual indexing logic
-    # For now, return a placeholder to confirm endpoint works
-    logger.info("Index request validated",
-                owner=owner, repo=repo_name, branch=branch,
-                file_count=file_count, session_token=session_token[:8],
+    # Initialize job manager
+    job_manager = AnonymousIndexingJob(redis_client)
+    job_id = job_manager.generate_job_id()
+
+    # Create job in Redis
+    job_manager.create_job(
+        job_id=job_id,
+        session_id=session_token,
+        github_url=request.github_url,
+        owner=owner,
+        repo_name=repo_name,
+        branch=branch,
+        file_count=file_count
+    )
+
+    # Queue background task
+    background_tasks.add_task(
+        run_indexing_job,
+        job_manager=job_manager,
+        indexer=indexer,
+        limiter=limiter,
+        job_id=job_id,
+        session_id=session_token,
+        github_url=request.github_url,
+        owner=owner,
+        repo_name=repo_name,
+        branch=branch,
+        file_count=file_count
+    )
+
+    logger.info("Indexing job queued",
+                job_id=job_id,
+                owner=owner,
+                repo=repo_name,
+                branch=branch,
+                file_count=file_count,
+                session_token=session_token[:8],
                 response_time_ms=response_time_ms)
 
+    # Estimate time based on file count (~0.3s per file)
+    estimated_seconds = max(10, int(file_count * 0.3))
+
     return {
-        "status": "checkpoint_1_complete",
-        "message": "Validation passed. Indexing logic will be added in next checkpoint.",
-        "validated": {
-            "owner": owner,
-            "repo_name": repo_name,
-            "branch": branch,
-            "file_count": file_count,
-            "github_url": request.github_url
-        },
-        "session_token": session_token[:8] + "...",
-        "response_time_ms": response_time_ms
+        "job_id": job_id,
+        "status": "queued",
+        "estimated_time_seconds": estimated_seconds,
+        "file_count": file_count,
+        "message": f"Indexing started. Poll /playground/index/{job_id} for status."
     }
diff --git a/backend/services/anonymous_indexer.py b/backend/services/anonymous_indexer.py
new file mode 100644
index 0000000..4fa9d5c
--- /dev/null
+++ b/backend/services/anonymous_indexer.py
@@ -0,0 +1,358 @@
+"""
+Anonymous Indexing Service (#125)
+
+Handles job management and background indexing for anonymous users.
+Jobs are tracked in Redis with progress updates.
+"""
+import uuid
+import json
+import shutil
+import asyncio
+from pathlib import Path
+from datetime import datetime, timezone, timedelta
+from typing import Optional
+from dataclasses import dataclass, asdict
+from enum import Enum
+
+import git
+
+from services.observability import logger, metrics, capture_exception
+
+
+class JobStatus(str, Enum):
+    """Job status values."""
+    QUEUED = "queued"
+    CLONING = "cloning"
+    PROCESSING = "processing"
+    COMPLETED = "completed"
+    FAILED = "failed"
+
+
+@dataclass
+class JobProgress:
+    """Progress tracking for indexing job."""
+    files_total: int = 0
+    files_processed: int = 0
+    functions_found: int = 0
+    current_file: Optional[str] = None
+
+    def to_dict(self) -> dict:
+        return {k: v for k, v in asdict(self).items() if v is not None}
+
+
+@dataclass
+class JobStats:
+    """Final stats for completed job."""
+    files_indexed: int = 0
+    functions_found: int = 0
+    time_taken_seconds: float = 0
+
+    def to_dict(self) -> dict:
+        return asdict(self)
+
+
+class AnonymousIndexingJob:
+    """
+    Manages anonymous indexing jobs in Redis.
+
+    Redis key: anon_job:{job_id}
+    TTL: 1 hour for job metadata
+    """
+
+    REDIS_PREFIX = "anon_job:"
+    JOB_TTL_SECONDS = 3600  # 1 hour for job metadata
+    REPO_TTL_HOURS = 24  # 24 hours for indexed data
+    TEMP_DIR = "/tmp/anon_repos"
+    CLONE_TIMEOUT_SECONDS = 120  # 2 minutes for clone
+    INDEX_TIMEOUT_SECONDS = 300  # 5 minutes for indexing
+
+    def __init__(self, redis_client):
+        self.redis = redis_client
+        # Ensure temp directory exists
+        Path(self.TEMP_DIR).mkdir(parents=True, exist_ok=True)
+
+    @staticmethod
+    def generate_job_id() -> str:
+        """Generate unique job ID."""
+        return f"idx_{uuid.uuid4().hex[:12]}"
+
+    @staticmethod
+    def generate_repo_id(job_id: str) -> str:
+        """Generate repo ID from job ID (for Pinecone namespace)."""
+        return f"anon_{job_id.replace('idx_', '')}"
+
+    def _get_key(self, job_id: str) -> str:
+        """Get Redis key for job."""
+        return f"{self.REDIS_PREFIX}{job_id}"
+
+    def create_job(
+        self,
+        job_id: str,
+        session_id: str,
+        github_url: str,
+        owner: str,
+        repo_name: str,
+        branch: str,
+        file_count: int
+    ) -> dict:
+        """
+        Create a new indexing job in Redis.
+
+        Returns the initial job state.
+        """
+        now = datetime.now(timezone.utc)
+        expires_at = now + timedelta(hours=self.REPO_TTL_HOURS)
+
+        job_data = {
+            "job_id": job_id,
+            "session_id": session_id,
+            "github_url": github_url,
+            "owner": owner,
+            "repo_name": repo_name,
+            "branch": branch,
+            "file_count": file_count,
+            "status": JobStatus.QUEUED.value,
+            "progress": None,
+            "stats": None,
+            "repo_id": None,
+            "error": None,
+            "error_message": None,
+            "created_at": now.isoformat(),
+            "updated_at": now.isoformat(),
+            "expires_at": expires_at.isoformat(),
+        }
+
+        if self.redis:
+            key = self._get_key(job_id)
+            self.redis.setex(key, self.JOB_TTL_SECONDS, json.dumps(job_data))
+            logger.info("Created indexing job", job_id=job_id, session_id=session_id[:8])
+
+        return job_data
+
+    def get_job(self, job_id: str) -> Optional[dict]:
+        """Get job data from Redis."""
+        if not self.redis:
+            return None
+
+        key = self._get_key(job_id)
+        data = self.redis.get(key)
+
+        if not data:
+            return None
+
+        try:
+            return json.loads(data)
+        except json.JSONDecodeError:
+            logger.error("Invalid job data in Redis", job_id=job_id)
+            return None
+
+    def update_status(
+        self,
+        job_id: str,
+        status: JobStatus,
+        progress: Optional[JobProgress] = None,
+        stats: Optional[JobStats] = None,
+        repo_id: Optional[str] = None,
+        error: Optional[str] = None,
+        error_message: Optional[str] = None
+    ) -> bool:
+        """Update job status in Redis."""
+        if not self.redis:
+            return False
+
+        job = self.get_job(job_id)
+        if not job:
+            logger.warning("Job not found for update", job_id=job_id)
+            return False
+
+        job["status"] = status.value
+        job["updated_at"] = datetime.now(timezone.utc).isoformat()
+
+        if progress:
+            job["progress"] = progress.to_dict()
+        if stats:
+            job["stats"] = stats.to_dict()
+        if repo_id:
+            job["repo_id"] = repo_id
+        if error:
+            job["error"] = error
+            job["error_message"] = error_message
+
+        key = self._get_key(job_id)
+        self.redis.setex(key, self.JOB_TTL_SECONDS, json.dumps(job))
+
+        return True
+
+    def update_progress(
+        self,
+        job_id: str,
+        files_processed: int,
+        functions_found: int,
+        files_total: int,
+        current_file: Optional[str] = None
+    ) -> bool:
+        """Update job progress (called during indexing)."""
+        progress = JobProgress(
+            files_total=files_total,
+            files_processed=files_processed,
+            functions_found=functions_found,
+            current_file=current_file
+        )
+        return self.update_status(job_id, JobStatus.PROCESSING, progress=progress)
+
+    def get_temp_path(self, job_id: str) -> Path:
+        """Get temp directory path for job."""
+        return Path(self.TEMP_DIR) / job_id
+
+    def cleanup_temp(self, job_id: str) -> None:
+        """Clean up temp directory for job."""
+        temp_path = self.get_temp_path(job_id)
+        if temp_path.exists():
+            try:
+                shutil.rmtree(temp_path)
+                logger.debug("Cleaned up temp directory", job_id=job_id)
+            except Exception as e:
+                logger.warning("Failed to cleanup temp", job_id=job_id, error=str(e))
+
+
+async def run_indexing_job(
+    job_manager: AnonymousIndexingJob,
+    indexer,
+    limiter,
+    job_id: str,
+    session_id: str,
+    github_url: str,
+    owner: str,
+    repo_name: str,
+    branch: str,
+    file_count: int
+) -> None:
+    """
+    Background task to clone and index a repository.
+
+    This runs asynchronously after the endpoint returns.
+    Updates Redis with progress and final status.
+    """
+    import time
+    start_time = time.time()
+    temp_path = job_manager.get_temp_path(job_id)
+    repo_id = job_manager.generate_repo_id(job_id)
+
+    try:
+        # --- Step 1: Clone repository ---
+        job_manager.update_status(job_id, JobStatus.CLONING)
+        logger.info("Cloning repository", job_id=job_id, url=github_url)
+
+        git_url = f"https://github.com/{owner}/{repo_name}.git"
+
+        # Clone in thread pool (git operations are blocking)
+        loop = asyncio.get_event_loop()
+        try:
+            await asyncio.wait_for(
+                loop.run_in_executor(
+                    None,
+                    lambda: git.Repo.clone_from(
+                        git_url,
+                        temp_path,
+                        branch=branch,
+                        depth=1,  # Shallow clone
+                        single_branch=True
+                    )
+                ),
+                timeout=job_manager.CLONE_TIMEOUT_SECONDS
+            )
+        except asyncio.TimeoutError:
+            raise Exception("Clone timed out")
+        except git.GitCommandError as e:
+            raise Exception(f"Clone failed: {str(e)}")
+
+        logger.info("Clone complete", job_id=job_id)
+
+        # --- Step 2: Index repository ---
+        job_manager.update_status(job_id, JobStatus.PROCESSING)
+
+        # Progress callback for real-time updates
+        async def progress_callback(files_processed: int, functions_found: int, total: int):
+            job_manager.update_progress(
+                job_id,
+                files_processed=files_processed,
+                functions_found=functions_found,
+                files_total=total
+            )
+
+        # Run indexing with timeout
+        try:
+            total_functions = await asyncio.wait_for(
+                indexer.index_repository_with_progress(
+                    repo_id,
+                    str(temp_path),
+                    progress_callback
+                ),
+                timeout=job_manager.INDEX_TIMEOUT_SECONDS
+            )
+        except asyncio.TimeoutError:
+            raise Exception("Indexing timed out")
+
+        # --- Step 3: Mark complete ---
+        elapsed = time.time() - start_time
+        stats = JobStats(
+            files_indexed=file_count,
+            functions_found=total_functions,
+            time_taken_seconds=round(elapsed, 2)
+        )
+
+        job_manager.update_status(
+            job_id,
+            JobStatus.COMPLETED,
+            stats=stats,
+            repo_id=repo_id
+        )
+
+        # Store in session for search access
+        job = job_manager.get_job(job_id)
+        if job and limiter:
+            limiter.set_indexed_repo(session_id, {
+                "repo_id": repo_id,
+                "github_url": github_url,
+                "name": repo_name,
+                "file_count": file_count,
+                "indexed_at": datetime.now(timezone.utc).isoformat(),
+                "expires_at": job.get("expires_at"),
+            })
+
+        metrics.increment("anon_indexing_success")
+        logger.info("Indexing complete",
+                    job_id=job_id,
+                    repo_id=repo_id,
+                    functions=total_functions,
+                    elapsed=f"{elapsed:.2f}s")
+
+    except Exception as e:
+        # --- Handle failure ---
+        error_msg = str(e)
+        error_type = "indexing_failed"
+
+        if "timed out" in error_msg.lower():
+            error_type = "timeout"
+        elif "clone" in error_msg.lower():
+            error_type = "clone_failed"
+        elif "rate limit" in error_msg.lower():
+            error_type = "github_rate_limit"
+
+        job_manager.update_status(
+            job_id,
+            JobStatus.FAILED,
+            error=error_type,
+            error_message=error_msg
+        )
+
+        metrics.increment("anon_indexing_failed")
+        logger.error("Indexing failed",
+                     job_id=job_id,
+                     error_type=error_type,
+                     error=error_msg)
+        capture_exception(e, operation="anonymous_indexing", job_id=job_id)
+
+    finally:
+        # --- Always cleanup ---
+        job_manager.cleanup_temp(job_id)

From 1e1ae2571b96d22d18e488ac398d19942b89f0d5 Mon Sep 17 00:00:00 2001
From: Devanshu Rajesh Chicholikar <chicholikar.d@northeastern.edu>
Date: Thu, 25 Dec 2025 22:43:00 -0600
Subject: [PATCH 3/5] fix(playground): add partial indexing support and fix
 bugs (#125)

- Add partial=true parameter to index first 200 files of large repos
- Fix create_session() call - generate token before creating session
- Fix JSON serialization in validation error handler
- Add missing capture_http_exception function to sentry module
- Add comprehensive tests for anonymous indexing (30 tests)

All 169 tests passing.
---
 backend/main.py                          |  13 +-
 backend/routes/playground.py             |  69 ++-
 backend/services/anonymous_indexer.py    |  15 +-
 backend/services/indexer_optimized.py    |  23 +-
 backend/services/sentry.py               |  62 ++-
 backend/tests/test_anonymous_indexing.py | 514 +++++++++++++++++++++++
 6 files changed, 645 insertions(+), 51 deletions(-)
 create mode 100644 backend/tests/test_anonymous_indexing.py

diff --git a/backend/main.py b/backend/main.py
index 6eccc59..83f4b19 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -98,11 +98,22 @@ async def dispatch(self, request: Request, call_next):
 @app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request: Request, exc: RequestValidationError):
     """Handle validation errors with clear messages."""
+    # Convert errors to JSON-serializable format
+    errors = []
+    for err in exc.errors():
+        error_dict = {
+            "type": err.get("type"),
+            "loc": err.get("loc"),
+            "msg": err.get("msg"),
+            "input": str(err.get("input")) if err.get("input") is not None else None,
+        }
+        errors.append(error_dict)
+
     return JSONResponse(
         status_code=422,
         content={
             "detail": "Validation error",
-            "errors": exc.errors()
+            "errors": errors
         }
     )
 
diff --git a/backend/routes/playground.py b/backend/routes/playground.py
index 6ae39b5..778036d 100644
--- a/backend/routes/playground.py
+++ b/backend/routes/playground.py
@@ -76,6 +76,7 @@ class IndexRepoRequest(BaseModel):
     """
     github_url: str
     branch: Optional[str] = None  # None = use repo's default branch
+    partial: bool = False  # If True, index first 200 files of large repos
 
     @field_validator("github_url")
     @classmethod
@@ -653,8 +654,9 @@ async def start_anonymous_indexing(
     client_ip = _get_client_ip(req)
 
     if not session_token:
-        # Create new session
-        session_token = limiter.create_session()
+        # Create new session - generate token first, then create session
+        session_token = limiter._generate_session_token()
+        limiter.create_session(session_token)
         _set_session_cookie(response, session_token)
         logger.info("Created new session for indexing",
                     session_token=session_token[:8],
@@ -767,18 +769,32 @@ async def start_anonymous_indexing(
         file_count = max(repo_size_kb // 3, 1)
 
     # Check file limit
+    is_partial = False
+    files_to_index = file_count
+
     if file_count > ANONYMOUS_FILE_LIMIT:
-        raise HTTPException(
-            status_code=400,
-            detail={
-                "error": "validation_failed",
-                "reason": "too_large",
-                "message": f"Repository has {file_count:,} code files. "
-                           f"Anonymous limit is {ANONYMOUS_FILE_LIMIT}.",
-                "file_count": file_count,
-                "limit": ANONYMOUS_FILE_LIMIT
-            }
-        )
+        if request.partial:
+            # Partial indexing - cap at limit
+            is_partial = True
+            files_to_index = ANONYMOUS_FILE_LIMIT
+            logger.info("Partial indexing enabled",
+                        total_files=file_count,
+                        indexing=files_to_index)
+        else:
+            # Reject large repos without partial flag
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": "validation_failed",
+                    "reason": "too_large",
+                    "message": f"Repository has {file_count:,} code files. "
+                               f"Anonymous limit is {ANONYMOUS_FILE_LIMIT}. "
+                               f"Use partial=true to index first {ANONYMOUS_FILE_LIMIT} files.",
+                    "file_count": file_count,
+                    "limit": ANONYMOUS_FILE_LIMIT,
+                    "hint": "Set partial=true to index a subset of files"
+                }
+            )
 
     # --- Validation passed! Create job and start background indexing ---
 
@@ -796,7 +812,9 @@ async def start_anonymous_indexing(
         owner=owner,
         repo_name=repo_name,
         branch=branch,
-        file_count=file_count
+        file_count=file_count,
+        is_partial=is_partial,
+        max_files=files_to_index
     )
 
     # Queue background task
@@ -811,7 +829,8 @@ async def start_anonymous_indexing(
         owner=owner,
         repo_name=repo_name,
         branch=branch,
-        file_count=file_count
+        file_count=files_to_index,  # Actual files to index (may be capped)
+        max_files=files_to_index if is_partial else None  # Limit for partial indexing
     )
 
     logger.info("Indexing job queued",
@@ -819,17 +838,29 @@ async def start_anonymous_indexing(
                 owner=owner,
                 repo=repo_name,
                 branch=branch,
-                file_count=file_count,
+                file_count=files_to_index,
+                is_partial=is_partial,
                 session_token=session_token[:8],
                 response_time_ms=response_time_ms)
 
     # Estimate time based on file count (~0.3s per file)
-    estimated_seconds = max(10, int(file_count * 0.3))
+    estimated_seconds = max(10, int(files_to_index * 0.3))
 
-    return {
+    response_data = {
         "job_id": job_id,
         "status": "queued",
         "estimated_time_seconds": estimated_seconds,
-        "file_count": file_count,
+        "file_count": files_to_index,
         "message": f"Indexing started. Poll /playground/index/{job_id} for status."
     }
+
+    # Add partial info if applicable
+    if is_partial:
+        response_data["partial"] = True
+        response_data["total_files"] = file_count
+        response_data["message"] = (
+            f"Partial indexing started ({files_to_index} of {file_count} files). "
+            f"Poll /playground/index/{job_id} for status."
+        )
+
+    return response_data
diff --git a/backend/services/anonymous_indexer.py b/backend/services/anonymous_indexer.py
index 4fa9d5c..b8048c6 100644
--- a/backend/services/anonymous_indexer.py
+++ b/backend/services/anonymous_indexer.py
@@ -93,7 +93,9 @@ def create_job(
         owner: str,
         repo_name: str,
         branch: str,
-        file_count: int
+        file_count: int,
+        is_partial: bool = False,
+        max_files: Optional[int] = None
     ) -> dict:
         """
         Create a new indexing job in Redis.
@@ -111,6 +113,8 @@ def create_job(
             "repo_name": repo_name,
             "branch": branch,
             "file_count": file_count,
+            "is_partial": is_partial,
+            "max_files": max_files,
             "status": JobStatus.QUEUED.value,
             "progress": None,
             "stats": None,
@@ -225,13 +229,17 @@ async def run_indexing_job(
     owner: str,
     repo_name: str,
     branch: str,
-    file_count: int
+    file_count: int,
+    max_files: Optional[int] = None
 ) -> None:
     """
     Background task to clone and index a repository.
 
     This runs asynchronously after the endpoint returns.
     Updates Redis with progress and final status.
+
+    Args:
+        max_files: If set, limit indexing to first N files (for partial indexing)
     """
     import time
     start_time = time.time()
@@ -286,7 +294,8 @@ async def progress_callback(files_processed: int, functions_found: int, total: i
                 indexer.index_repository_with_progress(
                     repo_id,
                     str(temp_path),
-                    progress_callback
+                    progress_callback,
+                    max_files=max_files
                 ),
                 timeout=job_manager.INDEX_TIMEOUT_SECONDS
             )
diff --git a/backend/services/indexer_optimized.py b/backend/services/indexer_optimized.py
index df579ee..4879065 100644
--- a/backend/services/indexer_optimized.py
+++ b/backend/services/indexer_optimized.py
@@ -464,17 +464,30 @@ async def explain_code(
             return f"Error: {str(e)}"
 
     async def index_repository_with_progress(
-        self, 
-        repo_id: str, 
+        self,
+        repo_id: str,
         repo_path: str,
-        progress_callback
+        progress_callback,
+        max_files: int = None
     ):
-        """Index repository with real-time progress updates"""
+        """Index repository with real-time progress updates
+
+        Args:
+            max_files: If set, limit indexing to first N files (for partial indexing)
+        """
         start_time = time.time()
         logger.info("Starting optimized indexing with progress", repo_id=repo_id)
-        
+
         # Discover code files
         code_files = self._discover_code_files(repo_path)
+
+        # Apply file limit if specified (partial indexing)
+        if max_files and len(code_files) > max_files:
+            logger.info("Limiting files for partial indexing",
+                        total_discovered=len(code_files),
+                        max_files=max_files)
+            code_files = code_files[:max_files]
+
         total_files = len(code_files)
         logger.info("Found code files", repo_id=repo_id, total_files=total_files)
         
diff --git a/backend/services/sentry.py b/backend/services/sentry.py
index 12508b2..4f7dbdc 100644
--- a/backend/services/sentry.py
+++ b/backend/services/sentry.py
@@ -12,58 +12,58 @@
 def init_sentry() -> bool:
     """
     Initialize Sentry SDK if SENTRY_DSN is configured.
-    
+
     Returns:
         bool: True if Sentry was initialized, False otherwise
     """
     sentry_dsn = os.getenv("SENTRY_DSN")
-    
+
     if not sentry_dsn:
         print("ℹ️  Sentry DSN not configured - error tracking disabled")
         return False
-    
+
     try:
         import sentry_sdk
         from sentry_sdk.integrations.fastapi import FastApiIntegration
         from sentry_sdk.integrations.starlette import StarletteIntegration
-        
+
         environment = os.getenv("ENVIRONMENT", "development")
-        
+
         sentry_sdk.init(
             dsn=sentry_dsn,
             environment=environment,
-            
+
             # Performance monitoring - sample rate based on environment
             traces_sample_rate=0.1 if environment == "production" else 1.0,
-            
+
             # Profile sampled transactions
             profiles_sample_rate=0.1 if environment == "production" else 1.0,
-            
+
             # Send PII for debugging (user IDs, emails)
             send_default_pii=True,
-            
+
             # Integrations
             integrations=[
                 FastApiIntegration(transaction_style="endpoint"),
                 StarletteIntegration(transaction_style="endpoint"),
             ],
-            
+
             # Filter noisy events
             before_send=_filter_events,
-            
+
             # Debug mode for development
             debug=environment == "development",
-            
+
             # Attach stack traces to messages
             attach_stacktrace=True,
-            
+
             # Include local variables in stack traces
             include_local_variables=True,
         )
-        
+
         print(f"✅ Sentry initialized (environment: {environment})")
         return True
-        
+
     except ImportError:
         print("⚠️  sentry-sdk not installed - error tracking disabled")
         return False
@@ -74,12 +74,12 @@ def init_sentry() -> bool:
 
 def _filter_events(event, hint):
     """Filter out noisy events before sending to Sentry."""
-    
+
     # Don't send health check errors
     request_url = event.get("request", {}).get("url", "")
     if "/health" in request_url:
         return None
-    
+
     # Don't send 404s for common bot paths
     exception_values = event.get("exception", {}).get("values", [])
     if exception_values:
@@ -87,13 +87,13 @@ def _filter_events(event, hint):
         bot_paths = ["/wp-admin", "/wp-login", "/.env", "/config", "/admin", "/phpmyadmin", "/.git"]
         if any(path in exception_value for path in bot_paths):
             return None
-    
+
     # Don't send validation errors (they're expected)
     if exception_values:
         exception_type = exception_values[0].get("type", "")
         if exception_type in ("RequestValidationError", "ValidationError"):
             return None
-    
+
     return event
 
 
@@ -104,7 +104,7 @@ def _filter_events(event, hint):
 def set_user_context(user_id: Optional[str] = None, email: Optional[str] = None):
     """
     Set user context for error tracking.
-    
+
     DEPRECATED: Use from services.observability import set_user_context
     """
     try:
@@ -117,7 +117,7 @@ def set_user_context(user_id: Optional[str] = None, email: Optional[str] = None)
 def capture_exception(error: Exception, **extra_context):
     """
     Manually capture an exception with additional context.
-    
+
     DEPRECATED: Use from services.observability import capture_exception
     """
     try:
@@ -133,7 +133,7 @@ def capture_exception(error: Exception, **extra_context):
 def capture_message(message: str, level: str = "info", **extra_context):
     """
     Capture a message (not an exception) for tracking.
-    
+
     DEPRECATED: Use from services.observability import get_logger
     """
     try:
@@ -149,7 +149,7 @@ def capture_message(message: str, level: str = "info", **extra_context):
 def set_operation_context(operation: str, **tags):
     """
     Set operation context for the current scope.
-    
+
     DEPRECATED: Use from services.observability import trace_operation
     """
     try:
@@ -159,3 +159,19 @@ def set_operation_context(operation: str, **tags):
             sentry_sdk.set_tag(key, str(value))
     except ImportError:
         pass
+
+
+def capture_http_exception(request, exc: Exception, status_code: int):
+    """
+    Capture HTTP exception with request context for error tracking.
+    """
+    try:
+        import sentry_sdk
+        with sentry_sdk.push_scope() as scope:
+            scope.set_extra("status_code", status_code)
+            scope.set_extra("path", str(request.url.path))
+            scope.set_extra("method", request.method)
+            sentry_sdk.capture_exception(exc)
+    except ImportError:
+        pass
+        pass
diff --git a/backend/tests/test_anonymous_indexing.py b/backend/tests/test_anonymous_indexing.py
new file mode 100644
index 0000000..c2d31a1
--- /dev/null
+++ b/backend/tests/test_anonymous_indexing.py
@@ -0,0 +1,514 @@
+"""
+Tests for anonymous indexing endpoint (Issue #125).
+Tests the POST /playground/index endpoint and related functionality.
+
+Note: These tests rely on conftest.py for Pinecone/OpenAI/Redis mocking.
+"""
+import pytest
+from unittest.mock import AsyncMock, patch, MagicMock
+from datetime import datetime, timezone, timedelta
+import json
+
+# Import directly - conftest.py handles external service mocking
+from routes.playground import (
+    IndexRepoRequest,
+    ANONYMOUS_FILE_LIMIT,
+)
+from services.anonymous_indexer import (
+    AnonymousIndexingJob,
+    JobStatus,
+    JobProgress,
+    JobStats,
+)
+
+
+# =============================================================================
+# REQUEST MODEL TESTS
+# =============================================================================
+
+class TestIndexRepoRequest:
+    """Tests for IndexRepoRequest validation."""
+
+    def test_valid_request(self):
+        """Valid GitHub URL should pass."""
+        req = IndexRepoRequest(github_url="https://github.com/facebook/react")
+        assert req.github_url == "https://github.com/facebook/react"
+        assert req.branch is None
+        assert req.partial is False
+
+    def test_valid_request_with_branch(self):
+        """Request with branch specified."""
+        req = IndexRepoRequest(
+            github_url="https://github.com/user/repo",
+            branch="develop"
+        )
+        assert req.branch == "develop"
+
+    def test_valid_request_with_partial(self):
+        """Request with partial=True."""
+        req = IndexRepoRequest(
+            github_url="https://github.com/user/repo",
+            partial=True
+        )
+        assert req.partial is True
+
+    def test_invalid_empty_url(self):
+        """Empty URL should fail."""
+        with pytest.raises(ValueError) as exc_info:
+            IndexRepoRequest(github_url="")
+        assert "required" in str(exc_info.value).lower()
+
+    def test_invalid_url_no_scheme(self):
+        """URL without http(s) should fail."""
+        with pytest.raises(ValueError) as exc_info:
+            IndexRepoRequest(github_url="github.com/user/repo")
+        assert "http" in str(exc_info.value).lower()
+
+    def test_invalid_url_wrong_domain(self):
+        """Non-GitHub URL should fail."""
+        with pytest.raises(ValueError) as exc_info:
+            IndexRepoRequest(github_url="https://gitlab.com/user/repo")
+        assert "github" in str(exc_info.value).lower()
+
+    def test_url_whitespace_trimmed(self):
+        """Whitespace should be trimmed."""
+        req = IndexRepoRequest(github_url="  https://github.com/user/repo  ")
+        assert req.github_url == "https://github.com/user/repo"
+
+
+# =============================================================================
+# JOB MANAGER TESTS
+# =============================================================================
+
+class TestAnonymousIndexingJob:
+    """Tests for AnonymousIndexingJob service."""
+
+    @pytest.fixture
+    def mock_redis(self):
+        """Create a mock Redis client."""
+        redis = MagicMock()
+        redis.get.return_value = None
+        redis.setex.return_value = True
+        return redis
+
+    @pytest.fixture
+    def job_manager(self, mock_redis):
+        """Create job manager with mock Redis."""
+        return AnonymousIndexingJob(mock_redis)
+
+    def test_generate_job_id_format(self, job_manager):
+        """Job ID should have correct format."""
+        job_id = job_manager.generate_job_id()
+        assert job_id.startswith("idx_")
+        assert len(job_id) == 16  # idx_ + 12 hex chars
+
+    def test_generate_job_id_unique(self, job_manager):
+        """Each job ID should be unique."""
+        ids = [job_manager.generate_job_id() for _ in range(100)]
+        assert len(set(ids)) == 100
+
+    def test_generate_repo_id(self, job_manager):
+        """Repo ID derived from job ID."""
+        repo_id = job_manager.generate_repo_id("idx_abc123def456")
+        assert repo_id == "anon_abc123def456"
+
+    def test_create_job(self, job_manager, mock_redis):
+        """Create job stores data in Redis."""
+        job_data = job_manager.create_job(
+            job_id="idx_test123456",
+            session_id="session_abc",
+            github_url="https://github.com/user/repo",
+            owner="user",
+            repo_name="repo",
+            branch="main",
+            file_count=50
+        )
+
+        # Check return data
+        assert job_data["job_id"] == "idx_test123456"
+        assert job_data["session_id"] == "session_abc"
+        assert job_data["status"] == "queued"
+        assert job_data["file_count"] == 50
+
+        # Check Redis was called
+        mock_redis.setex.assert_called_once()
+        call_args = mock_redis.setex.call_args
+        assert "anon_job:idx_test123456" in call_args[0]
+
+    def test_get_job_exists(self, job_manager, mock_redis):
+        """Get existing job from Redis."""
+        mock_redis.get.return_value = json.dumps({
+            "job_id": "idx_test123456",
+            "status": "processing"
+        })
+
+        job = job_manager.get_job("idx_test123456")
+        assert job is not None
+        assert job["status"] == "processing"
+
+    def test_get_job_not_found(self, job_manager, mock_redis):
+        """Get non-existent job returns None."""
+        mock_redis.get.return_value = None
+        job = job_manager.get_job("idx_nonexistent")
+        assert job is None
+
+    def test_update_status(self, job_manager, mock_redis):
+        """Update job status in Redis."""
+        # Setup existing job
+        mock_redis.get.return_value = json.dumps({
+            "job_id": "idx_test123456",
+            "status": "queued",
+            "updated_at": "2025-01-01T00:00:00Z"
+        })
+
+        result = job_manager.update_status(
+            "idx_test123456",
+            JobStatus.PROCESSING
+        )
+
+        assert result is True
+        # Check Redis setex was called to update
+        assert mock_redis.setex.called
+
+    def test_update_status_with_progress(self, job_manager, mock_redis):
+        """Update status with progress data."""
+        mock_redis.get.return_value = json.dumps({
+            "job_id": "idx_test123456",
+            "status": "cloning"
+        })
+
+        progress = JobProgress(
+            files_total=100,
+            files_processed=50,
+            functions_found=200
+        )
+
+        result = job_manager.update_status(
+            "idx_test123456",
+            JobStatus.PROCESSING,
+            progress=progress
+        )
+
+        assert result is True
+
+    def test_update_status_completed_with_stats(self, job_manager, mock_redis):
+        """Update status to completed with stats."""
+        mock_redis.get.return_value = json.dumps({
+            "job_id": "idx_test123456",
+            "status": "processing"
+        })
+
+        stats = JobStats(
+            files_indexed=100,
+            functions_found=500,
+            time_taken_seconds=45.5
+        )
+
+        result = job_manager.update_status(
+            "idx_test123456",
+            JobStatus.COMPLETED,
+            stats=stats,
+            repo_id="anon_test123456"
+        )
+
+        assert result is True
+
+    def test_update_status_failed_with_error(self, job_manager, mock_redis):
+        """Update status to failed with error."""
+        mock_redis.get.return_value = json.dumps({
+            "job_id": "idx_test123456",
+            "status": "cloning"
+        })
+
+        result = job_manager.update_status(
+            "idx_test123456",
+            JobStatus.FAILED,
+            error="clone_failed",
+            error_message="Repository not found"
+        )
+
+        assert result is True
+
+
+# =============================================================================
+# JOB DATACLASS TESTS
+# =============================================================================
+
+class TestJobDataclasses:
+    """Tests for JobProgress and JobStats."""
+
+    def test_job_progress_to_dict(self):
+        """JobProgress converts to dict correctly."""
+        progress = JobProgress(
+            files_total=100,
+            files_processed=50,
+            functions_found=200,
+            current_file="src/index.ts"
+        )
+        d = progress.to_dict()
+        assert d["files_total"] == 100
+        assert d["files_processed"] == 50
+        assert d["current_file"] == "src/index.ts"
+
+    def test_job_progress_none_excluded(self):
+        """JobProgress excludes None values."""
+        progress = JobProgress(files_total=100)
+        d = progress.to_dict()
+        assert "current_file" not in d
+
+    def test_job_stats_to_dict(self):
+        """JobStats converts to dict correctly."""
+        stats = JobStats(
+            files_indexed=100,
+            functions_found=500,
+            time_taken_seconds=45.5
+        )
+        d = stats.to_dict()
+        assert d["files_indexed"] == 100
+        assert d["time_taken_seconds"] == 45.5
+
+
+# =============================================================================
+# ENDPOINT TESTS (Integration)
+# =============================================================================
+
+class TestIndexEndpoint:
+    """Integration tests for POST /playground/index."""
+
+    @pytest.fixture
+    def client(self):
+        """Create test client."""
+        from fastapi.testclient import TestClient
+        from main import app
+        return TestClient(app)
+
+    def test_invalid_url_returns_400(self, client):
+        """Invalid GitHub URL returns 400."""
+        response = client.post(
+            "/api/v1/playground/index",
+            json={"github_url": "not-a-valid-url"}
+        )
+        assert response.status_code == 422  # Pydantic validation
+
+    def test_missing_url_returns_422(self, client):
+        """Missing github_url returns 422."""
+        response = client.post(
+            "/api/v1/playground/index",
+            json={}
+        )
+        assert response.status_code == 422
+
+    @patch('routes.playground._fetch_repo_metadata')
+    @patch('routes.playground._count_code_files')
+    def test_private_repo_returns_400(
+        self, mock_count, mock_metadata, client
+    ):
+        """Private repository returns 400."""
+        mock_metadata.return_value = {"private": True, "name": "repo"}
+        mock_count.return_value = (50, None)
+
+        response = client.post(
+            "/api/v1/playground/index",
+            json={"github_url": "https://github.com/user/private-repo"}
+        )
+
+        assert response.status_code == 400
+        assert "private" in response.json()["detail"]["reason"]
+
+    @patch('routes.playground._fetch_repo_metadata')
+    @patch('routes.playground._count_code_files')
+    def test_too_large_repo_without_partial_returns_400(
+        self, mock_count, mock_metadata, client
+    ):
+        """Large repo without partial=true returns 400 with hint."""
+        mock_metadata.return_value = {
+            "private": False,
+            "name": "large-repo",
+            "default_branch": "main"
+        }
+        mock_count.return_value = (500, None)  # Over 200 limit
+
+        response = client.post(
+            "/api/v1/playground/index",
+            json={"github_url": "https://github.com/user/large-repo"}
+        )
+
+        assert response.status_code == 400
+        detail = response.json()["detail"]
+        assert detail["reason"] == "too_large"
+        assert "partial" in detail.get("hint", "").lower()
+
+    @patch('routes.playground._fetch_repo_metadata')
+    @patch('routes.playground._count_code_files')
+    @patch('routes.playground.AnonymousIndexingJob')
+    def test_large_repo_with_partial_returns_202(
+        self, mock_job_class, mock_count, mock_metadata, client
+    ):
+        """Large repo with partial=true returns 202."""
+        mock_metadata.return_value = {
+            "private": False,
+            "name": "large-repo",
+            "default_branch": "main"
+        }
+        mock_count.return_value = (500, None)
+
+        # Mock job manager
+        mock_job_manager = MagicMock()
+        mock_job_manager.generate_job_id.return_value = "idx_test123456"
+        mock_job_manager.create_job.return_value = {"job_id": "idx_test123456"}
+        mock_job_class.return_value = mock_job_manager
+
+        response = client.post(
+            "/api/v1/playground/index",
+            json={
+                "github_url": "https://github.com/user/large-repo",
+                "partial": True
+            }
+        )
+
+        assert response.status_code == 202
+        data = response.json()
+        assert data["job_id"] == "idx_test123456"
+        assert data["partial"] is True
+        assert data["file_count"] == ANONYMOUS_FILE_LIMIT  # Capped at 200
+
+    @patch('routes.playground._fetch_repo_metadata')
+    @patch('routes.playground._count_code_files')
+    @patch('routes.playground.AnonymousIndexingJob')
+    def test_valid_request_returns_202_with_job_id(
+        self, mock_job_class, mock_count, mock_metadata, client
+    ):
+        """Valid request returns 202 with job_id."""
+        mock_metadata.return_value = {
+            "private": False,
+            "name": "repo",
+            "default_branch": "main"
+        }
+        mock_count.return_value = (50, None)
+
+        mock_job_manager = MagicMock()
+        mock_job_manager.generate_job_id.return_value = "idx_abc123def456"
+        mock_job_manager.create_job.return_value = {"job_id": "idx_abc123def456"}
+        mock_job_class.return_value = mock_job_manager
+
+        response = client.post(
+            "/api/v1/playground/index",
+            json={"github_url": "https://github.com/user/repo"}
+        )
+
+        assert response.status_code == 202
+        data = response.json()
+        assert data["job_id"] == "idx_abc123def456"
+        assert data["status"] == "queued"
+        assert "estimated_time_seconds" in data
+
+    @patch('routes.playground._fetch_repo_metadata')
+    def test_repo_not_found_returns_400(self, mock_metadata, client):
+        """Repository not found returns 400."""
+        mock_metadata.return_value = {"error": "not_found"}
+
+        response = client.post(
+            "/api/v1/playground/index",
+            json={"github_url": "https://github.com/user/nonexistent"}
+        )
+
+        assert response.status_code == 400
+        assert response.json()["detail"]["reason"] == "not_found"
+
+    @patch('routes.playground._fetch_repo_metadata')
+    def test_github_rate_limit_returns_429(self, mock_metadata, client):
+        """GitHub rate limit returns 429."""
+        mock_metadata.return_value = {"error": "rate_limited"}
+
+        response = client.post(
+            "/api/v1/playground/index",
+            json={"github_url": "https://github.com/user/repo"}
+        )
+
+        assert response.status_code == 429
+
+
+# =============================================================================
+# SESSION CONFLICT TESTS
+# =============================================================================
+
+class TestSessionConflict:
+    """Tests for session-already-has-repo behavior."""
+
+    @pytest.fixture
+    def client(self):
+        from fastapi.testclient import TestClient
+        from main import app
+        return TestClient(app)
+
+    @patch('routes.playground._fetch_repo_metadata')
+    @patch('routes.playground._count_code_files')
+    @patch('routes.playground._get_limiter')
+    def test_session_with_existing_repo_returns_409(
+        self, mock_get_limiter, mock_count, mock_metadata, client
+    ):
+        """Session with existing indexed repo returns 409."""
+        mock_metadata.return_value = {
+            "private": False,
+            "name": "repo",
+            "default_branch": "main"
+        }
+        mock_count.return_value = (50, None)
+
+        # Mock limiter with existing indexed repo
+        mock_limiter = MagicMock()
+        mock_session_data = MagicMock()
+        mock_session_data.indexed_repo = {
+            "repo_id": "existing_repo",
+            "expires_at": (datetime.now(timezone.utc) + timedelta(hours=12)).isoformat()
+        }
+        mock_limiter.get_session_data.return_value = mock_session_data
+        mock_limiter.create_session.return_value = "test_session"
+        mock_get_limiter.return_value = mock_limiter
+
+        response = client.post(
+            "/api/v1/playground/index",
+            json={"github_url": "https://github.com/user/repo"}
+        )
+
+        assert response.status_code == 409
+        assert response.json()["detail"]["error"] == "already_indexed"
+
+    @patch('routes.playground._fetch_repo_metadata')
+    @patch('routes.playground._count_code_files')
+    @patch('routes.playground._get_limiter')
+    @patch('routes.playground.AnonymousIndexingJob')
+    def test_expired_repo_allows_new_indexing(
+        self, mock_job_class, mock_get_limiter, mock_count, mock_metadata, client
+    ):
+        """Expired indexed repo allows new indexing."""
+        mock_metadata.return_value = {
+            "private": False,
+            "name": "repo",
+            "default_branch": "main"
+        }
+        mock_count.return_value = (50, None)
+
+        # Mock limiter with expired indexed repo
+        mock_limiter = MagicMock()
+        mock_session_data = MagicMock()
+        mock_session_data.indexed_repo = {
+            "repo_id": "old_repo",
+            "expires_at": (datetime.now(timezone.utc) - timedelta(hours=1)).isoformat()
+        }
+        mock_limiter.get_session_data.return_value = mock_session_data
+        mock_limiter.create_session.return_value = "test_session"
+        mock_get_limiter.return_value = mock_limiter
+
+        mock_job_manager = MagicMock()
+        mock_job_manager.generate_job_id.return_value = "idx_new123456"
+        mock_job_manager.create_job.return_value = {"job_id": "idx_new123456"}
+        mock_job_class.return_value = mock_job_manager
+
+        response = client.post(
+            "/api/v1/playground/index",
+            json={"github_url": "https://github.com/user/repo"}
+        )
+
+        assert response.status_code == 202
+        assert response.json()["job_id"] == "idx_new123456"

From 5dd044c211d290b1963a99df421a57a2d9aa4460 Mon Sep 17 00:00:00 2001
From: Devanshu Rajesh Chicholikar <chicholikar.d@northeastern.edu>
Date: Thu, 25 Dec 2025 23:52:29 -0600
Subject: [PATCH 4/5] feat(playground): add GET /index/{job_id} status endpoint
 (#125)

- Poll endpoint for job status (queued/cloning/processing/completed/failed)
- Returns progress with percent_complete during processing
- Returns repo_id on completion for search access
- Returns error details on failure
- Handles partial indexing info
- 7 new tests for status endpoint

Checkpoint 3 complete. 176 tests passing.
---
 backend/routes/playground.py             | 158 +++++++++++++++++++++
 backend/tests/test_anonymous_indexing.py | 169 ++++++++++++++++++++++-
 2 files changed, 326 insertions(+), 1 deletion(-)

diff --git a/backend/routes/playground.py b/backend/routes/playground.py
index 778036d..7f306c4 100644
--- a/backend/routes/playground.py
+++ b/backend/routes/playground.py
@@ -864,3 +864,161 @@ async def start_anonymous_indexing(
         )
 
     return response_data
+
+
+# =============================================================================
+# GET /playground/index/{job_id} - Check indexing job status (#126)
+# =============================================================================
+
+@router.get(
+    "/index/{job_id}",
+    summary="Check indexing job status",
+    description="Poll this endpoint to check the status of an indexing job.",
+    responses={
+        200: {
+            "description": "Job status",
+            "content": {
+                "application/json": {
+                    "examples": {
+                        "queued": {
+                            "value": {
+                                "job_id": "idx_abc123",
+                                "status": "queued",
+                                "message": "Job is queued for processing"
+                            }
+                        },
+                        "processing": {
+                            "value": {
+                                "job_id": "idx_abc123",
+                                "status": "processing",
+                                "progress": {
+                                    "files_processed": 50,
+                                    "files_total": 100,
+                                    "functions_found": 250,
+                                    "percent_complete": 50
+                                }
+                            }
+                        },
+                        "completed": {
+                            "value": {
+                                "job_id": "idx_abc123",
+                                "status": "completed",
+                                "repo_id": "anon_idx_abc123",
+                                "stats": {
+                                    "files_indexed": 100,
+                                    "functions_found": 500,
+                                    "time_taken_seconds": 45.2
+                                }
+                            }
+                        },
+                        "failed": {
+                            "value": {
+                                "job_id": "idx_abc123",
+                                "status": "failed",
+                                "error": "clone_failed",
+                                "error_message": "Repository not found"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        404: {"description": "Job not found or expired"}
+    }
+)
+async def get_indexing_status(
+    job_id: str,
+    req: Request
+):
+    """
+    Check the status of an anonymous indexing job.
+
+    Poll this endpoint after starting an indexing job to track progress.
+    Jobs expire after 1 hour.
+
+    Status values:
+    - queued: Job is waiting to start
+    - cloning: Repository is being cloned
+    - processing: Files are being indexed
+    - completed: Indexing finished successfully
+    - failed: Indexing failed (check error field)
+    """
+    # Validate job_id format
+    if not job_id or not job_id.startswith("idx_"):
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": "invalid_job_id",
+                "message": "Invalid job ID format"
+            }
+        )
+
+    # Get job from Redis
+    job_manager = AnonymousIndexingJob(redis_client)
+    job = job_manager.get_job(job_id)
+
+    if not job:
+        raise HTTPException(
+            status_code=404,
+            detail={
+                "error": "job_not_found",
+                "message": "Job not found or has expired. Jobs expire after 1 hour."
+            }
+        )
+
+    # Build response based on status
+    status = job.get("status", "unknown")
+    response = {
+        "job_id": job_id,
+        "status": status,
+        "created_at": job.get("created_at"),
+        "updated_at": job.get("updated_at"),
+    }
+
+    # Add repo info
+    response["repository"] = {
+        "owner": job.get("owner"),
+        "name": job.get("repo_name"),
+        "branch": job.get("branch"),
+        "github_url": job.get("github_url"),
+    }
+
+    # Add partial info if applicable
+    if job.get("is_partial"):
+        response["partial"] = True
+        response["max_files"] = job.get("max_files")
+
+    # Status-specific fields
+    if status == "queued":
+        response["message"] = "Job is queued for processing"
+
+    elif status == "cloning":
+        response["message"] = "Cloning repository..."
+
+    elif status == "processing":
+        response["message"] = "Indexing files..."
+        if job.get("progress"):
+            progress = job["progress"]
+            files_processed = progress.get("files_processed", 0)
+            files_total = progress.get("files_total", 1)
+            percent = round((files_processed / files_total) * 100) if files_total > 0 else 0
+            response["progress"] = {
+                "files_processed": files_processed,
+                "files_total": files_total,
+                "functions_found": progress.get("functions_found", 0),
+                "percent_complete": percent,
+                "current_file": progress.get("current_file")
+            }
+
+    elif status == "completed":
+        response["message"] = "Indexing completed successfully"
+        response["repo_id"] = job.get("repo_id")
+        if job.get("stats"):
+            response["stats"] = job["stats"]
+
+    elif status == "failed":
+        response["message"] = job.get("error_message", "Indexing failed")
+        response["error"] = job.get("error", "unknown_error")
+        response["error_message"] = job.get("error_message")
+
+    return response
diff --git a/backend/tests/test_anonymous_indexing.py b/backend/tests/test_anonymous_indexing.py
index c2d31a1..ea36007 100644
--- a/backend/tests/test_anonymous_indexing.py
+++ b/backend/tests/test_anonymous_indexing.py
@@ -5,7 +5,7 @@
 Note: These tests rely on conftest.py for Pinecone/OpenAI/Redis mocking.
 """
 import pytest
-from unittest.mock import AsyncMock, patch, MagicMock
+from unittest.mock import patch, MagicMock
 from datetime import datetime, timezone, timedelta
 import json
 
@@ -512,3 +512,170 @@ def test_expired_repo_allows_new_indexing(
 
         assert response.status_code == 202
         assert response.json()["job_id"] == "idx_new123456"
+
+
+# =============================================================================
+# STATUS ENDPOINT TESTS (GET /playground/index/{job_id})
+# =============================================================================
+
+class TestStatusEndpoint:
+    """Tests for GET /playground/index/{job_id} status endpoint."""
+
+    @pytest.fixture
+    def client(self):
+        """Create test client."""
+        from fastapi.testclient import TestClient
+        from main import app
+        return TestClient(app)
+
+    def test_invalid_job_id_format_returns_400(self, client):
+        """Invalid job ID format returns 400."""
+        response = client.get("/api/v1/playground/index/invalid_format")
+        assert response.status_code == 400
+        assert response.json()["detail"]["error"] == "invalid_job_id"
+
+    def test_job_not_found_returns_404(self, client):
+        """Non-existent job returns 404."""
+        response = client.get("/api/v1/playground/index/idx_nonexistent123")
+        assert response.status_code == 404
+        assert response.json()["detail"]["error"] == "job_not_found"
+
+    @patch('routes.playground.AnonymousIndexingJob')
+    def test_queued_job_returns_status(self, mock_job_class, client):
+        """Queued job returns correct status."""
+        mock_job_manager = MagicMock()
+        mock_job_manager.get_job.return_value = {
+            "job_id": "idx_test123456",
+            "status": "queued",
+            "owner": "user",
+            "repo_name": "repo",
+            "branch": "main",
+            "github_url": "https://github.com/user/repo",
+            "created_at": "2024-01-01T00:00:00Z",
+            "updated_at": "2024-01-01T00:00:00Z",
+        }
+        mock_job_class.return_value = mock_job_manager
+
+        response = client.get("/api/v1/playground/index/idx_test123456")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "queued"
+        assert data["message"] == "Job is queued for processing"
+
+    @patch('routes.playground.AnonymousIndexingJob')
+    def test_processing_job_returns_progress(self, mock_job_class, client):
+        """Processing job returns progress info."""
+        mock_job_manager = MagicMock()
+        mock_job_manager.get_job.return_value = {
+            "job_id": "idx_test123456",
+            "status": "processing",
+            "owner": "user",
+            "repo_name": "repo",
+            "branch": "main",
+            "github_url": "https://github.com/user/repo",
+            "created_at": "2024-01-01T00:00:00Z",
+            "updated_at": "2024-01-01T00:00:01Z",
+            "progress": {
+                "files_processed": 50,
+                "files_total": 100,
+                "functions_found": 250,
+                "current_file": "src/index.ts"
+            }
+        }
+        mock_job_class.return_value = mock_job_manager
+
+        response = client.get("/api/v1/playground/index/idx_test123456")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "processing"
+        assert data["progress"]["files_processed"] == 50
+        assert data["progress"]["percent_complete"] == 50
+
+    @patch('routes.playground.AnonymousIndexingJob')
+    def test_completed_job_returns_repo_id(self, mock_job_class, client):
+        """Completed job returns repo_id and stats."""
+        mock_job_manager = MagicMock()
+        mock_job_manager.get_job.return_value = {
+            "job_id": "idx_test123456",
+            "status": "completed",
+            "owner": "user",
+            "repo_name": "repo",
+            "branch": "main",
+            "github_url": "https://github.com/user/repo",
+            "repo_id": "anon_idx_test123456",
+            "created_at": "2024-01-01T00:00:00Z",
+            "updated_at": "2024-01-01T00:01:00Z",
+            "stats": {
+                "files_indexed": 100,
+                "functions_found": 500,
+                "time_taken_seconds": 45.2
+            }
+        }
+        mock_job_class.return_value = mock_job_manager
+
+        response = client.get("/api/v1/playground/index/idx_test123456")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "completed"
+        assert data["repo_id"] == "anon_idx_test123456"
+        assert data["stats"]["files_indexed"] == 100
+
+    @patch('routes.playground.AnonymousIndexingJob')
+    def test_failed_job_returns_error(self, mock_job_class, client):
+        """Failed job returns error details."""
+        mock_job_manager = MagicMock()
+        mock_job_manager.get_job.return_value = {
+            "job_id": "idx_test123456",
+            "status": "failed",
+            "owner": "user",
+            "repo_name": "repo",
+            "branch": "main",
+            "github_url": "https://github.com/user/repo",
+            "error": "clone_failed",
+            "error_message": "Repository not found or access denied",
+            "created_at": "2024-01-01T00:00:00Z",
+            "updated_at": "2024-01-01T00:00:30Z",
+        }
+        mock_job_class.return_value = mock_job_manager
+
+        response = client.get("/api/v1/playground/index/idx_test123456")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "failed"
+        assert data["error"] == "clone_failed"
+        assert "not found" in data["error_message"].lower()
+
+    @patch('routes.playground.AnonymousIndexingJob')
+    def test_partial_job_includes_partial_info(self, mock_job_class, client):
+        """Partial indexing job includes partial flag."""
+        mock_job_manager = MagicMock()
+        mock_job_manager.get_job.return_value = {
+            "job_id": "idx_test123456",
+            "status": "processing",
+            "owner": "user",
+            "repo_name": "large-repo",
+            "branch": "main",
+            "github_url": "https://github.com/user/large-repo",
+            "is_partial": True,
+            "max_files": 200,
+            "file_count": 500,
+            "created_at": "2024-01-01T00:00:00Z",
+            "updated_at": "2024-01-01T00:00:10Z",
+            "progress": {
+                "files_processed": 100,
+                "files_total": 200,
+                "functions_found": 400
+            }
+        }
+        mock_job_class.return_value = mock_job_manager
+
+        response = client.get("/api/v1/playground/index/idx_test123456")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["partial"] is True
+        assert data["max_files"] == 200

From ae605cc62c305661d635e8c62120a0d61d7a825c Mon Sep 17 00:00:00 2001
From: Devanshu Rajesh Chicholikar <chicholikar.d@northeastern.edu>
Date: Fri, 26 Dec 2025 00:10:36 -0600
Subject: [PATCH 5/5] fix(security): don't expose exception details to client

CodeQL flagged information exposure through exception.
Return generic 'error' string instead of str(e) to avoid
leaking internal details. Detailed errors still logged server-side.
---
 backend/routes/playground.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/routes/playground.py b/backend/routes/playground.py
index 7f306c4..f12556f 100644
--- a/backend/routes/playground.py
+++ b/backend/routes/playground.py
@@ -483,8 +483,9 @@ async def _count_code_files(
         except httpx.TimeoutException:
             return 0, "GitHub API request timed out"
         except Exception as e:
+            # Log detailed error server-side, but don't expose to client
             logger.error("GitHub tree API failed", error=str(e))
-            return 0, str(e)
+            return 0, "error"
 
 
 @router.post("/validate-repo")