diff --git a/.env.example b/.env.example
index 1fb5b44..1b9bb71 100644
--- a/.env.example
+++ b/.env.example
@@ -27,6 +27,13 @@ SUPABASE_SERVICE_ROLE_KEY=eyJ...
 # Backend API
 API_KEY=change-this-secret-key-for-production
 BACKEND_API_URL=http://backend:8000
+FRONTEND_URL=http://localhost:3000
+
+# GitHub OAuth (Required for GitHub repo import)
+# Create OAuth App: https://github.com/settings/developers
+GITHUB_CLIENT_ID=
+GITHUB_CLIENT_SECRET=
+GITHUB_REDIRECT_URI=http://localhost:3000/auth/github/callback
 
 # CORS Configuration (Security)
 # Comma-separated list of allowed origins
@@ -39,10 +46,15 @@ REDIS_HOST=redis
 REDIS_PORT=6379
 
 # Sentry Error Tracking (Optional but recommended for production)
-# Get DSN from: https://sentry.io → Settings → Projects → Client Keys
+# Get DSN from: https://sentry.io -> Settings -> Projects -> Client Keys
 SENTRY_DSN=
+SENTRY_SEND_PII=false
+SENTRY_INCLUDE_LOCAL_VARS=false
 ENVIRONMENT=development  # development, staging, production
 
+# Discord Webhook (Optional - for feedback notifications)
+DISCORD_FEEDBACK_WEBHOOK=
+
 # Search V2 Configuration
 # Cohere API for reranking (Optional - improves search quality)
 # Get from: https://dashboard.cohere.com/api-keys
diff --git a/.gitignore b/.gitignore
index a0ef7da..2b2dd7b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,11 @@ __pycache__/
 venv/
 env/
 
+# Test/coverage artifacts
+.coverage
+htmlcov/
+.pytest_cache/
+
 # Node
 node_modules/
 dist/
diff --git a/backend/.env.example b/backend/.env.example
deleted file mode 100644
index dc8f3b1..0000000
--- a/backend/.env.example
+++ /dev/null
@@ -1,96 +0,0 @@
-# =============================================================================
-# OpenCodeIntel Backend Environment Variables
-# =============================================================================
-
-# -----------------------------------------------------------------------------
-# AI & Search APIs
-# -----------------------------------------------------------------------------
-
-# OpenAI - for embeddings and AI features
-OPENAI_API_KEY=your_openai_api_key_here
-
-# Pinecone - vector database for semantic search
-PINECONE_API_KEY=your_pinecone_api_key_here
-PINECONE_INDEX_NAME=codeintel
-
-# Cohere - reranking for search quality (optional but recommended)
-COHERE_API_KEY=your_cohere_api_key_here
-
-# Voyage AI - code-specific embeddings (recommended for code search)
-# Get API key from https://dash.voyageai.com/
-VOYAGE_API_KEY=your_voyage_api_key_here
-
-# -----------------------------------------------------------------------------
-# Supabase - Authentication & Database
-# -----------------------------------------------------------------------------
-
-SUPABASE_URL=https://your-project.supabase.co
-SUPABASE_ANON_KEY=your_supabase_anon_key_here
-SUPABASE_JWT_SECRET=your_jwt_secret_here
-
-# Service role key - required for server-side database access (e.g., storing GitHub tokens)
-# Get from Supabase Dashboard → Settings → API → service_role key
-SUPABASE_SERVICE_ROLE_KEY=your_service_role_key_here
-
-# -----------------------------------------------------------------------------
-# GitHub OAuth - One-Click Repo Import Feature
-# -----------------------------------------------------------------------------
-# IMPORTANT: This is SEPARATE from Supabase GitHub login!
-# 
-# Supabase login uses its own OAuth app (configured in Supabase Dashboard).
-# This OAuth app is ONLY for importing repositories from GitHub.
-#
-# Setup:
-# 1. Go to GitHub → Settings → Developer settings → OAuth Apps → New OAuth App
-# 2. Create app with name like "YourApp Repo Import"
-# 3. Set callback URL based on environment:
-#    - Development: http://localhost:3000/github/callback
-#    - Production:  https://yourdomain.com/github/callback
-# 4. Copy Client ID and generate Client Secret
-#
-# You may need separate OAuth apps for dev and production (different callback URLs)
-
-GITHUB_CLIENT_ID=your_github_oauth_app_client_id
-GITHUB_CLIENT_SECRET=your_github_oauth_app_client_secret
-
-# Must match EXACTLY what you set in GitHub OAuth App settings
-GITHUB_REDIRECT_URI=http://localhost:3000/github/callback
-
-# Frontend URL for redirects after OAuth
-FRONTEND_URL=http://localhost:3000
-
-# -----------------------------------------------------------------------------
-# Backend Configuration
-# -----------------------------------------------------------------------------
-
-BACKEND_API_URL=http://localhost:8000
-API_KEY=dev-secret-key
-
-# CORS - allowed frontend origins (comma-separated for multiple)
-ALLOWED_ORIGINS=http://localhost:3000
-
-# -----------------------------------------------------------------------------
-# Redis Cache (Optional)
-# -----------------------------------------------------------------------------
-
-REDIS_HOST=localhost
-REDIS_PORT=6379
-
-# -----------------------------------------------------------------------------
-# Monitoring & Debugging
-# -----------------------------------------------------------------------------
-
-# Sentry error tracking (optional)
-# Get DSN from https://sentry.io → Settings → Projects → Client Keys
-SENTRY_DSN=
-
-# Environment identifier
-ENVIRONMENT=development
-
-# -----------------------------------------------------------------------------
-# Discord Webhooks - Feedback & Waitlist
-# -----------------------------------------------------------------------------
-
-# Discord webhook for receiving user feedback and waitlist signups
-# Create at: Discord Server → Channel Settings → Integrations → Webhooks
-DISCORD_FEEDBACK_WEBHOOK=https://discord.com/api/webhooks/your_webhook_id/your_webhook_token
diff --git a/backend/config/api.py b/backend/config/api.py
index 1c2a770..d231006 100644
--- a/backend/config/api.py
+++ b/backend/config/api.py
@@ -2,43 +2,7 @@
 API Configuration - Single Source of Truth for API Versioning
 
 Change API_VERSION here to update all routes across the application.
-Example: "v1" -> "v2" will change /api/v1/* to /api/v2/*
 """
 
-# API VERSION CONFIGURATION
-
 API_VERSION = "v1"
-
-# DERIVED PREFIXES (auto-calculated from version)
-
-# Current versioned API prefix: /api/v1
 API_PREFIX = f"/api/{API_VERSION}"
-
-# Legacy prefix for backward compatibility: /api
-# Routes here will be deprecated but still functional
-LEGACY_API_PREFIX = "/api"
-
-# DEPRECATION SETTINGS
-
-# When True, legacy routes (/api/*) will include deprecation warning headers
-LEGACY_DEPRECATION_ENABLED = True
-
-# Header to add on deprecated routes
-DEPRECATION_HEADER = "X-API-Deprecated"
-DEPRECATION_MESSAGE = f"This endpoint is deprecated. Please use {API_PREFIX} instead."
-
-# HELPER FUNCTIONS
-
-def get_versioned_prefix() -> str:
-    """Get the current versioned API prefix."""
-    return API_PREFIX
-
-
-def get_legacy_prefix() -> str:
-    """Get the legacy (deprecated) API prefix."""
-    return LEGACY_API_PREFIX
-
-
-def is_legacy_route(path: str) -> bool:
-    """Check if a route path is using the legacy prefix."""
-    return path.startswith(LEGACY_API_PREFIX) and not path.startswith(API_PREFIX)
diff --git a/backend/routes/__init__.py b/backend/routes/__init__.py
index 993ab14..fe01b3d 100644
--- a/backend/routes/__init__.py
+++ b/backend/routes/__init__.py
@@ -1,4 +1 @@
-"""API Routes package"""
-from .auth import router as auth_router
-
-__all__ = ["auth_router"]
+"""API Routes package."""
diff --git a/backend/scripts/benchmark_search_v3.py b/backend/scripts/benchmark_search_v3.py
deleted file mode 100644
index 8da31de..0000000
--- a/backend/scripts/benchmark_search_v3.py
+++ /dev/null
@@ -1,247 +0,0 @@
-#!/usr/bin/env python3
-"""
-Search V3 vs V2 Benchmark
-Run with: python3 scripts/benchmark_search_v3.py
-
-Compares:
-- V2 (OpenAI embeddings + Cohere reranking)
-- V3 (Voyage AI embeddings + Query Understanding + Code Graph + Cohere reranking)
-"""
-import asyncio
-import os
-import sys
-import time
-from typing import List, Dict, Tuple
-
-# add parent to path
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from dotenv import load_dotenv
-load_dotenv()
-
-from services.indexer_optimized import OptimizedCodeIndexer
-
-# Test queries representing real developer scenarios
-TEST_QUERIES = [
-    {
-        "query": "how to add authentication",
-        "expected_keywords": ["auth", "middleware", "authenticate", "credential"],
-        "description": "Developer wants to add auth to their app"
-    },
-    {
-        "query": "handle websocket messages",
-        "expected_keywords": ["websocket", "message", "send", "receive", "on_"],
-        "description": "Developer working with WebSockets"
-    },
-    {
-        "query": "return json from endpoint",
-        "expected_keywords": ["json", "response", "jsonresponse", "return"],
-        "description": "Developer wants to return JSON data"
-    },
-    {
-        "query": "validate request data",
-        "expected_keywords": ["valid", "request", "data", "schema"],
-        "description": "Developer needs input validation"
-    },
-    {
-        "query": "middleware that runs before request",
-        "expected_keywords": ["middleware", "before", "dispatch", "call_next"],
-        "description": "Developer needs pre-request processing"
-    },
-    {
-        "query": "error handling",
-        "expected_keywords": ["error", "exception", "handler", "catch"],
-        "description": "Looking for error handling patterns"
-    },
-    {
-        "query": "route decorator",
-        "expected_keywords": ["route", "decorator", "path", "endpoint"],
-        "description": "Developer needs routing functionality"
-    },
-    {
-        "query": "database session",
-        "expected_keywords": ["database", "session", "db", "connection"],
-        "description": "Working with database sessions"
-    },
-]
-
-
-def score_results(results: List[Dict], expected_keywords: List[str]) -> Tuple[float, int, bool]:
-    """
-    Score search results based on expected keywords
-    Returns: (score 0-10, matches count, is_test_in_top_3)
-    """
-    if not results:
-        return 0.0, 0, False
-    
-    # combine text from top 3 results
-    top_3_text = ""
-    has_test_in_top_3 = False
-    
-    for r in results[:3]:
-        name = r.get("name", "").lower()
-        qualified = r.get("qualified_name", "").lower()
-        summary = (r.get("summary") or "").lower()
-        file_path = r.get("file_path", "").lower()
-        
-        top_3_text += f" {name} {qualified} {summary} "
-        
-        # check for test files
-        if "test" in file_path or "test" in name:
-            has_test_in_top_3 = True
-    
-    # count keyword matches
-    matches = sum(1 for kw in expected_keywords if kw.lower() in top_3_text)
-    score = min(10.0, (matches / len(expected_keywords)) * 10)
-    
-    return score, matches, has_test_in_top_3
-
-
-async def run_benchmark(repo_id: str):
-    """Run benchmark comparing V2 vs V3"""
-    print("=" * 80)
-    print("🧪 SEARCH V3 vs V2 BENCHMARK")
-    print("=" * 80)
-    print()
-    
-    indexer = OptimizedCodeIndexer()
-    
-    v2_scores = []
-    v3_scores = []
-    v2_times = []
-    v3_times = []
-    v2_test_count = 0
-    v3_test_count = 0
-    
-    for tc in TEST_QUERIES:
-        query = tc["query"]
-        expected = tc["expected_keywords"]
-        desc = tc["description"]
-        
-        print(f"📝 Query: \"{query}\"")
-        print(f"   Scenario: {desc}")
-        print()
-        
-        # V2 Search
-        start = time.time()
-        try:
-            v2_results = await indexer.search_v2(
-                query=query,
-                repo_id=repo_id,
-                top_k=5,
-                use_reranking=True
-            )
-            v2_time = (time.time() - start) * 1000
-        except Exception as e:
-            print(f"   ❌ V2 Error: {e}")
-            v2_results = []
-            v2_time = 0
-        
-        v2_score, v2_matches, v2_has_test = score_results(v2_results, expected)
-        v2_scores.append(v2_score)
-        v2_times.append(v2_time)
-        if v2_has_test:
-            v2_test_count += 1
-        
-        # V3 Search
-        start = time.time()
-        try:
-            v3_results = await indexer.search_v3(
-                query=query,
-                repo_id=repo_id,
-                top_k=5,
-                include_tests=False,
-                use_reranking=True
-            )
-            v3_time = (time.time() - start) * 1000
-        except Exception as e:
-            print(f"   ❌ V3 Error: {e}")
-            v3_results = []
-            v3_time = 0
-        
-        v3_score, v3_matches, v3_has_test = score_results(v3_results, expected)
-        v3_scores.append(v3_score)
-        v3_times.append(v3_time)
-        if v3_has_test:
-            v3_test_count += 1
-        
-        # Print comparison
-        print(f"   V2: Score {v2_score:.1f}/10 ({v2_matches}/{len(expected)} keywords) | {v2_time:.0f}ms")
-        if v2_results:
-            print(f"       Top result: {v2_results[0].get('name', 'unknown')}")
-        
-        print(f"   V3: Score {v3_score:.1f}/10 ({v3_matches}/{len(expected)} keywords) | {v3_time:.0f}ms")
-        if v3_results:
-            print(f"       Top result: {v3_results[0].get('name', 'unknown')}")
-        
-        # Winner
-        if v3_score > v2_score:
-            print(f"   🏆 V3 WINS (+{v3_score - v2_score:.1f})")
-        elif v2_score > v3_score:
-            print(f"   🏆 V2 WINS (+{v2_score - v3_score:.1f})")
-        else:
-            print(f"   🤝 TIE")
-        
-        print()
-    
-    # Summary
-    print("=" * 80)
-    print("📊 BENCHMARK RESULTS")
-    print("=" * 80)
-    
-    v2_avg = sum(v2_scores) / len(v2_scores)
-    v3_avg = sum(v3_scores) / len(v3_scores)
-    v2_total_time = sum(v2_times)
-    v3_total_time = sum(v3_times)
-    
-    v2_wins = sum(1 for v2, v3 in zip(v2_scores, v3_scores) if v2 > v3)
-    v3_wins = sum(1 for v2, v3 in zip(v2_scores, v3_scores) if v3 > v2)
-    ties = len(v2_scores) - v2_wins - v3_wins
-    
-    print(f"""
-┌─────────────────────────────────────────────────────────┐
-│ METRIC                    │    V2     │    V3     │     │
-├─────────────────────────────────────────────────────────┤
-│ Average Score             │ {v2_avg:>6.1f}/10 │ {v3_avg:>6.1f}/10 │ {"V3 ✓" if v3_avg > v2_avg else "V2 ✓" if v2_avg > v3_avg else "TIE":<5}│
-│ Total Time                │ {v2_total_time:>6.0f}ms │ {v3_total_time:>6.0f}ms │ {"V3 ✓" if v3_total_time < v2_total_time else "V2 ✓":<5}│
-│ Queries with test in top3 │ {v2_test_count:>6}   │ {v3_test_count:>6}   │ {"V3 ✓" if v3_test_count < v2_test_count else "V2 ✓" if v2_test_count < v3_test_count else "TIE":<5}│
-│ Wins                      │ {v2_wins:>6}   │ {v3_wins:>6}   │     │
-│ Ties                      │ {ties:>6}   │ {ties:>6}   │     │
-└─────────────────────────────────────────────────────────┘
-    """)
-    
-    # Final verdict
-    print()
-    if v3_avg >= v2_avg + 1.0:
-        print("✅ VERDICT: V3 is SIGNIFICANTLY BETTER - Ready for production!")
-    elif v3_avg > v2_avg:
-        print("✅ VERDICT: V3 is BETTER - Consider shipping!")
-    elif v3_avg == v2_avg:
-        print("⚠️ VERDICT: V3 is EQUAL to V2 - Need more optimization")
-    else:
-        print("❌ VERDICT: V3 is WORSE than V2 - Needs more work")
-    
-    print()
-    
-    # Check for Voyage
-    try:
-        from services.search_v3.integration import get_search_v3
-        v3 = get_search_v3()
-        if v3.is_voyage_enabled:
-            print("🚀 Using Voyage AI code-specific embeddings")
-        else:
-            print("⚠️ Voyage AI not enabled - using OpenAI embeddings")
-            print("   Set VOYAGE_API_KEY for better code search accuracy!")
-    except Exception as e:
-        print(f"⚠️ Could not check Voyage status: {e}")
-
-
-if __name__ == "__main__":
-    # default repo ID (starlette) - change as needed
-    REPO_ID = os.getenv("BENCHMARK_REPO_ID", "0323a08f-9d21-4c59-b567-e0629a9bbb24")
-    
-    print(f"Using repo_id: {REPO_ID}")
-    print("Set BENCHMARK_REPO_ID env var to use a different repo")
-    print()
-    
-    asyncio.run(run_benchmark(REPO_ID))
diff --git a/backend/scripts/cross_repo_test.py b/backend/scripts/cross_repo_test.py
deleted file mode 100644
index 52775ca..0000000
--- a/backend/scripts/cross_repo_test.py
+++ /dev/null
@@ -1,128 +0,0 @@
-#!/usr/bin/env python3
-"""
-Cross-Repo Test - Test V3 on multiple repositories
-"""
-import asyncio
-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# Load from environment (set in .env or export manually)
-if not os.environ.get("VOYAGE_API_KEY"):
-    print("❌ VOYAGE_API_KEY not set. Export it or add to .env file.")
-    sys.exit(1)
-
-from services.indexer_optimized import OptimizedCodeIndexer
-
-REPOS = [
-    {"id": "b0d22b4c-9d05-426e-8d9c-7278cce0f4c7", "name": "Flask"},
-    {"id": "778333ff-6532-4c05-b73a-d54d44c6917d", "name": "Jotai"},
-    {"id": "409fbeac-376f-4593-99a2-882d74e2cae6", "name": "Bun"},
-]
-
-QUERIES = [
-    {"query": "routing", "good": ["route", "router", "path", "url"]},
-    {"query": "middleware", "good": ["middleware", "dispatch", "handler"]},
-    {"query": "request", "good": ["request", "req"]},
-    {"query": "response", "good": ["response", "res", "reply"]},
-    {"query": "error handling", "good": ["error", "exception", "handler"]},
-]
-
-
-def has_test_in_top3(results):
-    for r in results[:3]:
-        name = r.get("name", "").lower()
-        file_path = r.get("file_path", "").lower()
-        if "test" in name or "test" in file_path:
-            return True
-    return False
-
-
-async def test_repo(indexer, repo):
-    print(f"\n{'='*60}")
-    print(f"📦 Testing: {repo['name']}")
-    print(f"{'='*60}")
-    
-    v2_test_count = 0
-    v3_test_count = 0
-    v2_wins = 0
-    v3_wins = 0
-    
-    for q in QUERIES:
-        query = q["query"]
-        
-        try:
-            v2_results = await indexer.search_v2(query, repo["id"], top_k=5)
-            v2_has_test = has_test_in_top3(v2_results)
-            v2_top = v2_results[0].get("name", "?")[:20] if v2_results else "-"
-        except Exception as e:
-            v2_has_test = False
-            v2_top = f"error"
-            v2_results = []
-        
-        try:
-            v3_results = await indexer.search_v3(query, repo["id"], top_k=5, include_tests=False)
-            v3_has_test = has_test_in_top3(v3_results)
-            v3_top = v3_results[0].get("name", "?")[:20] if v3_results else "-"
-        except Exception as e:
-            v3_has_test = False
-            v3_top = f"error"
-            v3_results = []
-        
-        if v2_has_test:
-            v2_test_count += 1
-        if v3_has_test:
-            v3_test_count += 1
-        
-        # Simple win: no test pollution = better
-        if not v3_has_test and v2_has_test:
-            v3_wins += 1
-            winner = "V3"
-        elif not v2_has_test and v3_has_test:
-            v2_wins += 1
-            winner = "V2"
-        else:
-            winner = "TIE"
-        
-        v2_marker = "❌" if v2_has_test else "✅"
-        v3_marker = "❌" if v3_has_test else "✅"
-        
-        print(f"  \"{query}\"")
-        print(f"    V2: {v2_marker} {v2_top:<20} | V3: {v3_marker} {v3_top:<20} | {winner}")
-    
-    print(f"\n  Summary: V2 test pollution={v2_test_count}, V3 test pollution={v3_test_count}")
-    return {"v2_tests": v2_test_count, "v3_tests": v3_test_count, "v2_wins": v2_wins, "v3_wins": v3_wins}
-
-
-async def main():
-    print("🧪 CROSS-REPOSITORY TEST - V2 vs V3")
-    
-    indexer = OptimizedCodeIndexer()
-    
-    total_v2_tests = 0
-    total_v3_tests = 0
-    
-    for repo in REPOS:
-        try:
-            result = await test_repo(indexer, repo)
-            total_v2_tests += result["v2_tests"]
-            total_v3_tests += result["v3_tests"]
-        except Exception as e:
-            print(f"  ⚠️ Error testing {repo['name']}: {e}")
-    
-    print(f"\n{'='*60}")
-    print(f"📊 CROSS-REPO SUMMARY")
-    print(f"{'='*60}")
-    print(f"  Total V2 test pollution: {total_v2_tests}")
-    print(f"  Total V3 test pollution: {total_v3_tests}")
-    print(f"  V3 reduction: {total_v2_tests - total_v3_tests} fewer test files")
-    
-    if total_v3_tests < total_v2_tests:
-        print(f"\n✅ V3 WINS across multiple repos!")
-    else:
-        print(f"\n⚠️ Results mixed")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/backend/scripts/edge_case_test.py b/backend/scripts/edge_case_test.py
deleted file mode 100644
index e9da277..0000000
--- a/backend/scripts/edge_case_test.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python3
-"""
-Edge Case Test - Weird queries, typos, edge cases
-"""
-import asyncio
-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# Load from environment (set in .env or export manually)
-if not os.environ.get("VOYAGE_API_KEY"):
-    print("❌ VOYAGE_API_KEY not set. Export it or add to .env file.")
-    sys.exit(1)
-
-from services.indexer_optimized import OptimizedCodeIndexer
-
-repo_id = "0323a08f-9d21-4c59-b567-e0629a9bbb24"  # starlette
-
-EDGE_CASES = [
-    # Typos
-    {"query": "authnetication", "desc": "typo in authentication"},
-    {"query": "midleware", "desc": "typo in middleware"},
-    
-    # Very short queries
-    {"query": "ws", "desc": "abbreviation for websocket"},
-    {"query": "req", "desc": "abbreviation for request"},
-    {"query": "res", "desc": "abbreviation for response"},
-    
-    # Very long queries
-    {"query": "how do i create a custom middleware that logs all requests and responses", "desc": "long natural language"},
-    
-    # Code-like queries
-    {"query": "async def", "desc": "code pattern"},
-    {"query": "@app.route", "desc": "decorator pattern"},
-    {"query": "raise HTTPException", "desc": "exception pattern"},
-    
-    # Empty-ish queries
-    {"query": "the", "desc": "common word"},
-    {"query": "a function that", "desc": "vague query"},
-    
-    # Include test keyword (should include tests)
-    {"query": "test authentication", "desc": "explicitly wants tests"},
-]
-
-
-async def main():
-    print("🧪 EDGE CASE TEST - V3 Robustness")
-    print("=" * 70)
-    
-    indexer = OptimizedCodeIndexer()
-    
-    passed = 0
-    failed = 0
-    
-    for case in EDGE_CASES:
-        query = case["query"]
-        desc = case["desc"]
-        
-        print(f"\n📝 \"{query}\" ({desc})")
-        
-        try:
-            # Check if query should include tests
-            include_tests = "test" in query.lower()
-            
-            results = await indexer.search_v3(
-                query, repo_id, top_k=3, 
-                include_tests=include_tests
-            )
-            
-            if results:
-                top = results[0]
-                name = top.get("name", "?")[:25]
-                file = top.get("file_path", "?").split("/")[-1][:20]
-                score = top.get("score", 0)
-                
-                has_test = "test" in file.lower() or "test" in name.lower()
-                
-                # If we asked for tests, having tests is OK
-                if include_tests:
-                    status = "✅ PASS" if has_test else "✅ PASS (no tests found)"
-                else:
-                    status = "✅ PASS" if not has_test else "⚠️ test leak"
-                
-                print(f"   Result: {name} ({file}) | score={score:.2f}")
-                print(f"   Status: {status}")
-                passed += 1
-            else:
-                print(f"   Result: No results")
-                print(f"   Status: ⚠️ empty (may be OK for weird queries)")
-                passed += 1  # Empty is OK for edge cases
-                
-        except Exception as e:
-            print(f"   ❌ ERROR: {str(e)[:50]}")
-            failed += 1
-    
-    print(f"\n{'='*70}")
-    print(f"📊 EDGE CASE RESULTS")
-    print(f"{'='*70}")
-    print(f"   Passed: {passed}/{len(EDGE_CASES)}")
-    print(f"   Failed: {failed}/{len(EDGE_CASES)}")
-    
-    if failed == 0:
-        print(f"\n✅ V3 handles all edge cases!")
-    else:
-        print(f"\n⚠️ {failed} edge cases need attention")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/backend/scripts/extended_query_test.py b/backend/scripts/extended_query_test.py
deleted file mode 100644
index 6b1f533..0000000
--- a/backend/scripts/extended_query_test.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extended Human Query Test - More realistic developer queries
-"""
-import asyncio
-import os
-import sys
-import time
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# Load from environment (set in .env or export manually)
-if not os.environ.get("VOYAGE_API_KEY"):
-    print("❌ VOYAGE_API_KEY not set. Export it or add to .env file.")
-    sys.exit(1)
-
-from services.indexer_optimized import OptimizedCodeIndexer
-
-# More realistic queries developers would type
-EXTENDED_QUERIES = [
-    # Natural language questions
-    {"query": "how to validate input", "wants": "validation logic", "good": ["valid", "check", "schema"], "bad": ["test_"]},
-    {"query": "send response to client", "wants": "response handling", "good": ["response", "send", "return"], "bad": ["test_"]},
-    {"query": "parse cookies", "wants": "cookie handling", "good": ["cookie", "parse", "get"], "bad": ["test_"]},
-    {"query": "handle file uploads", "wants": "file upload logic", "good": ["file", "upload", "form", "multipart"], "bad": ["test_"]},
-    {"query": "cors settings", "wants": "CORS middleware", "good": ["cors", "origin", "header"], "bad": ["test_"]},
-    
-    # Short keyword searches
-    {"query": "session", "wants": "session management", "good": ["session"], "bad": ["test_session"]},
-    {"query": "redirect", "wants": "redirect response", "good": ["redirect", "location"], "bad": ["test_redirect"]},
-    {"query": "template", "wants": "template rendering", "good": ["template", "render", "jinja"], "bad": ["test_template"]},
-    {"query": "background task", "wants": "async background tasks", "good": ["background", "task", "async"], "bad": ["test_"]},
-    {"query": "lifespan", "wants": "app lifespan events", "good": ["lifespan", "startup", "shutdown"], "bad": ["test_"]},
-    
-    # Specific patterns
-    {"query": "404 not found", "wants": "404 error handling", "good": ["404", "not_found", "notfound"], "bad": ["test_"]},
-    {"query": "rate limit", "wants": "rate limiting", "good": ["rate", "limit", "throttle"], "bad": ["test_"]},
-    {"query": "database connection", "wants": "DB connection", "good": ["database", "db", "connection", "pool"], "bad": ["test_"]},
-    {"query": "form data", "wants": "form parsing", "good": ["form", "data", "parse", "multipart"], "bad": ["test_"]},
-    {"query": "headers", "wants": "HTTP headers", "good": ["header", "headers"], "bad": ["test_header"]},
-]
-
-
-def score_result(result, good_keywords, bad_keywords):
-    name = result.get("name", "").lower()
-    file_path = result.get("file_path", "").lower()
-    qualified = result.get("qualified_name", "").lower()
-    text = f"{name} {file_path} {qualified}"
-    
-    for bad in bad_keywords:
-        if bad in text:
-            return -1, True
-    
-    matches = sum(1 for good in good_keywords if good in text)
-    return matches, False
-
-
-def evaluate_results(results, query_info):
-    if not results:
-        return {"score": 0, "test_count": 0, "top_3": []}
-    
-    good = query_info["good"]
-    bad = query_info["bad"]
-    
-    total_score = 0
-    test_count = 0
-    top_3 = []
-    
-    for i, r in enumerate(results[:5]):
-        match_score, is_test = score_result(r, good, bad)
-        
-        if i < 3:
-            top_3.append({
-                "name": r.get("name", "?")[:25],
-                "file": r.get("file_path", "?").split("/")[-1][:20],
-                "is_test": is_test
-            })
-            if is_test:
-                test_count += 1
-        
-        position_weight = 6 - (i + 1)
-        if is_test:
-            total_score -= position_weight
-        else:
-            total_score += match_score * position_weight
-    
-    return {"score": max(0, total_score), "test_count": test_count, "top_3": top_3}
-
-
-async def run_extended_test():
-    print("=" * 70)
-    print("🧪 EXTENDED HUMAN QUERY TEST - V2 vs V3")
-    print("=" * 70)
-    print()
-    
-    indexer = OptimizedCodeIndexer()
-    repo_id = "0323a08f-9d21-4c59-b567-e0629a9bbb24"  # starlette
-    
-    v2_total, v3_total = 0, 0
-    v2_tests, v3_tests = 0, 0
-    v2_wins, v3_wins, ties = 0, 0, 0
-    
-    for q in EXTENDED_QUERIES:
-        query = q["query"]
-        
-        # V2
-        try:
-            v2_results = await indexer.search_v2(query, repo_id, top_k=5)
-        except Exception as e:
-            print(f"  V2 error for '{query}': {e}")
-            v2_results = []
-        v2_eval = evaluate_results(v2_results, q)
-        
-        # V3
-        try:
-            v3_results = await indexer.search_v3(query, repo_id, top_k=5, include_tests=False)
-        except Exception as e:
-            print(f"  V3 error for '{query}': {e}")
-            v3_results = []
-        v3_eval = evaluate_results(v3_results, q)
-        
-        v2_total += v2_eval["score"]
-        v3_total += v3_eval["score"]
-        v2_tests += v2_eval["test_count"]
-        v3_tests += v3_eval["test_count"]
-        
-        if v3_eval["score"] > v2_eval["score"]:
-            winner = "V3 ✓"
-            v3_wins += 1
-        elif v2_eval["score"] > v3_eval["score"]:
-            winner = "V2 ✓"
-            v2_wins += 1
-        else:
-            winner = "TIE"
-            ties += 1
-        
-        # Compact output
-        print(f"📝 \"{query}\"")
-        print(f"   V2: {v2_eval['score']:>2} | V3: {v3_eval['score']:>2} | {winner}")
-        
-        # Show top result comparison
-        v2_top = v2_eval["top_3"][0] if v2_eval["top_3"] else {"name": "-", "is_test": False}
-        v3_top = v3_eval["top_3"][0] if v3_eval["top_3"] else {"name": "-", "is_test": False}
-        v2_marker = "❌" if v2_top.get("is_test") else "✅"
-        v3_marker = "❌" if v3_top.get("is_test") else "✅"
-        print(f"   V2 top: {v2_marker} {v2_top['name']}")
-        print(f"   V3 top: {v3_marker} {v3_top['name']}")
-        print()
-    
-    # Summary
-    print("=" * 70)
-    print("📊 EXTENDED TEST RESULTS")
-    print("=" * 70)
-    print(f"""
-    Metric              V2          V3          Winner
-    ─────────────────────────────────────────────────────
-    Total Score         {v2_total:>3}         {v3_total:>3}         {"V3 ✓" if v3_total > v2_total else "V2 ✓" if v2_total > v3_total else "TIE"}
-    Test Pollution      {v2_tests:>3}         {v3_tests:>3}         {"V3 ✓" if v3_tests < v2_tests else "V2 ✓" if v2_tests < v3_tests else "TIE"}
-    Queries Won         {v2_wins:>3}         {v3_wins:>3}
-    Ties                {ties:>3}         {ties:>3}
-    """)
-    
-    improvement = ((v3_total - v2_total) / max(v2_total, 1)) * 100
-    print(f"    V3 improvement: {improvement:.0f}%")
-    print()
-    
-    if v3_total > v2_total * 1.2:
-        print("✅ V3 SIGNIFICANTLY BETTER!")
-    elif v3_total > v2_total:
-        print("✅ V3 is better")
-    else:
-        print("⚠️ Results inconclusive")
-
-
-if __name__ == "__main__":
-    asyncio.run(run_extended_test())
diff --git a/backend/scripts/extended_v3_test.py b/backend/scripts/extended_v3_test.py
deleted file mode 100644
index a5655ea..0000000
--- a/backend/scripts/extended_v3_test.py
+++ /dev/null
@@ -1,138 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extended Search V3 Testing Suite
-More human-like queries across different patterns
-"""
-import asyncio
-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# Load from environment (set in .env or export manually)
-if not os.environ.get("VOYAGE_API_KEY"):
-    print("❌ VOYAGE_API_KEY not set. Export it or add to .env file.")
-    sys.exit(1)
-
-from services.indexer_optimized import OptimizedCodeIndexer
-
-# More natural human queries - how devs ACTUALLY search
-EXTENDED_QUERIES = [
-    # Natural language questions
-    {"query": "how to send a response", "wants": "Response classes"},
-    {"query": "validate input", "wants": "Input validation"},
-    {"query": "cookies", "wants": "Cookie handling"},
-    {"query": "session management", "wants": "Session handling"},
-    {"query": "cors", "wants": "CORS middleware"},
-    
-    # Typos and variations  
-    {"query": "http request", "wants": "Request handling"},
-    {"query": "url parameters", "wants": "Path/query params"},
-    {"query": "background tasks", "wants": "BackgroundTask class"},
-    
-    # Implementation patterns
-    {"query": "async function", "wants": "Async handlers"},
-    {"query": "decorator", "wants": "Route decorators"},
-    {"query": "exception", "wants": "Exception classes"},
-    
-    # Specific features
-    {"query": "file upload", "wants": "File handling"},
-    {"query": "template", "wants": "Template rendering"},
-    {"query": "redirect", "wants": "Redirect responses"},
-    {"query": "headers", "wants": "Header handling"},
-]
-
-
-async def run_extended_tests():
-    print("=" * 70)
-    print("🧪 EXTENDED V3 TESTING - More Human Queries")
-    print("=" * 70)
-    print()
-    
-    indexer = OptimizedCodeIndexer()
-    repo_id = "0323a08f-9d21-4c59-b567-e0629a9bbb24"  # starlette
-    
-    v2_wins = 0
-    v3_wins = 0
-    ties = 0
-    v2_test_pollution = 0
-    v3_test_pollution = 0
-    
-    for q in EXTENDED_QUERIES:
-        query = q["query"]
-        wants = q["wants"]
-        
-        # V2
-        try:
-            v2_results = await indexer.search_v2(query, repo_id, top_k=3)
-        except Exception as e:
-            print(f"  V2 error: {e}")
-            v2_results = []
-        
-        # V3
-        try:
-            v3_results = await indexer.search_v3(query, repo_id, top_k=3, include_tests=False)
-        except Exception as e:
-            print(f"  V3 error: {e}")
-            v3_results = []
-        
-        # Check for test files in top 3
-        v2_tests = sum(1 for r in v2_results[:3] if "test" in r.get("file_path", "").lower())
-        v3_tests = sum(1 for r in v3_results[:3] if "test" in r.get("file_path", "").lower())
-        v2_test_pollution += v2_tests
-        v3_test_pollution += v3_tests
-        
-        # Simple scoring: penalize test files heavily
-        v2_score = len(v2_results) - (v2_tests * 2)
-        v3_score = len(v3_results) - (v3_tests * 2)
-        
-        if v3_score > v2_score:
-            v3_wins += 1
-            winner = "V3 ✓"
-        elif v2_score > v3_score:
-            v2_wins += 1
-            winner = "V2 ✓"
-        else:
-            ties += 1
-            winner = "TIE"
-        
-        # Print results
-        v2_top = v2_results[0].get("name", "?")[:25] if v2_results else "none"
-        v3_top = v3_results[0].get("name", "?")[:25] if v3_results else "none"
-        v2_file = v2_results[0].get("file_path", "").split("/")[-1][:20] if v2_results else ""
-        v3_file = v3_results[0].get("file_path", "").split("/")[-1][:20] if v3_results else ""
-        
-        test_marker_v2 = "❌" if v2_tests > 0 else "✅"
-        test_marker_v3 = "❌" if v3_tests > 0 else "✅"
-        
-        print(f"🔍 \"{query}\" (wants: {wants})")
-        print(f"   V2: {test_marker_v2} {v2_top:<25} ({v2_file})")
-        print(f"   V3: {test_marker_v3} {v3_top:<25} ({v3_file})")
-        print(f"   Winner: {winner}")
-        print()
-    
-    # Summary
-    print("=" * 70)
-    print("📊 EXTENDED TEST RESULTS")
-    print("=" * 70)
-    print(f"""
-    V2 Wins:              {v2_wins}
-    V3 Wins:              {v3_wins}
-    Ties:                 {ties}
-    
-    V2 Test Pollution:    {v2_test_pollution} test files in results
-    V3 Test Pollution:    {v3_test_pollution} test files in results
-    
-    V3 Win Rate:          {v3_wins}/{len(EXTENDED_QUERIES)} = {v3_wins/len(EXTENDED_QUERIES)*100:.0f}%
-    """)
-    
-    if v3_wins > v2_wins:
-        print("✅ V3 WINS EXTENDED TESTING!")
-    elif v2_wins > v3_wins:
-        print("❌ V2 performed better - needs investigation")
-    else:
-        print("🤝 TIE - V3 matches V2")
-
-
-if __name__ == "__main__":
-    asyncio.run(run_extended_tests())
diff --git a/backend/scripts/final_v3_test.py b/backend/scripts/final_v3_test.py
deleted file mode 100644
index 9d8066a..0000000
--- a/backend/scripts/final_v3_test.py
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/usr/bin/env python3
-"""
-Final Comprehensive V3 Test - Summary Report for CEO
-"""
-import asyncio
-import os
-import sys
-import time
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# Load .env file if present
-try:
-    from dotenv import load_dotenv
-    load_dotenv()
-except ImportError:
-    pass  # dotenv not installed, rely on exported env vars
-
-# Load from environment (set in .env or export manually)
-if not os.environ.get("VOYAGE_API_KEY"):
-    print("❌ VOYAGE_API_KEY not set. Export it or add to .env file.")
-    sys.exit(1)
-
-from services.indexer_optimized import OptimizedCodeIndexer
-from utils.test_detection import has_test_file_in_top_n as has_test_file
-
-# All query types combined
-ALL_QUERIES = [
-    # Core functionality
-    "authentication", "middleware", "routing", "websocket", "session",
-    # Natural language
-    "how to return json", "handle errors", "send response", "validate input",
-    # Features
-    "static files", "file upload", "cookies", "headers", "redirect",
-    # Implementation
-    "request body", "background task", "exception handler", "form data",
-    # Short keywords
-    "cors", "template", "lifespan",
-]
-
-REPOS = [
-    {"id": "0323a08f-9d21-4c59-b567-e0629a9bbb24", "name": "Starlette"},
-    {"id": "b0d22b4c-9d05-426e-8d9c-7278cce0f4c7", "name": "Flask"},
-]
-
-
-async def run_final_test():
-    print()
-    print("╔" + "═" * 68 + "╗")
-    print("║" + " 🧪 FINAL V3 COMPREHENSIVE TEST REPORT ".center(68) + "║")
-    print("╚" + "═" * 68 + "╝")
-    print()
-    
-    indexer = OptimizedCodeIndexer()
-    
-    total_v2_wins = 0
-    total_v3_wins = 0
-    total_ties = 0
-    total_v2_test_pollution = 0
-    total_v3_test_pollution = 0
-    total_v2_time = 0
-    total_v3_time = 0
-    total_queries = 0
-    
-    for repo in REPOS:
-        print(f"📦 Repository: {repo['name']}")
-        print("-" * 50)
-        
-        repo_v2_tests = 0
-        repo_v3_tests = 0
-        repo_v3_wins = 0
-        
-        for query in ALL_QUERIES:
-            total_queries += 1
-            
-            # V2
-            start = time.time()
-            try:
-                v2_results = await indexer.search_v2(query, repo["id"], top_k=3)
-            except Exception as e:
-                print(f"  V2 error [{repo['name']}] '{query}': {e}")
-                v2_results = []
-            v2_time = (time.time() - start) * 1000
-            total_v2_time += v2_time
-            
-            # V3  
-            start = time.time()
-            try:
-                v3_results = await indexer.search_v3(query, repo["id"], top_k=3, include_tests=False)
-            except Exception as e:
-                print(f"  V3 error [{repo['name']}] '{query}': {e}")
-                v3_results = []
-            v3_time = (time.time() - start) * 1000
-            total_v3_time += v3_time
-            
-            v2_has_test = has_test_file(v2_results)
-            v3_has_test = has_test_file(v3_results)
-            
-            if v2_has_test:
-                total_v2_test_pollution += 1
-                repo_v2_tests += 1
-            if v3_has_test:
-                total_v3_test_pollution += 1
-                repo_v3_tests += 1
-            
-            # Win logic: V3 wins if it has no test but V2 does
-            if not v3_has_test and v2_has_test:
-                total_v3_wins += 1
-                repo_v3_wins += 1
-            elif v3_has_test and not v2_has_test:
-                total_v2_wins += 1
-            else:
-                total_ties += 1
-        
-        print(f"   V2 test pollution: {repo_v2_tests}/{len(ALL_QUERIES)}")
-        print(f"   V3 test pollution: {repo_v3_tests}/{len(ALL_QUERIES)}")
-        print(f"   V3 wins: {repo_v3_wins}/{len(ALL_QUERIES)}")
-        print()
-    
-    # Final Summary
-    print("╔" + "═" * 68 + "╗")
-    print("║" + " 📊 FINAL RESULTS ".center(68) + "║")
-    print("╠" + "═" * 68 + "╣")
-    
-    print(f"║  {'Metric':<35} {'V2':>10} {'V3':>10} {'Winner':>8} ║")
-    print("╠" + "═" * 68 + "╣")
-    
-    # Test pollution
-    winner = "V3 ✓" if total_v3_test_pollution < total_v2_test_pollution else "V2" if total_v2_test_pollution < total_v3_test_pollution else "TIE"
-    print(f"║  {'Test Files in Top 3':<35} {total_v2_test_pollution:>10} {total_v3_test_pollution:>10} {winner:>8} ║")
-    
-    # Wins
-    winner = "V3 ✓" if total_v3_wins > total_v2_wins else "V2" if total_v2_wins > total_v3_wins else "TIE"
-    print(f"║  {'Query Wins':<35} {total_v2_wins:>10} {total_v3_wins:>10} {winner:>8} ║")
-    
-    # Avg latency
-    avg_v2 = total_v2_time / total_queries
-    avg_v3 = total_v3_time / total_queries
-    winner = "V3 ✓" if avg_v3 < avg_v2 else "V2" if avg_v2 < avg_v3 else "TIE"
-    print(f"║  {'Avg Latency (ms)':<35} {avg_v2:>10.0f} {avg_v3:>10.0f} {winner:>8} ║")
-    
-    print("╠" + "═" * 68 + "╣")
-    
-    # Improvement stats
-    test_reduction = total_v2_test_pollution - total_v3_test_pollution
-    test_reduction_pct = (test_reduction / max(total_v2_test_pollution, 1)) * 100
-    
-    print(f"║  {'Total Queries Tested':<35} {total_queries:>21} ║")
-    print(f"║  {'Test Pollution Reduction':<35} {test_reduction:>10} ({test_reduction_pct:.0f}%) ║")
-    print(f"║  {'V3 Win Rate':<35} {total_v3_wins/total_queries*100:>20.0f}% ║")
-    
-    print("╚" + "═" * 68 + "╝")
-    print()
-    
-    # Final verdict
-    if total_v3_test_pollution < total_v2_test_pollution and total_v3_wins > total_v2_wins:
-        print("🎯 VERDICT: V3 'Project Brain' is READY TO SHIP! 🚀")
-        print()
-        print("   ✅ Significantly reduced test file pollution")
-        print("   ✅ Better relevance for human-like queries")
-        print("   ✅ Works across multiple repositories")
-        print("   ✅ Query understanding + code graph ranking working")
-    else:
-        print("⚠️  VERDICT: Results inconclusive, needs review")
-
-
-if __name__ == "__main__":
-    asyncio.run(run_final_test())
diff --git a/backend/scripts/human_query_test.py b/backend/scripts/human_query_test.py
deleted file mode 100644
index 7031dd9..0000000
--- a/backend/scripts/human_query_test.py
+++ /dev/null
@@ -1,267 +0,0 @@
-#!/usr/bin/env python3
-"""
-Real-World Human Query Test - V2 vs V3
-Tests with queries that REAL developers would actually type
-"""
-import asyncio
-import os
-import sys
-import time
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# Load .env file if present
-try:
-    from dotenv import load_dotenv
-    load_dotenv()
-except ImportError:
-    pass  # dotenv not installed, rely on exported env vars
-
-# Load from environment (set in .env or export manually)
-if not os.environ.get("VOYAGE_API_KEY"):
-    print("❌ VOYAGE_API_KEY not set. Export it or add to .env file.")
-    sys.exit(1)
-
-from services.indexer_optimized import OptimizedCodeIndexer
-
-# Real human queries - how developers ACTUALLY search
-HUMAN_QUERIES = [
-    {
-        "query": "authentication",
-        "what_user_wants": "Auth middleware/decorators",
-        "good_results": ["auth", "middleware", "authenticate", "login", "session"],
-        "bad_results": ["test_", "_test", "mock", "fixture"],
-    },
-    {
-        "query": "how do I return json",
-        "what_user_wants": "JSONResponse or json return patterns",
-        "good_results": ["json", "response", "jsonresponse", "return"],
-        "bad_results": ["test_", "_test"],
-    },
-    {
-        "query": "handle errors",
-        "what_user_wants": "Error handlers, exception handling",
-        "good_results": ["error", "exception", "handler", "catch"],
-        "bad_results": ["test_error", "mock"],
-    },
-    {
-        "query": "websocket",
-        "what_user_wants": "WebSocket connection handling",
-        "good_results": ["websocket", "socket", "ws", "connect"],
-        "bad_results": ["test_websocket"],
-    },
-    {
-        "query": "middleware",
-        "what_user_wants": "Middleware classes/functions",
-        "good_results": ["middleware", "dispatch", "call_next"],
-        "bad_results": ["test_middleware"],
-    },
-    {
-        "query": "request body",
-        "what_user_wants": "How to read request body/data",
-        "good_results": ["request", "body", "data", "json", "form"],
-        "bad_results": ["test_request"],
-    },
-    {
-        "query": "routing",
-        "what_user_wants": "Route definitions, URL patterns",
-        "good_results": ["route", "router", "path", "endpoint", "url"],
-        "bad_results": ["test_route"],
-    },
-    {
-        "query": "static files",
-        "what_user_wants": "Serving static files",
-        "good_results": ["static", "file", "serve", "mount"],
-        "bad_results": ["test_static"],
-    },
-]
-
-
-def score_result(result, good_keywords, bad_keywords):
-    """Score a single result"""
-    name = result.get("name", "").lower()
-    file_path = result.get("file_path", "").lower()
-    qualified = result.get("qualified_name", "").lower()
-    text = f"{name} {file_path} {qualified}"
-    
-    # Check for bad results (test files)
-    for bad in bad_keywords:
-        if bad in text:
-            return -1, "test_file"
-    
-    # Check for good results
-    matches = sum(1 for good in good_keywords if good in text)
-    return matches, "ok"
-
-
-def evaluate_results(results, query_info):
-    """Evaluate search results quality"""
-    if not results:
-        return {"score": 0, "reason": "no_results", "top_3": []}
-    
-    good = query_info["good_results"]
-    bad = query_info["bad_results"]
-    
-    total_score = 0
-    test_files_in_top_3 = 0
-    top_3 = []
-    
-    for i, r in enumerate(results[:5]):  # Check top 5
-        match_score, status = score_result(r, good, bad)
-        
-        if i < 3:  # Track top 3
-            top_3.append({
-                "name": r.get("name", "?"),
-                "file": r.get("file_path", "?").split("/")[-1],
-                "score": r.get("score", 0),
-                "is_test": status == "test_file"
-            })
-            
-            if status == "test_file":
-                test_files_in_top_3 += 1
-        
-        # Weight by position (position 1 = 5pts, position 5 = 1pt)
-        position_weight = 6 - (i + 1)
-        
-        if status == "test_file":
-            total_score -= position_weight  # Penalty for test files
-        else:
-            total_score += match_score * position_weight
-    
-    return {
-        "score": max(0, total_score),
-        "test_files_in_top_3": test_files_in_top_3,
-        "top_3": top_3
-    }
-
-
-async def run_comparison():
-    print("=" * 80)
-    print("🧪 REAL HUMAN QUERY TEST: V2 vs V3 (with Voyage AI)")
-    print("=" * 80)
-    print()
-    
-    indexer = OptimizedCodeIndexer()
-    
-    # Use starlette repo
-    repo_id = "0323a08f-9d21-4c59-b567-e0629a9bbb24"
-    
-    v2_total = 0
-    v3_total = 0
-    v2_test_pollution = 0
-    v3_test_pollution = 0
-    
-    results_table = []
-    
-    for q in HUMAN_QUERIES:
-        query = q["query"]
-        print(f"🔍 Query: \"{query}\"")
-        print(f"   User wants: {q['what_user_wants']}")
-        
-        # V2
-        start = time.time()
-        try:
-            v2_results = await indexer.search_v2(query, repo_id, top_k=5)
-            v2_time = (time.time() - start) * 1000
-        except Exception as e:
-            print(f"   V2 Error: {e}")
-            v2_results = []
-            v2_time = 0
-        
-        v2_eval = evaluate_results(v2_results, q)
-        
-        # V3
-        start = time.time()
-        try:
-            v3_results = await indexer.search_v3(query, repo_id, top_k=5, include_tests=False)
-            v3_time = (time.time() - start) * 1000
-        except Exception as e:
-            print(f"   V3 Error: {e}")
-            v3_results = []
-            v3_time = 0
-        
-        v3_eval = evaluate_results(v3_results, q)
-        
-        # Compare
-        v2_total += v2_eval["score"]
-        v3_total += v3_eval["score"]
-        v2_test_pollution += v2_eval.get("test_files_in_top_3", 0)
-        v3_test_pollution += v3_eval.get("test_files_in_top_3", 0)
-        
-        # Print results
-        print(f"\n   V2 (OpenAI): Score={v2_eval['score']:>2} | {v2_time:>4.0f}ms | Tests in top3: {v2_eval.get('test_files_in_top_3', 0)}")
-        for r in v2_eval["top_3"]:
-            marker = "❌" if r["is_test"] else "✅"
-            print(f"      {marker} {r['name'][:30]:<30} ({r['file'][:25]})")
-        
-        print(f"\n   V3 (Voyage): Score={v3_eval['score']:>2} | {v3_time:>4.0f}ms | Tests in top3: {v3_eval.get('test_files_in_top_3', 0)}")
-        for r in v3_eval["top_3"]:
-            marker = "❌" if r["is_test"] else "✅"
-            print(f"      {marker} {r['name'][:30]:<30} ({r['file'][:25]})")
-        
-        # Winner
-        if v3_eval["score"] > v2_eval["score"]:
-            print(f"\n   🏆 V3 WINS (+{v3_eval['score'] - v2_eval['score']})")
-        elif v2_eval["score"] > v3_eval["score"]:
-            print(f"\n   🏆 V2 WINS (+{v2_eval['score'] - v3_eval['score']})")
-        else:
-            print(f"\n   🤝 TIE")
-        
-        results_table.append({
-            "query": query,
-            "v2_score": v2_eval["score"],
-            "v3_score": v3_eval["score"],
-            "v2_tests": v2_eval.get("test_files_in_top_3", 0),
-            "v3_tests": v3_eval.get("test_files_in_top_3", 0),
-        })
-        
-        print()
-        print("-" * 80)
-        print()
-    
-    # Final Summary
-    print()
-    print("=" * 80)
-    print("📊 FINAL RESULTS")
-    print("=" * 80)
-    
-    v2_wins = sum(1 for r in results_table if r["v2_score"] > r["v3_score"])
-    v3_wins = sum(1 for r in results_table if r["v3_score"] > r["v2_score"])
-    ties = len(results_table) - v2_wins - v3_wins
-    
-    print(f"""
-┌────────────────────────────────────────────────────────────────┐
-│                    V2 (OpenAI)    V3 (Voyage)    WINNER        │
-├────────────────────────────────────────────────────────────────┤
-│ Total Score            {v2_total:>4}           {v3_total:>4}         {"V3 ✓" if v3_total > v2_total else "V2 ✓" if v2_total > v3_total else "TIE":<10}   │
-│ Test Files in Top 3    {v2_test_pollution:>4}           {v3_test_pollution:>4}         {"V3 ✓" if v3_test_pollution < v2_test_pollution else "V2 ✓" if v2_test_pollution < v3_test_pollution else "TIE":<10}   │
-│ Query Wins             {v2_wins:>4}           {v3_wins:>4}         {"V3 ✓" if v3_wins > v2_wins else "V2 ✓" if v2_wins > v3_wins else "TIE":<10}   │
-│ Ties                   {ties:>4}           {ties:>4}                      │
-└────────────────────────────────────────────────────────────────┘
-    """)
-    
-    # Per-query breakdown
-    print("\nPer-Query Breakdown:")
-    print(f"{'Query':<20} {'V2':>6} {'V3':>6} {'Winner':>10}")
-    print("-" * 45)
-    for r in results_table:
-        winner = "V3" if r["v3_score"] > r["v2_score"] else "V2" if r["v2_score"] > r["v3_score"] else "TIE"
-        print(f"{r['query']:<20} {r['v2_score']:>6} {r['v3_score']:>6} {winner:>10}")
-    
-    # Final verdict
-    print()
-    if v3_total > v2_total * 1.2:  # 20% better
-        print("✅ VERDICT: V3 is SIGNIFICANTLY BETTER - Ship it! 🚀")
-    elif v3_total > v2_total:
-        print("✅ VERDICT: V3 is BETTER - Ready to ship!")
-    elif v3_total == v2_total:
-        print("⚠️ VERDICT: V3 is EQUAL to V2")
-    else:
-        print("❌ VERDICT: V3 needs more work")
-    
-    if v3_test_pollution < v2_test_pollution:
-        print(f"✅ V3 has {v2_test_pollution - v3_test_pollution} fewer test files polluting results!")
-
-
-if __name__ == "__main__":
-    asyncio.run(run_comparison())
diff --git a/backend/scripts/manual_ws_test.py b/backend/scripts/manual_ws_test.py
deleted file mode 100644
index 07c7874..0000000
--- a/backend/scripts/manual_ws_test.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python3
-"""
-MANUAL WebSocket E2E test for playground indexing.
-
-NOT run in CI - requires:
-  - Running backend server (uvicorn main:app)
-  - Redis running
-  - aiohttp installed (pip install aiohttp)
-
-This script:
-1. Creates an indexing job via the REST API
-2. Connects to the WebSocket endpoint
-3. Listens for all events until completion/error
-4. Reports what we received
-
-Usage: 
-  cd backend
-  pip install aiohttp  # if not installed
-  python3 scripts/manual_ws_test.py
-"""
-import asyncio
-import aiohttp
-import json
-import sys
-from datetime import datetime
-
-# Config
-BASE_URL = "http://localhost:8000/api/v1"
-WS_URL = "ws://localhost:8000/api/v1"
-TEST_REPO = "https://github.com/pmndrs/zustand"  # Small, fast to index
-
-
-def log(msg: str, level: str = "INFO"):
-    """Print timestamped log message."""
-    ts = datetime.now().strftime("%H:%M:%S.%f")[:-3]
-    icon = {"INFO": "ℹ️", "OK": "✅", "ERR": "❌", "WS": "🔌", "EVENT": "📨"}.get(level, "•")
-    print(f"[{ts}] {icon} {msg}")
-
-
-async def create_indexing_job(session: aiohttp.ClientSession) -> dict:
-    """Create a new indexing job via REST API."""
-    log("Creating indexing job for zustand...")
-    
-    async with session.post(
-        f"{BASE_URL}/playground/index",
-        json={"github_url": TEST_REPO}
-    ) as resp:
-        # 202 Accepted is the expected status for async job creation
-        if resp.status not in (200, 202):
-            text = await resp.text()
-            log(f"Failed to create job: {resp.status} - {text}", "ERR")
-            return None
-        
-        data = await resp.json()
-        job_id = data.get("job_id")
-        log(f"Job created: {job_id} (status: {resp.status})", "OK")
-        return data
-
-
-async def listen_websocket(job_id: str) -> list:
-    """Connect to WebSocket and collect all events."""
-    events = []
-    ws_endpoint = f"{WS_URL}/ws/playground/{job_id}"
-    
-    log(f"Connecting to WebSocket: {ws_endpoint}", "WS")
-    
-    async with aiohttp.ClientSession() as session:
-        try:
-            async with session.ws_connect(ws_endpoint, timeout=120) as ws:
-                log("WebSocket connected!", "OK")
-                
-                async for msg in ws:
-                    if msg.type == aiohttp.WSMsgType.TEXT:
-                        event = json.loads(msg.data)
-                        events.append(event)
-                        
-                        event_type = event.get("type", "unknown")
-                        
-                        # Log based on event type
-                        if event_type == "connected":
-                            log(f"Server acknowledged connection", "EVENT")
-                        elif event_type == "ping":
-                            log("Received keepalive ping", "EVENT")
-                        elif event_type == "cloning":
-                            repo = event.get("repo_name", "?")
-                            log(f"Cloning: {repo}", "EVENT")
-                        elif event_type == "progress":
-                            pct = event.get("percent", 0)
-                            files = event.get("files_processed", 0)
-                            total = event.get("files_total", 0)
-                            current = event.get("current_file") or ""
-                            funcs = event.get("functions_found", 0)
-                            # Truncate long paths
-                            if current and len(current) > 40:
-                                current = "..." + current[-37:]
-                            log(f"Progress: {pct}% ({files}/{total}) | {funcs} funcs | {current}", "EVENT")
-                        elif event_type == "completed":
-                            stats = event.get("stats", {})
-                            log(f"COMPLETED! Functions: {stats.get('functions_found', '?')}, Time: {stats.get('time_taken_seconds', '?')}s", "OK")
-                            break
-                        elif event_type == "error":
-                            log(f"ERROR: {event.get('message', 'Unknown error')}", "ERR")
-                            break
-                        else:
-                            log(f"Unknown event: {event_type}", "EVENT")
-                    
-                    elif msg.type == aiohttp.WSMsgType.ERROR:
-                        log(f"WebSocket error: {ws.exception()}", "ERR")
-                        break
-                    elif msg.type == aiohttp.WSMsgType.CLOSED:
-                        log("WebSocket closed by server", "WS")
-                        break
-                        
-        except asyncio.TimeoutError:
-            log("WebSocket connection timed out", "ERR")
-        except Exception as e:
-            log(f"WebSocket error: {e}", "ERR")
-    
-    return events
-
-
-async def main():
-    """Run the end-to-end test."""
-    print("\n" + "="*60)
-    print("  WebSocket E2E Test - Playground Indexing")
-    print("="*60 + "\n")
-    
-    async with aiohttp.ClientSession() as session:
-        # Step 1: Create job
-        job_data = await create_indexing_job(session)
-        if not job_data:
-            sys.exit(1)
-        
-        job_id = job_data.get("job_id")
-        if not job_id:
-            log("No job_id in response", "ERR")
-            sys.exit(1)
-    
-    # Step 2: Listen to WebSocket
-    print()
-    events = await listen_websocket(job_id)
-    
-    # Step 3: Summary
-    print("\n" + "="*60)
-    print("  Test Summary")
-    print("="*60)
-    
-    event_types = [e.get("type") for e in events]
-    print(f"\nTotal events received: {len(events)}")
-    print(f"Event types: {' → '.join(event_types)}")
-    
-    # Check expected flow
-    # Note: "cloning" may be skipped if repo was recently cloned
-    required = ["connected", "completed"]
-    has_required = all(t in event_types for t in required)
-    has_progress = "progress" in event_types
-    
-    print()
-    if has_required and has_progress:
-        log("TEST PASSED - Full event flow received!", "OK")
-        print()
-        return 0
-    elif "error" in event_types:
-        log("TEST COMPLETED WITH ERROR - Error event received (may be expected)", "ERR")
-        print()
-        return 1
-    else:
-        log(f"TEST INCOMPLETE - Missing events. Got: {event_types}", "ERR")
-        print()
-        return 1
-
-
-if __name__ == "__main__":
-    exit_code = asyncio.run(main())
-    sys.exit(exit_code)
diff --git a/backend/scripts/validate_cohere_rerank.py b/backend/scripts/validate_cohere_rerank.py
deleted file mode 100644
index 238788d..0000000
--- a/backend/scripts/validate_cohere_rerank.py
+++ /dev/null
@@ -1,180 +0,0 @@
-#!/usr/bin/env python3
-"""
-Phase 3: Cohere Reranking Validation Test
-Compare V3 with reranking ON vs OFF
-"""
-import asyncio
-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# load env
-try:
-    with open('.env', 'r') as f:
-        for line in f:
-            if '=' in line and not line.startswith('#'):
-                key, val = line.strip().split('=', 1)
-                os.environ[key] = val
-except:
-    pass
-
-from services.indexer_optimized import OptimizedCodeIndexer
-
-QUERIES = [
-    "authentication",
-    "how to return json",
-    "handle errors",
-    "middleware",
-    "websocket connection",
-    "static files",
-    "request body",
-    "redirect response",
-]
-
-repo_id = "0323a08f-9d21-4c59-b567-e0629a9bbb24"  # starlette
-
-
-def has_test_in_top3(results):
-    for r in results[:3]:
-        fp = r.get("file_path", "").lower()
-        if "test" in fp:
-            return True
-    return False
-
-
-def score_results(results, query):
-    """Simple relevance scoring based on name/file matching query terms"""
-    if not results:
-        return 0
-    
-    score = 0
-    terms = query.lower().split()
-    
-    for i, r in enumerate(results[:5]):
-        name = r.get("name", "").lower()
-        file_path = r.get("file_path", "").lower()
-        
-        # penalize test files heavily
-        if "test" in file_path:
-            score -= (5 - i)
-            continue
-        
-        # reward matches
-        for term in terms:
-            if term in name:
-                score += (5 - i) * 2
-            if term in file_path:
-                score += (5 - i)
-    
-    return max(0, score)
-
-
-async def run_validation():
-    print("=" * 70)
-    print("🧪 COHERE RERANKING VALIDATION TEST")
-    print("=" * 70)
-    print()
-    
-    indexer = OptimizedCodeIndexer()
-    
-    # check if Cohere is working
-    from services.search_v3.integration import get_search_v3
-    v3 = get_search_v3()
-    v3._ensure_initialized()
-    has_cohere = v3._search_engine.cohere_client is not None
-    print(f"Cohere Status: {'✅ ENABLED' if has_cohere else '❌ DISABLED'}")
-    print()
-    
-    if not has_cohere:
-        print("⚠️ Cohere not available - cannot test reranking")
-        return
-    
-    # test with reranking ON vs OFF
-    rerank_on_score = 0
-    rerank_off_score = 0
-    rerank_on_tests = 0
-    rerank_off_tests = 0
-    
-    for query in QUERIES:
-        print(f"📝 \"{query}\"")
-        
-        # V3 with reranking OFF
-        try:
-            results_off = await indexer.search_v3(
-                query, repo_id, top_k=5, 
-                include_tests=False,
-                use_reranking=False  # disable reranking
-            )
-            off_score = score_results(results_off, query)
-            off_test = has_test_in_top3(results_off)
-            off_top = results_off[0].get("name", "?")[:25] if results_off else "none"
-        except Exception as e:
-            print(f"   ❌ OFF error: {e}")
-            off_score, off_test, off_top = 0, False, "error"
-            results_off = []
-        
-        # V3 with reranking ON
-        try:
-            results_on = await indexer.search_v3(
-                query, repo_id, top_k=5,
-                include_tests=False,
-                use_reranking=True  # enable reranking
-            )
-            on_score = score_results(results_on, query)
-            on_test = has_test_in_top3(results_on)
-            on_top = results_on[0].get("name", "?")[:25] if results_on else "none"
-            
-            # show rerank scores if available
-            if results_on and 'rerank_score' in results_on[0]:
-                top_rerank = results_on[0].get('rerank_score', 0)
-                print(f"   Cohere relevance: {top_rerank:.3f}")
-        except Exception as e:
-            print(f"   ❌ ON error: {e}")
-            on_score, on_test, on_top = 0, False, "error"
-            results_on = []
-        
-        rerank_off_score += off_score
-        rerank_on_score += on_score
-        if off_test: rerank_off_tests += 1
-        if on_test: rerank_on_tests += 1
-        
-        # determine winner
-        if on_score > off_score:
-            winner = "RERANK ✓"
-        elif off_score > on_score:
-            winner = "NO-RERANK"
-        else:
-            winner = "TIE"
-        
-        off_marker = "❌" if off_test else "✅"
-        on_marker = "❌" if on_test else "✅"
-        
-        print(f"   OFF: {off_marker} {off_top:<25} (score={off_score})")
-        print(f"   ON:  {on_marker} {on_top:<25} (score={on_score})")
-        print(f"   Winner: {winner}")
-        print()
-    
-    # Summary
-    print("=" * 70)
-    print("📊 RERANKING IMPACT SUMMARY")
-    print("=" * 70)
-    print(f"""
-    Metric                    Rerank OFF     Rerank ON     Better?
-    ──────────────────────────────────────────────────────────────
-    Total Score               {rerank_off_score:>10}     {rerank_on_score:>10}     {"✅ ON" if rerank_on_score > rerank_off_score else "❌ OFF"}
-    Test Pollution            {rerank_off_tests:>10}     {rerank_on_tests:>10}     {"✅ ON" if rerank_on_tests < rerank_off_tests else "TIE" if rerank_on_tests == rerank_off_tests else "❌ OFF"}
-    """)
-    
-    improvement = ((rerank_on_score - rerank_off_score) / max(rerank_off_score, 1)) * 100
-    print(f"    Reranking improvement: {improvement:+.0f}%")
-    print()
-    
-    if rerank_on_score >= rerank_off_score and rerank_on_tests <= rerank_off_tests:
-        print("✅ COHERE RERANKING IS WORKING AND IMPROVING RESULTS!")
-    else:
-        print("⚠️ Reranking needs tuning")
-
-
-if __name__ == "__main__":
-    asyncio.run(run_validation())