OpenCodeIntel
diff --git a/‎backend/.env.example‎
Lines changed: 4 additions & 0 deletions b/‎backend/.env.example‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backend/requirements.txt‎
Lines changed: 3 additions & 0 deletions b/‎backend/requirements.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backend/routes/playground.py‎
Lines changed: 22 additions & 8 deletions b/‎backend/routes/playground.py‎
Lines changed: 22 additions & 8 deletions
diff --git a/‎backend/scripts/benchmark_search_v3.py‎
Lines changed: 247 additions & 0 deletions b/‎backend/scripts/benchmark_search_v3.py‎
Lines changed: 247 additions & 0 deletions
diff --git a/‎backend/services/indexer_optimized.py‎
Lines changed: 63 additions & 0 deletions b/‎backend/services/indexer_optimized.py‎
Lines changed: 63 additions & 0 deletions
@@ -26,3 +26,7 @@ REDIS_PORT=6379
 # Get DSN from https://sentry.io → Settings → Projects → Client Keys
 SENTRY_DSN=
 ENVIRONMENT=development
+
+# Search V3 - Voyage AI Code Embeddings (recommended for code search)
+# Get API key from https://dash.voyageai.com/
+VOYAGE_API_KEY=your_voyage_api_key_here
@@ -43,3 +43,6 @@ sentry-sdk[fastapi]>=2.0.0
 # Search V2 - Hybrid search
 rank-bm25>=0.2.2
 cohere>=5.0.0
+
+# Search V3 - Code-optimized embeddings
+voyageai>=0.3.0
@@ -49,6 +49,9 @@ class PlaygroundSearchRequest(BaseModel):
     demo_repo: Optional[str] = None  # Keep for backward compat
     repo_id: Optional[str] = None    # Direct repo_id (user-indexed repos)
     max_results: int = 10
+    # V3 options
+    use_v3: bool = True              # Use Search V3 by default (better accuracy)
+    include_tests: bool = False      # Include test files in results
 
 
 class ValidateRepoRequest(BaseModel):
@@ -429,17 +432,26 @@ async def playground_search(
                 "limit": limit_result.limit,
             }
 
-        # Search V2: Hybrid search with BM25 + Cohere reranking
-        v2_results = await indexer.search_v2(
-            query=sanitized_query,
-            repo_id=repo_id,
-            top_k=min(request.max_results, 10),
-            use_reranking=True
-        )
+        # Search V3 (default) or V2 (fallback)
+        if request.use_v3:
+            search_results = await indexer.search_v3(
+                query=sanitized_query,
+                repo_id=repo_id,
+                top_k=min(request.max_results, 10),
+                include_tests=request.include_tests,
+                use_reranking=True
+            )
+        else:
+            search_results = await indexer.search_v2(
+                query=sanitized_query,
+                repo_id=repo_id,
+                top_k=min(request.max_results, 10),
+                use_reranking=True
+            )
 
         # Format results for frontend compatibility
         results = []
-        for r in v2_results:
+        for r in search_results:
             results.append({
                 "name": r.get("name", ""),
                 "qualified_name": r.get("qualified_name", r.get("name", "")),
@@ -453,6 +465,7 @@ async def playground_search(
                 "type": "function",  # backward compat with V1
                 "summary": r.get("summary"),
                 "class_name": r.get("class_name"),
+                "is_test_file": r.get("is_test_file", False),  # V3 feature
             })
 
         # Cache results
@@ -467,6 +480,7 @@ async def playground_search(
             "remaining_searches": limit_result.remaining,
             "limit": limit_result.limit,
             "search_time_ms": search_time,
+            "search_version": "v3" if request.use_v3 else "v2",
         }
     except HTTPException:
         raise
 
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+"""
+Search V3 vs V2 Benchmark
+Run with: python3 scripts/benchmark_search_v3.py
+
+Compares:
+- V2 (OpenAI embeddings + Cohere reranking)
+- V3 (Voyage AI embeddings + Query Understanding + Code Graph + Cohere reranking)
+"""
+import asyncio
+import os
+import sys
+import time
+from typing import List, Dict, Tuple
+
+# add parent to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from dotenv import load_dotenv
+load_dotenv()
+
+from services.indexer_optimized import OptimizedCodeIndexer
+
+# Test queries representing real developer scenarios
+TEST_QUERIES = [
+    {
+        "query": "how to add authentication",
+        "expected_keywords": ["auth", "middleware", "authenticate", "credential"],
+        "description": "Developer wants to add auth to their app"
+    },
+    {
+        "query": "handle websocket messages",
+        "expected_keywords": ["websocket", "message", "send", "receive", "on_"],
+        "description": "Developer working with WebSockets"
+    },
+    {
+        "query": "return json from endpoint",
+        "expected_keywords": ["json", "response", "jsonresponse", "return"],
+        "description": "Developer wants to return JSON data"
+    },
+    {
+        "query": "validate request data",
+        "expected_keywords": ["valid", "request", "data", "schema"],
+        "description": "Developer needs input validation"
+    },
+    {
+        "query": "middleware that runs before request",
+        "expected_keywords": ["middleware", "before", "dispatch", "call_next"],
+        "description": "Developer needs pre-request processing"
+    },
+    {
+        "query": "error handling",
+        "expected_keywords": ["error", "exception", "handler", "catch"],
+        "description": "Looking for error handling patterns"
+    },
+    {
+        "query": "route decorator",
+        "expected_keywords": ["route", "decorator", "path", "endpoint"],
+        "description": "Developer needs routing functionality"
+    },
+    {
+        "query": "database session",
+        "expected_keywords": ["database", "session", "db", "connection"],
+        "description": "Working with database sessions"
+    },
+]
+
+
+def score_results(results: List[Dict], expected_keywords: List[str]) -> Tuple[float, int, bool]:
+    """
+    Score search results based on expected keywords
+    Returns: (score 0-10, matches count, is_test_in_top_3)
+    """
+    if not results:
+        return 0.0, 0, False
+    
+    # combine text from top 3 results
+    top_3_text = ""
+    has_test_in_top_3 = False
+    
+    for r in results[:3]:
+        name = r.get("name", "").lower()
+        qualified = r.get("qualified_name", "").lower()
+        summary = (r.get("summary") or "").lower()
+        file_path = r.get("file_path", "").lower()
+        
+        top_3_text += f" {name} {qualified} {summary} "
+        
+        # check for test files
+        if "test" in file_path or "test" in name:
+            has_test_in_top_3 = True
+    
+    # count keyword matches
+    matches = sum(1 for kw in expected_keywords if kw.lower() in top_3_text)
+    score = min(10.0, (matches / len(expected_keywords)) * 10)
+    
+    return score, matches, has_test_in_top_3
+
+
+async def run_benchmark(repo_id: str):
+    """Run benchmark comparing V2 vs V3"""
+    print("=" * 80)
+    print("🧪 SEARCH V3 vs V2 BENCHMARK")
+    print("=" * 80)
+    print()
+    
+    indexer = OptimizedCodeIndexer()
+    
+    v2_scores = []
+    v3_scores = []
+    v2_times = []
+    v3_times = []
+    v2_test_count = 0
+    v3_test_count = 0
+    
+    for tc in TEST_QUERIES:
+        query = tc["query"]
+        expected = tc["expected_keywords"]
+        desc = tc["description"]
+        
+        print(f"📝 Query: \"{query}\"")
+        print(f"   Scenario: {desc}")
+        print()
+        
+        # V2 Search
+        start = time.time()
+        try:
+            v2_results = await indexer.search_v2(
+                query=query,
+                repo_id=repo_id,
+                top_k=5,
+                use_reranking=True
+            )
+            v2_time = (time.time() - start) * 1000
+        except Exception as e:
+            print(f"   ❌ V2 Error: {e}")
+            v2_results = []
+            v2_time = 0
+        
+        v2_score, v2_matches, v2_has_test = score_results(v2_results, expected)
+        v2_scores.append(v2_score)
+        v2_times.append(v2_time)
+        if v2_has_test:
+            v2_test_count += 1
+        
+        # V3 Search
+        start = time.time()
+        try:
+            v3_results = await indexer.search_v3(
+                query=query,
+                repo_id=repo_id,
+                top_k=5,
+                include_tests=False,
+                use_reranking=True
+            )
+            v3_time = (time.time() - start) * 1000
+        except Exception as e:
+            print(f"   ❌ V3 Error: {e}")
+            v3_results = []
+            v3_time = 0
+        
+        v3_score, v3_matches, v3_has_test = score_results(v3_results, expected)
+        v3_scores.append(v3_score)
+        v3_times.append(v3_time)
+        if v3_has_test:
+            v3_test_count += 1
+        
+        # Print comparison
+        print(f"   V2: Score {v2_score:.1f}/10 ({v2_matches}/{len(expected)} keywords) | {v2_time:.0f}ms")
+        if v2_results:
+            print(f"       Top result: {v2_results[0].get('name', 'unknown')}")
+        
+        print(f"   V3: Score {v3_score:.1f}/10 ({v3_matches}/{len(expected)} keywords) | {v3_time:.0f}ms")
+        if v3_results:
+            print(f"       Top result: {v3_results[0].get('name', 'unknown')}")
+        
+        # Winner
+        if v3_score > v2_score:
+            print(f"   🏆 V3 WINS (+{v3_score - v2_score:.1f})")
+        elif v2_score > v3_score:
+            print(f"   🏆 V2 WINS (+{v2_score - v3_score:.1f})")
+        else:
+            print(f"   🤝 TIE")
+        
+        print()
+    
+    # Summary
+    print("=" * 80)
+    print("📊 BENCHMARK RESULTS")
+    print("=" * 80)
+    
+    v2_avg = sum(v2_scores) / len(v2_scores)
+    v3_avg = sum(v3_scores) / len(v3_scores)
+    v2_total_time = sum(v2_times)
+    v3_total_time = sum(v3_times)
+    
+    v2_wins = sum(1 for v2, v3 in zip(v2_scores, v3_scores) if v2 > v3)
+    v3_wins = sum(1 for v2, v3 in zip(v2_scores, v3_scores) if v3 > v2)
+    ties = len(v2_scores) - v2_wins - v3_wins
+    
+    print(f"""
+┌─────────────────────────────────────────────────────────┐
+│ METRIC                    │    V2     │    V3     │     │
+├─────────────────────────────────────────────────────────┤
+│ Average Score             │ {v2_avg:>6.1f}/10 │ {v3_avg:>6.1f}/10 │ {"V3 ✓" if v3_avg > v2_avg else "V2 ✓" if v2_avg > v3_avg else "TIE":<5}│
+│ Total Time                │ {v2_total_time:>6.0f}ms │ {v3_total_time:>6.0f}ms │ {"V3 ✓" if v3_total_time < v2_total_time else "V2 ✓":<5}│
+│ Queries with test in top3 │ {v2_test_count:>6}   │ {v3_test_count:>6}   │ {"V3 ✓" if v3_test_count < v2_test_count else "V2 ✓" if v2_test_count < v3_test_count else "TIE":<5}│
+│ Wins                      │ {v2_wins:>6}   │ {v3_wins:>6}   │     │
+│ Ties                      │ {ties:>6}   │ {ties:>6}   │     │
+└─────────────────────────────────────────────────────────┘
+    """)
+    
+    # Final verdict
+    print()
+    if v3_avg >= v2_avg + 1.0:
+        print("✅ VERDICT: V3 is SIGNIFICANTLY BETTER - Ready for production!")
+    elif v3_avg > v2_avg:
+        print("✅ VERDICT: V3 is BETTER - Consider shipping!")
+    elif v3_avg == v2_avg:
+        print("⚠️ VERDICT: V3 is EQUAL to V2 - Need more optimization")
+    else:
+        print("❌ VERDICT: V3 is WORSE than V2 - Needs more work")
+    
+    print()
+    
+    # Check for Voyage
+    try:
+        from services.search_v3.integration import get_search_v3
+        v3 = get_search_v3()
+        if v3.is_voyage_enabled:
+            print("🚀 Using Voyage AI code-specific embeddings")
+        else:
+            print("⚠️ Voyage AI not enabled - using OpenAI embeddings")
+            print("   Set VOYAGE_API_KEY for better code search accuracy!")
+    except:
+        pass
+
+
+if __name__ == "__main__":
+    # default repo ID (starlette) - change as needed
+    REPO_ID = os.getenv("BENCHMARK_REPO_ID", "0323a08f-9d21-4c59-b567-e0629a9bbb24")
+    
+    print(f"Using repo_id: {REPO_ID}")
+    print("Set BENCHMARK_REPO_ID env var to use a different repo")
+    print()
+    
+    asyncio.run(run_benchmark(REPO_ID))
@@ -568,6 +568,69 @@ async def embed_query(q: str) -> List[float]:
             metrics.increment("search_v2_errors")
             return []
 
+    async def search_v3(
+        self,
+        query: str,
+        repo_id: str,
+        top_k: int = 10,
+        include_tests: bool = False,
+        use_reranking: bool = True,
+    ) -> List[Dict]:
+        """
+        Search V3 - "Project Brain" search with:
+        - Voyage AI code-optimized embeddings (if available)
+        - Query understanding & intent classification
+        - Code graph importance ranking
+        - Test file filtering
+        - Cohere reranking
+        """
+        from services.search_v3.integration import get_search_v3
+        
+        start_time = time.time()
+        metrics.increment("search_v3_requests")
+        
+        try:
+            v3 = get_search_v3()
+            
+            # load file dependencies for code graph ranking
+            file_dependencies = None
+            try:
+                from services.dependency_analyzer import DependencyAnalyzer
+                analyzer = DependencyAnalyzer()
+                cached = analyzer.load_from_cache(repo_id)
+                if cached:
+                    file_dependencies = cached.get("dependencies", {})
+            except Exception as e:
+                logger.warning("Could not load dependencies for V3 search", error=str(e))
+            
+            results = await v3.search(
+                query=query,
+                repo_id=repo_id,
+                pinecone_index=self.index,
+                file_dependencies=file_dependencies,
+                include_tests=include_tests,
+                top_k=top_k,
+                use_reranking=use_reranking
+            )
+            
+            elapsed = time.time() - start_time
+            logger.info("Search V3 complete", 
+                       repo_id=repo_id, 
+                       results=len(results), 
+                       duration_ms=round(elapsed*1000),
+                       voyage_enabled=v3.is_voyage_enabled)
+            metrics.timing("search_v3_latency_ms", elapsed * 1000)
+            
+            return results
+            
+        except Exception as e:
+            capture_exception(e, operation="search_v3", repo_id=repo_id, query=query[:100])
+            logger.error("Search V3 failed", error=str(e))
+            metrics.increment("search_v3_errors")
+            # fallback to V2
+            logger.info("Falling back to search_v2")
+            return await self.search_v2(query, repo_id, top_k, use_reranking)
+
     async def explain_code(
         self,
         repo_id: str,