Skip to content

Commit c19fab8

Browse files
committed
feat(search): Search V3 'Project Brain' - Full Overhaul
🎯 Major Features: - Voyage AI code-specific embeddings (voyage-code-3) - 13.8% better accuracy than OpenAI for code search - Auto-fallback to OpenAI if Voyage unavailable - Query Understanding & Intent Classification - Detects: FIND, EXPLAIN, USAGE, DEBUG intents - Smart query expansion with code synonyms - 'json' → 'JSONResponse, json_response, application/json' - Code Graph Ranking (PageRank-style) - Integrates with existing dependency analyzer - Importance scoring based on usage/references - Files depended on by many others rank higher - Test File Filtering (CEO request!) - Auto-detection of test files - User-configurable via include_tests parameter - Default: exclude tests (-70% penalty when included) - Core files boost (+30%): main.py, index.js, etc. 📁 New Files: - services/search_v3/embedding_provider.py - Voyage/OpenAI abstraction - services/search_v3/query_understanding.py - Intent classification - services/search_v3/code_graph_ranker.py - Importance ranking - services/search_v3/search_engine.py - Main orchestrator - services/search_v3/integration.py - Bridge to indexer - tests/test_search_v3.py - 14 passing unit tests - scripts/benchmark_search_v3.py - V2 vs V3 comparison 📝 Modified: - routes/playground.py - Added use_v3 and include_tests params - services/indexer_optimized.py - Added search_v3() method - requirements.txt - Added voyageai>=0.3.0 - .env.example - Added VOYAGE_API_KEY 🔧 API Changes: - Search now uses V3 by default (use_v3=True) - New response field: search_version ('v2' or 'v3') - New result field: is_test_file (boolean) Tests: 14/14 passing
1 parent f7ea9cd commit c19fab8

12 files changed

Lines changed: 1778 additions & 8 deletions

backend/.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,7 @@ REDIS_PORT=6379
2626
# Get DSN from https://sentry.io → Settings → Projects → Client Keys
2727
SENTRY_DSN=
2828
ENVIRONMENT=development
29+
30+
# Search V3 - Voyage AI Code Embeddings (recommended for code search)
31+
# Get API key from https://dash.voyageai.com/
32+
VOYAGE_API_KEY=your_voyage_api_key_here

backend/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,6 @@ sentry-sdk[fastapi]>=2.0.0
4343
# Search V2 - Hybrid search
4444
rank-bm25>=0.2.2
4545
cohere>=5.0.0
46+
47+
# Search V3 - Code-optimized embeddings
48+
voyageai>=0.3.0

backend/routes/playground.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ class PlaygroundSearchRequest(BaseModel):
4949
demo_repo: Optional[str] = None # Keep for backward compat
5050
repo_id: Optional[str] = None # Direct repo_id (user-indexed repos)
5151
max_results: int = 10
52+
# V3 options
53+
use_v3: bool = True # Use Search V3 by default (better accuracy)
54+
include_tests: bool = False # Include test files in results
5255

5356

5457
class ValidateRepoRequest(BaseModel):
@@ -429,17 +432,26 @@ async def playground_search(
429432
"limit": limit_result.limit,
430433
}
431434

432-
# Search V2: Hybrid search with BM25 + Cohere reranking
433-
v2_results = await indexer.search_v2(
434-
query=sanitized_query,
435-
repo_id=repo_id,
436-
top_k=min(request.max_results, 10),
437-
use_reranking=True
438-
)
435+
# Search V3 (default) or V2 (fallback)
436+
if request.use_v3:
437+
search_results = await indexer.search_v3(
438+
query=sanitized_query,
439+
repo_id=repo_id,
440+
top_k=min(request.max_results, 10),
441+
include_tests=request.include_tests,
442+
use_reranking=True
443+
)
444+
else:
445+
search_results = await indexer.search_v2(
446+
query=sanitized_query,
447+
repo_id=repo_id,
448+
top_k=min(request.max_results, 10),
449+
use_reranking=True
450+
)
439451

440452
# Format results for frontend compatibility
441453
results = []
442-
for r in v2_results:
454+
for r in search_results:
443455
results.append({
444456
"name": r.get("name", ""),
445457
"qualified_name": r.get("qualified_name", r.get("name", "")),
@@ -453,6 +465,7 @@ async def playground_search(
453465
"type": "function", # backward compat with V1
454466
"summary": r.get("summary"),
455467
"class_name": r.get("class_name"),
468+
"is_test_file": r.get("is_test_file", False), # V3 feature
456469
})
457470

458471
# Cache results
@@ -467,6 +480,7 @@ async def playground_search(
467480
"remaining_searches": limit_result.remaining,
468481
"limit": limit_result.limit,
469482
"search_time_ms": search_time,
483+
"search_version": "v3" if request.use_v3 else "v2",
470484
}
471485
except HTTPException:
472486
raise
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Search V3 vs V2 Benchmark
4+
Run with: python3 scripts/benchmark_search_v3.py
5+
6+
Compares:
7+
- V2 (OpenAI embeddings + Cohere reranking)
8+
- V3 (Voyage AI embeddings + Query Understanding + Code Graph + Cohere reranking)
9+
"""
10+
import asyncio
11+
import os
12+
import sys
13+
import time
14+
from typing import List, Dict, Tuple
15+
16+
# add parent to path
17+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
18+
19+
from dotenv import load_dotenv
20+
load_dotenv()
21+
22+
from services.indexer_optimized import OptimizedCodeIndexer
23+
24+
# Test queries representing real developer scenarios
25+
TEST_QUERIES = [
26+
{
27+
"query": "how to add authentication",
28+
"expected_keywords": ["auth", "middleware", "authenticate", "credential"],
29+
"description": "Developer wants to add auth to their app"
30+
},
31+
{
32+
"query": "handle websocket messages",
33+
"expected_keywords": ["websocket", "message", "send", "receive", "on_"],
34+
"description": "Developer working with WebSockets"
35+
},
36+
{
37+
"query": "return json from endpoint",
38+
"expected_keywords": ["json", "response", "jsonresponse", "return"],
39+
"description": "Developer wants to return JSON data"
40+
},
41+
{
42+
"query": "validate request data",
43+
"expected_keywords": ["valid", "request", "data", "schema"],
44+
"description": "Developer needs input validation"
45+
},
46+
{
47+
"query": "middleware that runs before request",
48+
"expected_keywords": ["middleware", "before", "dispatch", "call_next"],
49+
"description": "Developer needs pre-request processing"
50+
},
51+
{
52+
"query": "error handling",
53+
"expected_keywords": ["error", "exception", "handler", "catch"],
54+
"description": "Looking for error handling patterns"
55+
},
56+
{
57+
"query": "route decorator",
58+
"expected_keywords": ["route", "decorator", "path", "endpoint"],
59+
"description": "Developer needs routing functionality"
60+
},
61+
{
62+
"query": "database session",
63+
"expected_keywords": ["database", "session", "db", "connection"],
64+
"description": "Working with database sessions"
65+
},
66+
]
67+
68+
69+
def score_results(results: List[Dict], expected_keywords: List[str]) -> Tuple[float, int, bool]:
70+
"""
71+
Score search results based on expected keywords
72+
Returns: (score 0-10, matches count, is_test_in_top_3)
73+
"""
74+
if not results:
75+
return 0.0, 0, False
76+
77+
# combine text from top 3 results
78+
top_3_text = ""
79+
has_test_in_top_3 = False
80+
81+
for r in results[:3]:
82+
name = r.get("name", "").lower()
83+
qualified = r.get("qualified_name", "").lower()
84+
summary = (r.get("summary") or "").lower()
85+
file_path = r.get("file_path", "").lower()
86+
87+
top_3_text += f" {name} {qualified} {summary} "
88+
89+
# check for test files
90+
if "test" in file_path or "test" in name:
91+
has_test_in_top_3 = True
92+
93+
# count keyword matches
94+
matches = sum(1 for kw in expected_keywords if kw.lower() in top_3_text)
95+
score = min(10.0, (matches / len(expected_keywords)) * 10)
96+
97+
return score, matches, has_test_in_top_3
98+
99+
100+
async def run_benchmark(repo_id: str):
101+
"""Run benchmark comparing V2 vs V3"""
102+
print("=" * 80)
103+
print("🧪 SEARCH V3 vs V2 BENCHMARK")
104+
print("=" * 80)
105+
print()
106+
107+
indexer = OptimizedCodeIndexer()
108+
109+
v2_scores = []
110+
v3_scores = []
111+
v2_times = []
112+
v3_times = []
113+
v2_test_count = 0
114+
v3_test_count = 0
115+
116+
for tc in TEST_QUERIES:
117+
query = tc["query"]
118+
expected = tc["expected_keywords"]
119+
desc = tc["description"]
120+
121+
print(f"📝 Query: \"{query}\"")
122+
print(f" Scenario: {desc}")
123+
print()
124+
125+
# V2 Search
126+
start = time.time()
127+
try:
128+
v2_results = await indexer.search_v2(
129+
query=query,
130+
repo_id=repo_id,
131+
top_k=5,
132+
use_reranking=True
133+
)
134+
v2_time = (time.time() - start) * 1000
135+
except Exception as e:
136+
print(f" ❌ V2 Error: {e}")
137+
v2_results = []
138+
v2_time = 0
139+
140+
v2_score, v2_matches, v2_has_test = score_results(v2_results, expected)
141+
v2_scores.append(v2_score)
142+
v2_times.append(v2_time)
143+
if v2_has_test:
144+
v2_test_count += 1
145+
146+
# V3 Search
147+
start = time.time()
148+
try:
149+
v3_results = await indexer.search_v3(
150+
query=query,
151+
repo_id=repo_id,
152+
top_k=5,
153+
include_tests=False,
154+
use_reranking=True
155+
)
156+
v3_time = (time.time() - start) * 1000
157+
except Exception as e:
158+
print(f" ❌ V3 Error: {e}")
159+
v3_results = []
160+
v3_time = 0
161+
162+
v3_score, v3_matches, v3_has_test = score_results(v3_results, expected)
163+
v3_scores.append(v3_score)
164+
v3_times.append(v3_time)
165+
if v3_has_test:
166+
v3_test_count += 1
167+
168+
# Print comparison
169+
print(f" V2: Score {v2_score:.1f}/10 ({v2_matches}/{len(expected)} keywords) | {v2_time:.0f}ms")
170+
if v2_results:
171+
print(f" Top result: {v2_results[0].get('name', 'unknown')}")
172+
173+
print(f" V3: Score {v3_score:.1f}/10 ({v3_matches}/{len(expected)} keywords) | {v3_time:.0f}ms")
174+
if v3_results:
175+
print(f" Top result: {v3_results[0].get('name', 'unknown')}")
176+
177+
# Winner
178+
if v3_score > v2_score:
179+
print(f" 🏆 V3 WINS (+{v3_score - v2_score:.1f})")
180+
elif v2_score > v3_score:
181+
print(f" 🏆 V2 WINS (+{v2_score - v3_score:.1f})")
182+
else:
183+
print(f" 🤝 TIE")
184+
185+
print()
186+
187+
# Summary
188+
print("=" * 80)
189+
print("📊 BENCHMARK RESULTS")
190+
print("=" * 80)
191+
192+
v2_avg = sum(v2_scores) / len(v2_scores)
193+
v3_avg = sum(v3_scores) / len(v3_scores)
194+
v2_total_time = sum(v2_times)
195+
v3_total_time = sum(v3_times)
196+
197+
v2_wins = sum(1 for v2, v3 in zip(v2_scores, v3_scores) if v2 > v3)
198+
v3_wins = sum(1 for v2, v3 in zip(v2_scores, v3_scores) if v3 > v2)
199+
ties = len(v2_scores) - v2_wins - v3_wins
200+
201+
print(f"""
202+
┌─────────────────────────────────────────────────────────┐
203+
│ METRIC │ V2 │ V3 │ │
204+
├─────────────────────────────────────────────────────────┤
205+
│ Average Score │ {v2_avg:>6.1f}/10 │ {v3_avg:>6.1f}/10 │ {"V3 ✓" if v3_avg > v2_avg else "V2 ✓" if v2_avg > v3_avg else "TIE":<5}
206+
│ Total Time │ {v2_total_time:>6.0f}ms │ {v3_total_time:>6.0f}ms │ {"V3 ✓" if v3_total_time < v2_total_time else "V2 ✓":<5}
207+
│ Queries with test in top3 │ {v2_test_count:>6}{v3_test_count:>6}{"V3 ✓" if v3_test_count < v2_test_count else "V2 ✓" if v2_test_count < v3_test_count else "TIE":<5}
208+
│ Wins │ {v2_wins:>6}{v3_wins:>6} │ │
209+
│ Ties │ {ties:>6}{ties:>6} │ │
210+
└─────────────────────────────────────────────────────────┘
211+
""")
212+
213+
# Final verdict
214+
print()
215+
if v3_avg >= v2_avg + 1.0:
216+
print("✅ VERDICT: V3 is SIGNIFICANTLY BETTER - Ready for production!")
217+
elif v3_avg > v2_avg:
218+
print("✅ VERDICT: V3 is BETTER - Consider shipping!")
219+
elif v3_avg == v2_avg:
220+
print("⚠️ VERDICT: V3 is EQUAL to V2 - Need more optimization")
221+
else:
222+
print("❌ VERDICT: V3 is WORSE than V2 - Needs more work")
223+
224+
print()
225+
226+
# Check for Voyage
227+
try:
228+
from services.search_v3.integration import get_search_v3
229+
v3 = get_search_v3()
230+
if v3.is_voyage_enabled:
231+
print("🚀 Using Voyage AI code-specific embeddings")
232+
else:
233+
print("⚠️ Voyage AI not enabled - using OpenAI embeddings")
234+
print(" Set VOYAGE_API_KEY for better code search accuracy!")
235+
except:
236+
pass
237+
238+
239+
if __name__ == "__main__":
240+
# default repo ID (starlette) - change as needed
241+
REPO_ID = os.getenv("BENCHMARK_REPO_ID", "0323a08f-9d21-4c59-b567-e0629a9bbb24")
242+
243+
print(f"Using repo_id: {REPO_ID}")
244+
print("Set BENCHMARK_REPO_ID env var to use a different repo")
245+
print()
246+
247+
asyncio.run(run_benchmark(REPO_ID))

backend/services/indexer_optimized.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,69 @@ async def embed_query(q: str) -> List[float]:
568568
metrics.increment("search_v2_errors")
569569
return []
570570

571+
async def search_v3(
572+
self,
573+
query: str,
574+
repo_id: str,
575+
top_k: int = 10,
576+
include_tests: bool = False,
577+
use_reranking: bool = True,
578+
) -> List[Dict]:
579+
"""
580+
Search V3 - "Project Brain" search with:
581+
- Voyage AI code-optimized embeddings (if available)
582+
- Query understanding & intent classification
583+
- Code graph importance ranking
584+
- Test file filtering
585+
- Cohere reranking
586+
"""
587+
from services.search_v3.integration import get_search_v3
588+
589+
start_time = time.time()
590+
metrics.increment("search_v3_requests")
591+
592+
try:
593+
v3 = get_search_v3()
594+
595+
# load file dependencies for code graph ranking
596+
file_dependencies = None
597+
try:
598+
from services.dependency_analyzer import DependencyAnalyzer
599+
analyzer = DependencyAnalyzer()
600+
cached = analyzer.load_from_cache(repo_id)
601+
if cached:
602+
file_dependencies = cached.get("dependencies", {})
603+
except Exception as e:
604+
logger.warning("Could not load dependencies for V3 search", error=str(e))
605+
606+
results = await v3.search(
607+
query=query,
608+
repo_id=repo_id,
609+
pinecone_index=self.index,
610+
file_dependencies=file_dependencies,
611+
include_tests=include_tests,
612+
top_k=top_k,
613+
use_reranking=use_reranking
614+
)
615+
616+
elapsed = time.time() - start_time
617+
logger.info("Search V3 complete",
618+
repo_id=repo_id,
619+
results=len(results),
620+
duration_ms=round(elapsed*1000),
621+
voyage_enabled=v3.is_voyage_enabled)
622+
metrics.timing("search_v3_latency_ms", elapsed * 1000)
623+
624+
return results
625+
626+
except Exception as e:
627+
capture_exception(e, operation="search_v3", repo_id=repo_id, query=query[:100])
628+
logger.error("Search V3 failed", error=str(e))
629+
metrics.increment("search_v3_errors")
630+
# fallback to V2
631+
logger.info("Falling back to search_v2")
632+
return await self.search_v2(query, repo_id, top_k, use_reranking)
633+
571634
async def explain_code(
572635
self,
573636
repo_id: str,

0 commit comments

Comments
 (0)