Skip to content

Commit b6acf5d

Browse files
authored
Merge pull request #224 from DevanshuNEU/feature/search-v3-project-brain
feat(search): Search V3 'Project Brain' with Cohere Reranking
2 parents f4cd7a2 + ba3370f commit b6acf5d

24 files changed

Lines changed: 3198 additions & 45 deletions

backend/.env.example

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ OPENAI_API_KEY=your_openai_api_key_here
33
PINECONE_API_KEY=your_pinecone_api_key_here
44
PINECONE_INDEX_NAME=codeintel
55

6+
# Search V2 - Cohere Reranking (optional but recommended)
7+
COHERE_API_KEY=your_cohere_api_key_here
8+
69
# Supabase
710
SUPABASE_URL=https://your-project.supabase.co
811
SUPABASE_ANON_KEY=your_supabase_anon_key_here
@@ -23,3 +26,7 @@ REDIS_PORT=6379
2326
# Get DSN from https://sentry.io → Settings → Projects → Client Keys
2427
SENTRY_DSN=
2528
ENVIRONMENT=development
29+
30+
# Search V3 - Voyage AI Code Embeddings (recommended for code search)
31+
# Get API key from https://dash.voyageai.com/
32+
VOYAGE_API_KEY=your_voyage_api_key_here

backend/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,6 @@ sentry-sdk[fastapi]>=2.0.0
4343
# Search V2 - Hybrid search
4444
rank-bm25>=0.2.2
4545
cohere>=5.0.0
46+
47+
# Search V3 - Code-optimized embeddings
48+
voyageai>=0.3.0

backend/routes/playground.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ class PlaygroundSearchRequest(BaseModel):
4949
demo_repo: Optional[str] = None # Keep for backward compat
5050
repo_id: Optional[str] = None # Direct repo_id (user-indexed repos)
5151
max_results: int = 10
52+
# V3 options
53+
use_v3: bool = True # Use Search V3 by default (better accuracy)
54+
include_tests: bool = False # Include test files in results
5255

5356

5457
class ValidateRepoRequest(BaseModel):
@@ -418,8 +421,9 @@ async def playground_search(
418421
try:
419422
sanitized_query = InputValidator.sanitize_string(request.query, max_length=200)
420423

421-
# Check cache
422-
cached_results = cache.get_search_results(sanitized_query, repo_id)
424+
# Check cache (include flags in key to avoid returning wrong results)
425+
cache_key = f"{sanitized_query}:v3={request.use_v3}:tests={request.include_tests}"
426+
cached_results = cache.get_search_results(cache_key, repo_id)
423427
if cached_results:
424428
return {
425429
"results": cached_results,
@@ -429,17 +433,44 @@ async def playground_search(
429433
"limit": limit_result.limit,
430434
}
431435

432-
# Search
433-
results = await indexer.semantic_search(
434-
query=sanitized_query,
435-
repo_id=repo_id,
436-
max_results=min(request.max_results, 10),
437-
use_query_expansion=True,
438-
use_reranking=True
439-
)
436+
# Search V3 (default) or V2 (fallback)
437+
if request.use_v3:
438+
search_results = await indexer.search_v3(
439+
query=sanitized_query,
440+
repo_id=repo_id,
441+
top_k=min(request.max_results, 10),
442+
include_tests=request.include_tests,
443+
use_reranking=True
444+
)
445+
else:
446+
search_results = await indexer.search_v2(
447+
query=sanitized_query,
448+
repo_id=repo_id,
449+
top_k=min(request.max_results, 10),
450+
use_reranking=True
451+
)
440452

441-
# Cache results
442-
cache.set_search_results(sanitized_query, repo_id, results, ttl=3600)
453+
# Format results for frontend compatibility
454+
results = []
455+
for r in search_results:
456+
results.append({
457+
"name": r.get("name", ""),
458+
"qualified_name": r.get("qualified_name", r.get("name", "")),
459+
"file_path": r.get("file_path", ""),
460+
"code": r.get("code", ""),
461+
"signature": r.get("signature", ""),
462+
"language": r.get("language", ""),
463+
"score": r.get("score", 0),
464+
"line_start": r.get("line_start", 0),
465+
"line_end": r.get("line_end", 0),
466+
"type": "function", # backward compat with V1
467+
"summary": r.get("summary"),
468+
"class_name": r.get("class_name"),
469+
"is_test_file": r.get("is_test_file", False), # V3 feature
470+
})
471+
472+
# Cache results (using same key that includes flags)
473+
cache.set_search_results(cache_key, repo_id, results, ttl=3600)
443474

444475
search_time = int((time.time() - start_time) * 1000)
445476

@@ -450,6 +481,7 @@ async def playground_search(
450481
"remaining_searches": limit_result.remaining,
451482
"limit": limit_result.limit,
452483
"search_time_ms": search_time,
484+
"search_version": "v3" if request.use_v3 else "v2",
453485
}
454486
except HTTPException:
455487
raise
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Search V3 vs V2 Benchmark
4+
Run with: python3 scripts/benchmark_search_v3.py
5+
6+
Compares:
7+
- V2 (OpenAI embeddings + Cohere reranking)
8+
- V3 (Voyage AI embeddings + Query Understanding + Code Graph + Cohere reranking)
9+
"""
10+
import asyncio
11+
import os
12+
import sys
13+
import time
14+
from typing import List, Dict, Tuple
15+
16+
# add parent to path
17+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
18+
19+
from dotenv import load_dotenv
20+
load_dotenv()
21+
22+
from services.indexer_optimized import OptimizedCodeIndexer
23+
24+
# Test queries representing real developer scenarios
25+
TEST_QUERIES = [
26+
{
27+
"query": "how to add authentication",
28+
"expected_keywords": ["auth", "middleware", "authenticate", "credential"],
29+
"description": "Developer wants to add auth to their app"
30+
},
31+
{
32+
"query": "handle websocket messages",
33+
"expected_keywords": ["websocket", "message", "send", "receive", "on_"],
34+
"description": "Developer working with WebSockets"
35+
},
36+
{
37+
"query": "return json from endpoint",
38+
"expected_keywords": ["json", "response", "jsonresponse", "return"],
39+
"description": "Developer wants to return JSON data"
40+
},
41+
{
42+
"query": "validate request data",
43+
"expected_keywords": ["valid", "request", "data", "schema"],
44+
"description": "Developer needs input validation"
45+
},
46+
{
47+
"query": "middleware that runs before request",
48+
"expected_keywords": ["middleware", "before", "dispatch", "call_next"],
49+
"description": "Developer needs pre-request processing"
50+
},
51+
{
52+
"query": "error handling",
53+
"expected_keywords": ["error", "exception", "handler", "catch"],
54+
"description": "Looking for error handling patterns"
55+
},
56+
{
57+
"query": "route decorator",
58+
"expected_keywords": ["route", "decorator", "path", "endpoint"],
59+
"description": "Developer needs routing functionality"
60+
},
61+
{
62+
"query": "database session",
63+
"expected_keywords": ["database", "session", "db", "connection"],
64+
"description": "Working with database sessions"
65+
},
66+
]
67+
68+
69+
def score_results(results: List[Dict], expected_keywords: List[str]) -> Tuple[float, int, bool]:
70+
"""
71+
Score search results based on expected keywords
72+
Returns: (score 0-10, matches count, is_test_in_top_3)
73+
"""
74+
if not results:
75+
return 0.0, 0, False
76+
77+
# combine text from top 3 results
78+
top_3_text = ""
79+
has_test_in_top_3 = False
80+
81+
for r in results[:3]:
82+
name = r.get("name", "").lower()
83+
qualified = r.get("qualified_name", "").lower()
84+
summary = (r.get("summary") or "").lower()
85+
file_path = r.get("file_path", "").lower()
86+
87+
top_3_text += f" {name} {qualified} {summary} "
88+
89+
# check for test files
90+
if "test" in file_path or "test" in name:
91+
has_test_in_top_3 = True
92+
93+
# count keyword matches
94+
matches = sum(1 for kw in expected_keywords if kw.lower() in top_3_text)
95+
score = min(10.0, (matches / len(expected_keywords)) * 10)
96+
97+
return score, matches, has_test_in_top_3
98+
99+
100+
async def run_benchmark(repo_id: str):
101+
"""Run benchmark comparing V2 vs V3"""
102+
print("=" * 80)
103+
print("🧪 SEARCH V3 vs V2 BENCHMARK")
104+
print("=" * 80)
105+
print()
106+
107+
indexer = OptimizedCodeIndexer()
108+
109+
v2_scores = []
110+
v3_scores = []
111+
v2_times = []
112+
v3_times = []
113+
v2_test_count = 0
114+
v3_test_count = 0
115+
116+
for tc in TEST_QUERIES:
117+
query = tc["query"]
118+
expected = tc["expected_keywords"]
119+
desc = tc["description"]
120+
121+
print(f"📝 Query: \"{query}\"")
122+
print(f" Scenario: {desc}")
123+
print()
124+
125+
# V2 Search
126+
start = time.time()
127+
try:
128+
v2_results = await indexer.search_v2(
129+
query=query,
130+
repo_id=repo_id,
131+
top_k=5,
132+
use_reranking=True
133+
)
134+
v2_time = (time.time() - start) * 1000
135+
except Exception as e:
136+
print(f" ❌ V2 Error: {e}")
137+
v2_results = []
138+
v2_time = 0
139+
140+
v2_score, v2_matches, v2_has_test = score_results(v2_results, expected)
141+
v2_scores.append(v2_score)
142+
v2_times.append(v2_time)
143+
if v2_has_test:
144+
v2_test_count += 1
145+
146+
# V3 Search
147+
start = time.time()
148+
try:
149+
v3_results = await indexer.search_v3(
150+
query=query,
151+
repo_id=repo_id,
152+
top_k=5,
153+
include_tests=False,
154+
use_reranking=True
155+
)
156+
v3_time = (time.time() - start) * 1000
157+
except Exception as e:
158+
print(f" ❌ V3 Error: {e}")
159+
v3_results = []
160+
v3_time = 0
161+
162+
v3_score, v3_matches, v3_has_test = score_results(v3_results, expected)
163+
v3_scores.append(v3_score)
164+
v3_times.append(v3_time)
165+
if v3_has_test:
166+
v3_test_count += 1
167+
168+
# Print comparison
169+
print(f" V2: Score {v2_score:.1f}/10 ({v2_matches}/{len(expected)} keywords) | {v2_time:.0f}ms")
170+
if v2_results:
171+
print(f" Top result: {v2_results[0].get('name', 'unknown')}")
172+
173+
print(f" V3: Score {v3_score:.1f}/10 ({v3_matches}/{len(expected)} keywords) | {v3_time:.0f}ms")
174+
if v3_results:
175+
print(f" Top result: {v3_results[0].get('name', 'unknown')}")
176+
177+
# Winner
178+
if v3_score > v2_score:
179+
print(f" 🏆 V3 WINS (+{v3_score - v2_score:.1f})")
180+
elif v2_score > v3_score:
181+
print(f" 🏆 V2 WINS (+{v2_score - v3_score:.1f})")
182+
else:
183+
print(f" 🤝 TIE")
184+
185+
print()
186+
187+
# Summary
188+
print("=" * 80)
189+
print("📊 BENCHMARK RESULTS")
190+
print("=" * 80)
191+
192+
v2_avg = sum(v2_scores) / len(v2_scores)
193+
v3_avg = sum(v3_scores) / len(v3_scores)
194+
v2_total_time = sum(v2_times)
195+
v3_total_time = sum(v3_times)
196+
197+
v2_wins = sum(1 for v2, v3 in zip(v2_scores, v3_scores) if v2 > v3)
198+
v3_wins = sum(1 for v2, v3 in zip(v2_scores, v3_scores) if v3 > v2)
199+
ties = len(v2_scores) - v2_wins - v3_wins
200+
201+
print(f"""
202+
┌─────────────────────────────────────────────────────────┐
203+
│ METRIC │ V2 │ V3 │ │
204+
├─────────────────────────────────────────────────────────┤
205+
│ Average Score │ {v2_avg:>6.1f}/10 │ {v3_avg:>6.1f}/10 │ {"V3 ✓" if v3_avg > v2_avg else "V2 ✓" if v2_avg > v3_avg else "TIE":<5}
206+
│ Total Time │ {v2_total_time:>6.0f}ms │ {v3_total_time:>6.0f}ms │ {"V3 ✓" if v3_total_time < v2_total_time else "V2 ✓":<5}
207+
│ Queries with test in top3 │ {v2_test_count:>6}{v3_test_count:>6}{"V3 ✓" if v3_test_count < v2_test_count else "V2 ✓" if v2_test_count < v3_test_count else "TIE":<5}
208+
│ Wins │ {v2_wins:>6}{v3_wins:>6} │ │
209+
│ Ties │ {ties:>6}{ties:>6} │ │
210+
└─────────────────────────────────────────────────────────┘
211+
""")
212+
213+
# Final verdict
214+
print()
215+
if v3_avg >= v2_avg + 1.0:
216+
print("✅ VERDICT: V3 is SIGNIFICANTLY BETTER - Ready for production!")
217+
elif v3_avg > v2_avg:
218+
print("✅ VERDICT: V3 is BETTER - Consider shipping!")
219+
elif v3_avg == v2_avg:
220+
print("⚠️ VERDICT: V3 is EQUAL to V2 - Need more optimization")
221+
else:
222+
print("❌ VERDICT: V3 is WORSE than V2 - Needs more work")
223+
224+
print()
225+
226+
# Check for Voyage
227+
try:
228+
from services.search_v3.integration import get_search_v3
229+
v3 = get_search_v3()
230+
if v3.is_voyage_enabled:
231+
print("🚀 Using Voyage AI code-specific embeddings")
232+
else:
233+
print("⚠️ Voyage AI not enabled - using OpenAI embeddings")
234+
print(" Set VOYAGE_API_KEY for better code search accuracy!")
235+
except Exception as e:
236+
print(f"⚠️ Could not check Voyage status: {e}")
237+
238+
239+
if __name__ == "__main__":
240+
# default repo ID (starlette) - change as needed
241+
REPO_ID = os.getenv("BENCHMARK_REPO_ID", "0323a08f-9d21-4c59-b567-e0629a9bbb24")
242+
243+
print(f"Using repo_id: {REPO_ID}")
244+
print("Set BENCHMARK_REPO_ID env var to use a different repo")
245+
print()
246+
247+
asyncio.run(run_benchmark(REPO_ID))

0 commit comments

Comments
 (0)