Skip to content

Commit 12aaa02

Browse files
committed
feat(api): add /api/v1/search/v2 endpoint (#142)
- New route: POST /api/v1/search/v2 - Uses hybrid search with BM25 + semantic + Cohere reranking - Returns function-level results with AI summaries - Feature flag: SEARCH_V2_ENABLED (default: true) - Consistent API versioning: all endpoints under /api/v1/* - 4 tests passing
1 parent 2a9ca5a commit 12aaa02

4 files changed

Lines changed: 228 additions & 0 deletions

File tree

.env.example

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,10 @@ REDIS_PORT=6379
3939
# Get DSN from: https://sentry.io → Settings → Projects → Client Keys
4040
SENTRY_DSN=
4141
ENVIRONMENT=development # development, staging, production
42+
43+
# Search V2 Configuration
44+
# Cohere API for reranking (Optional - improves search quality)
45+
# Get from: https://dashboard.cohere.com/api-keys
46+
# Free tier: 10K requests/month
47+
COHERE_API_KEY=
48+
SEARCH_V2_ENABLED=true

backend/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from routes.analysis import router as analysis_router
2727
from routes.api_keys import router as api_keys_router
2828
from routes.users import router as users_router
29+
from routes.search_v2 import router as search_v2_router
2930

3031

3132
# Lifespan context manager for startup/shutdown
@@ -88,6 +89,7 @@ async def dispatch(self, request: Request, call_next):
8889
app.include_router(analysis_router, prefix=API_PREFIX)
8990
app.include_router(api_keys_router, prefix=API_PREFIX)
9091
app.include_router(users_router, prefix=API_PREFIX)
92+
app.include_router(search_v2_router, prefix=API_PREFIX)
9193

9294
# WebSocket endpoint (versioned)
9395
app.add_api_websocket_route(f"{API_PREFIX}/ws/index/{{repo_id}}", websocket_index)

backend/routes/search_v2.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""Search V2 API - Function-level semantic search with hybrid ranking."""
2+
import os
3+
import time
4+
from fastapi import APIRouter, HTTPException, Depends
5+
from pydantic import BaseModel, Field
6+
from typing import List, Optional
7+
8+
from dependencies import indexer, cache, metrics, verify_repo_access
9+
from services.input_validator import InputValidator
10+
from middleware.auth import require_auth, AuthContext
11+
12+
router = APIRouter(prefix="/search", tags=["Search V2"])
13+
14+
SEARCH_V2_ENABLED = os.getenv("SEARCH_V2_ENABLED", "true").lower() == "true"
15+
16+
17+
class SearchV2Request(BaseModel):
18+
query: str = Field(..., min_length=1, max_length=500)
19+
repo_id: str
20+
top_k: int = Field(default=10, ge=1, le=50)
21+
use_reranking: bool = True
22+
23+
24+
class SearchResultV2(BaseModel):
25+
name: str
26+
qualified_name: str
27+
file_path: str
28+
code: str
29+
signature: str
30+
language: str
31+
score: float
32+
line_start: int
33+
line_end: int
34+
summary: Optional[str] = None
35+
class_name: Optional[str] = None
36+
match_reason: Optional[str] = None
37+
38+
39+
class SearchV2Response(BaseModel):
40+
results: List[SearchResultV2]
41+
query: str
42+
total: int
43+
cached: bool
44+
search_version: str = "v2"
45+
46+
47+
@router.post("/v2", response_model=SearchV2Response)
48+
async def search_v2(
49+
request: SearchV2Request,
50+
auth: AuthContext = Depends(require_auth)
51+
):
52+
"""Function-level semantic search with hybrid BM25 + vector ranking."""
53+
if not SEARCH_V2_ENABLED:
54+
raise HTTPException(status_code=503, detail="Search V2 is not enabled")
55+
56+
verify_repo_access(request.repo_id, auth.user_id)
57+
58+
valid_query, query_error = InputValidator.validate_search_query(request.query)
59+
if not valid_query:
60+
raise HTTPException(status_code=400, detail=f"Invalid query: {query_error}")
61+
62+
sanitized_query = InputValidator.sanitize_string(request.query, max_length=500)
63+
start_time = time.time()
64+
65+
try:
66+
cache_key = f"v2:{sanitized_query}:{request.repo_id}:{request.top_k}"
67+
cached = cache.get_search_results(cache_key, request.repo_id)
68+
if cached:
69+
metrics.record_search(time.time() - start_time, cached=True)
70+
return SearchV2Response(
71+
results=cached,
72+
query=sanitized_query,
73+
total=len(cached),
74+
cached=True,
75+
)
76+
77+
results = await indexer.search_v2(
78+
query=sanitized_query,
79+
repo_id=request.repo_id,
80+
top_k=request.top_k,
81+
use_reranking=request.use_reranking,
82+
)
83+
84+
cache.set_search_results(cache_key, request.repo_id, results, ttl=3600)
85+
metrics.record_search(time.time() - start_time, cached=False)
86+
87+
return SearchV2Response(
88+
results=results,
89+
query=sanitized_query,
90+
total=len(results),
91+
cached=False,
92+
)
93+
94+
except Exception as e:
95+
raise HTTPException(status_code=500, detail=str(e))
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""Tests for Search V2 API route."""
2+
import pytest
3+
from unittest.mock import AsyncMock, MagicMock, patch
4+
5+
6+
class TestSearchV2Route:
7+
"""Tests for /api/v1/search/v2 endpoint."""
8+
9+
@pytest.fixture
10+
def mock_auth(self):
11+
with patch("routes.search_v2.require_auth") as mock:
12+
mock.return_value = MagicMock(user_id="test-user")
13+
yield mock
14+
15+
@pytest.fixture
16+
def mock_indexer(self):
17+
with patch("routes.search_v2.indexer") as mock:
18+
mock.search_v2 = AsyncMock(return_value=[
19+
{
20+
"name": "authenticate",
21+
"qualified_name": "auth.authenticate",
22+
"file_path": "src/auth.py",
23+
"code": "def authenticate(): pass",
24+
"signature": "def authenticate() -> bool",
25+
"language": "python",
26+
"score": 0.95,
27+
"line_start": 10,
28+
"line_end": 20,
29+
"summary": "Authenticates user",
30+
"class_name": None,
31+
"match_reason": None,
32+
}
33+
])
34+
yield mock
35+
36+
@pytest.fixture
37+
def mock_cache(self):
38+
with patch("routes.search_v2.cache") as mock:
39+
mock.get_search_results = MagicMock(return_value=None)
40+
mock.set_search_results = MagicMock()
41+
yield mock
42+
43+
@pytest.fixture
44+
def mock_verify_access(self):
45+
with patch("routes.search_v2.verify_repo_access") as mock:
46+
yield mock
47+
48+
@pytest.fixture
49+
def mock_metrics(self):
50+
with patch("routes.search_v2.metrics") as mock:
51+
mock.record_search = MagicMock()
52+
yield mock
53+
54+
@pytest.mark.asyncio
55+
async def test_search_v2_returns_results(
56+
self, mock_auth, mock_indexer, mock_cache, mock_verify_access, mock_metrics
57+
):
58+
from routes.search_v2 import search_v2, SearchV2Request
59+
from middleware.auth import AuthContext
60+
61+
request = SearchV2Request(
62+
query="authentication",
63+
repo_id="test-repo",
64+
top_k=10,
65+
)
66+
auth = AuthContext(user_id="test-user", email="test@test.com")
67+
68+
response = await search_v2(request, auth)
69+
70+
assert response.total == 1
71+
assert response.search_version == "v2"
72+
assert response.cached is False
73+
assert response.results[0].name == "authenticate"
74+
75+
@pytest.mark.asyncio
76+
async def test_search_v2_uses_cache(
77+
self, mock_auth, mock_indexer, mock_cache, mock_verify_access, mock_metrics
78+
):
79+
from routes.search_v2 import search_v2, SearchV2Request
80+
from middleware.auth import AuthContext
81+
82+
mock_cache.get_search_results.return_value = [
83+
{"name": "cached_result", "qualified_name": "cached", "file_path": "x.py",
84+
"code": "", "signature": "", "language": "python", "score": 0.9,
85+
"line_start": 1, "line_end": 2}
86+
]
87+
88+
request = SearchV2Request(query="test", repo_id="repo", top_k=5)
89+
auth = AuthContext(user_id="test-user", email="test@test.com")
90+
91+
response = await search_v2(request, auth)
92+
93+
assert response.cached is True
94+
mock_indexer.search_v2.assert_not_called()
95+
96+
@pytest.mark.asyncio
97+
async def test_search_v2_rejects_sql_injection(self, mock_auth, mock_verify_access):
98+
from routes.search_v2 import search_v2, SearchV2Request
99+
from middleware.auth import AuthContext
100+
from fastapi import HTTPException
101+
102+
request = SearchV2Request(query="DROP TABLE users;--", repo_id="repo", top_k=10)
103+
auth = AuthContext(user_id="test-user", email="test@test.com")
104+
105+
with pytest.raises(HTTPException) as exc:
106+
await search_v2(request, auth)
107+
108+
assert exc.value.status_code == 400
109+
110+
@pytest.mark.asyncio
111+
async def test_search_v2_respects_top_k(
112+
self, mock_auth, mock_indexer, mock_cache, mock_verify_access, mock_metrics
113+
):
114+
from routes.search_v2 import search_v2, SearchV2Request
115+
from middleware.auth import AuthContext
116+
117+
request = SearchV2Request(query="test query", repo_id="repo", top_k=25)
118+
auth = AuthContext(user_id="test-user", email="test@test.com")
119+
120+
await search_v2(request, auth)
121+
122+
mock_indexer.search_v2.assert_called_once()
123+
call_args = mock_indexer.search_v2.call_args
124+
assert call_args.kwargs["top_k"] == 25

0 commit comments

Comments
 (0)