Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,10 @@ REDIS_PORT=6379
# Get DSN from: https://sentry.io → Settings → Projects → Client Keys
SENTRY_DSN=
ENVIRONMENT=development # development, staging, production

# Search V2 Configuration
# Cohere API for reranking (Optional - improves search quality)
# Get from: https://dashboard.cohere.com/api-keys
# Free tier: 10K requests/month
COHERE_API_KEY=
SEARCH_V2_ENABLED=true
2 changes: 2 additions & 0 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from routes.analysis import router as analysis_router
from routes.api_keys import router as api_keys_router
from routes.users import router as users_router
from routes.search_v2 import router as search_v2_router


# Lifespan context manager for startup/shutdown
Expand Down Expand Up @@ -88,6 +89,7 @@ async def dispatch(self, request: Request, call_next):
app.include_router(analysis_router, prefix=API_PREFIX)
app.include_router(api_keys_router, prefix=API_PREFIX)
app.include_router(users_router, prefix=API_PREFIX)
app.include_router(search_v2_router, prefix=API_PREFIX)

# WebSocket endpoint (versioned)
app.add_api_websocket_route(f"{API_PREFIX}/ws/index/{{repo_id}}", websocket_index)
Expand Down
95 changes: 95 additions & 0 deletions backend/routes/search_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Search V2 API - Function-level semantic search with hybrid ranking."""
import os
import time
from fastapi import APIRouter, HTTPException, Depends
from pydantic import BaseModel, Field
from typing import List, Optional

from dependencies import indexer, cache, metrics, verify_repo_access
from services.input_validator import InputValidator
from middleware.auth import require_auth, AuthContext

router = APIRouter(prefix="/search", tags=["Search V2"])

SEARCH_V2_ENABLED = os.getenv("SEARCH_V2_ENABLED", "true").lower() == "true"


class SearchV2Request(BaseModel):
query: str = Field(..., min_length=1, max_length=500)
repo_id: str
top_k: int = Field(default=10, ge=1, le=50)
use_reranking: bool = True


class SearchResultV2(BaseModel):
name: str
qualified_name: str
file_path: str
code: str
signature: str
language: str
score: float
line_start: int
line_end: int
summary: Optional[str] = None
class_name: Optional[str] = None
match_reason: Optional[str] = None


class SearchV2Response(BaseModel):
results: List[SearchResultV2]
query: str
total: int
cached: bool
search_version: str = "v2"


@router.post("/v2", response_model=SearchV2Response)
async def search_v2(
request: SearchV2Request,
auth: AuthContext = Depends(require_auth)
):
"""Function-level semantic search with hybrid BM25 + vector ranking."""
if not SEARCH_V2_ENABLED:
raise HTTPException(status_code=503, detail="Search V2 is not enabled")

verify_repo_access(request.repo_id, auth.user_id)

valid_query, query_error = InputValidator.validate_search_query(request.query)
if not valid_query:
raise HTTPException(status_code=400, detail=f"Invalid query: {query_error}")

sanitized_query = InputValidator.sanitize_string(request.query, max_length=500)
start_time = time.time()

try:
cache_key = f"v2:{sanitized_query}:{request.repo_id}:{request.top_k}"
cached = cache.get_search_results(cache_key, request.repo_id)
if cached:
metrics.record_search(time.time() - start_time, cached=True)
return SearchV2Response(
results=cached,
query=sanitized_query,
total=len(cached),
cached=True,
)

results = await indexer.search_v2(
query=sanitized_query,
repo_id=request.repo_id,
top_k=request.top_k,
use_reranking=request.use_reranking,
)

cache.set_search_results(cache_key, request.repo_id, results, ttl=3600)
metrics.record_search(time.time() - start_time, cached=False)

return SearchV2Response(
results=results,
query=sanitized_query,
total=len(results),
cached=False,
)

except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
124 changes: 124 additions & 0 deletions backend/tests/test_search_v2_route.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""Tests for Search V2 API route."""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch


class TestSearchV2Route:
"""Tests for /api/v1/search/v2 endpoint."""

@pytest.fixture
def mock_auth(self):
with patch("routes.search_v2.require_auth") as mock:
mock.return_value = MagicMock(user_id="test-user")
yield mock

@pytest.fixture
def mock_indexer(self):
with patch("routes.search_v2.indexer") as mock:
mock.search_v2 = AsyncMock(return_value=[
{
"name": "authenticate",
"qualified_name": "auth.authenticate",
"file_path": "src/auth.py",
"code": "def authenticate(): pass",
"signature": "def authenticate() -> bool",
"language": "python",
"score": 0.95,
"line_start": 10,
"line_end": 20,
"summary": "Authenticates user",
"class_name": None,
"match_reason": None,
}
])
yield mock

@pytest.fixture
def mock_cache(self):
with patch("routes.search_v2.cache") as mock:
mock.get_search_results = MagicMock(return_value=None)
mock.set_search_results = MagicMock()
yield mock

@pytest.fixture
def mock_verify_access(self):
with patch("routes.search_v2.verify_repo_access") as mock:
yield mock

@pytest.fixture
def mock_metrics(self):
with patch("routes.search_v2.metrics") as mock:
mock.record_search = MagicMock()
yield mock

@pytest.mark.asyncio
async def test_search_v2_returns_results(
self, mock_auth, mock_indexer, mock_cache, mock_verify_access, mock_metrics
):
from routes.search_v2 import search_v2, SearchV2Request
from middleware.auth import AuthContext

request = SearchV2Request(
query="authentication",
repo_id="test-repo",
top_k=10,
)
auth = AuthContext(user_id="test-user", email="test@test.com")

response = await search_v2(request, auth)

assert response.total == 1
assert response.search_version == "v2"
assert response.cached is False
assert response.results[0].name == "authenticate"

@pytest.mark.asyncio
async def test_search_v2_uses_cache(
self, mock_auth, mock_indexer, mock_cache, mock_verify_access, mock_metrics
):
from routes.search_v2 import search_v2, SearchV2Request
from middleware.auth import AuthContext

mock_cache.get_search_results.return_value = [
{"name": "cached_result", "qualified_name": "cached", "file_path": "x.py",
"code": "", "signature": "", "language": "python", "score": 0.9,
"line_start": 1, "line_end": 2}
]

request = SearchV2Request(query="test", repo_id="repo", top_k=5)
auth = AuthContext(user_id="test-user", email="test@test.com")

response = await search_v2(request, auth)

assert response.cached is True
mock_indexer.search_v2.assert_not_called()

@pytest.mark.asyncio
async def test_search_v2_rejects_sql_injection(self, mock_auth, mock_verify_access):
from routes.search_v2 import search_v2, SearchV2Request
from middleware.auth import AuthContext
from fastapi import HTTPException

request = SearchV2Request(query="DROP TABLE users;--", repo_id="repo", top_k=10)
auth = AuthContext(user_id="test-user", email="test@test.com")

with pytest.raises(HTTPException) as exc:
await search_v2(request, auth)

assert exc.value.status_code == 400

@pytest.mark.asyncio
async def test_search_v2_respects_top_k(
self, mock_auth, mock_indexer, mock_cache, mock_verify_access, mock_metrics
):
from routes.search_v2 import search_v2, SearchV2Request
from middleware.auth import AuthContext

request = SearchV2Request(query="test query", repo_id="repo", top_k=25)
auth = AuthContext(user_id="test-user", email="test@test.com")

await search_v2(request, auth)

mock_indexer.search_v2.assert_called_once()
call_args = mock_indexer.search_v2.call_args
assert call_args.kwargs["top_k"] == 25