Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 144 additions & 14 deletions backend/routes/playground.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from services.input_validator import InputValidator
from services.repo_validator import RepoValidator
from services.observability import logger
from services.playground_limiter import PlaygroundLimiter, get_playground_limiter
from services.playground_limiter import PlaygroundLimiter, get_playground_limiter, IndexedRepoData
from services.anonymous_indexer import (
AnonymousIndexingJob,
run_indexing_job,
Expand Down Expand Up @@ -46,7 +46,8 @@

class PlaygroundSearchRequest(BaseModel):
query: str
demo_repo: str = "flask"
demo_repo: Optional[str] = None # Keep for backward compat
repo_id: Optional[str] = None # Direct repo_id (user-indexed repos)
max_results: int = 10


Expand Down Expand Up @@ -143,6 +144,145 @@ def _get_limiter() -> PlaygroundLimiter:
return get_playground_limiter(redis_client)


def _resolve_repo_id(
request: PlaygroundSearchRequest,
limiter: PlaygroundLimiter,
limit_result,
req: Request
) -> str:
"""
Resolve which repository to search.

Priority: repo_id > demo_repo > default "flask"

For user-indexed repos, validates session ownership and expiry.
Demo repos are always accessible without auth.

Returns:
repo_id string

Raises:
HTTPException 403: Access denied (not owner)
HTTPException 410: Repo expired
HTTPException 404: Demo repo not found
"""
# Case 1: Direct repo_id provided
if request.repo_id:
repo_id = request.repo_id

# Demo repos bypass auth check
if repo_id in DEMO_REPO_IDS.values():
logger.debug("Search on demo repo via repo_id", repo_id=repo_id[:16])
return repo_id

# User-indexed repo - validate ownership
return _validate_user_repo_access(repo_id, limiter, limit_result, req)

# Case 2: Fall back to demo_repo or default
demo_name = request.demo_repo or "flask"
repo_id = DEMO_REPO_IDS.get(demo_name)

if repo_id:
logger.debug("Search on demo repo", demo_name=demo_name)
return repo_id

# Case 3: Demo not in mapping, try first indexed repo
repos = repo_manager.list_repos()
indexed_repos = [r for r in repos if r.get("status") == "indexed"]

if indexed_repos:
fallback_id = indexed_repos[0]["id"]
logger.debug("Using fallback indexed repo", repo_id=fallback_id[:16])
return fallback_id

logger.warning("No demo repo available", requested=demo_name)
raise HTTPException(
status_code=404,
detail=f"Demo repo '{demo_name}' not available"
)


def _validate_user_repo_access(
repo_id: str,
limiter: PlaygroundLimiter,
limit_result,
req: Request
) -> str:
"""
Validate that the session owns the requested user-indexed repo.

Returns:
repo_id if valid

Raises:
HTTPException 403: No session or not owner
HTTPException 410: Repo expired
"""
session_token = limit_result.session_token or _get_session_token(req)
token_preview = session_token[:8] if session_token else "none"

# No session token at all
if not session_token:
logger.warning(
"Search denied - no session token",
repo_id=repo_id[:16]
)
raise HTTPException(
status_code=403,
detail={
"error": "access_denied",
"message": "You don't have access to this repository"
}
)

# Get session data and check ownership
session_data = limiter.get_session_data(session_token)
indexed_repo = session_data.indexed_repo
session_repo_id = indexed_repo.get("repo_id") if indexed_repo else None

if not indexed_repo or session_repo_id != repo_id:
logger.warning(
"Search denied - repo not owned by session",
requested_repo_id=repo_id[:16],
session_repo_id=session_repo_id[:16] if session_repo_id else "none",
session_token=token_preview
)
raise HTTPException(
status_code=403,
detail={
"error": "access_denied",
"message": "You don't have access to this repository"
}
)

# Check expiry
repo_data = IndexedRepoData.from_dict(indexed_repo)
if repo_data.is_expired():
logger.warning(
"Search denied - repo expired",
repo_id=repo_id[:16],
expired_at=indexed_repo.get("expires_at"),
session_token=token_preview
)
raise HTTPException(
status_code=410,
detail={
"error": "repo_expired",
"message": "Repository index expired. Re-index to continue searching.",
"can_reindex": True
}
)

# All checks passed
logger.info(
"Search on user-indexed repo",
repo_id=repo_id[:16],
repo_name=indexed_repo.get("name"),
session_token=token_preview
)
return repo_id


@router.get("/limits")
async def get_playground_limits(req: Request):
"""
Expand Down Expand Up @@ -270,18 +410,8 @@ async def playground_search(
if not valid_query:
raise HTTPException(status_code=400, detail=f"Invalid query: {query_error}")

# Get demo repo ID
repo_id = DEMO_REPO_IDS.get(request.demo_repo)
if not repo_id:
repos = repo_manager.list_repos()
indexed_repos = [r for r in repos if r.get("status") == "indexed"]
if indexed_repos:
repo_id = indexed_repos[0]["id"]
else:
raise HTTPException(
status_code=404,
detail=f"Demo repo '{request.demo_repo}' not available"
)
# Resolve repo_id: priority is repo_id > demo_repo > default "flask"
repo_id = _resolve_repo_id(request, limiter, limit_result, req)

start_time = time.time()

Expand Down
202 changes: 201 additions & 1 deletion backend/tests/test_anonymous_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Note: These tests rely on conftest.py for Pinecone/OpenAI/Redis mocking.
"""
import pytest
from unittest.mock import patch, MagicMock
from unittest.mock import patch, MagicMock, AsyncMock
from datetime import datetime, timezone, timedelta
import json

Expand Down Expand Up @@ -679,3 +679,203 @@ def test_partial_job_includes_partial_info(self, mock_job_class, client):
data = response.json()
assert data["partial"] is True
assert data["max_files"] == 200



# =============================================================================
# Issue #128: Search User-Indexed Repos Tests
# =============================================================================

class TestSearchUserRepos:
"""Tests for searching user-indexed repositories."""

@patch('routes.playground._get_limiter')
@patch('routes.playground.indexer')
def test_search_with_repo_id_user_owns(self, mock_indexer, mock_get_limiter, client):
"""User can search their own indexed repo via repo_id."""
mock_limiter = MagicMock()
mock_limiter.check_and_record.return_value = MagicMock(
allowed=True,
remaining=99,
limit=100,
session_token="test_session_123"
)
# Session owns this repo
mock_limiter.get_session_data.return_value = MagicMock(
indexed_repo={
"repo_id": "repo_user_abc123",
"github_url": "https://github.com/user/repo",
"name": "repo",
"file_count": 50,
"indexed_at": "2024-01-01T00:00:00Z",
"expires_at": "2099-01-02T00:00:00Z" # Far future
}
)
mock_get_limiter.return_value = mock_limiter
mock_indexer.semantic_search = AsyncMock(return_value=[
{"file": "test.py", "score": 0.9}
])

response = client.post(
"/api/v1/playground/search",
json={"query": "test function", "repo_id": "repo_user_abc123"}
)

assert response.status_code == 200
data = response.json()
assert data["count"] == 1

@patch('routes.playground._get_limiter')
def test_search_repo_id_not_owned_returns_403(self, mock_get_limiter, client):
"""Searching repo_id user doesn't own returns 403."""
mock_limiter = MagicMock()
mock_limiter.check_and_record.return_value = MagicMock(
allowed=True,
remaining=99,
limit=100,
session_token="test_session_123"
)
# Session owns different repo
mock_limiter.get_session_data.return_value = MagicMock(
indexed_repo={
"repo_id": "repo_OTHER_xyz",
"github_url": "https://github.com/other/repo",
"name": "other-repo",
"file_count": 50,
"indexed_at": "2024-01-01T00:00:00Z",
"expires_at": "2099-01-02T00:00:00Z"
}
)
mock_get_limiter.return_value = mock_limiter

response = client.post(
"/api/v1/playground/search",
json={"query": "test", "repo_id": "repo_user_abc123"}
)

assert response.status_code == 403
data = response.json()
assert data["detail"]["error"] == "access_denied"

@patch('routes.playground._get_limiter')
def test_search_repo_id_no_session_repo_returns_403(self, mock_get_limiter, client):
"""Searching repo_id when session has no indexed repo returns 403."""
mock_limiter = MagicMock()
mock_limiter.check_and_record.return_value = MagicMock(
allowed=True,
remaining=99,
limit=100,
session_token="test_session_123"
)
# Session has no indexed repo
mock_limiter.get_session_data.return_value = MagicMock(indexed_repo=None)
mock_get_limiter.return_value = mock_limiter

response = client.post(
"/api/v1/playground/search",
json={"query": "test", "repo_id": "repo_user_abc123"}
)

assert response.status_code == 403

@patch('routes.playground._get_limiter')
def test_search_expired_repo_returns_410(self, mock_get_limiter, client):
"""Searching expired repo returns 410 with can_reindex hint."""
mock_limiter = MagicMock()
mock_limiter.check_and_record.return_value = MagicMock(
allowed=True,
remaining=99,
limit=100,
session_token="test_session_123"
)
# Session owns repo but it's expired
mock_limiter.get_session_data.return_value = MagicMock(
indexed_repo={
"repo_id": "repo_user_abc123",
"github_url": "https://github.com/user/repo",
"name": "repo",
"file_count": 50,
"indexed_at": "2024-01-01T00:00:00Z",
"expires_at": "2024-01-01T00:00:01Z" # Already expired
}
)
mock_get_limiter.return_value = mock_limiter

response = client.post(
"/api/v1/playground/search",
json={"query": "test", "repo_id": "repo_user_abc123"}
)

assert response.status_code == 410
data = response.json()
assert data["detail"]["error"] == "repo_expired"
assert data["detail"]["can_reindex"] is True

@patch('routes.playground._get_limiter')
@patch('routes.playground.indexer')
def test_search_demo_repo_via_repo_id_allowed(self, mock_indexer, mock_get_limiter, client):
"""Demo repos can be accessed via repo_id without ownership check."""
mock_limiter = MagicMock()
mock_limiter.check_and_record.return_value = MagicMock(
allowed=True,
remaining=99,
limit=100,
session_token="test_session_123"
)
mock_get_limiter.return_value = mock_limiter
mock_indexer.semantic_search = AsyncMock(return_value=[])

# Use the flask demo repo ID
from routes.playground import DEMO_REPO_IDS
flask_repo_id = DEMO_REPO_IDS.get("flask")

if flask_repo_id:
response = client.post(
"/api/v1/playground/search",
json={"query": "route handler", "repo_id": flask_repo_id}
)
assert response.status_code == 200

@patch('routes.playground._get_limiter')
@patch('routes.playground.indexer')
def test_search_backward_compat_demo_repo(self, mock_indexer, mock_get_limiter, client):
"""Backward compat: demo_repo parameter still works."""
mock_limiter = MagicMock()
mock_limiter.check_and_record.return_value = MagicMock(
allowed=True,
remaining=99,
limit=100,
session_token=None
)
mock_get_limiter.return_value = mock_limiter
mock_indexer.semantic_search = AsyncMock(return_value=[])

response = client.post(
"/api/v1/playground/search",
json={"query": "test", "demo_repo": "flask"}
)

# Should work (200) or 404 if flask not indexed - but not 4xx auth error
assert response.status_code in [200, 404]

@patch('routes.playground._get_limiter')
@patch('routes.playground.indexer')
def test_search_default_to_flask_when_no_repo_specified(self, mock_indexer, mock_get_limiter, client):
"""When neither repo_id nor demo_repo provided, defaults to flask."""
mock_limiter = MagicMock()
mock_limiter.check_and_record.return_value = MagicMock(
allowed=True,
remaining=99,
limit=100,
session_token=None
)
mock_get_limiter.return_value = mock_limiter
mock_indexer.semantic_search = AsyncMock(return_value=[])

response = client.post(
"/api/v1/playground/search",
json={"query": "test"} # No repo_id or demo_repo
)

# Should default to flask
assert response.status_code in [200, 404]