diff --git a/backend/routes/playground.py b/backend/routes/playground.py index f12556f..bdc4540 100644 --- a/backend/routes/playground.py +++ b/backend/routes/playground.py @@ -18,7 +18,7 @@ from services.input_validator import InputValidator from services.repo_validator import RepoValidator from services.observability import logger -from services.playground_limiter import PlaygroundLimiter, get_playground_limiter +from services.playground_limiter import PlaygroundLimiter, get_playground_limiter, IndexedRepoData from services.anonymous_indexer import ( AnonymousIndexingJob, run_indexing_job, @@ -46,7 +46,8 @@ class PlaygroundSearchRequest(BaseModel): query: str - demo_repo: str = "flask" + demo_repo: Optional[str] = None # Keep for backward compat + repo_id: Optional[str] = None # Direct repo_id (user-indexed repos) max_results: int = 10 @@ -143,6 +144,145 @@ def _get_limiter() -> PlaygroundLimiter: return get_playground_limiter(redis_client) +def _resolve_repo_id( + request: PlaygroundSearchRequest, + limiter: PlaygroundLimiter, + limit_result, + req: Request +) -> str: + """ + Resolve which repository to search. + + Priority: repo_id > demo_repo > default "flask" + + For user-indexed repos, validates session ownership and expiry. + Demo repos are always accessible without auth. + + Returns: + repo_id string + + Raises: + HTTPException 403: Access denied (not owner) + HTTPException 410: Repo expired + HTTPException 404: Demo repo not found + """ + # Case 1: Direct repo_id provided + if request.repo_id: + repo_id = request.repo_id + + # Demo repos bypass auth check + if repo_id in DEMO_REPO_IDS.values(): + logger.debug("Search on demo repo via repo_id", repo_id=repo_id[:16]) + return repo_id + + # User-indexed repo - validate ownership + return _validate_user_repo_access(repo_id, limiter, limit_result, req) + + # Case 2: Fall back to demo_repo or default + demo_name = request.demo_repo or "flask" + repo_id = DEMO_REPO_IDS.get(demo_name) + + if repo_id: + logger.debug("Search on demo repo", demo_name=demo_name) + return repo_id + + # Case 3: Demo not in mapping, try first indexed repo + repos = repo_manager.list_repos() + indexed_repos = [r for r in repos if r.get("status") == "indexed"] + + if indexed_repos: + fallback_id = indexed_repos[0]["id"] + logger.debug("Using fallback indexed repo", repo_id=fallback_id[:16]) + return fallback_id + + logger.warning("No demo repo available", requested=demo_name) + raise HTTPException( + status_code=404, + detail=f"Demo repo '{demo_name}' not available" + ) + + +def _validate_user_repo_access( + repo_id: str, + limiter: PlaygroundLimiter, + limit_result, + req: Request +) -> str: + """ + Validate that the session owns the requested user-indexed repo. + + Returns: + repo_id if valid + + Raises: + HTTPException 403: No session or not owner + HTTPException 410: Repo expired + """ + session_token = limit_result.session_token or _get_session_token(req) + token_preview = session_token[:8] if session_token else "none" + + # No session token at all + if not session_token: + logger.warning( + "Search denied - no session token", + repo_id=repo_id[:16] + ) + raise HTTPException( + status_code=403, + detail={ + "error": "access_denied", + "message": "You don't have access to this repository" + } + ) + + # Get session data and check ownership + session_data = limiter.get_session_data(session_token) + indexed_repo = session_data.indexed_repo + session_repo_id = indexed_repo.get("repo_id") if indexed_repo else None + + if not indexed_repo or session_repo_id != repo_id: + logger.warning( + "Search denied - repo not owned by session", + requested_repo_id=repo_id[:16], + session_repo_id=session_repo_id[:16] if session_repo_id else "none", + session_token=token_preview + ) + raise HTTPException( + status_code=403, + detail={ + "error": "access_denied", + "message": "You don't have access to this repository" + } + ) + + # Check expiry + repo_data = IndexedRepoData.from_dict(indexed_repo) + if repo_data.is_expired(): + logger.warning( + "Search denied - repo expired", + repo_id=repo_id[:16], + expired_at=indexed_repo.get("expires_at"), + session_token=token_preview + ) + raise HTTPException( + status_code=410, + detail={ + "error": "repo_expired", + "message": "Repository index expired. Re-index to continue searching.", + "can_reindex": True + } + ) + + # All checks passed + logger.info( + "Search on user-indexed repo", + repo_id=repo_id[:16], + repo_name=indexed_repo.get("name"), + session_token=token_preview + ) + return repo_id + + @router.get("/limits") async def get_playground_limits(req: Request): """ @@ -270,18 +410,8 @@ async def playground_search( if not valid_query: raise HTTPException(status_code=400, detail=f"Invalid query: {query_error}") - # Get demo repo ID - repo_id = DEMO_REPO_IDS.get(request.demo_repo) - if not repo_id: - repos = repo_manager.list_repos() - indexed_repos = [r for r in repos if r.get("status") == "indexed"] - if indexed_repos: - repo_id = indexed_repos[0]["id"] - else: - raise HTTPException( - status_code=404, - detail=f"Demo repo '{request.demo_repo}' not available" - ) + # Resolve repo_id: priority is repo_id > demo_repo > default "flask" + repo_id = _resolve_repo_id(request, limiter, limit_result, req) start_time = time.time() diff --git a/backend/tests/test_anonymous_indexing.py b/backend/tests/test_anonymous_indexing.py index ea36007..3fe7db6 100644 --- a/backend/tests/test_anonymous_indexing.py +++ b/backend/tests/test_anonymous_indexing.py @@ -5,7 +5,7 @@ Note: These tests rely on conftest.py for Pinecone/OpenAI/Redis mocking. """ import pytest -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock, AsyncMock from datetime import datetime, timezone, timedelta import json @@ -679,3 +679,203 @@ def test_partial_job_includes_partial_info(self, mock_job_class, client): data = response.json() assert data["partial"] is True assert data["max_files"] == 200 + + + +# ============================================================================= +# Issue #128: Search User-Indexed Repos Tests +# ============================================================================= + +class TestSearchUserRepos: + """Tests for searching user-indexed repositories.""" + + @patch('routes.playground._get_limiter') + @patch('routes.playground.indexer') + def test_search_with_repo_id_user_owns(self, mock_indexer, mock_get_limiter, client): + """User can search their own indexed repo via repo_id.""" + mock_limiter = MagicMock() + mock_limiter.check_and_record.return_value = MagicMock( + allowed=True, + remaining=99, + limit=100, + session_token="test_session_123" + ) + # Session owns this repo + mock_limiter.get_session_data.return_value = MagicMock( + indexed_repo={ + "repo_id": "repo_user_abc123", + "github_url": "https://github.com/user/repo", + "name": "repo", + "file_count": 50, + "indexed_at": "2024-01-01T00:00:00Z", + "expires_at": "2099-01-02T00:00:00Z" # Far future + } + ) + mock_get_limiter.return_value = mock_limiter + mock_indexer.semantic_search = AsyncMock(return_value=[ + {"file": "test.py", "score": 0.9} + ]) + + response = client.post( + "/api/v1/playground/search", + json={"query": "test function", "repo_id": "repo_user_abc123"} + ) + + assert response.status_code == 200 + data = response.json() + assert data["count"] == 1 + + @patch('routes.playground._get_limiter') + def test_search_repo_id_not_owned_returns_403(self, mock_get_limiter, client): + """Searching repo_id user doesn't own returns 403.""" + mock_limiter = MagicMock() + mock_limiter.check_and_record.return_value = MagicMock( + allowed=True, + remaining=99, + limit=100, + session_token="test_session_123" + ) + # Session owns different repo + mock_limiter.get_session_data.return_value = MagicMock( + indexed_repo={ + "repo_id": "repo_OTHER_xyz", + "github_url": "https://github.com/other/repo", + "name": "other-repo", + "file_count": 50, + "indexed_at": "2024-01-01T00:00:00Z", + "expires_at": "2099-01-02T00:00:00Z" + } + ) + mock_get_limiter.return_value = mock_limiter + + response = client.post( + "/api/v1/playground/search", + json={"query": "test", "repo_id": "repo_user_abc123"} + ) + + assert response.status_code == 403 + data = response.json() + assert data["detail"]["error"] == "access_denied" + + @patch('routes.playground._get_limiter') + def test_search_repo_id_no_session_repo_returns_403(self, mock_get_limiter, client): + """Searching repo_id when session has no indexed repo returns 403.""" + mock_limiter = MagicMock() + mock_limiter.check_and_record.return_value = MagicMock( + allowed=True, + remaining=99, + limit=100, + session_token="test_session_123" + ) + # Session has no indexed repo + mock_limiter.get_session_data.return_value = MagicMock(indexed_repo=None) + mock_get_limiter.return_value = mock_limiter + + response = client.post( + "/api/v1/playground/search", + json={"query": "test", "repo_id": "repo_user_abc123"} + ) + + assert response.status_code == 403 + + @patch('routes.playground._get_limiter') + def test_search_expired_repo_returns_410(self, mock_get_limiter, client): + """Searching expired repo returns 410 with can_reindex hint.""" + mock_limiter = MagicMock() + mock_limiter.check_and_record.return_value = MagicMock( + allowed=True, + remaining=99, + limit=100, + session_token="test_session_123" + ) + # Session owns repo but it's expired + mock_limiter.get_session_data.return_value = MagicMock( + indexed_repo={ + "repo_id": "repo_user_abc123", + "github_url": "https://github.com/user/repo", + "name": "repo", + "file_count": 50, + "indexed_at": "2024-01-01T00:00:00Z", + "expires_at": "2024-01-01T00:00:01Z" # Already expired + } + ) + mock_get_limiter.return_value = mock_limiter + + response = client.post( + "/api/v1/playground/search", + json={"query": "test", "repo_id": "repo_user_abc123"} + ) + + assert response.status_code == 410 + data = response.json() + assert data["detail"]["error"] == "repo_expired" + assert data["detail"]["can_reindex"] is True + + @patch('routes.playground._get_limiter') + @patch('routes.playground.indexer') + def test_search_demo_repo_via_repo_id_allowed(self, mock_indexer, mock_get_limiter, client): + """Demo repos can be accessed via repo_id without ownership check.""" + mock_limiter = MagicMock() + mock_limiter.check_and_record.return_value = MagicMock( + allowed=True, + remaining=99, + limit=100, + session_token="test_session_123" + ) + mock_get_limiter.return_value = mock_limiter + mock_indexer.semantic_search = AsyncMock(return_value=[]) + + # Use the flask demo repo ID + from routes.playground import DEMO_REPO_IDS + flask_repo_id = DEMO_REPO_IDS.get("flask") + + if flask_repo_id: + response = client.post( + "/api/v1/playground/search", + json={"query": "route handler", "repo_id": flask_repo_id} + ) + assert response.status_code == 200 + + @patch('routes.playground._get_limiter') + @patch('routes.playground.indexer') + def test_search_backward_compat_demo_repo(self, mock_indexer, mock_get_limiter, client): + """Backward compat: demo_repo parameter still works.""" + mock_limiter = MagicMock() + mock_limiter.check_and_record.return_value = MagicMock( + allowed=True, + remaining=99, + limit=100, + session_token=None + ) + mock_get_limiter.return_value = mock_limiter + mock_indexer.semantic_search = AsyncMock(return_value=[]) + + response = client.post( + "/api/v1/playground/search", + json={"query": "test", "demo_repo": "flask"} + ) + + # Should work (200) or 404 if flask not indexed - but not 4xx auth error + assert response.status_code in [200, 404] + + @patch('routes.playground._get_limiter') + @patch('routes.playground.indexer') + def test_search_default_to_flask_when_no_repo_specified(self, mock_indexer, mock_get_limiter, client): + """When neither repo_id nor demo_repo provided, defaults to flask.""" + mock_limiter = MagicMock() + mock_limiter.check_and_record.return_value = MagicMock( + allowed=True, + remaining=99, + limit=100, + session_token=None + ) + mock_get_limiter.return_value = mock_limiter + mock_indexer.semantic_search = AsyncMock(return_value=[]) + + response = client.post( + "/api/v1/playground/search", + json={"query": "test"} # No repo_id or demo_repo + ) + + # Should default to flask + assert response.status_code in [200, 404]