Skip to content

Commit 0749d2a

Browse files
authored
Merge pull request #132 from DevanshuNEU/feat/128-search-user-repos
feat(#128): Extend playground search to support user-indexed repos
2 parents 61c13c6 + 1a3ddb4 commit 0749d2a

2 files changed

Lines changed: 345 additions & 15 deletions

File tree

backend/routes/playground.py

Lines changed: 144 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from services.input_validator import InputValidator
1919
from services.repo_validator import RepoValidator
2020
from services.observability import logger
21-
from services.playground_limiter import PlaygroundLimiter, get_playground_limiter
21+
from services.playground_limiter import PlaygroundLimiter, get_playground_limiter, IndexedRepoData
2222
from services.anonymous_indexer import (
2323
AnonymousIndexingJob,
2424
run_indexing_job,
@@ -46,7 +46,8 @@
4646

4747
class PlaygroundSearchRequest(BaseModel):
4848
query: str
49-
demo_repo: str = "flask"
49+
demo_repo: Optional[str] = None # Keep for backward compat
50+
repo_id: Optional[str] = None # Direct repo_id (user-indexed repos)
5051
max_results: int = 10
5152

5253

@@ -143,6 +144,145 @@ def _get_limiter() -> PlaygroundLimiter:
143144
return get_playground_limiter(redis_client)
144145

145146

147+
def _resolve_repo_id(
148+
request: PlaygroundSearchRequest,
149+
limiter: PlaygroundLimiter,
150+
limit_result,
151+
req: Request
152+
) -> str:
153+
"""
154+
Resolve which repository to search.
155+
156+
Priority: repo_id > demo_repo > default "flask"
157+
158+
For user-indexed repos, validates session ownership and expiry.
159+
Demo repos are always accessible without auth.
160+
161+
Returns:
162+
repo_id string
163+
164+
Raises:
165+
HTTPException 403: Access denied (not owner)
166+
HTTPException 410: Repo expired
167+
HTTPException 404: Demo repo not found
168+
"""
169+
# Case 1: Direct repo_id provided
170+
if request.repo_id:
171+
repo_id = request.repo_id
172+
173+
# Demo repos bypass auth check
174+
if repo_id in DEMO_REPO_IDS.values():
175+
logger.debug("Search on demo repo via repo_id", repo_id=repo_id[:16])
176+
return repo_id
177+
178+
# User-indexed repo - validate ownership
179+
return _validate_user_repo_access(repo_id, limiter, limit_result, req)
180+
181+
# Case 2: Fall back to demo_repo or default
182+
demo_name = request.demo_repo or "flask"
183+
repo_id = DEMO_REPO_IDS.get(demo_name)
184+
185+
if repo_id:
186+
logger.debug("Search on demo repo", demo_name=demo_name)
187+
return repo_id
188+
189+
# Case 3: Demo not in mapping, try first indexed repo
190+
repos = repo_manager.list_repos()
191+
indexed_repos = [r for r in repos if r.get("status") == "indexed"]
192+
193+
if indexed_repos:
194+
fallback_id = indexed_repos[0]["id"]
195+
logger.debug("Using fallback indexed repo", repo_id=fallback_id[:16])
196+
return fallback_id
197+
198+
logger.warning("No demo repo available", requested=demo_name)
199+
raise HTTPException(
200+
status_code=404,
201+
detail=f"Demo repo '{demo_name}' not available"
202+
)
203+
204+
205+
def _validate_user_repo_access(
206+
repo_id: str,
207+
limiter: PlaygroundLimiter,
208+
limit_result,
209+
req: Request
210+
) -> str:
211+
"""
212+
Validate that the session owns the requested user-indexed repo.
213+
214+
Returns:
215+
repo_id if valid
216+
217+
Raises:
218+
HTTPException 403: No session or not owner
219+
HTTPException 410: Repo expired
220+
"""
221+
session_token = limit_result.session_token or _get_session_token(req)
222+
token_preview = session_token[:8] if session_token else "none"
223+
224+
# No session token at all
225+
if not session_token:
226+
logger.warning(
227+
"Search denied - no session token",
228+
repo_id=repo_id[:16]
229+
)
230+
raise HTTPException(
231+
status_code=403,
232+
detail={
233+
"error": "access_denied",
234+
"message": "You don't have access to this repository"
235+
}
236+
)
237+
238+
# Get session data and check ownership
239+
session_data = limiter.get_session_data(session_token)
240+
indexed_repo = session_data.indexed_repo
241+
session_repo_id = indexed_repo.get("repo_id") if indexed_repo else None
242+
243+
if not indexed_repo or session_repo_id != repo_id:
244+
logger.warning(
245+
"Search denied - repo not owned by session",
246+
requested_repo_id=repo_id[:16],
247+
session_repo_id=session_repo_id[:16] if session_repo_id else "none",
248+
session_token=token_preview
249+
)
250+
raise HTTPException(
251+
status_code=403,
252+
detail={
253+
"error": "access_denied",
254+
"message": "You don't have access to this repository"
255+
}
256+
)
257+
258+
# Check expiry
259+
repo_data = IndexedRepoData.from_dict(indexed_repo)
260+
if repo_data.is_expired():
261+
logger.warning(
262+
"Search denied - repo expired",
263+
repo_id=repo_id[:16],
264+
expired_at=indexed_repo.get("expires_at"),
265+
session_token=token_preview
266+
)
267+
raise HTTPException(
268+
status_code=410,
269+
detail={
270+
"error": "repo_expired",
271+
"message": "Repository index expired. Re-index to continue searching.",
272+
"can_reindex": True
273+
}
274+
)
275+
276+
# All checks passed
277+
logger.info(
278+
"Search on user-indexed repo",
279+
repo_id=repo_id[:16],
280+
repo_name=indexed_repo.get("name"),
281+
session_token=token_preview
282+
)
283+
return repo_id
284+
285+
146286
@router.get("/limits")
147287
async def get_playground_limits(req: Request):
148288
"""
@@ -270,18 +410,8 @@ async def playground_search(
270410
if not valid_query:
271411
raise HTTPException(status_code=400, detail=f"Invalid query: {query_error}")
272412

273-
# Get demo repo ID
274-
repo_id = DEMO_REPO_IDS.get(request.demo_repo)
275-
if not repo_id:
276-
repos = repo_manager.list_repos()
277-
indexed_repos = [r for r in repos if r.get("status") == "indexed"]
278-
if indexed_repos:
279-
repo_id = indexed_repos[0]["id"]
280-
else:
281-
raise HTTPException(
282-
status_code=404,
283-
detail=f"Demo repo '{request.demo_repo}' not available"
284-
)
413+
# Resolve repo_id: priority is repo_id > demo_repo > default "flask"
414+
repo_id = _resolve_repo_id(request, limiter, limit_result, req)
285415

286416
start_time = time.time()
287417

backend/tests/test_anonymous_indexing.py

Lines changed: 201 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
Note: These tests rely on conftest.py for Pinecone/OpenAI/Redis mocking.
66
"""
77
import pytest
8-
from unittest.mock import patch, MagicMock
8+
from unittest.mock import patch, MagicMock, AsyncMock
99
from datetime import datetime, timezone, timedelta
1010
import json
1111

@@ -679,3 +679,203 @@ def test_partial_job_includes_partial_info(self, mock_job_class, client):
679679
data = response.json()
680680
assert data["partial"] is True
681681
assert data["max_files"] == 200
682+
683+
684+
685+
# =============================================================================
686+
# Issue #128: Search User-Indexed Repos Tests
687+
# =============================================================================
688+
689+
class TestSearchUserRepos:
690+
"""Tests for searching user-indexed repositories."""
691+
692+
@patch('routes.playground._get_limiter')
693+
@patch('routes.playground.indexer')
694+
def test_search_with_repo_id_user_owns(self, mock_indexer, mock_get_limiter, client):
695+
"""User can search their own indexed repo via repo_id."""
696+
mock_limiter = MagicMock()
697+
mock_limiter.check_and_record.return_value = MagicMock(
698+
allowed=True,
699+
remaining=99,
700+
limit=100,
701+
session_token="test_session_123"
702+
)
703+
# Session owns this repo
704+
mock_limiter.get_session_data.return_value = MagicMock(
705+
indexed_repo={
706+
"repo_id": "repo_user_abc123",
707+
"github_url": "https://github.com/user/repo",
708+
"name": "repo",
709+
"file_count": 50,
710+
"indexed_at": "2024-01-01T00:00:00Z",
711+
"expires_at": "2099-01-02T00:00:00Z" # Far future
712+
}
713+
)
714+
mock_get_limiter.return_value = mock_limiter
715+
mock_indexer.semantic_search = AsyncMock(return_value=[
716+
{"file": "test.py", "score": 0.9}
717+
])
718+
719+
response = client.post(
720+
"/api/v1/playground/search",
721+
json={"query": "test function", "repo_id": "repo_user_abc123"}
722+
)
723+
724+
assert response.status_code == 200
725+
data = response.json()
726+
assert data["count"] == 1
727+
728+
@patch('routes.playground._get_limiter')
729+
def test_search_repo_id_not_owned_returns_403(self, mock_get_limiter, client):
730+
"""Searching repo_id user doesn't own returns 403."""
731+
mock_limiter = MagicMock()
732+
mock_limiter.check_and_record.return_value = MagicMock(
733+
allowed=True,
734+
remaining=99,
735+
limit=100,
736+
session_token="test_session_123"
737+
)
738+
# Session owns different repo
739+
mock_limiter.get_session_data.return_value = MagicMock(
740+
indexed_repo={
741+
"repo_id": "repo_OTHER_xyz",
742+
"github_url": "https://github.com/other/repo",
743+
"name": "other-repo",
744+
"file_count": 50,
745+
"indexed_at": "2024-01-01T00:00:00Z",
746+
"expires_at": "2099-01-02T00:00:00Z"
747+
}
748+
)
749+
mock_get_limiter.return_value = mock_limiter
750+
751+
response = client.post(
752+
"/api/v1/playground/search",
753+
json={"query": "test", "repo_id": "repo_user_abc123"}
754+
)
755+
756+
assert response.status_code == 403
757+
data = response.json()
758+
assert data["detail"]["error"] == "access_denied"
759+
760+
@patch('routes.playground._get_limiter')
761+
def test_search_repo_id_no_session_repo_returns_403(self, mock_get_limiter, client):
762+
"""Searching repo_id when session has no indexed repo returns 403."""
763+
mock_limiter = MagicMock()
764+
mock_limiter.check_and_record.return_value = MagicMock(
765+
allowed=True,
766+
remaining=99,
767+
limit=100,
768+
session_token="test_session_123"
769+
)
770+
# Session has no indexed repo
771+
mock_limiter.get_session_data.return_value = MagicMock(indexed_repo=None)
772+
mock_get_limiter.return_value = mock_limiter
773+
774+
response = client.post(
775+
"/api/v1/playground/search",
776+
json={"query": "test", "repo_id": "repo_user_abc123"}
777+
)
778+
779+
assert response.status_code == 403
780+
781+
@patch('routes.playground._get_limiter')
782+
def test_search_expired_repo_returns_410(self, mock_get_limiter, client):
783+
"""Searching expired repo returns 410 with can_reindex hint."""
784+
mock_limiter = MagicMock()
785+
mock_limiter.check_and_record.return_value = MagicMock(
786+
allowed=True,
787+
remaining=99,
788+
limit=100,
789+
session_token="test_session_123"
790+
)
791+
# Session owns repo but it's expired
792+
mock_limiter.get_session_data.return_value = MagicMock(
793+
indexed_repo={
794+
"repo_id": "repo_user_abc123",
795+
"github_url": "https://github.com/user/repo",
796+
"name": "repo",
797+
"file_count": 50,
798+
"indexed_at": "2024-01-01T00:00:00Z",
799+
"expires_at": "2024-01-01T00:00:01Z" # Already expired
800+
}
801+
)
802+
mock_get_limiter.return_value = mock_limiter
803+
804+
response = client.post(
805+
"/api/v1/playground/search",
806+
json={"query": "test", "repo_id": "repo_user_abc123"}
807+
)
808+
809+
assert response.status_code == 410
810+
data = response.json()
811+
assert data["detail"]["error"] == "repo_expired"
812+
assert data["detail"]["can_reindex"] is True
813+
814+
@patch('routes.playground._get_limiter')
815+
@patch('routes.playground.indexer')
816+
def test_search_demo_repo_via_repo_id_allowed(self, mock_indexer, mock_get_limiter, client):
817+
"""Demo repos can be accessed via repo_id without ownership check."""
818+
mock_limiter = MagicMock()
819+
mock_limiter.check_and_record.return_value = MagicMock(
820+
allowed=True,
821+
remaining=99,
822+
limit=100,
823+
session_token="test_session_123"
824+
)
825+
mock_get_limiter.return_value = mock_limiter
826+
mock_indexer.semantic_search = AsyncMock(return_value=[])
827+
828+
# Use the flask demo repo ID
829+
from routes.playground import DEMO_REPO_IDS
830+
flask_repo_id = DEMO_REPO_IDS.get("flask")
831+
832+
if flask_repo_id:
833+
response = client.post(
834+
"/api/v1/playground/search",
835+
json={"query": "route handler", "repo_id": flask_repo_id}
836+
)
837+
assert response.status_code == 200
838+
839+
@patch('routes.playground._get_limiter')
840+
@patch('routes.playground.indexer')
841+
def test_search_backward_compat_demo_repo(self, mock_indexer, mock_get_limiter, client):
842+
"""Backward compat: demo_repo parameter still works."""
843+
mock_limiter = MagicMock()
844+
mock_limiter.check_and_record.return_value = MagicMock(
845+
allowed=True,
846+
remaining=99,
847+
limit=100,
848+
session_token=None
849+
)
850+
mock_get_limiter.return_value = mock_limiter
851+
mock_indexer.semantic_search = AsyncMock(return_value=[])
852+
853+
response = client.post(
854+
"/api/v1/playground/search",
855+
json={"query": "test", "demo_repo": "flask"}
856+
)
857+
858+
# Should work (200) or 404 if flask not indexed - but not 4xx auth error
859+
assert response.status_code in [200, 404]
860+
861+
@patch('routes.playground._get_limiter')
862+
@patch('routes.playground.indexer')
863+
def test_search_default_to_flask_when_no_repo_specified(self, mock_indexer, mock_get_limiter, client):
864+
"""When neither repo_id nor demo_repo provided, defaults to flask."""
865+
mock_limiter = MagicMock()
866+
mock_limiter.check_and_record.return_value = MagicMock(
867+
allowed=True,
868+
remaining=99,
869+
limit=100,
870+
session_token=None
871+
)
872+
mock_get_limiter.return_value = mock_limiter
873+
mock_indexer.semantic_search = AsyncMock(return_value=[])
874+
875+
response = client.post(
876+
"/api/v1/playground/search",
877+
json={"query": "test"} # No repo_id or demo_repo
878+
)
879+
880+
# Should default to flask
881+
assert response.status_code in [200, 404]

0 commit comments

Comments
 (0)