Skip to content

Commit fb0a985

Browse files
committed
fix: address PR review round 2 -- type annotations, path validation, docs
1. Add -> dict return type to get_repo_directories 2. Add -> List[dict] return type to _scan_directories 3. Add Pydantic validator on IndexConfig.include_paths: - Rejects empty strings - Rejects path traversal (.. segments) - Normalizes leading/trailing slashes 4. Add docstring to websocket_index explaining it does NOT support include_paths (older pattern, use HTTP async endpoint instead) All 4 findings verified against code before fixing.
1 parent 022c297 commit fb0a985

1 file changed

Lines changed: 21 additions & 4 deletions

File tree

backend/routes/repos.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Repository management routes - CRUD and indexing."""
22
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect, Depends, BackgroundTasks
3-
from pydantic import BaseModel
3+
from pydantic import BaseModel, validator
44
from typing import List, Optional
55
from pathlib import Path
66
import hashlib
@@ -178,7 +178,7 @@ async def delete_repository(
178178
raise HTTPException(status_code=500, detail="Failed to delete repository")
179179

180180

181-
def _scan_directories(local_path: Path) -> list:
181+
def _scan_directories(local_path: Path) -> List[dict]:
182182
"""Scan top-level directories and count code files in each.
183183
184184
Runs synchronously -- call via asyncio.to_thread() from async handlers
@@ -206,7 +206,7 @@ def _scan_directories(local_path: Path) -> list:
206206
async def get_repo_directories(
207207
repo_id: str,
208208
auth: AuthContext = Depends(require_auth),
209-
):
209+
) -> dict:
210210
"""Return the top-level directory tree of a cloned repo.
211211
212212
Used for monorepo subset selection -- lets the user pick which
@@ -461,6 +461,17 @@ class IndexConfig(BaseModel):
461461
include_paths: Optional[List[str]] = None # e.g. ["packages/effect", "packages/schema"]
462462
incremental: bool = True
463463

464+
@validator("include_paths", each_item=True, pre=True)
465+
@classmethod
466+
def sanitize_path(cls, v: str) -> str:
467+
"""Reject path traversal, empty strings, and normalize slashes."""
468+
v = v.strip().strip("/")
469+
if not v:
470+
raise ValueError("include_paths entries must not be empty")
471+
if ".." in v.split("/"):
472+
raise ValueError(f"Path traversal not allowed: {v}")
473+
return v
474+
464475

465476
@router.post("/{repo_id}/index/async", status_code=202)
466477
async def index_repository_async(
@@ -564,7 +575,13 @@ async def _authenticate_websocket(websocket: WebSocket) -> Optional[dict]:
564575
# Note: WebSocket routes need to be registered on the main app, not router
565576
# This function is exported and called from main.py
566577
async def websocket_index(websocket: WebSocket, repo_id: str):
567-
"""Real-time repository indexing with progress updates."""
578+
"""Real-time repository indexing with progress updates.
579+
580+
NOTE: This WebSocket-direct-indexing path does NOT support include_paths
581+
(monorepo subset selection). Use the HTTP async endpoint instead:
582+
POST /repos/{id}/index/async with IndexConfig body.
583+
This handler is the older pattern -- kept for backward compatibility.
584+
"""
568585
user = await _authenticate_websocket(websocket)
569586
if not user:
570587
return

0 commit comments

Comments
 (0)