diff --git a/backend/routes/analysis.py b/backend/routes/analysis.py index 37a74c2..4ce126c 100644 --- a/backend/routes/analysis.py +++ b/backend/routes/analysis.py @@ -8,7 +8,7 @@ ) from services.input_validator import InputValidator from middleware.auth import require_auth, AuthContext -from services.observability import logger, metrics +from services.observability import logger, metrics, capture_exception router = APIRouter(prefix="/repos", tags=["Analysis"]) @@ -26,21 +26,23 @@ async def get_dependency_graph( """Get dependency graph for repository.""" try: repo = get_repo_or_404(repo_id, auth.user_id) - - # Try cache first + cached_graph = dependency_analyzer.load_from_cache(repo_id) if cached_graph: logger.debug("Using cached dependency graph", repo_id=repo_id) return {**cached_graph, "cached": True} - - # Build fresh + logger.info("Building fresh dependency graph", repo_id=repo_id) graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"]) dependency_analyzer.save_to_cache(repo_id, graph_data) - + return {**graph_data, "cached": False} + except HTTPException: + raise except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + logger.error("Dependency graph failed", repo_id=repo_id, error=str(e)) + capture_exception(e, operation="dependency_graph", repo_id=repo_id) + raise HTTPException(status_code=500, detail="Failed to build dependency graph") @router.post("/{repo_id}/impact") @@ -52,30 +54,32 @@ async def analyze_impact( """Analyze impact of changing a file.""" try: repo = get_repo_or_404(repo_id, auth.user_id) - - # Validate file path + valid_path, path_error = InputValidator.validate_file_path( request.file_path, repo["local_path"] ) if not valid_path: raise HTTPException(status_code=400, detail=f"Invalid file path: {path_error}") - - # Get or build graph + graph_data = dependency_analyzer.load_from_cache(repo_id) if not graph_data: logger.info("Building dependency graph for impact analysis", repo_id=repo_id) graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"]) dependency_analyzer.save_to_cache(repo_id, graph_data) - + impact = dependency_analyzer.get_file_impact( repo["local_path"], request.file_path, graph_data ) - + return impact + except HTTPException: + raise except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + logger.error("Impact analysis failed", repo_id=repo_id, file_path=request.file_path, error=str(e)) + capture_exception(e, operation="impact_analysis", repo_id=repo_id, file_path=request.file_path) + raise HTTPException(status_code=500, detail="Failed to analyze impact") @router.get("/{repo_id}/insights") @@ -86,14 +90,13 @@ async def get_repository_insights( """Get comprehensive insights about repository.""" try: repo = get_repo_or_404(repo_id, auth.user_id) - - # Get or build graph + graph_data = dependency_analyzer.load_from_cache(repo_id) if not graph_data: logger.info("Building dependency graph for insights", repo_id=repo_id) graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"]) dependency_analyzer.save_to_cache(repo_id, graph_data) - + return { "repo_id": repo_id, "name": repo["name"], @@ -106,8 +109,12 @@ async def get_repository_insights( "functions_indexed": repo["file_count"], "cached": bool(graph_data) } + except HTTPException: + raise except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + logger.error("Repository insights failed", repo_id=repo_id, error=str(e)) + capture_exception(e, operation="insights", repo_id=repo_id) + raise HTTPException(status_code=500, detail="Failed to get repository insights") @router.get("/{repo_id}/style-analysis") @@ -118,22 +125,23 @@ async def get_style_analysis( """Analyze code style and team patterns.""" try: repo = get_repo_or_404(repo_id, auth.user_id) - - # Try cache first + cached_style = style_analyzer.load_from_cache(repo_id) if cached_style: logger.debug("Using cached code style", repo_id=repo_id) return {**cached_style, "cached": True} - - # Analyze fresh + logger.info("Analyzing code style", repo_id=repo_id) style_data = style_analyzer.analyze_repository_style(repo["local_path"]) style_analyzer.save_to_cache(repo_id, style_data) - + return {**style_data, "cached": False} + except HTTPException: + raise except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - + logger.error("Style analysis failed", repo_id=repo_id, error=str(e)) + capture_exception(e, operation="style_analysis", repo_id=repo_id) + raise HTTPException(status_code=500, detail="Failed to analyze code style") @router.get("/{repo_id}/dna") @@ -144,39 +152,40 @@ async def get_codebase_dna( ): """ Extract codebase DNA - architectural patterns, conventions, and constraints. - + This endpoint analyzes your codebase and returns a DNA profile that helps AI assistants understand how to write code consistent with your patterns. - + Args: repo_id: Repository identifier format: Output format - 'json' or 'markdown' (default: json) - + Returns: DNA profile with auth patterns, service patterns, database patterns, etc. """ try: repo = get_repo_or_404(repo_id, auth.user_id) - - # Try cache first + cached_dna = dna_extractor.load_from_cache(repo_id) if cached_dna: logger.debug("Using cached DNA", repo_id=repo_id) if format == "markdown": return {"dna": cached_dna.to_markdown(), "cached": True} return {**cached_dna.to_dict(), "cached": True} - - # Extract fresh DNA + logger.info("Extracting codebase DNA", repo_id=repo_id) metrics.increment("dna_extractions") - + dna = dna_extractor.extract_dna(repo["local_path"], repo_id) dna_extractor.save_to_cache(repo_id, dna) - + if format == "markdown": return {"dna": dna.to_markdown(), "cached": False} return {**dna.to_dict(), "cached": False} - + + except HTTPException: + raise except Exception as e: - logger.error("Error extracting DNA", repo_id=repo_id, error=str(e)) - raise HTTPException(status_code=500, detail=str(e)) + logger.error("DNA extraction failed", repo_id=repo_id, error=str(e)) + capture_exception(e, operation="dna_extraction", repo_id=repo_id) + raise HTTPException(status_code=500, detail="Failed to extract codebase DNA") diff --git a/backend/routes/playground.py b/backend/routes/playground.py index df57b23..c3dda66 100644 --- a/backend/routes/playground.py +++ b/backend/routes/playground.py @@ -17,7 +17,7 @@ from dependencies import indexer, cache, repo_manager, redis_client from services.input_validator import InputValidator from services.repo_validator import RepoValidator -from services.observability import logger +from services.observability import logger, capture_exception from services.playground_limiter import PlaygroundLimiter, get_playground_limiter, IndexedRepoData from services.anonymous_indexer import ( AnonymousIndexingJob, @@ -486,8 +486,9 @@ async def playground_search( except HTTPException: raise except Exception as e: + capture_exception(e, operation="playground_search") logger.error("Playground search failed", error=str(e)) - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail="Search failed") @router.get("/repos") @@ -579,7 +580,7 @@ async def _fetch_repo_metadata(owner: str, repo: str) -> dict: return {"error": "timeout", "message": "GitHub API request timed out"} except Exception as e: logger.error("GitHub API request failed", error=str(e)) - return {"error": "request_failed", "message": str(e)} + return {"error": "request_failed", "message": "Failed to fetch repository metadata"} async def _count_code_files( diff --git a/backend/routes/repos.py b/backend/routes/repos.py index 7b60b05..beca2a4 100644 --- a/backend/routes/repos.py +++ b/backend/routes/repos.py @@ -145,7 +145,7 @@ async def add_repository( except Exception as e: logger.error("Failed to add repository", error=str(e), user_id=user_id) capture_exception(e) - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=500, detail="Failed to add repository") @router.delete("/{repo_id}") @@ -174,7 +174,7 @@ async def delete_repository( except Exception as e: logger.error("Failed to delete repository", repo_id=repo_id, error=str(e)) capture_exception(e) - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail="Failed to delete repository") @router.post("/{repo_id}/index") @@ -268,7 +268,7 @@ async def index_repository( logger.error("Indexing failed", repo_id=repo_id, error=str(e)) capture_exception(e) repo_manager.update_status(repo_id, "error") - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail="Indexing failed") async def _run_async_indexing( @@ -395,7 +395,7 @@ async def progress_callback( publisher.publish_error( repo_id, error="indexing_failed", - message=str(e), + message="An error occurred during indexing", recoverable=True ) @@ -478,7 +478,7 @@ async def index_repository_async( except Exception as e: logger.error("Failed to start async indexing", repo_id=repo_id, error=str(e)) capture_exception(e) - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail="Failed to start indexing") async def _authenticate_websocket(websocket: WebSocket) -> Optional[dict]: @@ -573,7 +573,7 @@ async def progress_callback(files_processed: int, functions_indexed: int, total_ logger.error("WebSocket indexing error", repo_id=repo_id, error=str(e)) capture_exception(e, operation="websocket_indexing", repo_id=repo_id) try: - await websocket.send_json({"type": "error", "message": str(e)}) + await websocket.send_json({"type": "error", "message": "An error occurred during indexing"}) except Exception: pass repo_manager.update_status(repo_id, "error") diff --git a/backend/routes/search.py b/backend/routes/search.py index 15e46ae..1e3def6 100644 --- a/backend/routes/search.py +++ b/backend/routes/search.py @@ -117,7 +117,7 @@ async def search_code( error=str(e) ) capture_exception(e, operation="search", repo_id=request.repo_id, user_id=auth.user_id) - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail="Search failed") @router.post("/explain") @@ -177,4 +177,4 @@ async def explain_code( user_id=auth.user_id, file_path=request.file_path ) - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail="Failed to generate explanation") diff --git a/backend/routes/search_v2.py b/backend/routes/search_v2.py index feeff12..6e7d817 100644 --- a/backend/routes/search_v2.py +++ b/backend/routes/search_v2.py @@ -154,4 +154,4 @@ async def search_v2( repo_id=request.repo_id, user_id=auth.user_id ) - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail="Search failed") diff --git a/backend/tests/test_error_leaking.py b/backend/tests/test_error_leaking.py new file mode 100644 index 0000000..d502aeb --- /dev/null +++ b/backend/tests/test_error_leaking.py @@ -0,0 +1,68 @@ +"""Tests that 500 errors don't leak internal details (OPE-79).""" +import pytest +from unittest.mock import patch, MagicMock + + +class TestErrorResponsesHideInternals: + """Verify 500 responses return generic messages, not str(e).""" + + def test_search_error_hides_details(self, client, valid_headers): + """Search failure should not expose Pinecone/OpenAI error strings.""" + with patch("routes.search.indexer") as mock_indexer, \ + patch("routes.search.cache") as mock_cache: + mock_cache.get_search_results.return_value = None + mock_indexer.semantic_search.side_effect = RuntimeError( + "Pinecone connection refused at pinecone-prod.svc.us-east1.aws:443" + ) + with patch("routes.search.verify_repo_access"): + resp = client.post( + "/api/v1/search", + json={"query": "auth middleware", "repo_id": "test-repo"}, + headers=valid_headers, + ) + + assert resp.status_code == 500 + body = resp.json()["detail"] + assert body == "Search failed" + assert "Pinecone" not in body + assert "pinecone-prod" not in body + + def test_dependency_graph_error_hides_details(self, client, valid_headers): + """Dependency graph failure should not expose file paths.""" + with patch("routes.analysis.get_repo_or_404") as mock_repo: + mock_repo.return_value = {"local_path": "/srv/repos/abc", "name": "test"} + with patch("routes.analysis.dependency_analyzer") as mock_dep: + mock_dep.load_from_cache.return_value = None + mock_dep.build_dependency_graph.side_effect = FileNotFoundError( + "/srv/repos/abc/.git/config not found" + ) + resp = client.get( + "/api/v1/repos/test-repo/dependencies", + headers=valid_headers, + ) + + assert resp.status_code == 500 + body = resp.json()["detail"] + assert body == "Failed to build dependency graph" + assert "/srv/repos" not in body + + def test_repo_add_error_hides_details(self, client, valid_headers): + """Add repo failure should not expose git credentials or paths.""" + with patch("routes.repos.repo_manager") as mock_rm, \ + patch("routes.repos.user_limits") as mock_limits: + limit_check = MagicMock() + limit_check.allowed = True + mock_limits.check_repo_count.return_value = limit_check + mock_rm.add_repo.side_effect = Exception( + "Authentication failed for https://user:ghp_secret@github.com/org/repo.git" + ) + resp = client.post( + "/api/v1/repos", + json={"name": "test", "git_url": "https://github.com/org/repo", "branch": "main"}, + headers=valid_headers, + ) + + assert resp.status_code == 500 + body = resp.json()["detail"] + assert body == "Failed to add repository" + assert "ghp_secret" not in body