fix: stop 13 routes from leaking internal errors via detail=str(e)

DevanshuNEU · DevanshuNEU · commit 66792cb32563 · 2026-02-22T19:02:28.000-05:00
Before: 13 route handlers returned raw exception strings in HTTP
responses. In production this leaked file paths, database connection
details, API key fragments, and third-party service errors.

After: All catch blocks return generic error messages. The actual
error is logged via logger.error() + capture_exception() for
debugging in Sentry/logs.

Changes by file:
- analysis.py: Fixed all 5 endpoints. Also added missing
  'except HTTPException: raise' (was swallowing 404s as 500s)
  and added logger.error + capture_exception (had zero logging).
- repos.py: Fixed 4 endpoints (add, delete, index, async index).
- search.py: Fixed 2 endpoints (search, explain).
- search_v2.py: Fixed 1 endpoint.
- playground.py: Fixed 1 endpoint.

Secondary fix in analysis.py:
- get_repo_or_404 raises HTTPException(404) but the bare
  'except Exception' was catching it and re-raising as 500.
  Added 'except HTTPException: raise' before 'except Exception'
  on all 5 endpoints.

Tests: 3 new tests verify that Pinecone connection strings, file
paths, and git credentials are NOT exposed in error responses.

284 tests pass (281 existing + 3 new).

Closes OPE-79
diff --git a/backend/routes/analysis.py b/backend/routes/analysis.py
@@ -8,7 +8,7 @@
 )
 from services.input_validator import InputValidator
 from middleware.auth import require_auth, AuthContext
-from services.observability import logger, metrics
+from services.observability import logger, metrics, capture_exception
 
 router = APIRouter(prefix="/repos", tags=["Analysis"])
 
@@ -26,21 +26,23 @@ async def get_dependency_graph(
     """Get dependency graph for repository."""
     try:
         repo = get_repo_or_404(repo_id, auth.user_id)
-        
-        # Try cache first
+
         cached_graph = dependency_analyzer.load_from_cache(repo_id)
         if cached_graph:
             logger.debug("Using cached dependency graph", repo_id=repo_id)
             return {**cached_graph, "cached": True}
-        
-        # Build fresh
+
         logger.info("Building fresh dependency graph", repo_id=repo_id)
         graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
         dependency_analyzer.save_to_cache(repo_id, graph_data)
-        
+
         return {**graph_data, "cached": False}
+    except HTTPException:
+        raise
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
+        logger.error("Dependency graph failed", repo_id=repo_id, error=str(e))
+        capture_exception(e, operation="dependency_graph", repo_id=repo_id)
+        raise HTTPException(status_code=500, detail="Failed to build dependency graph")
 
 
 @router.post("/{repo_id}/impact")
@@ -52,30 +54,32 @@ async def analyze_impact(
     """Analyze impact of changing a file."""
     try:
         repo = get_repo_or_404(repo_id, auth.user_id)
-        
-        # Validate file path
+
         valid_path, path_error = InputValidator.validate_file_path(
             request.file_path, repo["local_path"]
         )
         if not valid_path:
             raise HTTPException(status_code=400, detail=f"Invalid file path: {path_error}")
-        
-        # Get or build graph
+
         graph_data = dependency_analyzer.load_from_cache(repo_id)
         if not graph_data:
             logger.info("Building dependency graph for impact analysis", repo_id=repo_id)
             graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
             dependency_analyzer.save_to_cache(repo_id, graph_data)
-        
+
         impact = dependency_analyzer.get_file_impact(
             repo["local_path"],
             request.file_path,
             graph_data
         )
-        
+
         return impact
+    except HTTPException:
+        raise
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
+        logger.error("Impact analysis failed", repo_id=repo_id, file_path=request.file_path, error=str(e))
+        capture_exception(e, operation="impact_analysis", repo_id=repo_id)
+        raise HTTPException(status_code=500, detail="Failed to analyze impact")
 
 
 @router.get("/{repo_id}/insights")
@@ -86,14 +90,13 @@ async def get_repository_insights(
     """Get comprehensive insights about repository."""
     try:
         repo = get_repo_or_404(repo_id, auth.user_id)
-        
-        # Get or build graph
+
         graph_data = dependency_analyzer.load_from_cache(repo_id)
         if not graph_data:
             logger.info("Building dependency graph for insights", repo_id=repo_id)
             graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
             dependency_analyzer.save_to_cache(repo_id, graph_data)
-        
+
         return {
             "repo_id": repo_id,
             "name": repo["name"],
@@ -106,8 +109,12 @@ async def get_repository_insights(
             "functions_indexed": repo["file_count"],
             "cached": bool(graph_data)
         }
+    except HTTPException:
+        raise
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
+        logger.error("Repository insights failed", repo_id=repo_id, error=str(e))
+        capture_exception(e, operation="insights", repo_id=repo_id)
+        raise HTTPException(status_code=500, detail="Failed to get repository insights")
 
 
 @router.get("/{repo_id}/style-analysis")
@@ -118,22 +125,23 @@ async def get_style_analysis(
     """Analyze code style and team patterns."""
     try:
         repo = get_repo_or_404(repo_id, auth.user_id)
-        
-        # Try cache first
+
         cached_style = style_analyzer.load_from_cache(repo_id)
         if cached_style:
             logger.debug("Using cached code style", repo_id=repo_id)
             return {**cached_style, "cached": True}
-        
-        # Analyze fresh
+
         logger.info("Analyzing code style", repo_id=repo_id)
         style_data = style_analyzer.analyze_repository_style(repo["local_path"])
         style_analyzer.save_to_cache(repo_id, style_data)
-        
+
         return {**style_data, "cached": False}
+    except HTTPException:
+        raise
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
+        logger.error("Style analysis failed", repo_id=repo_id, error=str(e))
+        capture_exception(e, operation="style_analysis", repo_id=repo_id)
+        raise HTTPException(status_code=500, detail="Failed to analyze code style")
 
 
 @router.get("/{repo_id}/dna")
@@ -144,39 +152,40 @@ async def get_codebase_dna(
 ):
     """
     Extract codebase DNA - architectural patterns, conventions, and constraints.
-    
+
     This endpoint analyzes your codebase and returns a DNA profile that helps
     AI assistants understand how to write code consistent with your patterns.
-    
+
     Args:
         repo_id: Repository identifier
         format: Output format - 'json' or 'markdown' (default: json)
-    
+
     Returns:
         DNA profile with auth patterns, service patterns, database patterns, etc.
     """
     try:
         repo = get_repo_or_404(repo_id, auth.user_id)
-        
-        # Try cache first
+
         cached_dna = dna_extractor.load_from_cache(repo_id)
         if cached_dna:
             logger.debug("Using cached DNA", repo_id=repo_id)
             if format == "markdown":
                 return {"dna": cached_dna.to_markdown(), "cached": True}
             return {**cached_dna.to_dict(), "cached": True}
-        
-        # Extract fresh DNA
+
         logger.info("Extracting codebase DNA", repo_id=repo_id)
         metrics.increment("dna_extractions")
-        
+
         dna = dna_extractor.extract_dna(repo["local_path"], repo_id)
         dna_extractor.save_to_cache(repo_id, dna)
-        
+
         if format == "markdown":
             return {"dna": dna.to_markdown(), "cached": False}
         return {**dna.to_dict(), "cached": False}
-        
+
+    except HTTPException:
+        raise
     except Exception as e:
-        logger.error("Error extracting DNA", repo_id=repo_id, error=str(e))
-        raise HTTPException(status_code=500, detail=str(e))
+        logger.error("DNA extraction failed", repo_id=repo_id, error=str(e))
+        capture_exception(e, operation="dna_extraction", repo_id=repo_id)
+        raise HTTPException(status_code=500, detail="Failed to extract codebase DNA")
diff --git a/backend/routes/playground.py b/backend/routes/playground.py
@@ -487,7 +487,7 @@ async def playground_search(
         raise
     except Exception as e:
         logger.error("Playground search failed", error=str(e))
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail="Search failed")
 
 
 @router.get("/repos")
diff --git a/backend/routes/repos.py b/backend/routes/repos.py
@@ -145,7 +145,7 @@ async def add_repository(
     except Exception as e:
         logger.error("Failed to add repository", error=str(e), user_id=user_id)
         capture_exception(e)
-        raise HTTPException(status_code=400, detail=str(e))
+        raise HTTPException(status_code=400, detail="Failed to add repository")
 
 
 @router.delete("/{repo_id}")
@@ -174,7 +174,7 @@ async def delete_repository(
     except Exception as e:
         logger.error("Failed to delete repository", repo_id=repo_id, error=str(e))
         capture_exception(e)
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail="Failed to delete repository")
 
 
 @router.post("/{repo_id}/index")
@@ -268,7 +268,7 @@ async def index_repository(
         logger.error("Indexing failed", repo_id=repo_id, error=str(e))
         capture_exception(e)
         repo_manager.update_status(repo_id, "error")
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail="Indexing failed")
 
 
 async def _run_async_indexing(
@@ -478,7 +478,7 @@ async def index_repository_async(
     except Exception as e:
         logger.error("Failed to start async indexing", repo_id=repo_id, error=str(e))
         capture_exception(e)
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail="Failed to start indexing")
 
 
 async def _authenticate_websocket(websocket: WebSocket) -> Optional[dict]:
diff --git a/backend/routes/search.py b/backend/routes/search.py
@@ -117,7 +117,7 @@ async def search_code(
             error=str(e)
         )
         capture_exception(e, operation="search", repo_id=request.repo_id, user_id=auth.user_id)
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail="Search failed")
 
 
 @router.post("/explain")
@@ -177,4 +177,4 @@ async def explain_code(
             user_id=auth.user_id,
             file_path=request.file_path
         )
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail="Failed to generate explanation")
diff --git a/backend/routes/search_v2.py b/backend/routes/search_v2.py
@@ -154,4 +154,4 @@ async def search_v2(
             repo_id=request.repo_id,
             user_id=auth.user_id
         )
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail="Search failed")
diff --git a/backend/tests/test_error_leaking.py b/backend/tests/test_error_leaking.py
@@ -0,0 +1,66 @@
+"""Tests that 500 errors don't leak internal details (OPE-79)."""
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+class TestErrorResponsesHideInternals:
+    """Verify 500 responses return generic messages, not str(e)."""
+
+    def test_search_error_hides_details(self, client, valid_headers):
+        """Search failure should not expose Pinecone/OpenAI error strings."""
+        with patch("routes.search.indexer") as mock_indexer:
+            mock_indexer.semantic_search.side_effect = RuntimeError(
+                "Pinecone connection refused at pinecone-prod.svc.us-east1.aws:443"
+            )
+            with patch("routes.search.verify_repo_access"):
+                resp = client.post(
+                    "/api/v1/search",
+                    json={"query": "auth middleware", "repo_id": "test-repo"},
+                    headers=valid_headers,
+                )
+
+        assert resp.status_code == 500
+        body = resp.json()["detail"]
+        assert "Pinecone" not in body
+        assert "pinecone-prod" not in body
+        assert "443" not in body
+
+    def test_dependency_graph_error_hides_details(self, client, valid_headers):
+        """Dependency graph failure should not expose file paths."""
+        with patch("routes.analysis.get_repo_or_404") as mock_repo:
+            mock_repo.return_value = {"local_path": "/srv/repos/abc", "name": "test"}
+            with patch("routes.analysis.dependency_analyzer") as mock_dep:
+                mock_dep.load_from_cache.return_value = None
+                mock_dep.build_dependency_graph.side_effect = FileNotFoundError(
+                    "/srv/repos/abc/.git/config not found"
+                )
+                resp = client.get(
+                    "/api/v1/repos/test-repo/dependencies",
+                    headers=valid_headers,
+                )
+
+        assert resp.status_code == 500
+        body = resp.json()["detail"]
+        assert "/srv/repos" not in body
+        assert ".git/config" not in body
+
+    def test_repo_add_error_hides_details(self, client, valid_headers):
+        """Add repo failure should not expose git credentials or paths."""
+        with patch("routes.repos.repo_manager") as mock_rm, \
+             patch("routes.repos.user_limits") as mock_limits:
+            limit_check = MagicMock()
+            limit_check.allowed = True
+            mock_limits.check_repo_count.return_value = limit_check
+            mock_rm.add_repo.side_effect = Exception(
+                "Authentication failed for https://user:ghp_secret@github.com/org/repo.git"
+            )
+            resp = client.post(
+                "/api/v1/repos",
+                json={"name": "test", "git_url": "https://github.com/org/repo", "branch": "main"},
+                headers=valid_headers,
+            )
+
+        assert resp.status_code == 400
+        body = resp.json()["detail"]
+        assert "ghp_secret" not in body
+        assert "Authentication failed" not in body

Original file line number	Diff line number	Diff line change
`@@ -117,7 +117,7 @@ async def search_code(`
`117`	`117`	`error=str(e)`
`118`	`118`	`)`
`119`	`119`	`capture_exception(e, operation="search", repo_id=request.repo_id, user_id=auth.user_id)`
`120`		`- raise HTTPException(status_code=500, detail=str(e))`
	`120`	`+ raise HTTPException(status_code=500, detail="Search failed")`
`121`	`121`
`122`	`122`
`123`	`123`	`@router.post("/explain")`
`@@ -177,4 +177,4 @@ async def explain_code(`
`177`	`177`	`user_id=auth.user_id,`
`178`	`178`	`file_path=request.file_path`
`179`	`179`	`)`
`180`		`- raise HTTPException(status_code=500, detail=str(e))`
	`180`	`+ raise HTTPException(status_code=500, detail="Failed to generate explanation")`
Original file line number	Diff line number	Diff line change
`@@ -154,4 +154,4 @@ async def search_v2(`
`154`	`154`	`repo_id=request.repo_id,`
`155`	`155`	`user_id=auth.user_id`
`156`	`156`	`)`
`157`		`- raise HTTPException(status_code=500, detail=str(e))`
	`157`	`+ raise HTTPException(status_code=500, detail="Search failed")`