Skip to content

Commit 66792cb

Browse files
committed
fix: stop 13 routes from leaking internal errors via detail=str(e)
Before: 13 route handlers returned raw exception strings in HTTP responses. In production this leaked file paths, database connection details, API key fragments, and third-party service errors. After: All catch blocks return generic error messages. The actual error is logged via logger.error() + capture_exception() for debugging in Sentry/logs. Changes by file: - analysis.py: Fixed all 5 endpoints. Also added missing 'except HTTPException: raise' (was swallowing 404s as 500s) and added logger.error + capture_exception (had zero logging). - repos.py: Fixed 4 endpoints (add, delete, index, async index). - search.py: Fixed 2 endpoints (search, explain). - search_v2.py: Fixed 1 endpoint. - playground.py: Fixed 1 endpoint. Secondary fix in analysis.py: - get_repo_or_404 raises HTTPException(404) but the bare 'except Exception' was catching it and re-raising as 500. Added 'except HTTPException: raise' before 'except Exception' on all 5 endpoints. Tests: 3 new tests verify that Pinecone connection strings, file paths, and git credentials are NOT exposed in error responses. 284 tests pass (281 existing + 3 new). Closes OPE-79
1 parent ee0a7cc commit 66792cb

6 files changed

Lines changed: 120 additions & 45 deletions

File tree

backend/routes/analysis.py

Lines changed: 46 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
)
99
from services.input_validator import InputValidator
1010
from middleware.auth import require_auth, AuthContext
11-
from services.observability import logger, metrics
11+
from services.observability import logger, metrics, capture_exception
1212

1313
router = APIRouter(prefix="/repos", tags=["Analysis"])
1414

@@ -26,21 +26,23 @@ async def get_dependency_graph(
2626
"""Get dependency graph for repository."""
2727
try:
2828
repo = get_repo_or_404(repo_id, auth.user_id)
29-
30-
# Try cache first
29+
3130
cached_graph = dependency_analyzer.load_from_cache(repo_id)
3231
if cached_graph:
3332
logger.debug("Using cached dependency graph", repo_id=repo_id)
3433
return {**cached_graph, "cached": True}
35-
36-
# Build fresh
34+
3735
logger.info("Building fresh dependency graph", repo_id=repo_id)
3836
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
3937
dependency_analyzer.save_to_cache(repo_id, graph_data)
40-
38+
4139
return {**graph_data, "cached": False}
40+
except HTTPException:
41+
raise
4242
except Exception as e:
43-
raise HTTPException(status_code=500, detail=str(e))
43+
logger.error("Dependency graph failed", repo_id=repo_id, error=str(e))
44+
capture_exception(e, operation="dependency_graph", repo_id=repo_id)
45+
raise HTTPException(status_code=500, detail="Failed to build dependency graph")
4446

4547

4648
@router.post("/{repo_id}/impact")
@@ -52,30 +54,32 @@ async def analyze_impact(
5254
"""Analyze impact of changing a file."""
5355
try:
5456
repo = get_repo_or_404(repo_id, auth.user_id)
55-
56-
# Validate file path
57+
5758
valid_path, path_error = InputValidator.validate_file_path(
5859
request.file_path, repo["local_path"]
5960
)
6061
if not valid_path:
6162
raise HTTPException(status_code=400, detail=f"Invalid file path: {path_error}")
62-
63-
# Get or build graph
63+
6464
graph_data = dependency_analyzer.load_from_cache(repo_id)
6565
if not graph_data:
6666
logger.info("Building dependency graph for impact analysis", repo_id=repo_id)
6767
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
6868
dependency_analyzer.save_to_cache(repo_id, graph_data)
69-
69+
7070
impact = dependency_analyzer.get_file_impact(
7171
repo["local_path"],
7272
request.file_path,
7373
graph_data
7474
)
75-
75+
7676
return impact
77+
except HTTPException:
78+
raise
7779
except Exception as e:
78-
raise HTTPException(status_code=500, detail=str(e))
80+
logger.error("Impact analysis failed", repo_id=repo_id, file_path=request.file_path, error=str(e))
81+
capture_exception(e, operation="impact_analysis", repo_id=repo_id)
82+
raise HTTPException(status_code=500, detail="Failed to analyze impact")
7983

8084

8185
@router.get("/{repo_id}/insights")
@@ -86,14 +90,13 @@ async def get_repository_insights(
8690
"""Get comprehensive insights about repository."""
8791
try:
8892
repo = get_repo_or_404(repo_id, auth.user_id)
89-
90-
# Get or build graph
93+
9194
graph_data = dependency_analyzer.load_from_cache(repo_id)
9295
if not graph_data:
9396
logger.info("Building dependency graph for insights", repo_id=repo_id)
9497
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
9598
dependency_analyzer.save_to_cache(repo_id, graph_data)
96-
99+
97100
return {
98101
"repo_id": repo_id,
99102
"name": repo["name"],
@@ -106,8 +109,12 @@ async def get_repository_insights(
106109
"functions_indexed": repo["file_count"],
107110
"cached": bool(graph_data)
108111
}
112+
except HTTPException:
113+
raise
109114
except Exception as e:
110-
raise HTTPException(status_code=500, detail=str(e))
115+
logger.error("Repository insights failed", repo_id=repo_id, error=str(e))
116+
capture_exception(e, operation="insights", repo_id=repo_id)
117+
raise HTTPException(status_code=500, detail="Failed to get repository insights")
111118

112119

113120
@router.get("/{repo_id}/style-analysis")
@@ -118,22 +125,23 @@ async def get_style_analysis(
118125
"""Analyze code style and team patterns."""
119126
try:
120127
repo = get_repo_or_404(repo_id, auth.user_id)
121-
122-
# Try cache first
128+
123129
cached_style = style_analyzer.load_from_cache(repo_id)
124130
if cached_style:
125131
logger.debug("Using cached code style", repo_id=repo_id)
126132
return {**cached_style, "cached": True}
127-
128-
# Analyze fresh
133+
129134
logger.info("Analyzing code style", repo_id=repo_id)
130135
style_data = style_analyzer.analyze_repository_style(repo["local_path"])
131136
style_analyzer.save_to_cache(repo_id, style_data)
132-
137+
133138
return {**style_data, "cached": False}
139+
except HTTPException:
140+
raise
134141
except Exception as e:
135-
raise HTTPException(status_code=500, detail=str(e))
136-
142+
logger.error("Style analysis failed", repo_id=repo_id, error=str(e))
143+
capture_exception(e, operation="style_analysis", repo_id=repo_id)
144+
raise HTTPException(status_code=500, detail="Failed to analyze code style")
137145

138146

139147
@router.get("/{repo_id}/dna")
@@ -144,39 +152,40 @@ async def get_codebase_dna(
144152
):
145153
"""
146154
Extract codebase DNA - architectural patterns, conventions, and constraints.
147-
155+
148156
This endpoint analyzes your codebase and returns a DNA profile that helps
149157
AI assistants understand how to write code consistent with your patterns.
150-
158+
151159
Args:
152160
repo_id: Repository identifier
153161
format: Output format - 'json' or 'markdown' (default: json)
154-
162+
155163
Returns:
156164
DNA profile with auth patterns, service patterns, database patterns, etc.
157165
"""
158166
try:
159167
repo = get_repo_or_404(repo_id, auth.user_id)
160-
161-
# Try cache first
168+
162169
cached_dna = dna_extractor.load_from_cache(repo_id)
163170
if cached_dna:
164171
logger.debug("Using cached DNA", repo_id=repo_id)
165172
if format == "markdown":
166173
return {"dna": cached_dna.to_markdown(), "cached": True}
167174
return {**cached_dna.to_dict(), "cached": True}
168-
169-
# Extract fresh DNA
175+
170176
logger.info("Extracting codebase DNA", repo_id=repo_id)
171177
metrics.increment("dna_extractions")
172-
178+
173179
dna = dna_extractor.extract_dna(repo["local_path"], repo_id)
174180
dna_extractor.save_to_cache(repo_id, dna)
175-
181+
176182
if format == "markdown":
177183
return {"dna": dna.to_markdown(), "cached": False}
178184
return {**dna.to_dict(), "cached": False}
179-
185+
186+
except HTTPException:
187+
raise
180188
except Exception as e:
181-
logger.error("Error extracting DNA", repo_id=repo_id, error=str(e))
182-
raise HTTPException(status_code=500, detail=str(e))
189+
logger.error("DNA extraction failed", repo_id=repo_id, error=str(e))
190+
capture_exception(e, operation="dna_extraction", repo_id=repo_id)
191+
raise HTTPException(status_code=500, detail="Failed to extract codebase DNA")

backend/routes/playground.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ async def playground_search(
487487
raise
488488
except Exception as e:
489489
logger.error("Playground search failed", error=str(e))
490-
raise HTTPException(status_code=500, detail=str(e))
490+
raise HTTPException(status_code=500, detail="Search failed")
491491

492492

493493
@router.get("/repos")

backend/routes/repos.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ async def add_repository(
145145
except Exception as e:
146146
logger.error("Failed to add repository", error=str(e), user_id=user_id)
147147
capture_exception(e)
148-
raise HTTPException(status_code=400, detail=str(e))
148+
raise HTTPException(status_code=400, detail="Failed to add repository")
149149

150150

151151
@router.delete("/{repo_id}")
@@ -174,7 +174,7 @@ async def delete_repository(
174174
except Exception as e:
175175
logger.error("Failed to delete repository", repo_id=repo_id, error=str(e))
176176
capture_exception(e)
177-
raise HTTPException(status_code=500, detail=str(e))
177+
raise HTTPException(status_code=500, detail="Failed to delete repository")
178178

179179

180180
@router.post("/{repo_id}/index")
@@ -268,7 +268,7 @@ async def index_repository(
268268
logger.error("Indexing failed", repo_id=repo_id, error=str(e))
269269
capture_exception(e)
270270
repo_manager.update_status(repo_id, "error")
271-
raise HTTPException(status_code=500, detail=str(e))
271+
raise HTTPException(status_code=500, detail="Indexing failed")
272272

273273

274274
async def _run_async_indexing(
@@ -478,7 +478,7 @@ async def index_repository_async(
478478
except Exception as e:
479479
logger.error("Failed to start async indexing", repo_id=repo_id, error=str(e))
480480
capture_exception(e)
481-
raise HTTPException(status_code=500, detail=str(e))
481+
raise HTTPException(status_code=500, detail="Failed to start indexing")
482482

483483

484484
async def _authenticate_websocket(websocket: WebSocket) -> Optional[dict]:

backend/routes/search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ async def search_code(
117117
error=str(e)
118118
)
119119
capture_exception(e, operation="search", repo_id=request.repo_id, user_id=auth.user_id)
120-
raise HTTPException(status_code=500, detail=str(e))
120+
raise HTTPException(status_code=500, detail="Search failed")
121121

122122

123123
@router.post("/explain")
@@ -177,4 +177,4 @@ async def explain_code(
177177
user_id=auth.user_id,
178178
file_path=request.file_path
179179
)
180-
raise HTTPException(status_code=500, detail=str(e))
180+
raise HTTPException(status_code=500, detail="Failed to generate explanation")

backend/routes/search_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,4 +154,4 @@ async def search_v2(
154154
repo_id=request.repo_id,
155155
user_id=auth.user_id
156156
)
157-
raise HTTPException(status_code=500, detail=str(e))
157+
raise HTTPException(status_code=500, detail="Search failed")
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""Tests that 500 errors don't leak internal details (OPE-79)."""
2+
import pytest
3+
from unittest.mock import patch, MagicMock
4+
5+
6+
class TestErrorResponsesHideInternals:
7+
"""Verify 500 responses return generic messages, not str(e)."""
8+
9+
def test_search_error_hides_details(self, client, valid_headers):
10+
"""Search failure should not expose Pinecone/OpenAI error strings."""
11+
with patch("routes.search.indexer") as mock_indexer:
12+
mock_indexer.semantic_search.side_effect = RuntimeError(
13+
"Pinecone connection refused at pinecone-prod.svc.us-east1.aws:443"
14+
)
15+
with patch("routes.search.verify_repo_access"):
16+
resp = client.post(
17+
"/api/v1/search",
18+
json={"query": "auth middleware", "repo_id": "test-repo"},
19+
headers=valid_headers,
20+
)
21+
22+
assert resp.status_code == 500
23+
body = resp.json()["detail"]
24+
assert "Pinecone" not in body
25+
assert "pinecone-prod" not in body
26+
assert "443" not in body
27+
28+
def test_dependency_graph_error_hides_details(self, client, valid_headers):
29+
"""Dependency graph failure should not expose file paths."""
30+
with patch("routes.analysis.get_repo_or_404") as mock_repo:
31+
mock_repo.return_value = {"local_path": "/srv/repos/abc", "name": "test"}
32+
with patch("routes.analysis.dependency_analyzer") as mock_dep:
33+
mock_dep.load_from_cache.return_value = None
34+
mock_dep.build_dependency_graph.side_effect = FileNotFoundError(
35+
"/srv/repos/abc/.git/config not found"
36+
)
37+
resp = client.get(
38+
"/api/v1/repos/test-repo/dependencies",
39+
headers=valid_headers,
40+
)
41+
42+
assert resp.status_code == 500
43+
body = resp.json()["detail"]
44+
assert "/srv/repos" not in body
45+
assert ".git/config" not in body
46+
47+
def test_repo_add_error_hides_details(self, client, valid_headers):
48+
"""Add repo failure should not expose git credentials or paths."""
49+
with patch("routes.repos.repo_manager") as mock_rm, \
50+
patch("routes.repos.user_limits") as mock_limits:
51+
limit_check = MagicMock()
52+
limit_check.allowed = True
53+
mock_limits.check_repo_count.return_value = limit_check
54+
mock_rm.add_repo.side_effect = Exception(
55+
"Authentication failed for https://user:ghp_secret@github.com/org/repo.git"
56+
)
57+
resp = client.post(
58+
"/api/v1/repos",
59+
json={"name": "test", "git_url": "https://github.com/org/repo", "branch": "main"},
60+
headers=valid_headers,
61+
)
62+
63+
assert resp.status_code == 400
64+
body = resp.json()["detail"]
65+
assert "ghp_secret" not in body
66+
assert "Authentication failed" not in body

0 commit comments

Comments
 (0)