Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 46 additions & 37 deletions backend/routes/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from services.input_validator import InputValidator
from middleware.auth import require_auth, AuthContext
from services.observability import logger, metrics
from services.observability import logger, metrics, capture_exception

router = APIRouter(prefix="/repos", tags=["Analysis"])

Expand All @@ -26,21 +26,23 @@ async def get_dependency_graph(
"""Get dependency graph for repository."""
try:
repo = get_repo_or_404(repo_id, auth.user_id)

# Try cache first

cached_graph = dependency_analyzer.load_from_cache(repo_id)
if cached_graph:
logger.debug("Using cached dependency graph", repo_id=repo_id)
return {**cached_graph, "cached": True}

# Build fresh

logger.info("Building fresh dependency graph", repo_id=repo_id)
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
dependency_analyzer.save_to_cache(repo_id, graph_data)

return {**graph_data, "cached": False}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
logger.error("Dependency graph failed", repo_id=repo_id, error=str(e))
capture_exception(e, operation="dependency_graph", repo_id=repo_id)
raise HTTPException(status_code=500, detail="Failed to build dependency graph")


@router.post("/{repo_id}/impact")
Expand All @@ -52,30 +54,32 @@ async def analyze_impact(
"""Analyze impact of changing a file."""
try:
repo = get_repo_or_404(repo_id, auth.user_id)

# Validate file path

valid_path, path_error = InputValidator.validate_file_path(
request.file_path, repo["local_path"]
)
if not valid_path:
raise HTTPException(status_code=400, detail=f"Invalid file path: {path_error}")

# Get or build graph

graph_data = dependency_analyzer.load_from_cache(repo_id)
if not graph_data:
logger.info("Building dependency graph for impact analysis", repo_id=repo_id)
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
dependency_analyzer.save_to_cache(repo_id, graph_data)

impact = dependency_analyzer.get_file_impact(
repo["local_path"],
request.file_path,
graph_data
)

return impact
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
logger.error("Impact analysis failed", repo_id=repo_id, file_path=request.file_path, error=str(e))
capture_exception(e, operation="impact_analysis", repo_id=repo_id, file_path=request.file_path)
raise HTTPException(status_code=500, detail="Failed to analyze impact")


@router.get("/{repo_id}/insights")
Expand All @@ -86,14 +90,13 @@ async def get_repository_insights(
"""Get comprehensive insights about repository."""
try:
repo = get_repo_or_404(repo_id, auth.user_id)

# Get or build graph

graph_data = dependency_analyzer.load_from_cache(repo_id)
if not graph_data:
logger.info("Building dependency graph for insights", repo_id=repo_id)
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
dependency_analyzer.save_to_cache(repo_id, graph_data)

return {
"repo_id": repo_id,
"name": repo["name"],
Expand All @@ -106,8 +109,12 @@ async def get_repository_insights(
"functions_indexed": repo["file_count"],
"cached": bool(graph_data)
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
logger.error("Repository insights failed", repo_id=repo_id, error=str(e))
capture_exception(e, operation="insights", repo_id=repo_id)
raise HTTPException(status_code=500, detail="Failed to get repository insights")


@router.get("/{repo_id}/style-analysis")
Expand All @@ -118,22 +125,23 @@ async def get_style_analysis(
"""Analyze code style and team patterns."""
try:
repo = get_repo_or_404(repo_id, auth.user_id)

# Try cache first

cached_style = style_analyzer.load_from_cache(repo_id)
if cached_style:
logger.debug("Using cached code style", repo_id=repo_id)
return {**cached_style, "cached": True}

# Analyze fresh

logger.info("Analyzing code style", repo_id=repo_id)
style_data = style_analyzer.analyze_repository_style(repo["local_path"])
style_analyzer.save_to_cache(repo_id, style_data)

return {**style_data, "cached": False}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

logger.error("Style analysis failed", repo_id=repo_id, error=str(e))
capture_exception(e, operation="style_analysis", repo_id=repo_id)
raise HTTPException(status_code=500, detail="Failed to analyze code style")


@router.get("/{repo_id}/dna")
Expand All @@ -144,39 +152,40 @@ async def get_codebase_dna(
):
"""
Extract codebase DNA - architectural patterns, conventions, and constraints.

This endpoint analyzes your codebase and returns a DNA profile that helps
AI assistants understand how to write code consistent with your patterns.

Args:
repo_id: Repository identifier
format: Output format - 'json' or 'markdown' (default: json)

Returns:
DNA profile with auth patterns, service patterns, database patterns, etc.
"""
try:
repo = get_repo_or_404(repo_id, auth.user_id)

# Try cache first

cached_dna = dna_extractor.load_from_cache(repo_id)
if cached_dna:
logger.debug("Using cached DNA", repo_id=repo_id)
if format == "markdown":
return {"dna": cached_dna.to_markdown(), "cached": True}
return {**cached_dna.to_dict(), "cached": True}

# Extract fresh DNA

logger.info("Extracting codebase DNA", repo_id=repo_id)
metrics.increment("dna_extractions")

dna = dna_extractor.extract_dna(repo["local_path"], repo_id)
dna_extractor.save_to_cache(repo_id, dna)

if format == "markdown":
return {"dna": dna.to_markdown(), "cached": False}
return {**dna.to_dict(), "cached": False}


except HTTPException:
raise
except Exception as e:
logger.error("Error extracting DNA", repo_id=repo_id, error=str(e))
raise HTTPException(status_code=500, detail=str(e))
logger.error("DNA extraction failed", repo_id=repo_id, error=str(e))
capture_exception(e, operation="dna_extraction", repo_id=repo_id)
raise HTTPException(status_code=500, detail="Failed to extract codebase DNA")
7 changes: 4 additions & 3 deletions backend/routes/playground.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from dependencies import indexer, cache, repo_manager, redis_client
from services.input_validator import InputValidator
from services.repo_validator import RepoValidator
from services.observability import logger
from services.observability import logger, capture_exception
from services.playground_limiter import PlaygroundLimiter, get_playground_limiter, IndexedRepoData
from services.anonymous_indexer import (
AnonymousIndexingJob,
Expand Down Expand Up @@ -486,8 +486,9 @@ async def playground_search(
except HTTPException:
raise
except Exception as e:
capture_exception(e, operation="playground_search")
logger.error("Playground search failed", error=str(e))
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail="Search failed")


@router.get("/repos")
Expand Down Expand Up @@ -579,7 +580,7 @@ async def _fetch_repo_metadata(owner: str, repo: str) -> dict:
return {"error": "timeout", "message": "GitHub API request timed out"}
except Exception as e:
logger.error("GitHub API request failed", error=str(e))
return {"error": "request_failed", "message": str(e)}
return {"error": "request_failed", "message": "Failed to fetch repository metadata"}


async def _count_code_files(
Expand Down
12 changes: 6 additions & 6 deletions backend/routes/repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ async def add_repository(
except Exception as e:
logger.error("Failed to add repository", error=str(e), user_id=user_id)
capture_exception(e)
raise HTTPException(status_code=400, detail=str(e))
raise HTTPException(status_code=500, detail="Failed to add repository")


@router.delete("/{repo_id}")
Expand Down Expand Up @@ -174,7 +174,7 @@ async def delete_repository(
except Exception as e:
logger.error("Failed to delete repository", repo_id=repo_id, error=str(e))
capture_exception(e)
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail="Failed to delete repository")


@router.post("/{repo_id}/index")
Expand Down Expand Up @@ -268,7 +268,7 @@ async def index_repository(
logger.error("Indexing failed", repo_id=repo_id, error=str(e))
capture_exception(e)
repo_manager.update_status(repo_id, "error")
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail="Indexing failed")


async def _run_async_indexing(
Expand Down Expand Up @@ -395,7 +395,7 @@ async def progress_callback(
publisher.publish_error(
repo_id,
error="indexing_failed",
message=str(e),
message="An error occurred during indexing",
recoverable=True
)

Expand Down Expand Up @@ -478,7 +478,7 @@ async def index_repository_async(
except Exception as e:
logger.error("Failed to start async indexing", repo_id=repo_id, error=str(e))
capture_exception(e)
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail="Failed to start indexing")


async def _authenticate_websocket(websocket: WebSocket) -> Optional[dict]:
Expand Down Expand Up @@ -573,7 +573,7 @@ async def progress_callback(files_processed: int, functions_indexed: int, total_
logger.error("WebSocket indexing error", repo_id=repo_id, error=str(e))
capture_exception(e, operation="websocket_indexing", repo_id=repo_id)
try:
await websocket.send_json({"type": "error", "message": str(e)})
await websocket.send_json({"type": "error", "message": "An error occurred during indexing"})
except Exception:
pass
repo_manager.update_status(repo_id, "error")
Expand Down
4 changes: 2 additions & 2 deletions backend/routes/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ async def search_code(
error=str(e)
)
capture_exception(e, operation="search", repo_id=request.repo_id, user_id=auth.user_id)
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail="Search failed")


@router.post("/explain")
Expand Down Expand Up @@ -177,4 +177,4 @@ async def explain_code(
user_id=auth.user_id,
file_path=request.file_path
)
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail="Failed to generate explanation")
2 changes: 1 addition & 1 deletion backend/routes/search_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,4 @@ async def search_v2(
repo_id=request.repo_id,
user_id=auth.user_id
)
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail="Search failed")
68 changes: 68 additions & 0 deletions backend/tests/test_error_leaking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Tests that 500 errors don't leak internal details (OPE-79)."""
import pytest
from unittest.mock import patch, MagicMock


class TestErrorResponsesHideInternals:
"""Verify 500 responses return generic messages, not str(e)."""

def test_search_error_hides_details(self, client, valid_headers):
"""Search failure should not expose Pinecone/OpenAI error strings."""
with patch("routes.search.indexer") as mock_indexer, \
patch("routes.search.cache") as mock_cache:
mock_cache.get_search_results.return_value = None
mock_indexer.semantic_search.side_effect = RuntimeError(
"Pinecone connection refused at pinecone-prod.svc.us-east1.aws:443"
)
with patch("routes.search.verify_repo_access"):
resp = client.post(
"/api/v1/search",
json={"query": "auth middleware", "repo_id": "test-repo"},
headers=valid_headers,
)

assert resp.status_code == 500
body = resp.json()["detail"]
assert body == "Search failed"
assert "Pinecone" not in body
assert "pinecone-prod" not in body

def test_dependency_graph_error_hides_details(self, client, valid_headers):
"""Dependency graph failure should not expose file paths."""
with patch("routes.analysis.get_repo_or_404") as mock_repo:
mock_repo.return_value = {"local_path": "/srv/repos/abc", "name": "test"}
with patch("routes.analysis.dependency_analyzer") as mock_dep:
mock_dep.load_from_cache.return_value = None
mock_dep.build_dependency_graph.side_effect = FileNotFoundError(
"/srv/repos/abc/.git/config not found"
)
resp = client.get(
"/api/v1/repos/test-repo/dependencies",
headers=valid_headers,
)

assert resp.status_code == 500
body = resp.json()["detail"]
assert body == "Failed to build dependency graph"
assert "/srv/repos" not in body

def test_repo_add_error_hides_details(self, client, valid_headers):
"""Add repo failure should not expose git credentials or paths."""
with patch("routes.repos.repo_manager") as mock_rm, \
patch("routes.repos.user_limits") as mock_limits:
limit_check = MagicMock()
limit_check.allowed = True
mock_limits.check_repo_count.return_value = limit_check
mock_rm.add_repo.side_effect = Exception(
"Authentication failed for https://user:ghp_secret@github.com/org/repo.git"
)
resp = client.post(
"/api/v1/repos",
json={"name": "test", "git_url": "https://github.com/org/repo", "branch": "main"},
headers=valid_headers,
)

assert resp.status_code == 500
body = resp.json()["detail"]
assert body == "Failed to add repository"
assert "ghp_secret" not in body