Skip to content
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,7 @@ backend/repos/
# MCP Server
mcp-server/__pycache__/
*.code-workspace

# Validation experiments
validation-experiment/
exp-2/
2 changes: 2 additions & 0 deletions backend/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from services.dependency_analyzer import DependencyAnalyzer
from services.style_analyzer import StyleAnalyzer
from services.performance_metrics import PerformanceMetrics
from services.dna_extractor import DNAExtractor
from services.rate_limiter import RateLimiter, APIKeyManager
from services.supabase_service import get_supabase_service
from services.input_validator import InputValidator, CostController
Expand All @@ -26,6 +27,7 @@
repo_manager = RepositoryManager()
dependency_analyzer = DependencyAnalyzer()
style_analyzer = StyleAnalyzer()
dna_extractor = DNAExtractor()
metrics = PerformanceMetrics()

# Rate limiting and API key management
Expand Down
49 changes: 48 additions & 1 deletion backend/routes/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pydantic import BaseModel

from dependencies import (
dependency_analyzer, style_analyzer,
dependency_analyzer, style_analyzer, dna_extractor,
get_repo_or_404
)
from services.input_validator import InputValidator
Expand Down Expand Up @@ -133,3 +133,50 @@ async def get_style_analysis(
return {**style_data, "cached": False}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))



@router.get("/{repo_id}/dna")
async def get_codebase_dna(
repo_id: str,
format: str = "json",
auth: AuthContext = Depends(require_auth)
):
"""
Extract codebase DNA - architectural patterns, conventions, and constraints.

This endpoint analyzes your codebase and returns a DNA profile that helps
AI assistants understand how to write code consistent with your patterns.

Args:
repo_id: Repository identifier
format: Output format - 'json' or 'markdown' (default: json)

Returns:
DNA profile with auth patterns, service patterns, database patterns, etc.
"""
try:
repo = get_repo_or_404(repo_id, auth.user_id)

# Try cache first
cached_dna = dna_extractor.load_from_cache(repo_id)
if cached_dna:
logger.debug("Using cached DNA", repo_id=repo_id)
if format == "markdown":
return {"dna": cached_dna.to_markdown(), "cached": True}
return {**cached_dna.to_dict(), "cached": True}

# Extract fresh DNA
logger.info("Extracting codebase DNA", repo_id=repo_id)
metrics.increment("dna_extractions")

dna = dna_extractor.extract_dna(repo["local_path"], repo_id)
dna_extractor.save_to_cache(repo_id, dna)

if format == "markdown":
return {"dna": dna.to_markdown(), "cached": False}
return {**dna.to_dict(), "cached": False}

except Exception as e:
logger.error("Error extracting DNA", repo_id=repo_id, error=str(e))
raise HTTPException(status_code=500, detail=str(e))
Loading