refactor: restructure MCP server from 1 file to 6 focused modules (OPE-94, OPE-91)

DevanshuNEU · DevanshuNEU · commit 194b9bbafb69 · 2026-02-24T23:11:04.000-05:00
Split 389-line server.py into focused modules:
- config.py (16 lines) -- env config, no decorative headers
- api_client.py (59 lines) -- persistent httpx client, reused across calls
- tools.py (146 lines) -- tool schema definitions
- formatters.py (175 lines) -- response -&gt; markdown, independently testable
- handlers.py (104 lines) -- dispatch + API calls + safe error messages
- server.py (54 lines) -- bootstrap only

Security fixes:
- Remove hardcoded 'dev-secret-key' default (empty string, fails loud)
- Remove 'dev-secret-key' from .env.example
- Add local .gitignore for venv/, __pycache__/, .env
- Safe error messages: no httpx internals leaked, includes tool name + repo_id

CLAUDE.md compliance:
- Remove emoji from cached result indicators (was lightning bolt)
- All files under 200 lines

Note: search endpoint is already correct (/api/v1/search uses query expansion
+ reranking). The 'uses v1 search' finding in OPE-91 is stale -- there is
only one search endpoint and it already uses the latest engine.
diff --git a/mcp-server/.env.example b/mcp-server/.env.example
@@ -1,3 +1,3 @@
 # Backend API Configuration
 BACKEND_API_URL=http://localhost:8000
-API_KEY=dev-secret-key
+API_KEY=your-api-key-here
diff --git a/mcp-server/.gitignore b/mcp-server/.gitignore
@@ -0,0 +1,11 @@
+# Python
+__pycache__/
+*.pyc
+*.pyo
+
+# Virtual environment
+venv/
+
+# Environment (secrets)
+.env
+.env.local
diff --git a/mcp-server/api_client.py b/mcp-server/api_client.py
@@ -0,0 +1,59 @@
+"""Persistent HTTP client for backend API communication.
+
+Uses a module-level client to avoid creating new TCP connections per tool call.
+The client is initialized lazily on first use and reused for all subsequent calls.
+"""
+from typing import Any, Optional
+
+import httpx
+
+from config import BACKEND_API_URL, API_KEY
+
+
+# Persistent client reused across all tool calls
+_client: Optional[httpx.AsyncClient] = None
+
+
+def _get_headers() -> dict[str, str]:
+    """Build auth headers. Warns if no API key is configured."""
+    if not API_KEY:
+        raise ValueError(
+            "No API_KEY configured. Set API_KEY in .env or environment."
+        )
+    return {"Authorization": f"Bearer {API_KEY}"}
+
+
+async def get_client() -> httpx.AsyncClient:
+    """Get or create the persistent HTTP client."""
+    global _client
+    if _client is None or _client.is_closed:
+        _client = httpx.AsyncClient(
+            base_url=BACKEND_API_URL,
+            timeout=120.0,
+            headers=_get_headers(),
+        )
+    return _client
+
+
+async def api_get(path: str, **kwargs: Any) -> dict:
+    """Make a GET request to the backend API."""
+    client = await get_client()
+    response = await client.get(path, **kwargs)
+    response.raise_for_status()
+    return response.json()
+
+
+async def api_post(path: str, json: dict, **kwargs: Any) -> dict:
+    """Make a POST request to the backend API."""
+    client = await get_client()
+    response = await client.post(path, json=json, **kwargs)
+    response.raise_for_status()
+    return response.json()
+
+
+async def close_client() -> None:
+    """Close the persistent client. Call on server shutdown."""
+    global _client
+    if _client and not _client.is_closed:
+        await _client.aclose()
+        _client = None
diff --git a/mcp-server/config.py b/mcp-server/config.py
@@ -1,19 +1,16 @@
-"""
-API Configuration - Single Source of Truth for API Versioning
+"""MCP server configuration from environment variables."""
+import os
 
-Change API_VERSION here to update all API calls across the MCP server.
-Example: "v1" -> "v2" will change /api/v1/* to /api/v2/*
-"""
+from dotenv import load_dotenv
 
-# =============================================================================
-# API VERSION CONFIGURATION
-# =============================================================================
+load_dotenv()
 
 API_VERSION = "v1"
+API_PREFIX = f"/api/{API_VERSION}"
 
-# =============================================================================
-# DERIVED PREFIXES (auto-calculated from version)
-# =============================================================================
+BACKEND_BASE_URL = os.getenv("BACKEND_API_URL", "http://localhost:8000")
+BACKEND_API_URL = f"{BACKEND_BASE_URL}{API_PREFIX}"
+API_KEY = os.getenv("API_KEY", "")
 
-# Current versioned API prefix: /api/v1
-API_PREFIX = f"/api/{API_VERSION}"
+SERVER_NAME = "codeintel-mcp"
+SERVER_VERSION = "0.3.0"
diff --git a/mcp-server/formatters.py b/mcp-server/formatters.py
@@ -0,0 +1,175 @@
+"""Response formatters that convert API responses to markdown.
+
+Each formatter is a pure function: takes API response dict, returns markdown string.
+This makes them independently testable without any HTTP calls.
+"""
+
+
+def format_search_results(result: dict) -> str:
+    """Format semantic search results as markdown."""
+    count = result.get("count", 0)
+    cached = " (cached)" if result.get("cached") else ""
+    output = f"# Code Search Results\n\nFound {count} results{cached}\n\n"
+
+    if not result.get("results"):
+        return output + "No results found.\n"
+
+    for idx, res in enumerate(result["results"], 1):
+        score = res.get("score", 0) * 100
+        name = res.get("name", "unknown")
+        file_path = res.get("file_path", "unknown")
+        file_type = res.get("type", "unknown")
+        lang = res.get("language", "unknown")
+        line_start = res.get("line_start", 0)
+        line_end = res.get("line_end", 0)
+        code = res.get("code", "")
+
+        output += f"## {idx}. {name} ({score:.0f}% match)\n"
+        output += f"**File:** `{file_path}`\n"
+        output += f"**Type:** {file_type} | **Language:** {lang}\n"
+        output += f"**Lines:** {line_start}-{line_end}\n\n"
+        output += f"```{lang}\n{code}\n```\n\n"
+
+    return output
+
+
+def format_repositories(result: dict) -> str:
+    """Format repository listing as markdown."""
+    output = "# Indexed Repositories\n\n"
+
+    if not result.get("repositories"):
+        return output + "No repositories indexed yet.\n"
+
+    for repo in result["repositories"]:
+        output += f"### {repo.get('name', 'unknown')}\n"
+        output += f"- **ID:** `{repo.get('id')}`\n"
+        output += f"- **Status:** {repo.get('status', 'unknown')}\n"
+        output += f"- **Functions:** {repo.get('file_count', 0):,}\n"
+        output += f"- **Branch:** {repo.get('branch', 'main')}\n\n"
+
+    return output
+
+
+def format_dependency_graph(result: dict) -> str:
+    """Format dependency graph analysis as markdown."""
+    nodes = result.get("nodes", [])
+    edges = result.get("edges", [])
+    metrics = result.get("metrics", {})
+
+    output = "# Dependency Graph Analysis\n\n"
+    output += f"**Total Files:** {len(nodes)}\n"
+    output += f"**Total Dependencies:** {metrics.get('total_edges', len(edges))}\n"
+    output += f"**Avg Dependencies per File:** {metrics.get('avg_dependencies', 0):.1f}\n\n"
+
+    # Most-imported files (highest number of dependents)
+    dependent_count: dict[str, int] = {}
+    for edge in edges:
+        target = edge.get("target", "")
+        dependent_count[target] = dependent_count.get(target, 0) + 1
+
+    if dependent_count:
+        sorted_deps = sorted(
+            dependent_count.items(), key=lambda x: x[1], reverse=True
+        )[:5]
+        output += "## Most Critical Files (High Impact)\n\n"
+        for file, count in sorted_deps:
+            output += f"- `{file}` - **{count} dependents**\n"
+        output += "\n"
+
+    high_import = [n for n in nodes if n.get("imports", 0) >= 3]
+    if high_import:
+        output += "## Files with Most Imports\n\n"
+        for f in sorted(high_import, key=lambda x: x.get("imports", 0), reverse=True)[:5]:
+            output += f"- `{f['id']}` - imports {f['imports']} files\n"
+
+    return output
+
+
+def format_code_style(result: dict) -> str:
+    """Format code style analysis as markdown."""
+    summary = result.get("summary", {})
+    output = "# Code Style Analysis\n\n"
+    output += f"**Files Analyzed:** {summary.get('total_files_analyzed', 0)}\n"
+    output += f"**Functions:** {summary.get('total_functions', 0)}\n"
+    output += f"**Async Adoption:** {summary.get('async_adoption', '0%')}\n"
+    output += f"**Type Hints:** {summary.get('type_hints_usage', '0%')}\n\n"
+
+    naming = result.get("naming_conventions", {}).get("functions")
+    if naming:
+        output += "## Function Naming Conventions\n\n"
+        for conv, info in naming.items():
+            output += f"- **{conv}:** {info['percentage']} ({info['count']} functions)\n"
+        output += "\n"
+
+    top_imports = result.get("top_imports")
+    if top_imports:
+        output += "## Most Common Imports\n\n"
+        for item in top_imports[:10]:
+            output += f"- `{item['module']}` (used {item['count']}x)\n"
+
+    return output
+
+
+def format_impact_analysis(result: dict) -> str:
+    """Format file impact analysis as markdown."""
+    output = f"# Impact Analysis: {result.get('file', 'unknown')}\n\n"
+    output += f"**Risk Level:** {result.get('risk_level', 'unknown').upper()}\n"
+    output += f"**Impact Summary:** {result.get('impact_summary', '')}\n\n"
+
+    deps = result.get("direct_dependencies", [])
+    output += f"## Dependencies ({len(deps)})\n"
+    output += "Files this file imports:\n"
+    for dep in deps[:10]:
+        output += f"- `{dep}`\n"
+    output += "\n"
+
+    dependents = result.get("all_dependents", [])
+    output += f"## Dependents ({len(dependents)})\n"
+    output += "Files that would be affected by changes:\n"
+    for dep in dependents[:15]:
+        output += f"- `{dep}`\n"
+
+    test_files = result.get("test_files")
+    if test_files:
+        output += "\n## Related Tests\n"
+        for test in test_files:
+            output += f"- `{test}`\n"
+
+    return output
+
+
+def format_repository_insights(result: dict) -> str:
+    """Format repository insights as markdown."""
+    output = f"# Repository Insights: {result.get('name', 'unknown')}\n\n"
+    output += f"**Status:** {result.get('status', 'unknown')}\n"
+    output += f"**Functions Indexed:** {result.get('functions_indexed', 0):,}\n"
+    output += f"**Total Files:** {result.get('total_files', 0)}\n"
+    output += f"**Total Dependencies:** {result.get('total_dependencies', 0)}\n\n"
+
+    metrics = result.get("graph_metrics", {})
+    critical = metrics.get("most_critical_files")
+    if critical:
+        output += "## Most Critical Files\n"
+        for item in critical[:5]:
+            output += f"- `{item['file']}` ({item['dependents']} dependents)\n"
+
+    return output
+
+
+def format_codebase_dna(result: dict) -> str:
+    """Format codebase DNA extraction as markdown."""
+    dna_markdown = result.get("dna", "")
+    cached = " (cached)" if result.get("cached") else ""
+
+    output = f"# Codebase DNA{cached}\n\n"
+    output += "**Use this information to write code that matches existing patterns.**\n\n"
+    output += dna_markdown
+    output += "\n---\n"
+    output += "**Instructions:** When generating code for this codebase:\n"
+    output += "1. Follow the auth patterns shown above\n"
+    output += "2. Use the service layer structure (singletons in dependencies.py)\n"
+    output += "3. Match the database conventions (ID types, timestamps, RLS)\n"
+    output += "4. Use the logging patterns shown\n"
+    output += "5. Follow the naming conventions\n"
+
+    return output
diff --git a/mcp-server/handlers.py b/mcp-server/handlers.py
@@ -0,0 +1,104 @@
+"""Tool handler dispatch.
+
+Maps tool names to their API calls and response formatters.
+Each handler follows the same pattern: call API, format response.
+Error handling is centralized in call_tool() so individual handlers stay clean.
+"""
+from typing import Any
+
+import httpx
+import mcp.types as types
+
+from api_client import api_get, api_post
+from formatters import (
+    format_codebase_dna,
+    format_code_style,
+    format_dependency_graph,
+    format_impact_analysis,
+    format_repositories,
+    format_repository_insights,
+    format_search_results,
+)
+
+
+async def _handle_search(args: dict[str, Any]) -> str:
+    result = await api_post("/search", json=args)
+    return format_search_results(result)
+
+
+async def _handle_list_repositories(args: dict[str, Any]) -> str:
+    result = await api_get("/repos")
+    return format_repositories(result)
+
+
+async def _handle_dependency_graph(args: dict[str, Any]) -> str:
+    result = await api_get(f"/repos/{args['repo_id']}/dependencies")
+    return format_dependency_graph(result)
+
+
+async def _handle_code_style(args: dict[str, Any]) -> str:
+    result = await api_get(f"/repos/{args['repo_id']}/style-analysis")
+    return format_code_style(result)
+
+
+async def _handle_impact(args: dict[str, Any]) -> str:
+    result = await api_post(
+        f"/repos/{args['repo_id']}/impact",
+        json={"repo_id": args["repo_id"], "file_path": args["file_path"]},
+    )
+    return format_impact_analysis(result)
+
+
+async def _handle_insights(args: dict[str, Any]) -> str:
+    result = await api_get(f"/repos/{args['repo_id']}/insights")
+    return format_repository_insights(result)
+
+
+async def _handle_dna(args: dict[str, Any]) -> str:
+    result = await api_get(f"/repos/{args['repo_id']}/dna?format=markdown")
+    return format_codebase_dna(result)
+
+
+# Tool name -> handler mapping
+_HANDLERS: dict[str, Any] = {
+    "search_code": _handle_search,
+    "list_repositories": _handle_list_repositories,
+    "get_dependency_graph": _handle_dependency_graph,
+    "analyze_code_style": _handle_code_style,
+    "analyze_impact": _handle_impact,
+    "get_repository_insights": _handle_insights,
+    "get_codebase_dna": _handle_dna,
+}
+
+
+def _safe_error_message(tool_name: str, args: dict[str, Any], error: Exception) -> str:
+    """Build error message with context but without leaking internal details."""
+    repo_id = args.get("repo_id", "unknown")
+    if isinstance(error, httpx.HTTPStatusError):
+        status = error.response.status_code
+        return f"Backend returned {status} for tool '{tool_name}' (repo: {repo_id})"
+    if isinstance(error, httpx.TimeoutException):
+        return f"Request timed out for tool '{tool_name}' (repo: {repo_id})"
+    if isinstance(error, httpx.ConnectError):
+        return f"Cannot connect to backend for tool '{tool_name}'. Is the server running?"
+    if isinstance(error, ValueError):
+        return str(error)
+    return f"Unexpected error in tool '{tool_name}' (repo: {repo_id})"
+
+
+async def call_tool(
+    name: str, arguments: dict[str, Any] | None
+) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
+    """Dispatch a tool call to the appropriate handler."""
+    args = arguments or {}
+
+    handler = _HANDLERS.get(name)
+    if handler is None:
+        return [types.TextContent(type="text", text=f"Unknown tool: {name}")]
+
+    try:
+        text = await handler(args)
+        return [types.TextContent(type="text", text=text)]
+    except Exception as e:
+        msg = _safe_error_message(name, args, e)
+        return [types.TextContent(type="text", text=msg)]
diff --git a/mcp-server/server.py b/mcp-server/server.py
diff --git a/mcp-server/tools.py b/mcp-server/tools.py