Skip to content

Commit d6563d1

Browse files
authored
Merge pull request #263 from OpenCodeIntel/refactor/mcp-server-restructure
refactor: restructure MCP server, upgrade to v2 search, add 37 tests (OPE-94, OPE-91)
2 parents 1de8893 + d1e3931 commit d6563d1

16 files changed

Lines changed: 1032 additions & 378 deletions

mcp-server/.env.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# Backend API Configuration
2+
API_KEY=your-api-key-here
23
BACKEND_API_URL=http://localhost:8000
3-
API_KEY=dev-secret-key

mcp-server/.gitignore

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Python
2+
__pycache__/
3+
*.pyc
4+
*.pyo
5+
6+
# Virtual environment
7+
venv/
8+
9+
# Environment (secrets)
10+
.env
11+
.env.local

mcp-server/api_client.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
"""Persistent HTTP client for backend API communication.
2+
3+
Uses a module-level client to avoid creating new TCP connections per tool call.
4+
The client is initialized lazily on first use and reused for all subsequent calls.
5+
Concurrent access is serialized via asyncio.Lock to prevent duplicate clients.
6+
"""
7+
import asyncio
8+
from typing import Any, Optional
9+
10+
import httpx
11+
12+
from config import BACKEND_API_URL, API_KEY
13+
14+
15+
# Persistent client reused across all tool calls
16+
_client: Optional[httpx.AsyncClient] = None
17+
_client_lock: asyncio.Lock = asyncio.Lock()
18+
19+
20+
def _get_headers() -> dict[str, str]:
21+
"""Return Authorization header with the configured API_KEY.
22+
23+
Raises ValueError if API_KEY is empty or unset.
24+
"""
25+
if not API_KEY:
26+
raise ValueError(
27+
"No API_KEY configured. Set API_KEY in .env or environment."
28+
)
29+
return {"Authorization": f"Bearer {API_KEY}"}
30+
31+
32+
async def get_client() -> httpx.AsyncClient:
33+
"""Get or create the persistent HTTP client."""
34+
global _client
35+
async with _client_lock:
36+
if _client is None or _client.is_closed:
37+
_client = httpx.AsyncClient(
38+
base_url=BACKEND_API_URL,
39+
timeout=120.0,
40+
headers=_get_headers(),
41+
)
42+
return _client
43+
44+
45+
async def api_get(path: str, **kwargs: Any) -> dict:
46+
"""Make a GET request to the backend API."""
47+
client = await get_client()
48+
response = await client.get(path, **kwargs)
49+
response.raise_for_status()
50+
return response.json()
51+
52+
53+
async def api_post(path: str, json: dict, **kwargs: Any) -> dict:
54+
"""Make a POST request to the backend API."""
55+
client = await get_client()
56+
response = await client.post(path, json=json, **kwargs)
57+
response.raise_for_status()
58+
return response.json()
59+
60+
61+
async def close_client() -> None:
62+
"""Close the persistent client. Call on server shutdown."""
63+
global _client
64+
async with _client_lock:
65+
local = _client
66+
_client = None
67+
if local and not local.is_closed:
68+
await local.aclose()

mcp-server/config.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
1-
"""
2-
API Configuration - Single Source of Truth for API Versioning
1+
"""MCP server configuration from environment variables."""
2+
import os
33

4-
Change API_VERSION here to update all API calls across the MCP server.
5-
Example: "v1" -> "v2" will change /api/v1/* to /api/v2/*
6-
"""
4+
from dotenv import load_dotenv
75

8-
# =============================================================================
9-
# API VERSION CONFIGURATION
10-
# =============================================================================
6+
load_dotenv()
117

128
API_VERSION = "v1"
9+
API_PREFIX = f"/api/{API_VERSION}"
1310

14-
# =============================================================================
15-
# DERIVED PREFIXES (auto-calculated from version)
16-
# =============================================================================
11+
BACKEND_BASE_URL = os.getenv("BACKEND_API_URL", "http://localhost:8000")
12+
BACKEND_API_URL = f"{BACKEND_BASE_URL}{API_PREFIX}"
13+
API_KEY = os.getenv("API_KEY", "")
1714

18-
# Current versioned API prefix: /api/v1
19-
API_PREFIX = f"/api/{API_VERSION}"
15+
SERVER_NAME = "codeintel-mcp"
16+
SERVER_VERSION = "0.4.0"

mcp-server/formatters.py

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
"""Response formatters that convert API responses to markdown.
2+
3+
Each formatter is a pure function: takes API response dict, returns markdown string.
4+
This makes them independently testable without any HTTP calls.
5+
"""
6+
7+
8+
def format_search_results(result: dict) -> str:
9+
"""Format semantic search results as markdown.
10+
11+
Supports both v1 (count/results) and v2 (total/results) response shapes
12+
so the formatter stays resilient across API versions.
13+
"""
14+
total = result.get("total") or result.get("count", 0)
15+
cached = " (cached)" if result.get("cached") else ""
16+
version = result.get("search_version", "v1")
17+
output = f"# Code Search Results ({version})\n\nFound {total} results{cached}\n\n"
18+
19+
if not result.get("results"):
20+
return output + "No results found.\n"
21+
22+
for idx, res in enumerate(result["results"], 1):
23+
score_raw = res.get("score")
24+
try:
25+
score = float(score_raw) * 100
26+
except (TypeError, ValueError):
27+
score = 0
28+
name = res.get("name", "unknown")
29+
file_path = res.get("file_path", "unknown")
30+
lang = res.get("language", "unknown")
31+
line_start = res.get("line_start", 0)
32+
line_end = res.get("line_end", 0)
33+
code = res.get("code", "")
34+
35+
output += f"## {idx}. {name} ({score:.0f}% match)\n"
36+
output += f"**File:** `{file_path}`\n"
37+
38+
# v2 adds qualified_name and signature
39+
qualified = res.get("qualified_name")
40+
if qualified and qualified != name:
41+
output += f"**Qualified:** `{qualified}`\n"
42+
signature = res.get("signature")
43+
if signature:
44+
output += f"**Signature:** `{signature}`\n"
45+
46+
output += f"**Language:** {lang} | **Lines:** {line_start}-{line_end}\n"
47+
48+
reason = res.get("match_reason")
49+
if reason:
50+
output += f"**Why:** {reason}\n"
51+
52+
output += f"\n```{lang}\n{code}\n```\n\n"
53+
54+
return output
55+
56+
57+
def format_repositories(result: dict) -> str:
58+
"""Format repository listing as markdown."""
59+
output = "# Indexed Repositories\n\n"
60+
61+
if not result.get("repositories"):
62+
return output + "No repositories indexed yet.\n"
63+
64+
for repo in result["repositories"]:
65+
output += f"### {repo.get('name', 'unknown')}\n"
66+
output += f"- **ID:** `{repo.get('id')}`\n"
67+
output += f"- **Status:** {repo.get('status', 'unknown')}\n"
68+
output += f"- **Functions:** {repo.get('file_count', 0):,}\n"
69+
output += f"- **Branch:** {repo.get('branch', 'main')}\n\n"
70+
71+
return output
72+
73+
74+
def format_dependency_graph(result: dict) -> str:
75+
"""Format dependency graph analysis as markdown."""
76+
nodes = result.get("nodes", [])
77+
edges = result.get("edges", [])
78+
metrics = result.get("metrics", {})
79+
80+
output = "# Dependency Graph Analysis\n\n"
81+
output += f"**Total Files:** {len(nodes)}\n"
82+
output += f"**Total Dependencies:** {metrics.get('total_edges', len(edges))}\n"
83+
output += f"**Avg Dependencies per File:** {metrics.get('avg_dependencies', 0):.1f}\n\n"
84+
85+
# Most-imported files (highest number of dependents)
86+
dependent_count: dict[str, int] = {}
87+
for edge in edges:
88+
target = edge.get("target", "")
89+
dependent_count[target] = dependent_count.get(target, 0) + 1
90+
91+
if dependent_count:
92+
sorted_deps = sorted(
93+
dependent_count.items(), key=lambda x: x[1], reverse=True
94+
)[:5]
95+
output += "## Most Critical Files (High Impact)\n\n"
96+
for file, count in sorted_deps:
97+
output += f"- `{file}` - **{count} dependents**\n"
98+
output += "\n"
99+
100+
high_import = [n for n in nodes if n.get("imports", 0) >= 3]
101+
if high_import:
102+
output += "## Files with Most Imports\n\n"
103+
for f in sorted(high_import, key=lambda x: x.get("imports", 0), reverse=True)[:5]:
104+
output += f"- `{f.get('id', '<unknown>')}` - imports {f.get('imports', 0)} files\n"
105+
106+
return output
107+
108+
109+
def format_code_style(result: dict) -> str:
110+
"""Format code style analysis as markdown."""
111+
summary = result.get("summary", {})
112+
output = "# Code Style Analysis\n\n"
113+
output += f"**Files Analyzed:** {summary.get('total_files_analyzed', 0)}\n"
114+
output += f"**Functions:** {summary.get('total_functions', 0)}\n"
115+
output += f"**Async Adoption:** {summary.get('async_adoption', '0%')}\n"
116+
output += f"**Type Hints:** {summary.get('type_hints_usage', '0%')}\n\n"
117+
118+
naming = result.get("naming_conventions", {}).get("functions")
119+
if naming:
120+
output += "## Function Naming Conventions\n\n"
121+
for conv, info in naming.items():
122+
output += f"- **{conv}:** {info.get('percentage', '?')} ({info.get('count', 0)} functions)\n"
123+
output += "\n"
124+
125+
top_imports = result.get("top_imports")
126+
if top_imports:
127+
output += "## Most Common Imports\n\n"
128+
for item in top_imports[:10]:
129+
output += f"- `{item.get('module', '<unknown>')}` (used {item.get('count', 0)}x)\n"
130+
131+
return output
132+
133+
134+
def format_impact_analysis(result: dict) -> str:
135+
"""Format file impact analysis as markdown."""
136+
output = f"# Impact Analysis: {result.get('file', 'unknown')}\n\n"
137+
output += f"**Risk Level:** {result.get('risk_level', 'unknown').upper()}\n"
138+
output += f"**Impact Summary:** {result.get('impact_summary', '')}\n\n"
139+
140+
deps = result.get("direct_dependencies", [])
141+
output += f"## Dependencies ({len(deps)})\n"
142+
output += "Files this file imports:\n"
143+
for dep in deps[:10]:
144+
output += f"- `{dep}`\n"
145+
output += "\n"
146+
147+
dependents = result.get("all_dependents", [])
148+
output += f"## Dependents ({len(dependents)})\n"
149+
output += "Files that would be affected by changes:\n"
150+
for dep in dependents[:15]:
151+
output += f"- `{dep}`\n"
152+
153+
test_files = result.get("test_files")
154+
if test_files:
155+
output += "\n## Related Tests\n"
156+
for test in test_files:
157+
output += f"- `{test}`\n"
158+
159+
return output
160+
161+
162+
def format_repository_insights(result: dict) -> str:
163+
"""Format repository insights as markdown."""
164+
output = f"# Repository Insights: {result.get('name', 'unknown')}\n\n"
165+
output += f"**Status:** {result.get('status', 'unknown')}\n"
166+
output += f"**Functions Indexed:** {result.get('functions_indexed', 0):,}\n"
167+
output += f"**Total Files:** {result.get('total_files', 0)}\n"
168+
output += f"**Total Dependencies:** {result.get('total_dependencies', 0)}\n\n"
169+
170+
metrics = result.get("graph_metrics", {})
171+
critical = metrics.get("most_critical_files")
172+
if critical:
173+
output += "## Most Critical Files\n"
174+
for item in critical[:5]:
175+
output += f"- `{item.get('file', '<unknown>')}` ({item.get('dependents', 0)} dependents)\n"
176+
177+
return output
178+
179+
180+
def format_codebase_dna(result: dict) -> str:
181+
"""Format codebase DNA extraction as markdown."""
182+
dna_markdown = result.get("dna", "")
183+
cached = " (cached)" if result.get("cached") else ""
184+
185+
output = f"# Codebase DNA{cached}\n\n"
186+
output += "**Use this information to write code that matches existing patterns.**\n\n"
187+
output += dna_markdown
188+
output += "\n---\n"
189+
output += "**Instructions:** When generating code for this codebase:\n"
190+
output += "1. Follow the auth patterns shown above\n"
191+
output += "2. Use the service layer structure (singletons in dependencies.py)\n"
192+
output += "3. Match the database conventions (ID types, timestamps, RLS)\n"
193+
output += "4. Use the logging patterns shown\n"
194+
output += "5. Follow the naming conventions\n"
195+
196+
return output

0 commit comments

Comments
 (0)