From fc281f7ebcd1ceac0f8950f2fc56d65d46d7c480 Mon Sep 17 00:00:00 2001 From: Devanshu Rajesh Chicholikar Date: Sat, 7 Mar 2026 15:49:26 -0500 Subject: [PATCH 1/3] feat: MCP write tools -- add_repository, index, directories, delete (OPE-165) 4 new MCP tools so we can manage repos without leaving the conversation: - add_repository: POST /repos (name, git_url, branch) - get_repo_directories: GET /repos/{repo_id}/directories - index_repository: POST /repos/{repo_id}/index (with optional include_paths) - delete_repository: DELETE /repos/{repo_id} Also adds api_delete to the HTTP client. All 45 tests pass, lint clean. --- mcp-server/api_client.py | 8 +++ mcp-server/handlers.py | 90 ++++++++++++++++++++++++++++++++- mcp-server/tests/test_tools.py | 4 ++ mcp-server/tools.py | 92 ++++++++++++++++++++++++++++++++++ 4 files changed, 193 insertions(+), 1 deletion(-) diff --git a/mcp-server/api_client.py b/mcp-server/api_client.py index a2fcfc3..31b77f6 100644 --- a/mcp-server/api_client.py +++ b/mcp-server/api_client.py @@ -58,6 +58,14 @@ async def api_post(path: str, json: dict, **kwargs: Any) -> dict: return response.json() +async def api_delete(path: str, **kwargs: Any) -> dict: + """Make a DELETE request to the backend API.""" + client = await get_client() + response = await client.delete(path, **kwargs) + response.raise_for_status() + return response.json() + + async def close_client() -> None: """Close the persistent client. Call on server shutdown.""" global _client diff --git a/mcp-server/handlers.py b/mcp-server/handlers.py index 5154bb8..998c9a8 100644 --- a/mcp-server/handlers.py +++ b/mcp-server/handlers.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -from api_client import api_get, api_post +from api_client import api_get, api_post, api_delete from formatters import ( format_codebase_dna, format_code_style, @@ -79,6 +79,90 @@ async def _handle_dna(args: dict[str, Any]) -> str: return format_codebase_dna(result) +# --- Write tool handlers --- + +async def _handle_add_repository(args: dict[str, Any]) -> str: + payload = { + "name": args["name"], + "git_url": args["git_url"], + "branch": args.get("branch", "main"), + } + result = await api_post("/repos", json=payload) + repo_id = result.get("id", "unknown") + name = result.get("name", args["name"]) + status = result.get("status", "added") + needs_selection = result.get("needs_directory_selection", False) + lines = [ + f"Repository '{name}' added successfully.", + f"ID: `{repo_id}`", + f"Status: {status}", + ] + if needs_selection: + lines.append( + "\nThis repo may benefit from subset indexing. " + "Use get_repo_directories to see available directories, " + "then index_repository with include_paths." + ) + else: + lines.append( + f"\nReady to index. Run: index_repository(repo_id='{repo_id}')" + ) + return "\n".join(lines) + + +async def _handle_get_repo_directories(args: dict[str, Any]) -> str: + result = await api_get(f"/repos/{args['repo_id']}/directories") + dirs = result.get("directories", []) + if not dirs: + return "No directories found (repo may be flat or not yet cloned)." + lines = ["# Repository Directories\n"] + for d in dirs: + name = d.get("name", d.get("path", "unknown")) + count = d.get("file_count", 0) + lines.append(f"- **{name}/** -- {count} code files") + lines.append( + "\nTo index specific directories, use index_repository " + "with include_paths=['dir1', 'dir2']." + ) + return "\n".join(lines) + + +async def _handle_index_repository(args: dict[str, Any]) -> str: + repo_id = args["repo_id"] + include_paths = args.get("include_paths") + # Build query params + params = {} + if include_paths: + params["include_paths"] = include_paths + result = await api_post( + f"/repos/{repo_id}/index", + json=params if params else {}, + ) + status = result.get("status", "unknown") + fn_count = result.get("function_count", result.get("functions_indexed", 0)) + file_count = result.get("file_count", result.get("files_indexed", 0)) + lines = [ + "Indexing complete.", + f"Status: {status}", + f"Files indexed: {file_count}", + f"Functions extracted: {fn_count}", + ] + if include_paths: + lines.append(f"Subset: {', '.join(include_paths)}") + lines.append( + f"\nYou can now use search_code(repo_id='{repo_id}') " + "to search this codebase." + ) + return "\n".join(lines) + + +async def _handle_delete_repository(args: dict[str, Any]) -> str: + repo_id = args["repo_id"] + result = await api_delete(f"/repos/{repo_id}") + msg = result.get("message", "Repository deleted.") + return f"{msg}\nRepo ID `{repo_id}` has been removed." + + # Tool name -> handler mapping _HANDLERS: dict[str, Any] = { "search_code": _handle_search, @@ -88,6 +172,10 @@ async def _handle_dna(args: dict[str, Any]) -> str: "analyze_impact": _handle_impact, "get_repository_insights": _handle_insights, "get_codebase_dna": _handle_dna, + "add_repository": _handle_add_repository, + "get_repo_directories": _handle_get_repo_directories, + "index_repository": _handle_index_repository, + "delete_repository": _handle_delete_repository, } diff --git a/mcp-server/tests/test_tools.py b/mcp-server/tests/test_tools.py index 3be62c1..7e4ed4f 100644 --- a/mcp-server/tests/test_tools.py +++ b/mcp-server/tests/test_tools.py @@ -15,6 +15,10 @@ "analyze_impact", "get_repository_insights", "get_codebase_dna", + "add_repository", + "get_repo_directories", + "index_repository", + "delete_repository", } diff --git a/mcp-server/tools.py b/mcp-server/tools.py index 20091f1..fb13e87 100644 --- a/mcp-server/tools.py +++ b/mcp-server/tools.py @@ -145,4 +145,96 @@ def get_tool_schemas() -> list[types.Tool]: "required": ["repo_id"], }, ), + # --- Write tools --- + types.Tool( + name="add_repository", + description=( + "Add a new repository for indexing. Clones the repo and analyzes " + "its structure. After adding, use get_repo_directories to see " + "available directories, then index_repository to start indexing." + ), + inputSchema={ + "type": "object", + "properties": { + "git_url": { + "type": "string", + "description": ( + "Git clone URL. " + "Example: https://github.com/owner/repo.git" + ), + }, + "name": { + "type": "string", + "description": "Short name for the repository", + }, + "branch": { + "type": "string", + "description": "Branch to clone (default: main)", + "default": "main", + }, + }, + "required": ["git_url", "name"], + }, + ), + types.Tool( + name="get_repo_directories", + description=( + "List top-level directories in a cloned repository with file " + "counts. Use this after add_repository to decide which " + "directories to index (useful for monorepos)." + ), + inputSchema={ + "type": "object", + "properties": { + "repo_id": { + "type": "string", + "description": "Repository identifier", + } + }, + "required": ["repo_id"], + }, + ), + types.Tool( + name="index_repository", + description=( + "Trigger indexing for a repository. Extracts functions, builds " + "embeddings, and enables semantic search. For monorepos, pass " + "include_paths to index only specific directories." + ), + inputSchema={ + "type": "object", + "properties": { + "repo_id": { + "type": "string", + "description": "Repository identifier", + }, + "include_paths": { + "type": "array", + "items": {"type": "string"}, + "description": ( + "Optional list of directories to index " + "(e.g. ['src', 'lib']). Omit to index everything." + ), + }, + }, + "required": ["repo_id"], + }, + ), + types.Tool( + name="delete_repository", + description=( + "Delete a repository and all its indexed data. This is " + "irreversible -- the repo must be re-added and re-indexed." + ), + inputSchema={ + "type": "object", + "properties": { + "repo_id": { + "type": "string", + "description": "Repository identifier", + } + }, + "required": ["repo_id"], + }, + ), ] From 2e48561f989325cc4e42b3afdd61cdc3f440c6a8 Mon Sep 17 00:00:00 2001 From: Devanshu Rajesh Chicholikar Date: Sat, 7 Mar 2026 15:56:52 -0500 Subject: [PATCH 2/3] fix: review findings -- correct response keys, async index endpoint, defensive delete - handlers.py: add_repository reads result.get('repo_id') not 'id' (backend returns 'repo_id') - handlers.py: index_repository uses /index/async when include_paths set (sync endpoint doesn't accept include_paths), reads 'functions' not 'function_count' - api_client.py: api_delete handles 204/empty body gracefully - test_tools.py: repo_tools list includes new write tools --- mcp-server/api_client.py | 2 ++ mcp-server/handlers.py | 32 +++++++++++++++++++------------- mcp-server/tests/test_tools.py | 1 + 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/mcp-server/api_client.py b/mcp-server/api_client.py index 31b77f6..95b74da 100644 --- a/mcp-server/api_client.py +++ b/mcp-server/api_client.py @@ -63,6 +63,8 @@ async def api_delete(path: str, **kwargs: Any) -> dict: client = await get_client() response = await client.delete(path, **kwargs) response.raise_for_status() + if response.status_code == 204 or not response.content: + return {} return response.json() diff --git a/mcp-server/handlers.py b/mcp-server/handlers.py index 998c9a8..e09fc96 100644 --- a/mcp-server/handlers.py +++ b/mcp-server/handlers.py @@ -88,7 +88,7 @@ async def _handle_add_repository(args: dict[str, Any]) -> str: "branch": args.get("branch", "main"), } result = await api_post("/repos", json=payload) - repo_id = result.get("id", "unknown") + repo_id = result.get("repo_id", "unknown") name = result.get("name", args["name"]) status = result.get("status", "added") needs_selection = result.get("needs_directory_selection", False) @@ -130,25 +130,31 @@ async def _handle_get_repo_directories(args: dict[str, Any]) -> str: async def _handle_index_repository(args: dict[str, Any]) -> str: repo_id = args["repo_id"] include_paths = args.get("include_paths") - # Build query params - params = {} + if include_paths: - params["include_paths"] = include_paths - result = await api_post( - f"/repos/{repo_id}/index", - json=params if params else {}, - ) + # Async endpoint supports include_paths for monorepo subset indexing + result = await api_post( + f"/repos/{repo_id}/index/async", + json={"include_paths": include_paths}, + ) + status = result.get("status", "accepted") + return ( + f"Async indexing started for subset: {', '.join(include_paths)}\n" + f"Status: {status}\n" + f"Repo ID: `{repo_id}`\n" + "\nIndexing runs in the background. Use list_repositories " + "to check when status changes to 'indexed'." + ) + + # Sync endpoint for full-repo indexing + result = await api_post(f"/repos/{repo_id}/index", json={}) status = result.get("status", "unknown") - fn_count = result.get("function_count", result.get("functions_indexed", 0)) - file_count = result.get("file_count", result.get("files_indexed", 0)) + fn_count = result.get("functions", 0) lines = [ "Indexing complete.", f"Status: {status}", - f"Files indexed: {file_count}", f"Functions extracted: {fn_count}", ] - if include_paths: - lines.append(f"Subset: {', '.join(include_paths)}") lines.append( f"\nYou can now use search_code(repo_id='{repo_id}') " "to search this codebase." diff --git a/mcp-server/tests/test_tools.py b/mcp-server/tests/test_tools.py index 7e4ed4f..bc239fb 100644 --- a/mcp-server/tests/test_tools.py +++ b/mcp-server/tests/test_tools.py @@ -57,6 +57,7 @@ def test_repo_tools_require_repo_id(self): repo_tools = [ "get_dependency_graph", "analyze_code_style", "analyze_impact", "get_repository_insights", "get_codebase_dna", + "get_repo_directories", "index_repository", "delete_repository", ] for name in repo_tools: required = schemas[name].inputSchema.get("required", []) From 429a7ba7a23c3d641783459876b0c1a28ee7dcc9 Mon Sep 17 00:00:00 2001 From: Devanshu Rajesh Chicholikar Date: Sat, 7 Mar 2026 16:04:52 -0500 Subject: [PATCH 3/3] fix: reject empty include_paths instead of silently indexing full repo [] is falsy in Python, so 'if include_paths:' treated an explicit empty list the same as None (omitted). Now returns a clear error for empty list, preserving the distinction between 'index everything' (omit include_paths) and 'index nothing' (empty list). --- mcp-server/handlers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mcp-server/handlers.py b/mcp-server/handlers.py index e09fc96..da4142b 100644 --- a/mcp-server/handlers.py +++ b/mcp-server/handlers.py @@ -131,6 +131,9 @@ async def _handle_index_repository(args: dict[str, Any]) -> str: repo_id = args["repo_id"] include_paths = args.get("include_paths") + if include_paths is not None and len(include_paths) == 0: + return "Error: include_paths cannot be empty. Omit it to index the full repo, or provide directory names." + if include_paths: # Async endpoint supports include_paths for monorepo subset indexing result = await api_post(