From 84cec0a9d654ea10f6224d4c3ab911fdbd16cea2 Mon Sep 17 00:00:00 2001 From: CocoRoF Date: Tue, 19 May 2026 19:54:35 +0900 Subject: [PATCH] feat(claude_code): APIRequest.mcp_config + auto-disable CLI built-ins (2.0.5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase-I foundation for MCP-wrapped tools on claude_code_cli sessions. Stage 6 with provider claude_code_cli was the lone outlier in the otherwise provider-symmetric surface: every SDK client (anthropic / openai / google / vllm) accepts the canonical APIRequest.tools and passes schemas natively to the LLM. The CLI client dropped them — the LLM saw only the CLI's built-in palette (Bash / Read / Write / Glob / ToolSearch / …) and hallucinated against it whenever the host's intent referenced a Geny custom tool. Symptom: "Tool execution complete: 32 calls, 29 errors" on prod VTuber sessions. This PR ships the executor-side wire so a companion Geny PR can synthesize a per-session MCP config that bridges the host's tool registry to the CLI. ### Added - APIRequest.mcp_config: Optional[Dict[str, Any]] — per-request MCP server configuration. ### Changed - claude_code_argv reads request.mcp_config with precedence over the per-client kwarg. When ANY MCP config is supplied, the argv builder also emits: * --tools "" — disable CLI's built-in tool palette so the LLM cannot hallucinate against tools the host has no executor for. Skipped when caller explicitly passed allow_tools (mixed surface support). * --strict-mcp-config — ignore user-level / project-level MCP config sources so the per-session bridge is the sole surface. - Legacy callers without any MCP config keep today's behaviour exactly (no flags, CLI built-ins available). ### Stage interface preservation The Stage 6 → Stage 10 → Stage 16 interface is preserved. When the CLI uses MCP to call a host tool, the call is dispatched inside the CLI's agentic loop (via the bridge → host HTTP endpoint) and the final APIResponse carries only the assistant message — no tool_use blocks for Stage 10 to dispatch. Stage 10 sees no tool_use → naturally no-ops. Stage 16 sees no pending state → naturally finishes. Memory / persona / persistence stages run identically because the canonical APIResponse shape is the same. Anthropic API path keeps the per-iteration tool-dispatch loop; the CLI path collapses it inside one CLI invocation. Both produce identical canonical outputs. ### Tests - test_argv_request_mcp_config_overrides_kwarg - test_argv_host_mcp_disables_cli_builtins_and_strict - test_argv_host_mcp_with_explicit_allow_tools_keeps_builtins - test_argv_no_mcp_no_tools_flag (legacy back-compat) Full tests/llm_client/ 193/193 pass. --- CHANGELOG.md | 66 +++++++++++++++++++ pyproject.toml | 2 +- src/geny_executor/__init__.py | 2 +- .../llm_client/translators/_cli.py | 38 +++++++++-- src/geny_executor/llm_client/types.py | 12 ++++ .../unit/test_translators_cli_claude_code.py | 51 ++++++++++++++ 6 files changed, 164 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36f4a21..7186735 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,72 @@ All notable changes to `geny-executor` are recorded here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and this project adheres to [Semantic Versioning](https://semver.org/). +## [2.0.5] — 2026-05-19 + +Phase-I foundation for **MCP-wrapped tools on ``claude_code_cli`` +sessions** — surfaces the host's tool registry to the CLI's LLM +without breaking the Stage 6 → Stage 10 → Stage 16 pipeline +interface. Companion Geny PR ships the actual MCP bridge + tool +endpoint that consume this wire. + +### Added + +- ``APIRequest.mcp_config: Optional[Dict[str, Any]]`` — per-request + MCP server configuration. CLI-based backends serialize this to + ``--mcp-config ``; SDK-based backends ignore it. Hosts use + this to expose their tool registry to the CLI's LLM without + going through the per-client static ``mcp_config_path``. + +### Changed + +- ``claude_code_argv`` now reads ``request.mcp_config`` with + precedence over the per-client kwarg. When *any* MCP config is + supplied (per-request or per-client) the argv builder also + emits: + * ``--tools ""`` — disable the CLI's built-in tool palette so + the LLM cannot hallucinate ``Bash`` / ``Read`` / + ``ToolSearch`` / etc. that the host has no executor for. + Skipped when the caller explicitly passed ``allow_tools`` so + "MCP + curated CLI built-ins" hybrid surfaces still work. + * ``--strict-mcp-config`` — ignore user-level and + project-level MCP configurations so the per-session bridge is + the sole surface. Prevents accidental leakage from a host's + ``~/.claude/...`` config files. +- Legacy callers without any MCP config keep today's behaviour + exactly: no ``--tools "" disable``, no ``--strict-mcp-config``, + CLI built-ins available. + +### Why + +Stage 6 with provider ``claude_code_cli`` was the lone outlier in +the otherwise provider-symmetric surface: every SDK client +(anthropic / openai / google / vllm) accepts the canonical +``APIRequest.tools`` and passes the schemas natively to the LLM. +The CLI client dropped them on the floor — the LLM saw only the +CLI's built-in palette and hallucinated against it whenever the +host's intent referenced a Geny custom tool. + +The Stage 6 → Stage 10 interface is preserved. When the CLI uses +MCP to call a host tool, the call is dispatched inside the CLI's +agentic loop (via the bridge ↔ host HTTP endpoint) and the final +``APIResponse`` carries only the assistant message — no +``tool_use`` blocks for Stage 10 to dispatch. Stage 10 sees no +``tool_use`` → naturally no-ops. Stage 16 sees no pending state → +naturally finishes. Memory / persona / persistence stages run +identically because the canonical ``APIResponse`` shape is the +same. Anthropic API path keeps the per-iteration tool-dispatch +loop; the CLI path collapses it inside one CLI invocation. Both +produce identical canonical outputs. + +### Tests + +- ``test_argv_request_mcp_config_overrides_kwarg`` +- ``test_argv_host_mcp_disables_cli_builtins_and_strict`` +- ``test_argv_host_mcp_with_explicit_allow_tools_keeps_builtins`` +- ``test_argv_no_mcp_no_tools_flag`` (legacy back-compat) + +Full ``tests/llm_client/`` 193/193 pass. + ## [2.0.4] — 2026-05-19 Patch release. Fixes Claude Code (CLI) sessions failing on the second diff --git a/pyproject.toml b/pyproject.toml index 585fdc3..512f55e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "geny-executor" -version = "2.0.4" +version = "2.0.5" description = "Harness-engineered agent pipeline library with 21-stage dual-abstraction architecture, built on the Anthropic API" readme = "README.md" license = "MIT" diff --git a/src/geny_executor/__init__.py b/src/geny_executor/__init__.py index bf42449..2bb9eb8 100644 --- a/src/geny_executor/__init__.py +++ b/src/geny_executor/__init__.py @@ -95,7 +95,7 @@ ProviderDrivenStrategy, ) -__version__ = "2.0.4" +__version__ = "2.0.5" __all__ = [ # Core diff --git a/src/geny_executor/llm_client/translators/_cli.py b/src/geny_executor/llm_client/translators/_cli.py index e99565d..bf43965 100644 --- a/src/geny_executor/llm_client/translators/_cli.py +++ b/src/geny_executor/llm_client/translators/_cli.py @@ -139,12 +139,40 @@ def claude_code_argv( if settings_path: argv += ["--settings", settings_path] - # MCP config: accept dict (inline JSON), str path, or pre-serialized JSON. - if mcp_config is not None: - if isinstance(mcp_config, str): - argv += ["--mcp-config", mcp_config] + # MCP config — precedence: + # 1. ``request.mcp_config`` (per-request, set by host for + # session-scoped MCP wraps). Phase I: Geny synthesizes a + # per-session MCP config that bridges its tool registry to + # the CLI so the LLM can call host tools via MCP. + # 2. ``mcp_config`` constructor kwarg (legacy per-client static + # config from the LLM-backends settings card). + # Both flow to ``--mcp-config ``. + effective_mcp_config: Any = ( + request.mcp_config if request.mcp_config is not None else mcp_config + ) + has_host_mcp = bool(effective_mcp_config) + if has_host_mcp: + if isinstance(effective_mcp_config, str): + argv += ["--mcp-config", effective_mcp_config] else: - argv += ["--mcp-config", json.dumps(mcp_config)] + argv += [ + "--mcp-config", + json.dumps(effective_mcp_config, ensure_ascii=False), + ] + # When the host exposes its own tool surface via MCP, disable + # the CLI's built-in tool palette so the LLM cannot hallucinate + # against ``Bash`` / ``Read`` / ``ToolSearch`` / etc. The CLI's + # ``--tools ""`` literal disables the entire built-in set per + # ``claude --help``. Caller-supplied ``allow_tools`` / + # ``disallow_tools`` (legacy CLI-built-in filters) are also + # forwarded earlier so a host that wants a mixed surface — MCP + # tools + a curated subset of CLI built-ins — can opt back in. + # ``--strict-mcp-config`` ignores any other MCP config sources + # (user-level / project-level) so the per-session bridge is + # the sole MCP surface the CLI sees. + if not allow_tools: + argv += ["--tools", ""] + argv += ["--strict-mcp-config"] # JSON schema (structured output). if request.response_format: diff --git a/src/geny_executor/llm_client/types.py b/src/geny_executor/llm_client/types.py index 282a4d7..a3ba70d 100644 --- a/src/geny_executor/llm_client/types.py +++ b/src/geny_executor/llm_client/types.py @@ -42,6 +42,18 @@ class APIRequest: #: {"session_id": "...", "resume": bool} session_hint: Optional[Dict[str, Any]] = None + #: Per-request MCP server configuration. CLI-based backends + #: (claude_code_cli) serialize this to ``--mcp-config ``; + #: SDK-based backends ignore it. Hosts use this to surface their + #: tool registry to the CLI's LLM without going through the + #: cumbersome per-client static ``mcp_config_path``. Shape:: + #: + #: {"mcpServers": {"": {"type": "stdio", + #: "command": "...", + #: "args": [...], + #: "env": {...}}}} + mcp_config: Optional[Dict[str, Any]] = None + metadata: Optional[Dict[str, Any]] = None diff --git a/tests/llm_client/unit/test_translators_cli_claude_code.py b/tests/llm_client/unit/test_translators_cli_claude_code.py index 08f1e3e..8472010 100644 --- a/tests/llm_client/unit/test_translators_cli_claude_code.py +++ b/tests/llm_client/unit/test_translators_cli_claude_code.py @@ -137,6 +137,57 @@ def test_argv_mcp_config_path_passed_through() -> None: assert blob == "/tmp/mcp.json" +def test_argv_request_mcp_config_overrides_kwarg() -> None: + """``APIRequest.mcp_config`` (per-request) wins over the + constructor kwarg (per-client static). Phase I uses this to + inject the per-session Geny tools bridge alongside any + settings-card-configured MCP servers.""" + per_request = {"mcpServers": {"geny": {"type": "stdio", "command": "py"}}} + per_client = {"mcpServers": {"legacy": {"command": "x"}}} + argv = claude_code_argv(_req(mcp_config=per_request), mcp_config=per_client) + blob = argv[argv.index("--mcp-config") + 1] + assert json.loads(blob) == per_request # per-request wins + + +def test_argv_host_mcp_disables_cli_builtins_and_strict() -> None: + """When the host registers MCP servers, the CLI's built-in tool + palette is disabled (``--tools ""``) so the LLM only ever sees + MCP-advertised tools. ``--strict-mcp-config`` ignores any other + MCP configuration sources so the per-session bridge is the sole + surface. Together these eliminate the hallucination path where + the LLM tries to use ``Bash``/``ToolSearch``/etc. that the host + has no executor for.""" + cfg = {"mcpServers": {"geny": {"type": "stdio", "command": "py"}}} + argv = claude_code_argv(_req(mcp_config=cfg)) + # Disable built-ins. + idx = argv.index("--tools") + assert argv[idx + 1] == "" + # Strict mode. + assert "--strict-mcp-config" in argv + + +def test_argv_host_mcp_with_explicit_allow_tools_keeps_builtins() -> None: + """``--allowedTools`` is the legacy whitelist of CLI built-ins. + If a caller explicitly supplies one alongside an MCP config they + want a mixed surface (custom MCP tools + a curated subset of CLI + built-ins). Don't override their choice.""" + cfg = {"mcpServers": {"geny": {"type": "stdio", "command": "py"}}} + argv = claude_code_argv(_req(mcp_config=cfg), allow_tools=["Read"]) + # No --tools "" disabler — caller picked allowedTools explicitly. + assert "--tools" not in argv + assert "--allowedTools" in argv + + +def test_argv_no_mcp_no_tools_flag() -> None: + """Legacy callers without any MCP config keep today's behaviour: + CLI built-ins available, no ``--tools ""`` disable, no + ``--strict-mcp-config``.""" + argv = claude_code_argv(_req()) + assert "--tools" not in argv + assert "--strict-mcp-config" not in argv + assert "--mcp-config" not in argv + + def test_argv_response_format_json_schema_emits_flag() -> None: schema = {"type": "object", "properties": {"x": {"type": "string"}}} argv = claude_code_argv(