diff --git a/pyproject.toml b/pyproject.toml index 512f55e..b271c92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "geny-executor" -version = "2.0.5" +version = "2.0.6" description = "Harness-engineered agent pipeline library with 21-stage dual-abstraction architecture, built on the Anthropic API" readme = "README.md" license = "MIT" diff --git a/src/geny_executor/__init__.py b/src/geny_executor/__init__.py index 2bb9eb8..e843ee9 100644 --- a/src/geny_executor/__init__.py +++ b/src/geny_executor/__init__.py @@ -95,7 +95,7 @@ ProviderDrivenStrategy, ) -__version__ = "2.0.5" +__version__ = "2.0.6" __all__ = [ # Core diff --git a/src/geny_executor/core/pipeline.py b/src/geny_executor/core/pipeline.py index 17aa119..788dda3 100644 --- a/src/geny_executor/core/pipeline.py +++ b/src/geny_executor/core/pipeline.py @@ -97,16 +97,6 @@ def _creds_to_client_kwargs(provider: str, creds: ProviderCredentials) -> Dict[s kwargs[key] = extras[key] return kwargs - if provider == "copilot_cli": - extras = dict(creds.extras or {}) - kwargs = {} - if creds.binary_path: - kwargs["gh_binary_path"] = creds.binary_path - for key in ("allow_tools", "cwd", "extra_args", "timeout_s"): - if key in extras: - kwargs[key] = extras[key] - return kwargs - # API providers (anthropic / openai / google) kwargs = {"api_key": creds.api_key} if creds.base_url is not None: diff --git a/src/geny_executor/llm_client/__init__.py b/src/geny_executor/llm_client/__init__.py index 940ce1b..73bba92 100644 --- a/src/geny_executor/llm_client/__init__.py +++ b/src/geny_executor/llm_client/__init__.py @@ -9,7 +9,6 @@ from geny_executor.llm_client.anthropic import AnthropicClient from geny_executor.llm_client.base import BaseClient, ClientCapabilities from geny_executor.llm_client.claude_code import ClaudeCodeCLIClient -from geny_executor.llm_client.copilot import CopilotCLIClient from geny_executor.llm_client.credentials import ( ConfigError, CredentialBundle, @@ -28,7 +27,6 @@ "ClientRegistry", "ConfigError", "ContentBlock", - "CopilotCLIClient", "CredentialBundle", "ProviderCredentials", ] diff --git a/src/geny_executor/llm_client/_cli_runtime.py b/src/geny_executor/llm_client/_cli_runtime.py index 6bbdeed..e57503b 100644 --- a/src/geny_executor/llm_client/_cli_runtime.py +++ b/src/geny_executor/llm_client/_cli_runtime.py @@ -1,8 +1,10 @@ """Async subprocess primitives shared by CLI-backed LLM clients. This module is the *only* place where ``asyncio.create_subprocess_exec`` is -called inside ``llm_client/``. Both ``ClaudeCodeCLIClient`` (Phase B) and -``CopilotCLIClient`` (Phase C) drive their work through these helpers. +called inside ``llm_client/``. ``ClaudeCodeCLIClient`` (Phase B) drives its +work through these helpers. (The Phase-C ``CopilotCLIClient`` was removed +in 2.0.6 — see the commit message for the structural-incompatibility +rationale.) Design rules ------------ diff --git a/src/geny_executor/llm_client/copilot.py b/src/geny_executor/llm_client/copilot.py deleted file mode 100644 index 47250c6..0000000 --- a/src/geny_executor/llm_client/copilot.py +++ /dev/null @@ -1,185 +0,0 @@ -"""GitHub Copilot CLI backend. - -Wraps ``gh copilot`` (the Copilot CLI extension to the ``gh`` binary) as a -:class:`BaseClient`. Plain-text stdout only — no streaming, no structured -output, no tool round-trip (only ``--allow-tool`` allowlist gating). The -client advertises this honestly via ``ClientCapabilities``. - -Authentication --------------- -``gh copilot`` reads its credentials from the host's ``gh auth`` state -(typically ``~/.config/gh/hosts.yml``). This client makes no attempt to -manage that — operators run ``gh auth login`` and ``gh extension install -github/gh-copilot`` themselves. The client surfaces auth failures via -``ErrorCategory.CLI_AUTH_FAILED`` based on stderr heuristics. - -Streaming ---------- -``supports_streaming=False``. ``create_message_stream`` falls back to -``BaseClient``'s default (one ``message_complete`` event after the full -response). -""" - -from __future__ import annotations - -import os -from typing import Any, Callable, Dict, Optional, Sequence - -from geny_executor.core.errors import APIError, ErrorCategory -from geny_executor.llm_client._cli_runtime import ( - CLIAuthFailed, - CLIBinaryNotFound, - CLIProcessRunner, - CLIProtocolError, - CLIResult, - CLITimeout, - detect_binary, -) -from geny_executor.llm_client.base import BaseClient, ClientCapabilities -from geny_executor.llm_client.translators._cli import ( - compose_copilot_prompt, - copilot_argv, - parse_plain_text_to_response, -) -from geny_executor.llm_client.types import APIRequest, APIResponse - - -__all__ = ["CopilotCLIClient"] - - -def _classify_cli_result(result: CLIResult) -> APIError: - """Map gh copilot exit-codes / stderr → APIError category.""" - stderr = result.stderr.decode("utf-8", errors="replace").lower() - if "not logged in" in stderr or "authentication" in stderr or "unauthorized" in stderr or "auth required" in stderr: - return APIError( - f"Copilot CLI auth failed (exit {result.returncode}): {stderr[:300]}", - category=ErrorCategory.CLI_AUTH_FAILED, - ) - if "not installed" in stderr or "extension not found" in stderr: - return APIError( - f"gh copilot extension not installed: {stderr[:300]}", - category=ErrorCategory.CLI_NOT_FOUND, - ) - if "permission" in stderr and ("denied" in stderr or "deny" in stderr or "blocked" in stderr): - return APIError( - f"Copilot CLI permission denied: {stderr[:300]}", - category=ErrorCategory.CLI_PERMISSION_DENIED, - ) - return APIError( - f"Copilot CLI exited with code {result.returncode}: {stderr[:300]}", - category=ErrorCategory.CLI_PROTOCOL_ERROR, - ) - - -class CopilotCLIClient(BaseClient): - """Subprocess-backed GitHub Copilot CLI client.""" - - provider = "copilot_cli" - capabilities = ClientCapabilities( - supports_thinking=False, - supports_tools=False, - supports_streaming=False, - supports_tool_choice=False, - supports_stop_sequences=False, - supports_top_k=False, - supports_system_prompt=True, # via prompt prepend - supports_structured_output=False, - supports_session_continuity=False, - supports_mcp_passthrough=False, - supports_budget_limit=False, - supports_token_usage=False, - supports_cost_usage=False, - is_subprocess=True, - requires_workspace=False, - streaming_granularity="none", - drops=( - "tools", - "tool_choice", - "thinking_enabled", - "stop_sequences", - "top_k", - "temperature", - "top_p", - "max_tokens", - "response_format", - "session_hint", - ), - ) - - def __init__( - self, - *, - gh_binary_path: Optional[str] = None, - allow_tools: Sequence[str] = (), - cwd: Optional[str] = None, - extra_args: Sequence[str] = (), - timeout_s: float = 180.0, - env_extras: Optional[Dict[str, str]] = None, - event_sink: Optional[Callable[[Dict[str, Any]], None]] = None, - ) -> None: - super().__init__( - api_key="", - base_url=None, - default_headers=None, - event_sink=event_sink, - ) - # Binary resolution: explicit > GH_BINARY env > which("gh"). - if gh_binary_path: - self._gh = detect_binary("gh", gh_binary_path) or "" - else: - env_override = os.environ.get("GH_BINARY", "") - self._gh = ( - detect_binary("gh", env_override) if env_override else None - ) or detect_binary("gh", None) or "" - self._allow_tools = tuple(allow_tools) - self._cwd = cwd - self._extra_args = tuple(extra_args) - self._timeout_s = timeout_s - self._extra_env: Dict[str, str] = dict(env_extras) if env_extras else {} - - def _env_extras(self) -> Dict[str, str]: - return dict(self._extra_env) - - def _make_runner(self) -> CLIProcessRunner: - if not self._gh: - raise CLIBinaryNotFound( - "gh binary not found. Install GitHub CLI and set gh_binary_path= " - "or ensure 'gh' is on PATH (then install the copilot extension " - "via `gh extension install github/gh-copilot`)." - ) - return CLIProcessRunner( - binary=self._gh, - cwd=self._cwd, - env_extras=self._env_extras(), - timeout_s=self._timeout_s, - ) - - # ─────────────────────────────────────────────────────── _send ─ - - async def _send(self, request: APIRequest, *, purpose: str = "") -> APIResponse: - try: - runner = self._make_runner() - except CLIBinaryNotFound as e: - raise APIError(str(e), category=ErrorCategory.CLI_NOT_FOUND) from e - - prompt = compose_copilot_prompt(request.system, request.messages) - argv = copilot_argv( - allow_tools=self._allow_tools, - extra_args=self._extra_args, - ) - argv += ["-p", prompt] - - try: - result = await runner.run_oneshot(argv) - if result.returncode != 0: - raise _classify_cli_result(result) - text = result.stdout.decode("utf-8", errors="replace") - return parse_plain_text_to_response(text, model=request.model) - except CLIBinaryNotFound as e: - raise APIError(str(e), category=ErrorCategory.CLI_NOT_FOUND) from e - except CLITimeout as e: - raise APIError(str(e), category=ErrorCategory.CLI_TIMEOUT) from e - except CLIAuthFailed as e: - raise APIError(str(e), category=ErrorCategory.CLI_AUTH_FAILED) from e - except CLIProtocolError as e: - raise APIError(str(e), category=ErrorCategory.CLI_PROTOCOL_ERROR) from e diff --git a/src/geny_executor/llm_client/registry.py b/src/geny_executor/llm_client/registry.py index dcc9745..39b9543 100644 --- a/src/geny_executor/llm_client/registry.py +++ b/src/geny_executor/llm_client/registry.py @@ -74,15 +74,8 @@ def _claude_code_cli_factory() -> Type[BaseClient]: return ClaudeCodeCLIClient -def _copilot_cli_factory() -> Type[BaseClient]: - from geny_executor.llm_client.copilot import CopilotCLIClient - - return CopilotCLIClient - - ClientRegistry.register("anthropic", _anthropic_factory) ClientRegistry.register("openai", _openai_factory) ClientRegistry.register("google", _google_factory) ClientRegistry.register("vllm", _vllm_factory) ClientRegistry.register("claude_code_cli", _claude_code_cli_factory) -ClientRegistry.register("copilot_cli", _copilot_cli_factory) diff --git a/src/geny_executor/llm_client/translators/__init__.py b/src/geny_executor/llm_client/translators/__init__.py index 5a16548..8740234 100644 --- a/src/geny_executor/llm_client/translators/__init__.py +++ b/src/geny_executor/llm_client/translators/__init__.py @@ -33,10 +33,7 @@ assemble_response_from_stream_json, build_stream_json_stdin, claude_code_argv, - compose_copilot_prompt, - copilot_argv, parse_json_output_to_response, - parse_plain_text_to_response, stream_json_line_to_canonical_event, thinking_to_effort, ) @@ -55,11 +52,8 @@ "canonical_tools_to_google", "canonical_tools_to_openai", "claude_code_argv", - "compose_copilot_prompt", - "copilot_argv", "normalize_stop_reason", "parse_json_output_to_response", - "parse_plain_text_to_response", "split_tool_results", "split_tool_uses", "stream_json_line_to_canonical_event", diff --git a/src/geny_executor/llm_client/translators/_cli.py b/src/geny_executor/llm_client/translators/_cli.py index bf43965..63b12f5 100644 --- a/src/geny_executor/llm_client/translators/_cli.py +++ b/src/geny_executor/llm_client/translators/_cli.py @@ -1,20 +1,22 @@ """Canonical ↔ CLI translation helpers. -Used by ``ClaudeCodeCLIClient`` (Phase B) and ``CopilotCLIClient`` (Phase C) -to: +Used by ``ClaudeCodeCLIClient`` (Phase B) to: - Build vendor-specific argv lists from a canonical :class:`APIRequest`. - Assemble a canonical :class:`APIResponse` from CLI output. - Map streaming stream-json line types to canonical event dicts. -Claude Code helpers landed in Phase B1; ``gh copilot`` helpers -(``compose_copilot_prompt``, ``copilot_argv``, ``parse_plain_text_to_response``) -land here in Phase C1. +The Phase-C ``gh copilot`` helpers (``compose_copilot_prompt``, +``copilot_argv``, ``parse_plain_text_to_response``) were removed in +2.0.6 along with the ``CopilotCLIClient`` itself — ``gh copilot`` +does not support streaming, tools, or MCP, so it could not host the +pipeline's Stage-10 dispatch loop. """ from __future__ import annotations import json +import os from typing import Any, AsyncIterator, Dict, List, Optional, Sequence from geny_executor.core.state import TokenUsage @@ -83,8 +85,19 @@ def claude_code_argv( # Output / input formats: always stream-json for streaming requests, # else json so we can parse a single object. + # + # ``--verbose`` is required by Claude Code CLI ≥ 2.1.x whenever + # ``--print`` is combined with ``--output-format=stream-json``; + # without it the CLI exits 1 with: + # + # Error: When using --print, --output-format=stream-json + # requires --verbose + # + # 2.0.6 emits it automatically alongside the stream-json switch so + # hosts don't have to thread an opt-in flag through their settings. if request.stream: argv += [ + "--verbose", "--input-format", "stream-json", "--output-format", "stream-json", "--include-partial-messages", @@ -92,8 +105,21 @@ def claude_code_argv( else: argv += ["--output-format", "json"] + # ``--bare`` skips OAuth + keychain reads (per ``claude --help``: + # "Anthropic auth is strictly ANTHROPIC_API_KEY or apiKeyHelper via + # --settings (OAuth and keychain are never read)"). That's correct + # for the API-key auth path but **wrong** for the subscription + # OAuth path — passing ``--bare`` without an API key crashes every + # subscription user with "Not logged in · Please run /login". 2.0.6 + # auto-strips ``--bare`` when no API key is reachable in the + # spawning process's environment, so the same ``bare_mode=True`` + # default works for both auth paths transparently. Callers that + # explicitly want OAuth even with an API key present can still + # pass ``bare_mode=False``. if bare_mode: - argv.append("--bare") + has_api_key = bool(os.environ.get("ANTHROPIC_API_KEY", "").strip()) + if has_api_key: + argv.append("--bare") # Model: alias or pinned id. if request.model: @@ -159,19 +185,18 @@ def claude_code_argv( "--mcp-config", json.dumps(effective_mcp_config, ensure_ascii=False), ] - # When the host exposes its own tool surface via MCP, disable - # the CLI's built-in tool palette so the LLM cannot hallucinate - # against ``Bash`` / ``Read`` / ``ToolSearch`` / etc. The CLI's - # ``--tools ""`` literal disables the entire built-in set per - # ``claude --help``. Caller-supplied ``allow_tools`` / - # ``disallow_tools`` (legacy CLI-built-in filters) are also - # forwarded earlier so a host that wants a mixed surface — MCP - # tools + a curated subset of CLI built-ins — can opt back in. # ``--strict-mcp-config`` ignores any other MCP config sources # (user-level / project-level) so the per-session bridge is - # the sole MCP surface the CLI sees. - if not allow_tools: - argv += ["--tools", ""] + # the sole MCP surface the CLI sees. The CLI's *built-in* tool + # palette (``Bash`` / ``Read`` / ``Write`` / ``Edit`` / …) + # stays available alongside the MCP surface — earlier + # executor versions auto-emitted ``--tools ""`` here to + # disable it (a defensive measure against the LLM + # hallucinating against unknown built-ins), but in practice + # most hosts *want* both surfaces (e.g. a Sub-Worker writing + # files via ``Write`` while delegating to MCP-wrapped host + # tools). Hosts that prefer the old MCP-only behaviour can + # pass ``extra_args=("--tools", "")`` explicitly. argv += ["--strict-mcp-config"] # JSON schema (structured output). @@ -454,6 +479,11 @@ def parse_json_output_to_response(stdout: bytes, *, model: str) -> APIResponse: if not isinstance(obj, dict): raise ValueError("Claude Code json output is not an object") + # ``tool_use`` blocks in the json output are intentionally dropped + # for the same reason as ``StreamJsonAccumulator.finalize`` — the + # CLI handles tool dispatch internally and host pipelines should + # see only the final assistant text. See ``finalize``'s docstring + # for the full rationale. blocks: List[ContentBlock] = [] for block in obj.get("content", []) or []: if not isinstance(block, dict): @@ -465,15 +495,6 @@ def parse_json_output_to_response(stdout: bytes, *, model: str) -> APIResponse: blocks.append( ContentBlock(type="thinking", thinking_text=block.get("text", "")) ) - elif btype == "tool_use": - blocks.append( - ContentBlock( - type="tool_use", - tool_use_id=block.get("id"), - tool_name=block.get("name"), - tool_input=block.get("input") or {}, - ) - ) usage_in = obj.get("usage", {}) or {} usage = TokenUsage( @@ -580,9 +601,39 @@ def feed(self, line: Dict[str, Any]) -> List[Dict[str, Any]]: return [{"type": "cli_unknown", "raw": line}] def finalize(self) -> APIResponse: - """Build the canonical :class:`APIResponse` from accumulated state.""" + """Build the canonical :class:`APIResponse` from accumulated state. + + ``tool_use`` blocks observed during streaming are intentionally + **dropped** from the assembled response. Claude Code CLI 2.1.x + runs its agentic loop *internally* (LLM → tool → LLM → tool → + …); each intermediate turn arrives as its own ``"assistant"`` + envelope and the accumulator collects every block from every + envelope into the shared buffers below. The CLI has already + dispatched those tool calls (via its own built-ins or via the + host's MCP bridge) and emitted the matching ``"user"`` + ``tool_result`` envelopes in the same stream — so including the + ``tool_use`` blocks in the terminal :class:`APIResponse` would + push host pipelines (Geny's Stage 10, the canonical reference + consumer) into trying to re-dispatch tools they have no + registration for, producing instant ``ERROR (0 ms) — No + output`` ghost failures for every CLI tool call. Per the Phase + I design contract: + + Stage 10 receives that assistant message, sees no + ``tool_use`` blocks (they were executed inside the CLI), + and naturally no-ops. + + Hosts that *do* want the raw tool_use record can still recover + it from the per-line stream events the accumulator yields + through ``feed()`` (each ``tool_use`` block produces a + ``{"type": "tool_use", "id": ..., "name": ..., "input": ...}`` + event). + """ # Flush any unclosed tool — the message form often skips - # ``content_block_stop`` entirely. + # ``content_block_stop`` entirely. We still call this so the + # accumulator's internal state is consistent for callers that + # rely on ``_tool_uses`` directly; only the *response* blocks + # below skip them. self._close_current_tool() blocks: List[ContentBlock] = [] @@ -592,15 +643,6 @@ def finalize(self) -> APIResponse: ) if self._text_buf: blocks.append(ContentBlock(type="text", text="".join(self._text_buf))) - for tu in self._tool_uses: - blocks.append( - ContentBlock( - type="tool_use", - tool_use_id=tu.get("id"), - tool_name=tu.get("name"), - tool_input=tu.get("input") or {}, - ) - ) usage_in: Dict[str, Any] = (self._final_obj or {}).get("usage", {}) or {} usage = TokenUsage( @@ -773,104 +815,9 @@ async def assemble_response_from_stream_json( return accum.finalize() -# --------------------------------------------------------------------------- -# Copilot CLI: prompt composition -# --------------------------------------------------------------------------- - - -def compose_copilot_prompt(system: Any, messages: List[Dict[str, Any]]) -> str: - """Flatten a canonical (system + messages) into one ``-p`` argument. - - The Copilot CLI accepts a single prompt string. Conversation history - is encoded as Markdown-style turns so the model can still see prior - turns. The system prompt is prepended as a ``## System`` section - when present. - """ - parts: List[str] = [] - if system: - if isinstance(system, str): - sys_text = system - elif isinstance(system, list): - sys_text = "\n".join( - str(b.get("text", "")) for b in system if isinstance(b, dict) and b.get("type") == "text" - ) - else: - sys_text = str(system) - if sys_text: - parts.append(f"## System\n{sys_text}") - - for m in messages: - role = str(m.get("role", "user")).capitalize() - content = m.get("content", "") - if isinstance(content, list): - chunks: List[str] = [] - for block in content: - if not isinstance(block, dict): - continue - btype = block.get("type", "") - if btype == "text": - chunks.append(str(block.get("text", ""))) - elif btype == "tool_result": - chunks.append(f"[tool_result]\n{block.get('content', '')}") - content_text = "\n".join(chunks) - else: - content_text = str(content) - if content_text: - parts.append(f"## {role}\n{content_text}") - - return "\n\n".join(parts) - - -# --------------------------------------------------------------------------- -# Copilot CLI: argv builder -# --------------------------------------------------------------------------- - - -def copilot_argv( - *, - allow_tools: Sequence[str] = (), - extra_args: Sequence[str] = (), -) -> List[str]: - """Build the argv list for ``gh copilot`` (excluding the binary). - - The caller is expected to invoke the result as ``gh copilot ...`` — - i.e. ``argv[0]`` is *not* prepended here. ``-p `` is appended - by the client after computing the prompt via - :func:`compose_copilot_prompt`. - - Only the flags the CLI actually accepts are emitted: - - ``-p``: single prompt (added by the client, not here) - - ``--allow-tool ''``: repeated, one flag per scope - - any ``extra_args`` for escape-hatch use. - """ - argv: List[str] = ["copilot"] - for scope in allow_tools: - if scope: - argv += ["--allow-tool", str(scope)] - if extra_args: - argv += list(extra_args) - return argv - - -# --------------------------------------------------------------------------- -# Copilot CLI: stdout → APIResponse -# --------------------------------------------------------------------------- - - -def parse_plain_text_to_response(text: str, *, model: str = "default") -> APIResponse: - """Wrap plain stdout text into a canonical :class:`APIResponse`. - - Copilot CLI does not return JSON in print mode, so we cannot recover - structured usage / cost. The response carries the text in a single - block, ``stop_reason="end_turn"``, and an empty TokenUsage with - ``supports_token_usage=False`` advertised at the client level. - """ - content_text = text.strip("\n") - return APIResponse( - content=[ContentBlock(type="text", text=content_text)], - stop_reason="end_turn", - usage=TokenUsage(), - model=model, - message_id="", - raw=text, - ) +# Copilot CLI helpers (compose_copilot_prompt / copilot_argv / +# parse_plain_text_to_response) were removed in 2.0.6. ``gh copilot`` +# is one-shot text-in / text-out with no streaming, no tool round-trip, +# and no MCP support, so it could not host the pipeline's Stage-10 +# dispatch loop. The ``CopilotCLIClient`` and its registry entry are +# also gone — see the matching commit message. diff --git a/tests/_fixtures/__init__.py b/tests/_fixtures/__init__.py index 6a28f1e..96bfabc 100644 --- a/tests/_fixtures/__init__.py +++ b/tests/_fixtures/__init__.py @@ -1,2 +1,2 @@ -"""Test fixtures: fake CLI binaries used by _cli_runtime / claude_code / -copilot suites. Each binary is a standalone Python script with a shebang.""" +"""Test fixtures: fake CLI binaries used by _cli_runtime / claude_code +suites. Each binary is a standalone Python script with a shebang.""" diff --git a/tests/_fixtures/fake_gh.py b/tests/_fixtures/fake_gh.py deleted file mode 100755 index 21ed15f..0000000 --- a/tests/_fixtures/fake_gh.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python3 -"""Tiny fake ``gh`` binary used by Copilot CLI tests. - -Only the ``copilot`` subcommand is handled. Scenarios driven by -``FAKE_GH_SCENARIO`` env var: - - - ``ok`` — emit ``FAKE_GH_TEXT`` (default greeting) to stdout - - ``auth_fail`` — exit 4 with "not logged in" stderr - - ``not_installed`` — exit 1 with "extension not found" stderr - - ``permission_fail`` — exit 1 with "permission denied" stderr - - ``crash`` — exit 2 with generic stderr - - ``hang`` — sleep forever - - ``echo_argv`` — print argv as JSON -""" - -from __future__ import annotations - -import json -import os -import sys -import time -from typing import List - - -def _ok(argv: List[str]) -> int: - text = os.environ.get("FAKE_GH_TEXT", "Hello from fake gh copilot.") - sys.stdout.write(text) - return 0 - - -def _auth_fail(argv: List[str]) -> int: - sys.stderr.write("gh: not logged in. Run `gh auth login`.\n") - return 4 - - -def _not_installed(argv: List[str]) -> int: - sys.stderr.write("gh: extension not found: github/gh-copilot\n") - return 1 - - -def _permission_fail(argv: List[str]) -> int: - sys.stderr.write("permission denied: tool shell(rm) blocked\n") - return 1 - - -def _crash(argv: List[str]) -> int: - sys.stderr.write("gh: copilot service unreachable\n") - return 2 - - -def _hang(argv: List[str]) -> int: - time.sleep(60) - return 0 - - -def _echo_argv(argv: List[str]) -> int: - sys.stdout.write(json.dumps(argv)) - return 0 - - -SCENARIOS = { - "ok": _ok, - "auth_fail": _auth_fail, - "not_installed": _not_installed, - "permission_fail": _permission_fail, - "crash": _crash, - "hang": _hang, - "echo_argv": _echo_argv, -} - - -def main() -> int: - argv = sys.argv[1:] - if not argv or argv[0] != "copilot": - sys.stderr.write(f"fake_gh: only the 'copilot' subcommand is supported (got {argv!r})\n") - return 99 - scenario = os.environ.get("FAKE_GH_SCENARIO", "ok") - fn = SCENARIOS.get(scenario) - if fn is None: - sys.stderr.write(f"fake_gh: unknown scenario {scenario!r}\n") - return 99 - return fn(argv[1:]) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tests/llm_client/conformance/test_claude_code_cli.py b/tests/llm_client/conformance/test_claude_code_cli.py index 30cfd93..f91fd32 100644 --- a/tests/llm_client/conformance/test_claude_code_cli.py +++ b/tests/llm_client/conformance/test_claude_code_cli.py @@ -110,16 +110,21 @@ async def test_token_usage_and_cost_populated(self) -> None: assert resp.usage.duration_ms is not None @pytest.mark.asyncio - async def test_tool_use_round_trip(self) -> None: + async def test_tool_use_blocks_dropped(self) -> None: + """The CLI handles tool dispatch internally — ``tool_use`` + blocks observed in its output are intentionally dropped from + the assembled response so host pipelines (Geny's Stage 10, + the canonical reference consumer) don't try to re-dispatch + them and ghost-error. ``stop_reason`` is preserved so callers + can still see the CLI ended in a tool turn. See + ``StreamJsonAccumulator.finalize`` for the full rationale.""" client = self.make_client(scenario="ok_tool_use") resp = await client.create_message( model_config=ModelConfig(model="sonnet"), messages=[{"role": "user", "content": "read /tmp/x"}], ) - assert resp.has_tool_calls is True - tu = resp.tool_calls[0] - assert tu.tool_name == "Read" - assert tu.tool_input == {"path": "/tmp/x"} + assert resp.tool_calls == [] + assert resp.stop_reason == "tool_use" @pytest.mark.asyncio async def test_thinking_blocks_returned(self) -> None: diff --git a/tests/llm_client/conformance/test_copilot_cli.py b/tests/llm_client/conformance/test_copilot_cli.py deleted file mode 100644 index ce96b4f..0000000 --- a/tests/llm_client/conformance/test_copilot_cli.py +++ /dev/null @@ -1,111 +0,0 @@ -"""Copilot CLI provider conformance (Phase C2).""" - -from __future__ import annotations - -import os -import sys -from pathlib import Path - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", "src")) - -import pytest - -from geny_executor.core.config import ModelConfig -from geny_executor.core.errors import APIError, ErrorCategory -from geny_executor.llm_client.copilot import CopilotCLIClient -from geny_executor.llm_client.base import BaseClient - -from tests.llm_client.conformance.harness import ConformanceTestSuite - - -FAKE_GH = str( - (Path(__file__).resolve().parents[2] / "_fixtures" / "fake_gh.py") -) - - -class TestCopilotCLIConformance(ConformanceTestSuite): - provider_name = "copilot_cli" - - def make_client( - self, - *, - mode="mocked", - scenario: str = "ok", - text: str | None = None, - ) -> BaseClient: - env_extras = {"FAKE_GH_SCENARIO": scenario} - if text is not None: - env_extras["FAKE_GH_TEXT"] = text - return CopilotCLIClient( - gh_binary_path=FAKE_GH, - timeout_s=5.0, - env_extras=env_extras, - ) - - # ---------------------------------------------------------------- shape - def test_is_subprocess(self) -> None: - c = self.make_client() - assert c.capabilities.is_subprocess is True - assert c.capabilities.requires_workspace is False - assert c.capabilities.streaming_granularity == "none" - - def test_lacks_streaming_and_tools(self) -> None: - c = self.make_client() - assert c.supports("streaming") is False - assert c.supports("tools") is False - assert c.supports("structured_output") is False - assert c.supports("token_usage") is False - - # ---------------------------------------------------------------- e2e - @pytest.mark.asyncio - async def test_basic_text_completion(self) -> None: - c = self.make_client(text="Hello!") - resp = await c.create_message( - model_config=ModelConfig(model="default"), - messages=[{"role": "user", "content": "say hi"}], - ) - assert resp.text == "Hello!" - assert resp.stop_reason == "end_turn" - - @pytest.mark.asyncio - async def test_translates_auth_error(self) -> None: - c = self.make_client(scenario="auth_fail") - with pytest.raises(APIError) as ei: - await c.create_message( - model_config=ModelConfig(model="default"), - messages=[{"role": "user", "content": "x"}], - ) - assert ei.value.category is ErrorCategory.CLI_AUTH_FAILED - - @pytest.mark.asyncio - async def test_translates_not_installed(self) -> None: - c = self.make_client(scenario="not_installed") - with pytest.raises(APIError) as ei: - await c.create_message( - model_config=ModelConfig(model="default"), - messages=[{"role": "user", "content": "x"}], - ) - assert ei.value.category is ErrorCategory.CLI_NOT_FOUND - - @pytest.mark.asyncio - async def test_streaming_falls_back(self) -> None: - """copilot_cli's supports_streaming=False → BaseClient default emits - one message_complete event.""" - c = self.make_client(text="streamed") - events = [] - async for evt in c.create_message_stream( - model_config=ModelConfig(model="default"), - messages=[{"role": "user", "content": "go"}], - ): - events.append(evt) - assert any(e.get("type") == "message_complete" for e in events) - - @pytest.mark.asyncio - async def test_binary_not_found_raises_cli_not_found(self) -> None: - c = CopilotCLIClient(gh_binary_path="/totally/missing/gh", timeout_s=2.0) - with pytest.raises(APIError) as ei: - await c.create_message( - model_config=ModelConfig(model="default"), - messages=[{"role": "user", "content": "x"}], - ) - assert ei.value.category is ErrorCategory.CLI_NOT_FOUND diff --git a/tests/llm_client/unit/test_claude_code.py b/tests/llm_client/unit/test_claude_code.py index 856fcdc..a385661 100644 --- a/tests/llm_client/unit/test_claude_code.py +++ b/tests/llm_client/unit/test_claude_code.py @@ -151,13 +151,15 @@ async def test_send_oneshot_ok_text() -> None: @pytest.mark.asyncio -async def test_send_oneshot_tool_use() -> None: +async def test_send_oneshot_tool_use_blocks_dropped() -> None: + """``tool_use`` blocks are dropped from the response — the CLI + dispatched them internally. ``stop_reason`` is preserved + verbatim so callers can still tell the CLI ended in a tool turn + (e.g. CLI hit max-iter mid-loop with pending tool calls). See + ``StreamJsonAccumulator.finalize`` for the full rationale.""" c = _client(scenario="ok_tool_use") resp = await c._send(_make_request()) - assert resp.has_tool_calls is True - tools = resp.tool_calls - assert tools[0].tool_name == "Read" - assert tools[0].tool_input == {"path": "/tmp/x"} + assert resp.tool_calls == [] assert resp.stop_reason == "tool_use" @@ -323,7 +325,11 @@ async def test_send_streaming_message_form_text() -> None: @pytest.mark.asyncio -async def test_argv_carries_bare_and_workspace() -> None: +async def test_argv_carries_bare_and_workspace(monkeypatch) -> None: + # ``--bare`` is auto-stripped on the OAuth path (no + # ANTHROPIC_API_KEY in env). Pin the API-key env so this argv + # surface test exercises the API-key path. + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test") c = _client(scenario="echo_argv") resp = await c._send(_make_request(model="opus", system="rule X")) import json diff --git a/tests/llm_client/unit/test_copilot.py b/tests/llm_client/unit/test_copilot.py deleted file mode 100644 index 91913e5..0000000 --- a/tests/llm_client/unit/test_copilot.py +++ /dev/null @@ -1,286 +0,0 @@ -"""Tests for :class:`CopilotCLIClient` (Phase C1).""" - -from __future__ import annotations - -import json -import os -import sys -from pathlib import Path - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", "src")) - -import pytest - -from geny_executor.core.config import ModelConfig -from geny_executor.core.errors import APIError, ErrorCategory -from geny_executor.llm_client.copilot import CopilotCLIClient -from geny_executor.llm_client.registry import ClientRegistry -from geny_executor.llm_client.translators._cli import ( - compose_copilot_prompt, - copilot_argv, - parse_plain_text_to_response, -) - - -FAKE_GH = str( - (Path(__file__).resolve().parents[2] / "_fixtures" / "fake_gh.py") -) - - -def _client(scenario: str = "ok", text: str | None = None, **kwargs) -> CopilotCLIClient: - env_extras = kwargs.pop("env_extras", None) or {} - env_extras = dict(env_extras) - env_extras.setdefault("FAKE_GH_SCENARIO", scenario) - if text is not None: - env_extras["FAKE_GH_TEXT"] = text - defaults = dict( - gh_binary_path=FAKE_GH, - timeout_s=5.0, - env_extras=env_extras, - ) - defaults.update(kwargs) - return CopilotCLIClient(**defaults) - - -def _make_request(**kwargs): - from geny_executor.llm_client.types import APIRequest - base = dict( - model="default", - messages=[{"role": "user", "content": "hi"}], - system="be brief.", - stream=False, - ) - base.update(kwargs) - return APIRequest(**base) - - -# --------------------------------------------------------------------------- -# Registry + capability shape -# --------------------------------------------------------------------------- - - -def test_registry_has_copilot_cli() -> None: - assert "copilot_cli" in ClientRegistry.available() - cls = ClientRegistry.get("copilot_cli") - assert cls is CopilotCLIClient - - -def test_capabilities_shape() -> None: - caps = CopilotCLIClient.capabilities - assert caps.is_subprocess is True - assert caps.supports_streaming is False - assert caps.streaming_granularity == "none" - assert caps.supports_tools is False - assert caps.supports_thinking is False - assert caps.supports_structured_output is False - assert caps.supports_token_usage is False - assert caps.supports_cost_usage is False - assert caps.requires_workspace is False - for f in ("tools", "tool_choice", "thinking_enabled", "stop_sequences", "top_k", "temperature", "top_p", "max_tokens", "response_format", "session_hint"): - assert f in caps.drops, f - - -def test_provider_attr() -> None: - c = _client() - assert c.provider == "copilot_cli" - - -# --------------------------------------------------------------------------- -# Prompt composition -# --------------------------------------------------------------------------- - - -def test_compose_copilot_prompt_single_user_message() -> None: - out = compose_copilot_prompt("", [{"role": "user", "content": "hi"}]) - assert out == "## User\nhi" - - -def test_compose_copilot_prompt_with_system() -> None: - out = compose_copilot_prompt("be terse.", [{"role": "user", "content": "yo"}]) - assert out.startswith("## System\nbe terse.") - assert "## User\nyo" in out - - -def test_compose_copilot_prompt_multi_turn() -> None: - msgs = [ - {"role": "user", "content": "first"}, - {"role": "assistant", "content": "ack"}, - {"role": "user", "content": "second"}, - ] - out = compose_copilot_prompt("", msgs) - assert "## User\nfirst" in out - assert "## Assistant\nack" in out - assert out.endswith("## User\nsecond") - - -def test_compose_copilot_prompt_tool_result_in_content() -> None: - msgs = [ - {"role": "user", "content": [ - {"type": "text", "text": "look at this"}, - {"type": "tool_result", "content": "OK\n"}, - ]}, - ] - out = compose_copilot_prompt("", msgs) - assert "look at this" in out - assert "[tool_result]\nOK" in out - - -def test_compose_copilot_prompt_system_block_list() -> None: - sys_blocks = [ - {"type": "text", "text": "rule 1"}, - {"type": "text", "text": "rule 2"}, - {"type": "image"}, # ignored - ] - out = compose_copilot_prompt(sys_blocks, [{"role": "user", "content": "ok"}]) - assert "rule 1\nrule 2" in out - - -# --------------------------------------------------------------------------- -# argv builder -# --------------------------------------------------------------------------- - - -def test_copilot_argv_minimal() -> None: - assert copilot_argv() == ["copilot"] - - -def test_copilot_argv_allow_tools() -> None: - argv = copilot_argv(allow_tools=["shell(git)", "fs(read)"]) - assert argv == ["copilot", "--allow-tool", "shell(git)", "--allow-tool", "fs(read)"] - - -def test_copilot_argv_extra_args() -> None: - argv = copilot_argv(extra_args=["--verbose"]) - assert argv[-1] == "--verbose" - - -# --------------------------------------------------------------------------- -# parse_plain_text_to_response -# --------------------------------------------------------------------------- - - -def test_parse_plain_text_to_response_simple() -> None: - resp = parse_plain_text_to_response("hello world\n", model="default") - assert resp.text == "hello world" - assert resp.stop_reason == "end_turn" - assert resp.usage.input_tokens == 0 - assert resp.usage.output_tokens == 0 - - -# --------------------------------------------------------------------------- -# End-to-end via fake gh -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_send_oneshot_ok() -> None: - c = _client(text="Greetings!") - resp = await c._send(_make_request()) - assert resp.text == "Greetings!" - assert resp.stop_reason == "end_turn" - - -@pytest.mark.asyncio -async def test_send_oneshot_auth_failure() -> None: - c = _client(scenario="auth_fail") - with pytest.raises(APIError) as ei: - await c._send(_make_request()) - assert ei.value.category is ErrorCategory.CLI_AUTH_FAILED - - -@pytest.mark.asyncio -async def test_send_oneshot_not_installed() -> None: - c = _client(scenario="not_installed") - with pytest.raises(APIError) as ei: - await c._send(_make_request()) - assert ei.value.category is ErrorCategory.CLI_NOT_FOUND - - -@pytest.mark.asyncio -async def test_send_oneshot_permission_failure() -> None: - c = _client(scenario="permission_fail") - with pytest.raises(APIError) as ei: - await c._send(_make_request()) - assert ei.value.category is ErrorCategory.CLI_PERMISSION_DENIED - - -@pytest.mark.asyncio -async def test_send_oneshot_crash() -> None: - c = _client(scenario="crash") - with pytest.raises(APIError) as ei: - await c._send(_make_request()) - assert ei.value.category is ErrorCategory.CLI_PROTOCOL_ERROR - - -@pytest.mark.asyncio -async def test_send_oneshot_timeout() -> None: - c = _client(scenario="hang") - c._timeout_s = 0.4 - with pytest.raises(APIError) as ei: - await c._send(_make_request()) - assert ei.value.category is ErrorCategory.CLI_TIMEOUT - - -@pytest.mark.asyncio -async def test_argv_carries_allow_tools_and_prompt() -> None: - """fake_gh.echo_argv strips the leading 'copilot' subcommand before - echoing; we only assert on the trailing flags.""" - c = _client(scenario="echo_argv", allow_tools=["shell(git)"]) - resp = await c._send(_make_request(system="rules", messages=[{"role": "user", "content": "task"}])) - argv = json.loads(resp.text) - assert "--allow-tool" in argv and "shell(git)" in argv - assert "-p" in argv - prompt = argv[argv.index("-p") + 1] - assert "## System\nrules" in prompt - assert "## User\ntask" in prompt - - -@pytest.mark.asyncio -async def test_missing_binary_raises_cli_not_found() -> None: - c = CopilotCLIClient(gh_binary_path="/totally/missing/gh", timeout_s=2.0) - with pytest.raises(APIError) as ei: - await c._send(_make_request()) - assert ei.value.category is ErrorCategory.CLI_NOT_FOUND - - -# --------------------------------------------------------------------------- -# Streaming fallback (BaseClient default → one message_complete event) -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_create_message_stream_falls_back_to_message_complete() -> None: - c = _client(text="streamed") - events = [] - async for evt in c.create_message_stream( - model_config=ModelConfig(model="default"), - messages=[{"role": "user", "content": "go"}], - ): - events.append(evt) - # Default fallback emits a single message_complete with the response - assert any(e.get("type") == "message_complete" for e in events) - - -# --------------------------------------------------------------------------- -# CredentialBundle mapping (already added in Phase B2 for the copilot branch) -# --------------------------------------------------------------------------- - - -def test_pipeline_credentials_kwargs_mapping() -> None: - from geny_executor.core.pipeline import _creds_to_client_kwargs - from geny_executor.llm_client.credentials import ProviderCredentials - - creds = ProviderCredentials( - binary_path=FAKE_GH, - extras={ - "allow_tools": ("shell(git)",), - "cwd": "/tmp/wd", - "extra_args": ("--verbose",), - "timeout_s": 30.0, - }, - ) - kwargs = _creds_to_client_kwargs("copilot_cli", creds) - assert kwargs["gh_binary_path"] == FAKE_GH - assert kwargs["allow_tools"] == ("shell(git)",) - assert kwargs["cwd"] == "/tmp/wd" - assert kwargs["timeout_s"] == 30.0 diff --git a/tests/llm_client/unit/test_translators_cli_claude_code.py b/tests/llm_client/unit/test_translators_cli_claude_code.py index 8472010..ca98eb3 100644 --- a/tests/llm_client/unit/test_translators_cli_claude_code.py +++ b/tests/llm_client/unit/test_translators_cli_claude_code.py @@ -55,7 +55,11 @@ def _req(**kwargs) -> APIRequest: return APIRequest(**base) -def test_argv_non_stream_uses_json_output() -> None: +def test_argv_non_stream_uses_json_output(monkeypatch) -> None: + # ``--bare`` is auto-stripped on the OAuth path (no ANTHROPIC_API_KEY + # in env). Pin the env so the test exercises the API-key path + # where ``--bare`` is expected. + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test") argv = claude_code_argv(_req()) assert "--print" in argv assert "--output-format" in argv @@ -64,12 +68,27 @@ def test_argv_non_stream_uses_json_output() -> None: assert "--bare" in argv -def test_argv_stream_uses_stream_json_io() -> None: +def test_argv_stream_uses_stream_json_io_with_verbose() -> None: + # ``--verbose`` is required by Claude Code CLI ≥ 2.1.x whenever + # ``--print`` is combined with ``--output-format=stream-json``; + # the argv builder emits it automatically alongside the stream-json + # switch so hosts don't have to thread an opt-in flag. argv = claude_code_argv(_req(stream=True)) assert "--input-format" in argv assert "--output-format" in argv assert "stream-json" in argv assert "--include-partial-messages" in argv + assert "--verbose" in argv + + +def test_argv_bare_stripped_on_oauth_path(monkeypatch) -> None: + """When no ``ANTHROPIC_API_KEY`` is in the spawning process's env, + ``--bare`` is auto-stripped because the CLI's bare mode explicitly + disables OAuth ('OAuth and keychain are never read'), which crashes + every subscription user with 'Not logged in · Please run /login'.""" + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + argv = claude_code_argv(_req(), bare_mode=True) + assert "--bare" not in argv def test_argv_includes_model_and_system_prompt() -> None: @@ -149,33 +168,33 @@ def test_argv_request_mcp_config_overrides_kwarg() -> None: assert json.loads(blob) == per_request # per-request wins -def test_argv_host_mcp_disables_cli_builtins_and_strict() -> None: - """When the host registers MCP servers, the CLI's built-in tool - palette is disabled (``--tools ""``) so the LLM only ever sees - MCP-advertised tools. ``--strict-mcp-config`` ignores any other - MCP configuration sources so the per-session bridge is the sole - surface. Together these eliminate the hallucination path where - the LLM tries to use ``Bash``/``ToolSearch``/etc. that the host - has no executor for.""" +def test_argv_host_mcp_emits_strict_and_keeps_builtins() -> None: + """When the host registers MCP servers we emit + ``--strict-mcp-config`` so the per-session bridge is the only MCP + surface (no user-level or project-level MCP servers leak in). The + CLI's *built-in* tool palette (``Bash`` / ``Read`` / ``Write`` / + ``Edit`` / …) stays available alongside the MCP surface — most + hosts (e.g. Geny's Sub-Worker) want both: file/shell built-ins for + real work, MCP for host-delegated tools. + + Earlier executor versions auto-emitted ``--tools ""`` here to + disable the built-in palette; 2.0.6 dropped that default. Hosts + that want the old MCP-only behaviour can pass + ``extra_args=("--tools", "")`` explicitly.""" cfg = {"mcpServers": {"geny": {"type": "stdio", "command": "py"}}} argv = claude_code_argv(_req(mcp_config=cfg)) - # Disable built-ins. - idx = argv.index("--tools") - assert argv[idx + 1] == "" - # Strict mode. + assert "--tools" not in argv assert "--strict-mcp-config" in argv -def test_argv_host_mcp_with_explicit_allow_tools_keeps_builtins() -> None: - """``--allowedTools`` is the legacy whitelist of CLI built-ins. - If a caller explicitly supplies one alongside an MCP config they - want a mixed surface (custom MCP tools + a curated subset of CLI - built-ins). Don't override their choice.""" +def test_argv_host_mcp_with_explicit_allow_tools_emits_allowedtools() -> None: + """``--allowedTools`` is the permission-pattern allowlist for CLI + built-ins (e.g. ``Bash(git *)``). Pass it through verbatim when + the caller supplies one.""" cfg = {"mcpServers": {"geny": {"type": "stdio", "command": "py"}}} argv = claude_code_argv(_req(mcp_config=cfg), allow_tools=["Read"]) - # No --tools "" disabler — caller picked allowedTools explicitly. - assert "--tools" not in argv assert "--allowedTools" in argv + assert "--tools" not in argv def test_argv_no_mcp_no_tools_flag() -> None: @@ -430,7 +449,16 @@ def test_parse_json_output_text_only() -> None: assert resp.usage.duration_ms == 800 -def test_parse_json_output_tool_use_round_trip() -> None: +def test_parse_json_output_drops_tool_use_blocks() -> None: + """``tool_use`` blocks in the CLI's json output are intentionally + dropped from the assembled :class:`APIResponse` because the CLI + already dispatched them internally. Host pipelines should see + only the final assistant text — see ``finalize``'s docstring for + the full rationale. The stop_reason is preserved verbatim so + callers can still distinguish ``end_turn`` from ``tool_use`` for + telemetry / retry decisions; ``response.tool_calls`` (the actual + block list, which is what Stage 9 reads to populate + ``state.pending_tool_calls``) is empty so Stage 10 no-ops.""" blob = json.dumps({ "type": "result", "content": [ @@ -441,11 +469,9 @@ def test_parse_json_output_tool_use_round_trip() -> None: "usage": {"input_tokens": 5, "output_tokens": 0}, }).encode("utf-8") resp = parse_json_output_to_response(blob, model="m") - assert resp.has_tool_calls is True - tools = resp.tool_calls - assert len(tools) == 1 - assert tools[0].tool_name == "Read" - assert tools[0].tool_input == {"path": "/x"} + assert resp.tool_calls == [] + assert resp.text == "checking..." + assert resp.stop_reason == "tool_use" def test_parse_json_output_malformed_raises() -> None: @@ -483,7 +509,13 @@ async def gen(): @pytest.mark.asyncio -async def test_assemble_tool_use_with_partial_json() -> None: +async def test_assemble_drops_tool_use_blocks() -> None: + """Tool calls observed in the CLI's stream-json output are + intentionally dropped from the assembled :class:`APIResponse` — + the CLI dispatched them internally and host pipelines (e.g. + Geny's Stage 10) must NOT re-dispatch. See ``finalize``'s + docstring for the full rationale. The stop_reason is preserved + so callers can still see the CLI ended in a tool turn.""" lines = [ b'{"type": "system", "model": "claude-sonnet-4-6"}\n', b'{"type": "assistant", "content_block": {"type": "tool_use", "id": "t1", "name": "Read"}}\n', @@ -498,10 +530,7 @@ async def gen(): yield l resp = await assemble_response_from_stream_json(gen(), model="default") - assert resp.has_tool_calls is True - tu = resp.tool_calls[0] - assert tu.tool_name == "Read" - assert tu.tool_input == {"path": "/x"} + assert resp.tool_calls == [] assert resp.stop_reason == "tool_use"