From 61da4adf64c71c17557ba9c8e6102c1ebac4e54b Mon Sep 17 00:00:00 2001 From: CocoRoF Date: Tue, 19 May 2026 17:12:45 +0900 Subject: [PATCH] fix(claude_code): flatten multi-turn history into a single user envelope (2.0.4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Code (CLI) Stage-6 sessions failed at the second turn with:: Error: CLI '/usr/bin/claude' exited with code 1: Error: Expected message role 'user', got 'assistant' Geny's s06_api stage accumulates an Anthropic-style multi-turn message list across loop iterations: ``[user, assistant_w_tool_use, user_w_tool_result, ...]``. The previous ``build_stream_json_stdin`` envelopes each canonical message as ``{"type":"user","message":{"role":, ...}}`` and writes one line per message. Claude Code 2.x's stream-json input parser strictly requires every envelope's ``message.role`` to be ``"user"`` — embedded ``assistant`` / ``tool`` roles are rejected. ### Fix ``build_stream_json_stdin`` now collapses the entire canonical history into a **single synthetic ``type:user`` envelope** whose ``content`` is a markdown preamble + the current input: ## Conversation so far ### User find the README ### Assistant Let me check. [Tool call: Read({"path": "/repo/README.md"})] ### Tool result [Tool result] # Hello ## Current input summarize it The LLM reconstructs the conversation from the structure; the CLI sees one cohesive single-turn prompt. Single-turn callers (one user message only) skip the preamble and emit the canonical envelope unchanged so simple invocations stay byte-for-byte identical to the legacy path. Thinking blocks from a prior provider are dropped (CLI does its own ``--effort`` thinking on the new turn). Tool errors render under a separate ``[Tool error]`` tag so the LLM sees the failure semantics. ### Why Provider-neutral OUTPUT contract was already restored in 2.0.3 (``StreamJsonAccumulator``). The remaining asymmetry was on the INPUT side: every provider — anthropic / openai / google / vllm / claude_code_cli / copilot_cli — must accept the same canonical message-list shape and translate internally to whatever the underlying surface wants. The CLI's stream-json input grammar is strict user-only; the executor owns the translation so hosts never see the difference. ### Tests - ``test_stdin_envelope_multi_turn_always_user_role`` — collapses user/assistant/user-with-tool_result into one ``message.role:user`` envelope. - ``test_stdin_envelope_multi_turn_preserves_history_in_content`` — preamble carries text, tool calls (name + input json), and tool results under markdown headers; the final user turn is the "Current input" block. - ``test_stdin_envelope_drops_thinking_and_handles_tool_errors`` — thinking blocks dropped; ``is_error: True`` tool_results tagged ``[Tool error]``. - Single-turn fast path test unchanged (back-compat verified). Full ``tests/llm_client/`` 189/189 pass. --- CHANGELOG.md | 42 +++++ pyproject.toml | 2 +- src/geny_executor/__init__.py | 2 +- .../llm_client/translators/_cli.py | 152 ++++++++++++++++-- .../unit/test_translators_cli_claude_code.py | 88 +++++++++- 5 files changed, 263 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2c1fa1..36f4a21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,48 @@ All notable changes to `geny-executor` are recorded here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and this project adheres to [Semantic Versioning](https://semver.org/). +## [2.0.4] — 2026-05-19 + +Patch release. Fixes Claude Code (CLI) sessions failing on the second +turn with:: + + Error: CLI '/usr/bin/claude' exited with code 1: + Error: Expected message role 'user', got 'assistant' + +### Fixed + +- ``build_stream_json_stdin`` now flattens canonical Anthropic-style + multi-turn message history into a **single synthetic ``type:user`` + envelope** with a markdown preamble. Claude Code's + ``--input-format stream-json`` strictly requires every envelope's + ``message.role`` to be ``"user"``; the previous builder forwarded + the canonical role through (assistant / tool turns embedded with + their original role kept) which the CLI rejected. +- The collapsed envelope preserves enough fidelity for the LLM to + reconstruct the conversation: + * ``### User`` / ``### Assistant`` markdown headers for text + turns, + * ``[Tool call: name(input_json)]`` for assistant tool_use + blocks, + * ``[Tool result] ...`` / ``[Tool error] ...`` for user + tool_result blocks, + * thinking blocks dropped (CLI does its own ``--effort`` thinking + on the new turn). +- Single-turn fast path (one user message only) emits the canonical + envelope unchanged so simple invocations stay byte-for-byte + identical to the legacy path. + +### Why + +Provider-neutral output contract was already restored in 2.0.3 +(StreamJsonAccumulator). The remaining asymmetry was on the +**input** side: every provider (anthropic / openai / google / vllm / +claude_code_cli / copilot_cli) must accept the same canonical +message list shape and translate internally to whatever the +underlying surface wants. The CLI's stream-json input grammar is +strict user-only; the executor owns the translation so hosts never +see the difference. + ## [2.0.3] — 2026-05-19 Patch release. Fixes empty assistant output (`output_len=0`) when diff --git a/pyproject.toml b/pyproject.toml index 09fa21f..585fdc3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "geny-executor" -version = "2.0.3" +version = "2.0.4" description = "Harness-engineered agent pipeline library with 21-stage dual-abstraction architecture, built on the Anthropic API" readme = "README.md" license = "MIT" diff --git a/src/geny_executor/__init__.py b/src/geny_executor/__init__.py index d2ca80e..bf42449 100644 --- a/src/geny_executor/__init__.py +++ b/src/geny_executor/__init__.py @@ -95,7 +95,7 @@ ProviderDrivenStrategy, ) -__version__ = "2.0.3" +__version__ = "2.0.4" __all__ = [ # Core diff --git a/src/geny_executor/llm_client/translators/_cli.py b/src/geny_executor/llm_client/translators/_cli.py index 1c5702c..e99565d 100644 --- a/src/geny_executor/llm_client/translators/_cli.py +++ b/src/geny_executor/llm_client/translators/_cli.py @@ -173,29 +173,147 @@ def claude_code_argv( # --------------------------------------------------------------------------- -def build_stream_json_stdin(messages: List[Dict[str, Any]]) -> bytes: - """Render canonical messages as Claude Code stream-json stdin. - - Claude Code's ``--input-format stream-json`` expects newline-delimited - JSON envelopes of the shape:: +def _render_block_for_history(block: Any) -> str: + """Render one Anthropic-style content block as readable text. + + Used by ``build_stream_json_stdin`` when collapsing multi-turn + history into a single synthetic user envelope. Preserves enough + fidelity (tool name + input, tool result text) for the LLM to + reconstruct the conversation, while dropping shapes the CLI + cannot ingest (thinking blocks, images→placeholder).""" + if isinstance(block, str): + return block + if not isinstance(block, dict): + return str(block) + btype = str(block.get("type", "")) + if btype == "text": + return str(block.get("text", "")) + if btype == "thinking": + # Thinking traces from a prior provider don't replay on the + # CLI — drop them. The CLI does its own ``--effort`` thinking + # on the new turn. + return "" + if btype == "tool_use": + name = block.get("name", "tool") + try: + input_json = json.dumps( + block.get("input") or {}, ensure_ascii=False, + ) + except (TypeError, ValueError): + input_json = str(block.get("input")) + return f"[Tool call: {name}({input_json})]" + if btype == "tool_result": + body = block.get("content") + if isinstance(body, list): + body = "\n".join( + _render_block_for_history(b) for b in body + ).strip() + elif body is None: + body = "" + is_error = bool(block.get("is_error")) + tag = "Tool error" if is_error else "Tool result" + return f"[{tag}] {body}" + if btype == "image": + return "[image attachment]" + return "" + + +def _render_content_for_history(content: Any) -> str: + """Flatten a canonical ``content`` field (string or block list) + into one display-ready text run for history-preamble use.""" + if isinstance(content, str): + return content + if isinstance(content, list): + rendered = [ + _render_block_for_history(b) for b in content + ] + return "\n".join(s for s in rendered if s).strip() + return str(content) - {"type": "user", "message": {"role": "user", "content": [...]}} - Tool-results / assistant turns from prior multi-turn history flow as - additional ``user``-typed entries with their original role embedded — - the CLI reconstructs the conversation from the envelopes. +def build_stream_json_stdin(messages: List[Dict[str, Any]]) -> bytes: + """Render canonical Anthropic-style messages as Claude Code + stream-json stdin — **always as a single ``type:user`` envelope**. + + Claude Code CLI's ``--input-format stream-json`` strictly requires + each envelope's ``message.role`` to be ``"user"``. The previous + implementation forwarded the canonical role through (assistant / + tool turns embedded in ``type:user`` envelopes with their original + role kept) which the CLI rejects with:: + + Error: Expected message role 'user', got 'assistant' + + For multi-turn pipelines (Geny's s06_api accumulates conversation + history across loop iterations) we collapse the whole history into + a single synthetic user envelope: + + - The latest user message becomes the bulk of the prompt. + - All prior turns are rendered as a markdown preamble + (``### User`` / ``### Assistant`` / tool calls + results). + - The CLI receives one cohesive single-turn prompt with all + relevant context — same input contract whether the host is + running Geny's iterative loop or sending a one-shot query. + + The single-turn fast-path (one user message only) emits the + canonical envelope unchanged so simple invocations stay byte-for- + byte identical to the legacy path. """ - out_lines: List[str] = [] - for m in messages: - role = str(m.get("role", "user")) - content = m.get("content", "") + if not messages: + return b"" + + # Single-turn fast path — preserve the canonical envelope shape. + if len(messages) == 1 and str(messages[0].get("role", "")) == "user": envelope = { "type": "user", - "message": {"role": role, "content": content}, + "message": {"role": "user", "content": messages[0].get("content", "")}, } - out_lines.append(json.dumps(envelope)) - blob = "\n".join(out_lines) - return (blob + "\n").encode("utf-8") if blob else b"" + return (json.dumps(envelope, ensure_ascii=False) + "\n").encode("utf-8") + + # Multi-turn: flatten into a single synthetic user message. The + # CLI's ``--bare`` mode treats this as a regular prompt; the LLM + # reconstructs the conversation from the markdown structure. + parts: List[str] = [] + last_user_idx = -1 + for i, m in enumerate(messages): + if str(m.get("role", "")) == "user": + last_user_idx = i + + for i, m in enumerate(messages): + role = str(m.get("role", "user")) + text = _render_content_for_history(m.get("content", "")) + if not text and role != "assistant": + continue + if role == "user": + # The final user turn is the "current input" — render it + # without a header so it reads as the actual question. + if i == last_user_idx: + parts.append(text) + else: + parts.append(f"### User\n{text}") + elif role == "assistant": + if text: + parts.append(f"### Assistant\n{text}") + elif role == "tool": + parts.append(f"### Tool result\n{text}") + else: + parts.append(f"### {role.capitalize()}\n{text}") + + preamble = "" + current_input = parts[-1] if parts else "" + if len(parts) > 1: + preamble_parts = parts[:-1] + preamble = ( + "## Conversation so far\n\n" + + "\n\n".join(preamble_parts) + + "\n\n## Current input\n" + ) + + flat = (preamble + current_input).strip() + envelope = { + "type": "user", + "message": {"role": "user", "content": flat}, + } + return (json.dumps(envelope, ensure_ascii=False) + "\n").encode("utf-8") # --------------------------------------------------------------------------- diff --git a/tests/llm_client/unit/test_translators_cli_claude_code.py b/tests/llm_client/unit/test_translators_cli_claude_code.py index a8426a6..08f1e3e 100644 --- a/tests/llm_client/unit/test_translators_cli_claude_code.py +++ b/tests/llm_client/unit/test_translators_cli_claude_code.py @@ -189,17 +189,97 @@ def test_stdin_envelope_one_user_message() -> None: ] -def test_stdin_envelope_multi_turn() -> None: +def test_stdin_envelope_multi_turn_always_user_role() -> None: + """Regression: every envelope's ``message.role`` MUST be ``"user"``. + + Claude Code CLI 2.x rejects ``type:user`` envelopes that carry an + embedded ``message.role: assistant`` with:: + + Error: Expected message role 'user', got 'assistant' + + The pre-fix builder forwarded canonical roles through and broke + every multi-turn iteration of an env that pinned ``claude_code_cli`` + as the Stage 6 provider. + """ out = build_stream_json_stdin([ {"role": "user", "content": "q1"}, {"role": "assistant", "content": "a1"}, {"role": "user", "content": [{"type": "tool_result", "content": "ok"}]}, ]) envs = [json.loads(l) for l in out.strip().split(b"\n")] + # ONE synthetic envelope — multi-turn collapses to a single user + # message; the CLI reconstructs the conversation from its content. + assert len(envs) == 1 + assert envs[0]["type"] == "user" assert envs[0]["message"]["role"] == "user" - assert envs[1]["message"]["role"] == "assistant" - assert envs[2]["message"]["role"] == "user" - assert envs[2]["message"]["content"][0]["type"] == "tool_result" + + +def test_stdin_envelope_multi_turn_preserves_history_in_content() -> None: + """The collapsed envelope must carry enough fidelity that the LLM + can reconstruct the prior conversation: text turns, tool calls + (name + input), and tool results all show up in the flattened + content under markdown headers.""" + out = build_stream_json_stdin([ + {"role": "user", "content": "find the README"}, + { + "role": "assistant", + "content": [ + {"type": "text", "text": "Let me check."}, + { + "type": "tool_use", + "id": "tu_1", + "name": "Read", + "input": {"path": "/repo/README.md"}, + }, + ], + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "tu_1", "content": "# Hello"}, + ], + }, + {"role": "user", "content": "summarize it"}, + ]) + env = json.loads(out.strip()) + text = env["message"]["content"] + assert "## Conversation so far" in text + assert "find the README" in text + assert "[Tool call: Read({" in text + assert "/repo/README.md" in text + assert "[Tool result] # Hello" in text + # The final user turn ("summarize it") is the "current input" and + # appears under "## Current input" without the per-turn header. + assert "## Current input" in text + assert text.rstrip().endswith("summarize it") + + +def test_stdin_envelope_drops_thinking_and_handles_tool_errors() -> None: + """Thinking blocks from a prior provider don't replay on the CLI + — drop them. ``is_error: True`` tool_results render under a + "Tool error" tag so the LLM sees the failure semantics.""" + out = build_stream_json_stdin([ + {"role": "user", "content": "do X"}, + { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": "secret reasoning"}, + {"type": "text", "text": "trying X..."}, + {"type": "tool_use", "id": "t1", "name": "Bash", "input": {"cmd": "x"}}, + ], + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "t1", "is_error": True, "content": "command failed"}, + ], + }, + ]) + env = json.loads(out.strip()) + text = env["message"]["content"] + assert "secret reasoning" not in text # thinking dropped + assert "trying X..." in text + assert "[Tool error] command failed" in text def test_stdin_empty_messages_returns_empty_bytes() -> None: