Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,48 @@ All notable changes to `geny-executor` are recorded here. The format
follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and
this project adheres to [Semantic Versioning](https://semver.org/).

## [2.0.4] — 2026-05-19

Patch release. Fixes Claude Code (CLI) sessions failing on the second
turn with::

Error: CLI '/usr/bin/claude' exited with code 1:
Error: Expected message role 'user', got 'assistant'

### Fixed

- ``build_stream_json_stdin`` now flattens canonical Anthropic-style
multi-turn message history into a **single synthetic ``type:user``
envelope** with a markdown preamble. Claude Code's
``--input-format stream-json`` strictly requires every envelope's
``message.role`` to be ``"user"``; the previous builder forwarded
the canonical role through (assistant / tool turns embedded with
their original role kept) which the CLI rejected.
- The collapsed envelope preserves enough fidelity for the LLM to
reconstruct the conversation:
* ``### User`` / ``### Assistant`` markdown headers for text
turns,
* ``[Tool call: name(input_json)]`` for assistant tool_use
blocks,
* ``[Tool result] ...`` / ``[Tool error] ...`` for user
tool_result blocks,
* thinking blocks dropped (CLI does its own ``--effort`` thinking
on the new turn).
- Single-turn fast path (one user message only) emits the canonical
envelope unchanged so simple invocations stay byte-for-byte
identical to the legacy path.

### Why

Provider-neutral output contract was already restored in 2.0.3
(StreamJsonAccumulator). The remaining asymmetry was on the
**input** side: every provider (anthropic / openai / google / vllm /
claude_code_cli / copilot_cli) must accept the same canonical
message list shape and translate internally to whatever the
underlying surface wants. The CLI's stream-json input grammar is
strict user-only; the executor owns the translation so hosts never
see the difference.

## [2.0.3] — 2026-05-19

Patch release. Fixes empty assistant output (`output_len=0`) when
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "geny-executor"
version = "2.0.3"
version = "2.0.4"
description = "Harness-engineered agent pipeline library with 21-stage dual-abstraction architecture, built on the Anthropic API"
readme = "README.md"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion src/geny_executor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
ProviderDrivenStrategy,
)

__version__ = "2.0.3"
__version__ = "2.0.4"

__all__ = [
# Core
Expand Down
152 changes: 135 additions & 17 deletions src/geny_executor/llm_client/translators/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,29 +173,147 @@ def claude_code_argv(
# ---------------------------------------------------------------------------


def build_stream_json_stdin(messages: List[Dict[str, Any]]) -> bytes:
"""Render canonical messages as Claude Code stream-json stdin.

Claude Code's ``--input-format stream-json`` expects newline-delimited
JSON envelopes of the shape::
def _render_block_for_history(block: Any) -> str:
"""Render one Anthropic-style content block as readable text.

Used by ``build_stream_json_stdin`` when collapsing multi-turn
history into a single synthetic user envelope. Preserves enough
fidelity (tool name + input, tool result text) for the LLM to
reconstruct the conversation, while dropping shapes the CLI
cannot ingest (thinking blocks, images→placeholder)."""
if isinstance(block, str):
return block
if not isinstance(block, dict):
return str(block)
btype = str(block.get("type", ""))
if btype == "text":
return str(block.get("text", ""))
if btype == "thinking":
# Thinking traces from a prior provider don't replay on the
# CLI — drop them. The CLI does its own ``--effort`` thinking
# on the new turn.
return ""
if btype == "tool_use":
name = block.get("name", "tool")
try:
input_json = json.dumps(
block.get("input") or {}, ensure_ascii=False,
)
except (TypeError, ValueError):
input_json = str(block.get("input"))
return f"[Tool call: {name}({input_json})]"
if btype == "tool_result":
body = block.get("content")
if isinstance(body, list):
body = "\n".join(
_render_block_for_history(b) for b in body
).strip()
elif body is None:
body = ""
is_error = bool(block.get("is_error"))
tag = "Tool error" if is_error else "Tool result"
return f"[{tag}] {body}"
if btype == "image":
return "[image attachment]"
return ""


def _render_content_for_history(content: Any) -> str:
"""Flatten a canonical ``content`` field (string or block list)
into one display-ready text run for history-preamble use."""
if isinstance(content, str):
return content
if isinstance(content, list):
rendered = [
_render_block_for_history(b) for b in content
]
return "\n".join(s for s in rendered if s).strip()
return str(content)

{"type": "user", "message": {"role": "user", "content": [...]}}

Tool-results / assistant turns from prior multi-turn history flow as
additional ``user``-typed entries with their original role embedded —
the CLI reconstructs the conversation from the envelopes.
def build_stream_json_stdin(messages: List[Dict[str, Any]]) -> bytes:
"""Render canonical Anthropic-style messages as Claude Code
stream-json stdin — **always as a single ``type:user`` envelope**.

Claude Code CLI's ``--input-format stream-json`` strictly requires
each envelope's ``message.role`` to be ``"user"``. The previous
implementation forwarded the canonical role through (assistant /
tool turns embedded in ``type:user`` envelopes with their original
role kept) which the CLI rejects with::

Error: Expected message role 'user', got 'assistant'

For multi-turn pipelines (Geny's s06_api accumulates conversation
history across loop iterations) we collapse the whole history into
a single synthetic user envelope:

- The latest user message becomes the bulk of the prompt.
- All prior turns are rendered as a markdown preamble
(``### User`` / ``### Assistant`` / tool calls + results).
- The CLI receives one cohesive single-turn prompt with all
relevant context — same input contract whether the host is
running Geny's iterative loop or sending a one-shot query.

The single-turn fast-path (one user message only) emits the
canonical envelope unchanged so simple invocations stay byte-for-
byte identical to the legacy path.
"""
out_lines: List[str] = []
for m in messages:
role = str(m.get("role", "user"))
content = m.get("content", "")
if not messages:
return b""

# Single-turn fast path — preserve the canonical envelope shape.
if len(messages) == 1 and str(messages[0].get("role", "")) == "user":
envelope = {
"type": "user",
"message": {"role": role, "content": content},
"message": {"role": "user", "content": messages[0].get("content", "")},
}
out_lines.append(json.dumps(envelope))
blob = "\n".join(out_lines)
return (blob + "\n").encode("utf-8") if blob else b""
return (json.dumps(envelope, ensure_ascii=False) + "\n").encode("utf-8")

# Multi-turn: flatten into a single synthetic user message. The
# CLI's ``--bare`` mode treats this as a regular prompt; the LLM
# reconstructs the conversation from the markdown structure.
parts: List[str] = []
last_user_idx = -1
for i, m in enumerate(messages):
if str(m.get("role", "")) == "user":
last_user_idx = i

for i, m in enumerate(messages):
role = str(m.get("role", "user"))
text = _render_content_for_history(m.get("content", ""))
if not text and role != "assistant":
continue
if role == "user":
# The final user turn is the "current input" — render it
# without a header so it reads as the actual question.
if i == last_user_idx:
parts.append(text)
else:
parts.append(f"### User\n{text}")
elif role == "assistant":
if text:
parts.append(f"### Assistant\n{text}")
elif role == "tool":
parts.append(f"### Tool result\n{text}")
else:
parts.append(f"### {role.capitalize()}\n{text}")

preamble = ""
current_input = parts[-1] if parts else ""
if len(parts) > 1:
preamble_parts = parts[:-1]
preamble = (
"## Conversation so far\n\n"
+ "\n\n".join(preamble_parts)
+ "\n\n## Current input\n"
)

flat = (preamble + current_input).strip()
envelope = {
"type": "user",
"message": {"role": "user", "content": flat},
}
return (json.dumps(envelope, ensure_ascii=False) + "\n").encode("utf-8")


# ---------------------------------------------------------------------------
Expand Down
88 changes: 84 additions & 4 deletions tests/llm_client/unit/test_translators_cli_claude_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,17 +189,97 @@ def test_stdin_envelope_one_user_message() -> None:
]


def test_stdin_envelope_multi_turn() -> None:
def test_stdin_envelope_multi_turn_always_user_role() -> None:
"""Regression: every envelope's ``message.role`` MUST be ``"user"``.

Claude Code CLI 2.x rejects ``type:user`` envelopes that carry an
embedded ``message.role: assistant`` with::

Error: Expected message role 'user', got 'assistant'

The pre-fix builder forwarded canonical roles through and broke
every multi-turn iteration of an env that pinned ``claude_code_cli``
as the Stage 6 provider.
"""
out = build_stream_json_stdin([
{"role": "user", "content": "q1"},
{"role": "assistant", "content": "a1"},
{"role": "user", "content": [{"type": "tool_result", "content": "ok"}]},
])
envs = [json.loads(l) for l in out.strip().split(b"\n")]
# ONE synthetic envelope — multi-turn collapses to a single user
# message; the CLI reconstructs the conversation from its content.
assert len(envs) == 1
assert envs[0]["type"] == "user"
assert envs[0]["message"]["role"] == "user"
assert envs[1]["message"]["role"] == "assistant"
assert envs[2]["message"]["role"] == "user"
assert envs[2]["message"]["content"][0]["type"] == "tool_result"


def test_stdin_envelope_multi_turn_preserves_history_in_content() -> None:
"""The collapsed envelope must carry enough fidelity that the LLM
can reconstruct the prior conversation: text turns, tool calls
(name + input), and tool results all show up in the flattened
content under markdown headers."""
out = build_stream_json_stdin([
{"role": "user", "content": "find the README"},
{
"role": "assistant",
"content": [
{"type": "text", "text": "Let me check."},
{
"type": "tool_use",
"id": "tu_1",
"name": "Read",
"input": {"path": "/repo/README.md"},
},
],
},
{
"role": "user",
"content": [
{"type": "tool_result", "tool_use_id": "tu_1", "content": "# Hello"},
],
},
{"role": "user", "content": "summarize it"},
])
env = json.loads(out.strip())
text = env["message"]["content"]
assert "## Conversation so far" in text
assert "find the README" in text
assert "[Tool call: Read({" in text
assert "/repo/README.md" in text
assert "[Tool result] # Hello" in text
# The final user turn ("summarize it") is the "current input" and
# appears under "## Current input" without the per-turn header.
assert "## Current input" in text
assert text.rstrip().endswith("summarize it")


def test_stdin_envelope_drops_thinking_and_handles_tool_errors() -> None:
"""Thinking blocks from a prior provider don't replay on the CLI
— drop them. ``is_error: True`` tool_results render under a
"Tool error" tag so the LLM sees the failure semantics."""
out = build_stream_json_stdin([
{"role": "user", "content": "do X"},
{
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "secret reasoning"},
{"type": "text", "text": "trying X..."},
{"type": "tool_use", "id": "t1", "name": "Bash", "input": {"cmd": "x"}},
],
},
{
"role": "user",
"content": [
{"type": "tool_result", "tool_use_id": "t1", "is_error": True, "content": "command failed"},
],
},
])
env = json.loads(out.strip())
text = env["message"]["content"]
assert "secret reasoning" not in text # thinking dropped
assert "trying X..." in text
assert "[Tool error] command failed" in text


def test_stdin_empty_messages_returns_empty_bytes() -> None:
Expand Down
Loading