Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,38 @@ All notable changes to `geny-executor` are recorded here. The format
follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and
this project adheres to [Semantic Versioning](https://semver.org/).

## [2.0.3] — 2026-05-19

Patch release. Fixes empty assistant output (`output_len=0`) when
Claude Code (CLI) 2.x is the Stage 6 provider, and surfaces
authentication failures as ``APIError`` instead of silently
returning a "Not logged in" placeholder.

### Fixed

- ``ClaudeCodeCLIClient.create_message_stream`` /
``assemble_response_from_stream_json`` now accumulate text from the
**full-message** stream-json shape Claude Code 2.x emits by
default (``{"type":"assistant","message":{"content":[...]}}``) in
addition to the **delta** shape (``--include-partial-messages``
on). The 2.0.2 fix unblocked the streaming control flow but only
parsed delta-form text, so every session came back with
``output_len=0`` even though the CLI did real work for ~6s.
- The CLI's ``assistant`` envelope occasionally carries
``error="authentication_failed"`` with a placeholder ``"Not logged
in"`` text block. The streaming path now raises
``APIError(category=CLI_AUTH_FAILED)`` so the host surfaces the
problem instead of returning the placeholder as the assistant's
reply.
- Both parser paths now share one ``StreamJsonAccumulator`` so the
streaming + non-streaming consumers never drift apart again.

### Added

- ``StreamJsonAccumulator`` exported from
``geny_executor.llm_client.translators`` for hosts that want to
pipe a custom stream-json source into the canonical response shape.

## [2.0.2] — 2026-05-19

Patch release. Fixes streaming Stage 6 calls failing with
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "geny-executor"
version = "2.0.2"
version = "2.0.3"
description = "Harness-engineered agent pipeline library with 21-stage dual-abstraction architecture, built on the Anthropic API"
readme = "README.md"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion src/geny_executor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
ProviderDrivenStrategy,
)

__version__ = "2.0.2"
__version__ = "2.0.3"

__all__ = [
# Core
Expand Down
151 changes: 35 additions & 116 deletions src/geny_executor/llm_client/claude_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,13 @@
)
from geny_executor.llm_client.base import BaseClient, ClientCapabilities
from geny_executor.llm_client.translators._cli import (
StreamJsonAccumulator,
assemble_response_from_stream_json,
build_stream_json_stdin,
claude_code_argv,
parse_json_output_to_response,
)
from geny_executor.llm_client.types import (
APIRequest,
APIResponse,
ContentBlock,
TokenUsage,
)
from geny_executor.llm_client.types import APIRequest, APIResponse


__all__ = ["ClaudeCodeCLIClient"]
Expand Down Expand Up @@ -272,127 +268,50 @@ async def create_message_stream(
stdin = build_stream_json_stdin(messages)

from geny_executor.llm_client._cli_runtime import parse_stream_json_line
from geny_executor.llm_client.translators._cli import (
stream_json_line_to_canonical_event,
)

# Accumulator state — mirrors ``assemble_response_from_stream_json``
# so the final message_complete envelope carries the same
# APIResponse the non-streaming path produces.
import json as _json

text_buf: List[str] = []
thinking_buf: List[str] = []
tool_uses: List[Dict[str, Any]] = []
current_tool: Optional[Dict[str, Any]] = None
final_obj: Optional[Dict[str, Any]] = None
message_id = ""
stop_reason = "end_turn"
resolved_model = model_config.model
# Shared accumulator handles both stream-json shapes:
# - delta form (``--include-partial-messages`` on, true streaming)
# - full-message form (Claude Code 2.x default — content[]
# arrives in one ``assistant`` envelope).
# Without the message-form branch, every assistant frame yielded
# zero text and the terminal APIResponse came back empty —
# exactly the symptom the user reported (``output_len=0``).
accum = StreamJsonAccumulator(model=model_config.model)

try:
async for raw in runner.stream(argv, stdin_iter=aiter_bytes(stdin)):
line_obj = parse_stream_json_line(raw)
if line_obj is None:
continue

# ── Accumulate for the terminal APIResponse ──
ltype = str(line_obj.get("type", ""))
if ltype == "system":
message_id = str(
line_obj.get("session_id")
or line_obj.get("message_id")
or message_id
)
resolved_model = str(line_obj.get("model") or resolved_model)
elif ltype == "assistant":
delta = line_obj.get("delta") or {}
dtype = str(delta.get("type", ""))
if dtype == "text_delta":
text_buf.append(str(delta.get("text", "")))
elif dtype == "thinking_delta":
thinking_buf.append(str(delta.get("text", "")))
elif dtype == "input_json_delta":
if current_tool is not None:
current_tool.setdefault("_partial_json", "")
current_tool["_partial_json"] += str(
delta.get("partial_json", "")
)
else:
cb = line_obj.get("content_block")
if isinstance(cb, dict) and cb.get("type") == "tool_use":
current_tool = {
"id": cb.get("id"),
"name": cb.get("name"),
"input": cb.get("input") or {},
}
elif ltype == "content_block_stop":
if current_tool is not None:
partial = current_tool.pop("_partial_json", "")
if partial and not current_tool.get("input"):
try:
current_tool["input"] = _json.loads(partial)
except _json.JSONDecodeError:
current_tool["input"] = {"_raw": partial}
tool_uses.append(current_tool)
current_tool = None
elif ltype == "result":
final_obj = line_obj
stop_reason = str(line_obj.get("stop_reason", stop_reason))

# ── Yield the per-line canonical event ──
# Suppress the translator's bare ``message_complete``
# (it carries no response field) — we emit the
# populated version after the loop. Everything else
# passes through unchanged.
event = stream_json_line_to_canonical_event(line_obj)
if event is None:
continue
if event.get("type") == "message_complete":
if "__malformed__" in line_obj:
continue
yield event

# ── Assemble + emit the terminal message_complete ──
blocks: List[ContentBlock] = []
if thinking_buf:
blocks.append(
ContentBlock(type="thinking", thinking_text="".join(thinking_buf))
)
if text_buf:
blocks.append(ContentBlock(type="text", text="".join(text_buf)))
for tu in tool_uses:
blocks.append(
ContentBlock(
type="tool_use",
tool_use_id=tu.get("id"),
tool_name=tu.get("name"),
tool_input=tu.get("input") or {},
# Surface CLI-side errors as APIError so the stage's
# retry/escalate path runs instead of silently producing
# an empty response.
if str(line_obj.get("type", "")) == "error":
raise APIError(
f"Claude Code CLI reported error: "
f"{line_obj.get('message') or line_obj!r}",
category=ErrorCategory.CLI_PROTOCOL_ERROR,
)
# Surface the authentication_failed annotation that the
# CLI emits on the assistant frame when no credential
# is available — without this we'd swallow the
# "Not logged in" placeholder text as the assistant's
# answer and call the session "successful".
if str(line_obj.get("error", "")) == "authentication_failed":
raise APIError(
"Claude Code CLI is not authenticated (claude --print "
"returned error=authentication_failed). Sign in via "
"Settings → LLM Backends → Claude Code (CLI).",
category=ErrorCategory.CLI_AUTH_FAILED,
)
)

usage_in: Dict[str, Any] = (final_obj or {}).get("usage", {}) or {}
usage = TokenUsage(
input_tokens=int(usage_in.get("input_tokens", 0) or 0),
output_tokens=int(usage_in.get("output_tokens", 0) or 0),
cache_creation_input_tokens=int(
usage_in.get("cache_creation_input_tokens", 0) or 0
),
cache_read_input_tokens=int(
usage_in.get("cache_read_input_tokens", 0) or 0
),
cost_usd=usage_in.get("cost_usd"),
duration_ms=(final_obj or {}).get("duration_ms"),
)
# Feed accumulator + stream canonical events to consumer.
for event in accum.feed(line_obj):
yield event

response = APIResponse(
content=blocks,
stop_reason=stop_reason,
usage=usage,
model=resolved_model,
message_id=message_id,
raw=final_obj or {},
)
yield {"type": "message_complete", "response": response}
yield {"type": "message_complete", "response": accum.finalize()}
except CLIBinaryNotFound as e:
raise APIError(str(e), category=ErrorCategory.CLI_NOT_FOUND) from e
except CLITimeout as e:
Expand Down
2 changes: 2 additions & 0 deletions src/geny_executor/llm_client/translators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
split_tool_uses,
)
from geny_executor.llm_client.translators._cli import (
StreamJsonAccumulator,
assemble_response_from_stream_json,
build_stream_json_stdin,
claude_code_argv,
Expand Down Expand Up @@ -62,5 +63,6 @@
"split_tool_results",
"split_tool_uses",
"stream_json_line_to_canonical_event",
"StreamJsonAccumulator",
"thinking_to_effort",
]
Loading
Loading