From 89c03354ef835b9cb0b56a62d7610d8f27226b51 Mon Sep 17 00:00:00 2001 From: CocoRoF Date: Sun, 17 May 2026 22:34:41 +0900 Subject: [PATCH] refactor(skills/fork): multi-provider via CredentialBundle + CHANGELOG addendum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase D4 of the LLM backend upgrade. Closes the executor side of the multi-provider sub-agent / fork-mode plan by introducing a credential- bundle-aware fork runner that routes through any of the 6 providers (four API + two CLI) — the existing Anthropic-only ``make_default_fork_runner`` stays around for hosts that don't yet wire a bundle. skills/types.py - ``SkillMetadata.provider: Optional[str]`` — fork skills can declare their preferred provider; the runner picks the right client class. skills/fork.py - New ``make_credential_bundle_fork_runner(credentials, *, fallback_provider, fallback_model, max_tokens)``. Always returns a runner; credential failures surface as ``ForkResult(is_error=True)`` so the parent loop keeps running. - Provider routing: ``skill.metadata.provider`` → fallback_provider. - Client construction via ClientRegistry + ``_creds_to_client_kwargs`` shared with the pipeline path. - Records provider + model + token usage + cost_usd (when populated) in ``ForkResult.metadata``. tests/unit/test_fork_multi_provider.py (NEW) - 6 cases: runner returns unconditionally, skill provider routing, fallback_provider routing, missing-credentials structured error, unknown-provider structured error, model_override propagation. CHANGELOG.md — 2.0.0 entry gains a "Multi-provider sub-agent system" section + the fork runner / state slots / attach_runtime kwarg documented. Full suite: 3235 passed, 8 skipped, 0 failed. Phase D done. The executor is feature-complete for the LLM backend upgrade cycle: 6 providers in the registry, multi-provider sub-agent dispatch with parallel fan-out, credential propagation end-to-end, and a fork-mode runner that doesn't care about provider count. Next step: PyPI release. Plan reference: docs/llm-backend-upgrade-plan/07_rollout_phases.md (Phase D4). Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 27 ++++ src/geny_executor/skills/fork.py | 121 ++++++++++++++ src/geny_executor/skills/types.py | 4 + tests/unit/test_fork_multi_provider.py | 212 +++++++++++++++++++++++++ 4 files changed, 364 insertions(+) create mode 100644 tests/unit/test_fork_multi_provider.py diff --git a/CHANGELOG.md b/CHANGELOG.md index aac2ebb..c785b1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,33 @@ unified behind a single `CredentialBundle` channel. ### Added +- **Multi-provider sub-agent system** (`stages.s12_agent.subagent_type`). + `SubagentTypeDescriptor` gains `provider`, `provider_credentials_extras`, + `parallel`, and `max_concurrent` fields. `SubAgentBuildContext` (frozen + dataclass) is now handed to every factory carrying the parent's + `CredentialBundle` + descriptor + session ids + workspace snapshot. + `PipelineFactory` signature changes from `Callable[[], Any]` to + `Callable[[SubAgentBuildContext], Pipeline | Awaitable[Pipeline]]` + (zero-arg legacy factories still work via TypeError fallback). + `SubagentTypeOrchestrator` now does mixed serial + parallel dispatch, + bounded by `asyncio.Semaphore(min(max_concurrent))` of each parallel + group. +- **`Pipeline.attach_runtime(subagent_registry=...)`** + matching + kwarg on `from_manifest{,_async}`. Pipeline stores the registry; on + call it rebuilds the agent stage's orchestrator as + `SubagentTypeOrchestrator(registry)`. `PipelineState.subagent_registry` + mirrors the slot so sub-agent factories can reach it. +- **`PipelineState.credentials`** + **`PipelineState.subagent_registry`** + — populated by `_init_state`. Sub-pipelines see the same bundle the + parent received. +- **`SkillMetadata.provider`** — fork-mode skills can declare their + preferred provider so the new fork runner picks the right client. +- **`make_credential_bundle_fork_runner(credentials, ...)`** in + `skills.fork` — multi-provider fork-mode runner. Routes via + `skill.metadata.provider` (falls back to `fallback_provider`), + builds the client via `ClientRegistry.get(...)` with credentials + from the bundle. Missing credentials surface as a structured + `ForkResult(is_error=True)` rather than crashing. - **`ClaudeCodeCLIClient`** (`llm_client.claude_code`) — subprocess-backed client driving Anthropic's `claude` CLI. Streams via stream-json, drops the fields the CLI doesn't accept, and propagates token usage / cost. diff --git a/src/geny_executor/skills/fork.py b/src/geny_executor/skills/fork.py index d591ad8..7ba4211 100644 --- a/src/geny_executor/skills/fork.py +++ b/src/geny_executor/skills/fork.py @@ -184,6 +184,127 @@ async def _runner( return _runner +def make_credential_bundle_fork_runner( + credentials: Any, + *, + fallback_provider: str = "anthropic", + fallback_model: str = _DEFAULT_FORK_MODEL, + max_tokens: int = _DEFAULT_FORK_MAX_TOKENS, +) -> SkillForkRunner: + """Multi-provider fork runner backed by a :class:`CredentialBundle`. + + The runner chooses its client based on ``skill.metadata.provider`` + (falling back to ``fallback_provider``). Authentication flows through + the host-supplied :class:`CredentialBundle`, so a fork-mode skill + can run on any of the 6 registered providers — including the CLI + backends — without separately wiring API keys. + + Returns a runner unconditionally; missing credentials surface at + invocation time as a :class:`ForkResult` with ``is_error=True`` + rather than crashing at construction. + """ + from geny_executor.core.config import ModelConfig + from geny_executor.core.pipeline import _creds_to_client_kwargs + from geny_executor.llm_client.credentials import ConfigError, CredentialBundle + from geny_executor.llm_client.registry import ClientRegistry + + if not isinstance(credentials, CredentialBundle): + # Defensive: hosts wiring this runner pass a CredentialBundle. + credentials = CredentialBundle() + + async def _runner( + *, + skill: "Skill", + rendered_body: str, + invoke_args: Dict[str, Any], + parent_context: ToolContext, + ) -> ForkResult: + provider = ( + getattr(skill.metadata, "provider", None) or fallback_provider + ) + + try: + creds = credentials.require(provider) + except ConfigError as e: + return ForkResult( + content=( + f"fork runner: provider {provider!r} has no configured " + f"credentials in this session ({e})" + ), + metadata={"skill_id": skill.id, "provider": provider}, + is_error=True, + ) + + try: + client_cls = ClientRegistry.get(provider) + except (KeyError, ValueError) as e: + return ForkResult( + content=f"fork runner: unknown provider {provider!r} ({e})", + metadata={"skill_id": skill.id, "provider": provider}, + is_error=True, + ) + + client = client_cls(**_creds_to_client_kwargs(provider, creds)) + model = skill.metadata.model_override or fallback_model + model_config = ModelConfig(model=model, max_tokens=max_tokens) + + user_content = "Execute the skill following the system prompt." + if invoke_args: + import json as _json + + user_content = ( + user_content + "\n\nArguments:\n" + _json.dumps(invoke_args, indent=2) + ) + + try: + response = await client.create_message( + model_config=model_config, + messages=[{"role": "user", "content": user_content}], + system=rendered_body, + purpose=f"skill_fork:{skill.id}", + ) + except Exception as exc: # noqa: BLE001 + logger.warning( + "credential-bundle fork runner: skill=%s provider=%s API call failed: %s", + skill.id, + provider, + exc, + ) + return ForkResult( + content=f"fork API call failed: {exc}", + metadata={"model": model, "provider": provider, "skill_id": skill.id}, + is_error=True, + ) + + text_parts = [] + for block in getattr(response, "content", None) or []: + if isinstance(block, dict): + if block.get("type") == "text": + text_parts.append(block.get("text", "")) + else: + if getattr(block, "type", None) == "text": + text_parts.append(getattr(block, "text", "")) + + text = "\n".join(p for p in text_parts if p) or "(no text in fork response)" + + usage = getattr(response, "usage", None) + meta: Dict[str, Any] = { + "model": model, + "provider": provider, + "skill_id": skill.id, + } + if usage is not None: + meta["input_tokens"] = getattr(usage, "input_tokens", 0) + meta["output_tokens"] = getattr(usage, "output_tokens", 0) + cost = getattr(usage, "cost_usd", None) + if cost is not None: + meta["cost_usd"] = cost + + return ForkResult(content=text, metadata=meta) + + return _runner + + __all__ = [ "ForkResult", "SkillForkRunner", diff --git a/src/geny_executor/skills/types.py b/src/geny_executor/skills/types.py index 8def115..98e1491 100644 --- a/src/geny_executor/skills/types.py +++ b/src/geny_executor/skills/types.py @@ -95,6 +95,10 @@ class SkillMetadata: version: Optional[str] = None allowed_tools: Tuple[str, ...] = () model_override: Optional[str] = None + # Phase D4 — fork-mode skills can name their preferred provider + # so the runner picks the right client class via ClientRegistry. + # ``None`` means "inherit from parent" (handled by the runner). + provider: Optional[str] = None execution_mode: str = "inline" # PR-B.4.1 — richer schema. All optional with defaults so existing # SKILL.md files load unchanged. ``category`` slots the skill into diff --git a/tests/unit/test_fork_multi_provider.py b/tests/unit/test_fork_multi_provider.py new file mode 100644 index 0000000..11b633f --- /dev/null +++ b/tests/unit/test_fork_multi_provider.py @@ -0,0 +1,212 @@ +"""Tests for the credential-bundle fork runner (Phase D4). + +Validates that a fork-mode skill can route through any of the 6 +providers via a single :class:`CredentialBundle`, replacing the +Anthropic-only default runner. +""" + +from __future__ import annotations + +import asyncio +import sys +import os +from pathlib import Path + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src")) + +import pytest + +from geny_executor.llm_client.base import BaseClient, ClientCapabilities +from geny_executor.llm_client.credentials import ( + CredentialBundle, + ProviderCredentials, +) +from geny_executor.llm_client.types import APIResponse, ContentBlock +from geny_executor.skills.fork import ( + ForkResult, + make_credential_bundle_fork_runner, +) +from geny_executor.skills.types import Skill, SkillMetadata + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +class _RecordingClient(BaseClient): + """Fake BaseClient that records what it received and returns canned text.""" + + provider = "fake" + capabilities = ClientCapabilities() + + last_call: dict = {} + + def __init__(self, *, api_key: str = "", **kwargs): + super().__init__(api_key=api_key) + type(self).last_call = {"api_key": api_key, "kwargs": kwargs} + + async def _send(self, request, *, purpose: str = ""): + type(self).last_call["request"] = request + type(self).last_call["purpose"] = purpose + return APIResponse( + content=[ContentBlock(type="text", text="forked-text")], + stop_reason="end_turn", + ) + + +def _skill( + name: str = "test", + *, + provider: str | None = None, + model_override: str | None = None, +) -> Skill: + meta = SkillMetadata( + name=name, + description="test skill", + provider=provider, + model_override=model_override, + execution_mode="fork", + ) + return Skill( + id=f"skill::{name}", + metadata=meta, + body="be terse and helpful", + ) + + +def _ctx_stub(): + """A minimal stand-in for ToolContext (only attributes the runner reads).""" + + class _C: + pass + + return _C() + + +# --------------------------------------------------------------------------- +# Provider routing +# --------------------------------------------------------------------------- + + +def test_runner_returned_unconditionally() -> None: + """Unlike make_default_fork_runner (returns None on missing key), the + bundle variant always returns a runner — credential issues surface + at invocation time.""" + runner = make_credential_bundle_fork_runner(CredentialBundle()) + assert runner is not None + + +@pytest.mark.asyncio +async def test_runner_uses_skill_provider(monkeypatch: pytest.MonkeyPatch) -> None: + """``skill.metadata.provider="openai"`` → runner builds an OpenAI client.""" + bundle = CredentialBundle(by_provider={ + "openai": ProviderCredentials(api_key="sk-oai"), + }) + runner = make_credential_bundle_fork_runner(bundle) + + # Hijack ClientRegistry.get to return our recorder. + from geny_executor.llm_client.registry import ClientRegistry + + monkeypatch.setattr( + ClientRegistry, "get", classmethod(lambda cls, p: _RecordingClient) + ) + + skill = _skill(provider="openai") + result = await runner( + skill=skill, rendered_body="be helpful", invoke_args={}, parent_context=_ctx_stub(), + ) + assert isinstance(result, ForkResult) + assert result.is_error is False + assert result.content == "forked-text" + # The recorder saw the openai api_key from the bundle. + assert _RecordingClient.last_call["api_key"] == "sk-oai" + assert result.metadata["provider"] == "openai" + + +@pytest.mark.asyncio +async def test_runner_falls_back_to_fallback_provider(monkeypatch: pytest.MonkeyPatch) -> None: + """When ``skill.metadata.provider`` is None, the runner uses the + fallback_provider parameter.""" + bundle = CredentialBundle(by_provider={ + "anthropic": ProviderCredentials(api_key="sk-anth"), + }) + runner = make_credential_bundle_fork_runner( + bundle, fallback_provider="anthropic", + ) + from geny_executor.llm_client.registry import ClientRegistry + + monkeypatch.setattr( + ClientRegistry, "get", classmethod(lambda cls, p: _RecordingClient) + ) + + skill = _skill(provider=None) + result = await runner( + skill=skill, rendered_body="rules", invoke_args={}, parent_context=_ctx_stub(), + ) + assert result.is_error is False + assert result.metadata["provider"] == "anthropic" + assert _RecordingClient.last_call["api_key"] == "sk-anth" + + +# --------------------------------------------------------------------------- +# Missing credentials → structured error +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_runner_returns_error_when_credentials_missing() -> None: + """The runner does not crash on missing credentials — it returns a + ForkResult with is_error=True and an informative message.""" + bundle = CredentialBundle() # empty + runner = make_credential_bundle_fork_runner(bundle) + skill = _skill(provider="openai") + result = await runner( + skill=skill, rendered_body="x", invoke_args={}, parent_context=_ctx_stub(), + ) + assert result.is_error is True + assert "openai" in result.content + assert "credentials" in result.content.lower() + + +@pytest.mark.asyncio +async def test_runner_returns_error_when_provider_unknown() -> None: + bundle = CredentialBundle(by_provider={ + "imaginary": ProviderCredentials(api_key="sk-x"), + }) + runner = make_credential_bundle_fork_runner(bundle) + skill = _skill(provider="imaginary") + result = await runner( + skill=skill, rendered_body="x", invoke_args={}, parent_context=_ctx_stub(), + ) + assert result.is_error is True + assert "imaginary" in result.content + assert "unknown" in result.content.lower() + + +# --------------------------------------------------------------------------- +# Model override + arguments +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_runner_uses_skill_model_override(monkeypatch: pytest.MonkeyPatch) -> None: + bundle = CredentialBundle(by_provider={ + "anthropic": ProviderCredentials(api_key="sk-a"), + }) + runner = make_credential_bundle_fork_runner(bundle) + from geny_executor.llm_client.registry import ClientRegistry + + monkeypatch.setattr( + ClientRegistry, "get", classmethod(lambda cls, p: _RecordingClient) + ) + + skill = _skill(provider="anthropic", model_override="claude-opus-4-7") + result = await runner( + skill=skill, rendered_body="x", invoke_args={"q": "test"}, parent_context=_ctx_stub(), + ) + assert result.metadata["model"] == "claude-opus-4-7" + # The argument payload should be in the user message. + req = _RecordingClient.last_call["request"] + user_text = req.messages[0]["content"] + assert "test" in user_text