From 80abbf8d0f84f266aa18bc7a5fa042f00f7b63cf Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Fri, 5 Jun 2026 22:38:39 +0200 Subject: [PATCH 1/7] release: prepare v0.21.4 --- CHANGELOG.md | 5 +- Cargo.lock | 62 +++++++++---------- Cargo.toml | 62 +++++++++---------- ...idgets__splash__tests__splash_default.snap | 3 +- 4 files changed, 68 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93db6f0fd..2351631fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +## [0.21.4] - 2026-06-05 + ### Added - `test(memory)`: add unit tests for `deep_reasoning_query_conditioned` flag in `retrieve_tier` — @@ -7400,7 +7402,8 @@ let agent = Agent::new(provider, channel, &skills_prompt, executor); - Agent::run() uses tokio::select! to race channel messages against shutdown signal [0.16.0]: https://github.com/bug-ops/zeph/compare/v0.15.3...v0.16.0 -[Unreleased]: https://github.com/bug-ops/zeph/compare/v0.21.3...HEAD +[Unreleased]: https://github.com/bug-ops/zeph/compare/v0.21.4...HEAD +[0.21.4]: https://github.com/bug-ops/zeph/compare/v0.21.3...v0.21.4 [0.21.3]: https://github.com/bug-ops/zeph/compare/v0.21.2...v0.21.3 [0.21.2]: https://github.com/bug-ops/zeph/compare/v0.21.1...v0.21.2 [0.21.1]: https://github.com/bug-ops/zeph/compare/v0.21.0...v0.21.1 diff --git a/Cargo.lock b/Cargo.lock index f8d8f363c..7dc398e7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10482,7 +10482,7 @@ dependencies = [ [[package]] name = "zeph" -version = "0.21.3" +version = "0.21.4" dependencies = [ "anyhow", "async-trait", @@ -10552,7 +10552,7 @@ dependencies = [ [[package]] name = "zeph-a2a" -version = "0.21.3" +version = "0.21.4" dependencies = [ "axum 0.8.9", "base64 0.22.1", @@ -10580,7 +10580,7 @@ dependencies = [ [[package]] name = "zeph-acp" -version = "0.21.3" +version = "0.21.4" dependencies = [ "agent-client-protocol 0.12.1", "agent-client-protocol-schema 0.13.2", @@ -10623,7 +10623,7 @@ dependencies = [ [[package]] name = "zeph-agent-context" -version = "0.21.3" +version = "0.21.4" dependencies = [ "chrono", "futures", @@ -10646,7 +10646,7 @@ dependencies = [ [[package]] name = "zeph-agent-feedback" -version = "0.21.3" +version = "0.21.4" dependencies = [ "regex", "schemars 1.2.1", @@ -10661,7 +10661,7 @@ dependencies = [ [[package]] name = "zeph-agent-persistence" -version = "0.21.3" +version = "0.21.4" dependencies = [ "serde", "serde_json", @@ -10677,7 +10677,7 @@ dependencies = [ [[package]] name = "zeph-agent-tools" -version = "0.21.3" +version = "0.21.4" dependencies = [ "futures", "serde", @@ -10699,7 +10699,7 @@ dependencies = [ [[package]] name = "zeph-bench" -version = "0.21.3" +version = "0.21.4" dependencies = [ "clap", "schemars 1.2.1", @@ -10721,7 +10721,7 @@ dependencies = [ [[package]] name = "zeph-channels" -version = "0.21.3" +version = "0.21.4" dependencies = [ "axum 0.8.9", "criterion", @@ -10750,7 +10750,7 @@ dependencies = [ [[package]] name = "zeph-commands" -version = "0.21.3" +version = "0.21.4" dependencies = [ "serde", "thiserror 2.0.18", @@ -10761,7 +10761,7 @@ dependencies = [ [[package]] name = "zeph-common" -version = "0.21.3" +version = "0.21.4" dependencies = [ "axum 0.8.9", "blake3", @@ -10791,7 +10791,7 @@ dependencies = [ [[package]] name = "zeph-config" -version = "0.21.3" +version = "0.21.4" dependencies = [ "dirs", "insta", @@ -10811,7 +10811,7 @@ dependencies = [ [[package]] name = "zeph-context" -version = "0.21.3" +version = "0.21.4" dependencies = [ "blake3", "criterion", @@ -10832,7 +10832,7 @@ dependencies = [ [[package]] name = "zeph-core" -version = "0.21.3" +version = "0.21.4" dependencies = [ "age", "base64 0.22.1", @@ -10899,7 +10899,7 @@ dependencies = [ [[package]] name = "zeph-db" -version = "0.21.3" +version = "0.21.4" dependencies = [ "regex", "sqlx", @@ -10914,7 +10914,7 @@ dependencies = [ [[package]] name = "zeph-experiments" -version = "0.21.3" +version = "0.21.4" dependencies = [ "futures", "ordered-float 5.3.0", @@ -10938,7 +10938,7 @@ dependencies = [ [[package]] name = "zeph-gateway" -version = "0.21.3" +version = "0.21.4" dependencies = [ "axum 0.8.9", "http-body-util", @@ -10955,7 +10955,7 @@ dependencies = [ [[package]] name = "zeph-index" -version = "0.21.3" +version = "0.21.4" dependencies = [ "futures", "ignore", @@ -10990,7 +10990,7 @@ dependencies = [ [[package]] name = "zeph-llm" -version = "0.21.3" +version = "0.21.4" dependencies = [ "async-stream", "audioadapter-buffers", @@ -11039,7 +11039,7 @@ dependencies = [ [[package]] name = "zeph-mcp" -version = "0.21.3" +version = "0.21.4" dependencies = [ "async-trait", "blake3", @@ -11073,7 +11073,7 @@ dependencies = [ [[package]] name = "zeph-memory" -version = "0.21.3" +version = "0.21.4" dependencies = [ "arc-swap", "blake3", @@ -11114,7 +11114,7 @@ dependencies = [ [[package]] name = "zeph-orchestration" -version = "0.21.3" +version = "0.21.4" dependencies = [ "blake3", "dirs", @@ -11141,7 +11141,7 @@ dependencies = [ [[package]] name = "zeph-plugins" -version = "0.21.3" +version = "0.21.4" dependencies = [ "dirs", "flate2", @@ -11167,7 +11167,7 @@ dependencies = [ [[package]] name = "zeph-sanitizer" -version = "0.21.3" +version = "0.21.4" dependencies = [ "parking_lot", "proptest", @@ -11189,7 +11189,7 @@ dependencies = [ [[package]] name = "zeph-scheduler" -version = "0.21.3" +version = "0.21.4" dependencies = [ "chrono", "cron", @@ -11209,7 +11209,7 @@ dependencies = [ [[package]] name = "zeph-skills" -version = "0.21.3" +version = "0.21.4" dependencies = [ "anyhow", "blake3", @@ -11245,7 +11245,7 @@ dependencies = [ [[package]] name = "zeph-subagent" -version = "0.21.3" +version = "0.21.4" dependencies = [ "dirs", "indoc", @@ -11274,7 +11274,7 @@ dependencies = [ [[package]] name = "zeph-tools" -version = "0.21.3" +version = "0.21.4" dependencies = [ "arc-swap", "dashmap", @@ -11322,7 +11322,7 @@ dependencies = [ [[package]] name = "zeph-tui" -version = "0.21.3" +version = "0.21.4" dependencies = [ "arboard", "base64 0.22.1", @@ -11363,7 +11363,7 @@ dependencies = [ [[package]] name = "zeph-vault" -version = "0.21.3" +version = "0.21.4" dependencies = [ "age", "proptest", @@ -11379,7 +11379,7 @@ dependencies = [ [[package]] name = "zeph-worktree" -version = "0.21.3" +version = "0.21.4" dependencies = [ "serde", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 7a93d16b3..e06d03a35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ resolver = "3" [workspace.package] edition = "2024" rust-version = "1.95" -version = "0.21.3" +version = "0.21.4" authors = ["bug-ops"] license = "MIT" repository = "https://github.com/bug-ops/zeph" @@ -148,36 +148,36 @@ url = "2.5.8" uuid = "1.23.2" walkdir = "2.5" wiremock = "0.6.5" -zeph-a2a = { path = "crates/zeph-a2a", version = "0.21.3" } -zeph-acp = { path = "crates/zeph-acp", version = "0.21.3" } -zeph-agent-context = { path = "crates/zeph-agent-context", version = "0.21.3" } -zeph-agent-feedback = { path = "crates/zeph-agent-feedback", version = "0.21.3" } -zeph-agent-persistence = { path = "crates/zeph-agent-persistence", version = "0.21.3" } -zeph-agent-tools = { path = "crates/zeph-agent-tools", version = "0.21.3" } -zeph-bench = { path = "crates/zeph-bench", version = "0.21.3" } -zeph-channels = { path = "crates/zeph-channels", version = "0.21.3" } -zeph-commands = { path = "crates/zeph-commands", version = "0.21.3" } -zeph-common = { path = "crates/zeph-common", version = "0.21.3" } -zeph-config = { path = "crates/zeph-config", version = "0.21.3" } -zeph-context = { path = "crates/zeph-context", version = "0.21.3" } -zeph-core = { path = "crates/zeph-core", version = "0.21.3" } -zeph-db = { path = "crates/zeph-db", default-features = false, version = "0.21.3" } -zeph-experiments = { path = "crates/zeph-experiments", version = "0.21.3" } -zeph-gateway = { path = "crates/zeph-gateway", version = "0.21.3" } -zeph-index = { path = "crates/zeph-index", version = "0.21.3" } -zeph-llm = { path = "crates/zeph-llm", version = "0.21.3" } -zeph-mcp = { path = "crates/zeph-mcp", version = "0.21.3" } -zeph-memory = { path = "crates/zeph-memory", default-features = false, version = "0.21.3" } -zeph-orchestration = { path = "crates/zeph-orchestration", version = "0.21.3" } -zeph-plugins = { path = "crates/zeph-plugins", version = "0.21.3" } -zeph-sanitizer = { path = "crates/zeph-sanitizer", version = "0.21.3" } -zeph-scheduler = { path = "crates/zeph-scheduler", version = "0.21.3" } -zeph-skills = { path = "crates/zeph-skills", version = "0.21.3" } -zeph-subagent = { path = "crates/zeph-subagent", version = "0.21.3" } -zeph-tools = { path = "crates/zeph-tools", version = "0.21.3" } -zeph-tui = { path = "crates/zeph-tui", version = "0.21.3" } -zeph-worktree = { path = "crates/zeph-worktree", version = "0.21.3" } -zeph-vault = { path = "crates/zeph-vault", version = "0.21.3" } +zeph-a2a = { path = "crates/zeph-a2a", version = "0.21.4" } +zeph-acp = { path = "crates/zeph-acp", version = "0.21.4" } +zeph-agent-context = { path = "crates/zeph-agent-context", version = "0.21.4" } +zeph-agent-feedback = { path = "crates/zeph-agent-feedback", version = "0.21.4" } +zeph-agent-persistence = { path = "crates/zeph-agent-persistence", version = "0.21.4" } +zeph-agent-tools = { path = "crates/zeph-agent-tools", version = "0.21.4" } +zeph-bench = { path = "crates/zeph-bench", version = "0.21.4" } +zeph-channels = { path = "crates/zeph-channels", version = "0.21.4" } +zeph-commands = { path = "crates/zeph-commands", version = "0.21.4" } +zeph-common = { path = "crates/zeph-common", version = "0.21.4" } +zeph-config = { path = "crates/zeph-config", version = "0.21.4" } +zeph-context = { path = "crates/zeph-context", version = "0.21.4" } +zeph-core = { path = "crates/zeph-core", version = "0.21.4" } +zeph-db = { path = "crates/zeph-db", default-features = false, version = "0.21.4" } +zeph-experiments = { path = "crates/zeph-experiments", version = "0.21.4" } +zeph-gateway = { path = "crates/zeph-gateway", version = "0.21.4" } +zeph-index = { path = "crates/zeph-index", version = "0.21.4" } +zeph-llm = { path = "crates/zeph-llm", version = "0.21.4" } +zeph-mcp = { path = "crates/zeph-mcp", version = "0.21.4" } +zeph-memory = { path = "crates/zeph-memory", default-features = false, version = "0.21.4" } +zeph-orchestration = { path = "crates/zeph-orchestration", version = "0.21.4" } +zeph-plugins = { path = "crates/zeph-plugins", version = "0.21.4" } +zeph-sanitizer = { path = "crates/zeph-sanitizer", version = "0.21.4" } +zeph-scheduler = { path = "crates/zeph-scheduler", version = "0.21.4" } +zeph-skills = { path = "crates/zeph-skills", version = "0.21.4" } +zeph-subagent = { path = "crates/zeph-subagent", version = "0.21.4" } +zeph-tools = { path = "crates/zeph-tools", version = "0.21.4" } +zeph-tui = { path = "crates/zeph-tui", version = "0.21.4" } +zeph-worktree = { path = "crates/zeph-worktree", version = "0.21.4" } +zeph-vault = { path = "crates/zeph-vault", version = "0.21.4" } zeroize = { version = "1.8.2", default-features = false } [workspace.lints.rust] diff --git a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__splash__tests__splash_default.snap b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__splash__tests__splash_default.snap index 4ac979e80..7ff6d6d22 100644 --- a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__splash__tests__splash_default.snap +++ b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__splash__tests__splash_default.snap @@ -1,5 +1,6 @@ --- source: crates/zeph-tui/src/widgets/splash.rs +assertion_line: 79 expression: output --- ┌──────────────────────────────────────────────────────────┐ @@ -14,7 +15,7 @@ expression: output │ ███████╗███████╗██║ ██║ ██║ │ │ ╚══════╝╚══════╝╚═╝ ╚═╝ ╚═╝ │ │ │ -│ v0.21.3 │ +│ v0.21.4 │ │ │ │ Type a message to start. │ │ │ From 599e42ab8a33e1906c6c8a5cae2e45d35f19f9e5 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Fri, 5 Jun 2026 22:39:32 +0200 Subject: [PATCH 2/7] docs: update book, specs, and README for v0.21.4 --- README.md | 2 +- book/src/getting-started/wizard.md | 26 +++++++--- book/src/reference/cli.md | 40 +++++++++++++++ book/src/reference/configuration.md | 27 ++++++++++ specs/003-llm-providers/spec.md | 41 ++++++++++++++++ specs/005-skills/spec.md | 73 +++++++++++++++++++++++++++- specs/063-worktree-subsystem/spec.md | 1 + 7 files changed, 201 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 6d20acefe..32e6f860a 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![CI](https://img.shields.io/github/actions/workflow/status/bug-ops/zeph/ci.yml?branch=main&label=CI)](https://github.com/bug-ops/zeph/actions) [![codecov](https://codecov.io/gh/bug-ops/zeph/graph/badge.svg?token=S5O0GR9U6G)](https://codecov.io/gh/bug-ops/zeph) [![MSRV](https://img.shields.io/badge/MSRV-1.95-blue)](https://www.rust-lang.org) - [![Tests](https://img.shields.io/badge/tests-10294-brightgreen)](https://github.com/bug-ops/zeph/actions) + [![Tests](https://img.shields.io/badge/tests-10511-brightgreen)](https://github.com/bug-ops/zeph/actions) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) diff --git a/book/src/getting-started/wizard.md b/book/src/getting-started/wizard.md index d3badffb7..20e921fc9 100644 --- a/book/src/getting-started/wizard.md +++ b/book/src/getting-started/wizard.md @@ -108,20 +108,32 @@ references-on-rename configuration. ## Step 11: Sub-Agents - Configure the sub-agent system: - **Enable sub-agents** — toggle parallel sub-agent execution - **Max concurrent** — maximum sub-agents running at the same time (default: 1) -## Step 12: Router +## Step 12: Worktree Isolation + +Configure native git worktree isolation for background sub-agents: + +- **Enable worktree isolation** — create isolated git worktrees for each spawned sub-agent (default: disabled) + - When disabled, sub-agents operate in the same directory as the main agent + - When enabled, each sub-agent gets its own shallow clone via `git worktree` +- **Isolation mode** — how to isolate sub-agents: `none` (shared directory) or `worktree` (isolated checkout) (default: `none`, only available when worktree isolation is enabled) +- **Base ref** — which reference to use for worktree creation: `head` (current working tree state) or `fresh` (clone from main/master) (default: `head`) +- **Git timeout** — timeout in seconds for git operations when creating/removing worktrees (default: `30`) + +Skip this step if you do not plan to use sub-agents or prefer not to isolate their file system access. + +## Step 13: Router Configure the Thompson Sampling model router (requires `router` feature): - **Enable router** — toggle router on/off - **State file path** — where to persist alpha/beta statistics (default: `~/.zeph/router_thompson_state.json`) -## Step 13: Experiments +## Step 14: Experiments Configure autonomous self-experimentation: @@ -132,7 +144,7 @@ Configure autonomous self-experimentation: When enabled, the agent can autonomously tune its own inference parameters by running A/B trials against a benchmark dataset. See [Experiments](../concepts/experiments.md) for details. -## Step 14: Self-Learning +## Step 15: Self-Learning Configure the self-learning feedback detector: @@ -141,7 +153,7 @@ Configure the self-learning feedback detector: - **judge** — LLM-backed classifier for borderline cases; you can specify a dedicated model - **Correction confidence threshold** — Jaccard overlap threshold (default: 0.7) -## Step 15: Compaction Probe +## Step 16: Compaction Probe Configure post-compression context integrity validation: @@ -153,7 +165,7 @@ Configure post-compression context integrity validation: When enabled, each hard compaction is followed by a quality check. If the summary fails to preserve critical facts (HardFail), compaction is blocked and original messages are preserved. See [Context Engineering — Compaction Probe](../advanced/context.md#post-compression-validation-compaction-probe) for tuning guidance. -## Step 16: Debug Dump +## Step 17: Debug Dump Enable debug dump at startup: @@ -161,7 +173,7 @@ Enable debug dump at startup: Debug dump is intended for context debugging — use it when you need to inspect exactly what is sent to the LLM and what comes back. See [Debug Dump](../advanced/debug-dump.md) for details. -## Step 17: Security +## Step 18: Security Configure security features: diff --git a/book/src/reference/cli.md b/book/src/reference/cli.md index 952bdbb21..68a0fea20 100644 --- a/book/src/reference/cli.md +++ b/book/src/reference/cli.md @@ -25,6 +25,7 @@ zeph [OPTIONS] [COMMAND] | `schedule` | Manage cron-based scheduled jobs — list, add, remove, show (requires `scheduler` feature; see [Scheduler](../concepts/scheduler.md)) | | `db` | Database management — run migrations, check status (see [Database Abstraction](../concepts/database.md)) | | `migrate-config` | Add missing config parameters as commented-out blocks and reformat the file (see [Migrate Config](../guides/migrate-config.md)) | +| `worktree` | Manage background sub-agent git worktrees — list active, remove stale (requires `[worktree] enabled = true`; see [Worktree Isolation](../guides/worktree.md)) | When no subcommand is given, Zeph starts the agent loop. @@ -132,6 +133,24 @@ zeph plugin disable my-plugin --force **Integrity checks:** When you install a plugin, Zeph records a sha256 digest of its `.plugin.toml`. At startup and hot-reload, the digest is verified. If it doesn't match, the plugin is skipped and the mismatch is visible in `plugin list --overlay`. See [Plugin Manifest Integrity](security.md#plugin-manifest-integrity) for details. +**Ephemeral plugins (session-scoped):** Use the global `--plugin-url` flag to load plugins for a single session without permanent installation: + +```bash +# Load a plugin from a remote URL (HTTPS only) +zeph --plugin-url https://example.com/my-plugin.tar.gz + +# Multiple plugins +zeph --plugin-url https://example.com/plugin1.tar.gz --plugin-url https://example.com/plugin2.tar.gz + +# Pin a plugin version using url@sha256 syntax +zeph --plugin-url https://example.com/plugin.tar.gz@abc123def456789 + +# Combine ephemeral and permanent plugins +zeph --plugin-url https://example.com/plugin.tar.gz +``` + +Ephemeral plugins are scanned for security issues before loading and removed when the session ends. They cannot be disabled or permanently installed; use `zeph plugin add` for persistent plugins. + ### `zeph memory` Manage conversation history and advanced memory subsystems. @@ -295,6 +314,25 @@ zeph migrate-config --config config.toml See [Migrate Config](../guides/migrate-config.md) for a full walkthrough. +### `zeph worktree` + +Manage background sub-agent git worktrees for isolation. Requires `[worktree] enabled = true` in the config. See [Worktree Isolation](../guides/worktree.md) for details. + +| Subcommand | Description | +|------------|-------------| +| `worktree list` | List all active worktrees managed by Zeph with paths and creation timestamps | +| `worktree clean` | Remove stale worktrees (those no longer tracked by active sub-agents) | + +```bash +# List all active worktrees +zeph worktree list + +# Clean up unused worktrees (safe operation, only removes untracked ones) +zeph worktree clean +``` + +Each sub-agent spawned with background isolation gets its own git worktree cloned from your repository. The `list` command shows which worktrees are active; `clean` removes ones that are no longer in use. + ### `zeph router` Inspect or reset the Thompson Sampling router state file. @@ -630,6 +668,8 @@ Configuration: Set `[session.recap]` in your config to control which LLM provide | `--log-file ` | Override the log file path for this session. Set to empty string (`""`) to disable file logging. See [Logging](../concepts/logging.md) | | `--tafc` | Enable Think-Augmented Function Calling for this session, overriding `tools.tafc.enabled`. See [Tools — TAFC](../concepts/tools.md#think-augmented-function-calling-tafc) | | `--debug-dump [PATH]` | Write LLM requests/responses and raw tool output to files. Omit `PATH` to use `debug.output_dir` from config (default: `.zeph/debug`). See [Debug Dump](../advanced/debug-dump.md) | +| `--plugin-url ` | Load a plugin from a remote URL for this session only (ephemeral). Accepts multiple values. Use `url@sha256` syntax to pin a version, e.g., `--plugin-url https://example.com/plugin.tar.gz@abc123def456`. Requires HTTPS. See [Plugins](../guides/plugins.md) | +| `--worktree-base-ref ` | Override the base ref for worktree creation: `head` (current HEAD) or `fresh` (clone main). Requires `[worktree] enabled = true`. See [Worktree Isolation](../guides/worktree.md) | | `--version` | Print version and exit | | `--help` | Print help and exit | diff --git a/book/src/reference/configuration.md b/book/src/reference/configuration.md index b74abbfb2..fd3aefa77 100644 --- a/book/src/reference/configuration.md +++ b/book/src/reference/configuration.md @@ -238,6 +238,12 @@ model = "whisper-1" # Requires `stt` feature. When base_url is set, targets a local server (no API key needed). # When omitted, uses the OpenAI API key from the openai [[llm.providers]] entry or ZEPH_OPENAI_API_KEY. +[llm.stream_limits] +# Configurable SSE buffer caps for streaming LLM responses. +# max_tool_json_bytes = 4194304 # Max bytes for tool call JSON (default: 4 MiB) +# max_thinking_bytes = 1048576 # Max bytes for Claude thinking content (default: 1 MiB) +# max_compaction_bytes = 32768 # Max bytes for context compaction responses (default: 32 KiB) + [skills] # Defaults to the user config dir when omitted # (for example ~/.config/zeph/skills on Linux, @@ -359,6 +365,10 @@ importance_weight = 0.15 # Blend weight for importance in ranking, [memory.routing] strategy = "heuristic" # Routing strategy for memory backend selection (default: "heuristic") +[memory.retrieval] +# MemORAI adaptive retrieval settings for graph-based memory +# deep_reasoning_query_conditioned = false # Use query-adaptive SYNAPSE weighting when deep reasoning active (default: false) + # [memory.admission] # enabled = false # Enable A-MAC adaptive memory admission control (default: false) # threshold = 0.40 # Composite score threshold; messages below this are rejected (default: 0.40) @@ -433,6 +443,10 @@ recall_limit = 10 # Max graph facts injected into context ( temporal_decay_rate = 0.0 # Recency boost for graph recall; 0.0 = disabled (default: 0.0) # Range: [0.0, 10.0]. Formula: 1/(1 + age_days * rate) edge_history_limit = 100 # Max historical edge versions per source+predicate pair (default: 100) +# Benna-Fusi multi-timescale synaptic variables for edge confidence +# bennad_alpha = 0.5 # Weight between fast and slow timescales, [0.0, 1.0] (default: 0.5) +# bennad_eta_fast = 0.6 # Learning rate for fast variable, [0.0, 1.0] (default: 0.6) +# bennad_eta_slow = 0.1 # Learning rate for slow variable, [0.0, 1.0] (default: 0.1) [memory.graph.spreading_activation] # enabled = false # Replace BFS with spreading activation (default: false) @@ -455,6 +469,10 @@ on_resume = true # Auto-generate recap when resuming a stored conve max_tokens = 500 # Max tokens for the recap summary (default: 500) max_input_messages = 50 # Max messages included in recap context (default: 50) +[session.provider_persistence] +enabled = true # Persist channel-level provider overrides across restarts (default: true) +# persist_provider_overrides = true # Store reasoning_effort per session (default: true) + [tools] enabled = true summarize_output = false # LLM-based summarization for long tool outputs @@ -624,6 +642,9 @@ endpoint = "http://localhost:4317" enabled = false max_daily_cents = 500 # Daily budget in cents (USD), UTC midnight reset +[cocoon] +# show_balance = true # Display TON balance in TUI sidebar (default: true). Set to false to show "*** TON" instead + [a2a] enabled = false host = "0.0.0.0" @@ -665,6 +686,12 @@ tool_timeout_secs = 30 # Default timeout for tool execution via MC # trust_level = "untrusted" # trusted, untrusted (default), or sandboxed # tool_allowlist = [] # Tools to expose from this server; empty = all (untrusted) or none (sandboxed) +[worktree] +enabled = false # Enable native worktree isolation for background sub-agents (default: false) +# bg_isolation = "none" # Isolation mode: "none", "worktree" (default: "none", requires `enabled = true`) +# base_ref = "head" # Worktree base ref: "head" (current HEAD) or "fresh" (clone from main) (default: "head") +# git_timeout_secs = 30 # Git operation timeout in seconds (default: 30) + [agents] enabled = false # Enable sub-agent system (default: false) max_concurrent = 1 # Max concurrent sub-agents (default: 1) diff --git a/specs/003-llm-providers/spec.md b/specs/003-llm-providers/spec.md index 2b3d88bc6..c90f7c5f7 100644 --- a/specs/003-llm-providers/spec.md +++ b/specs/003-llm-providers/spec.md @@ -189,11 +189,52 @@ correct field name automatically (commit #4591). This applies to: - `chat` / `chat_stream` requests (uses `max_completion_tokens` when model is o-series) - `chat_with_tools` requests +The o-series path also uses a separate `context_window` branch for context-length calculation +(#4811): when the model is o-series, context window is looked up from an internal table keyed +by model prefix rather than from the default `max_tokens` field. + ### Key Invariants - o-series detection uses model name prefix match — NEVER hardcode specific model names - `max_tokens` must NOT be sent for o-series models — use `max_completion_tokens` only - Non-o-series models continue to use `max_tokens` +- o-series `context_window` lookup uses the prefix table — NEVER fall back to the generic default + +## Configurable SSE Buffer Caps (`StreamLimits`) (#4750, #4808, #4790) + +`StreamLimits` in `LlmConfig` caps the size of SSE streaming buffers to prevent runaway +memory growth from malformed or oversized provider responses: + +```toml +[llm.stream_limits] +max_tool_json_bytes = 4194304 # 4 MiB — tool call JSON accumulator +max_thinking_bytes = 1048576 # 1 MiB — thinking/reasoning block accumulator +max_compaction_bytes = 32768 # 32 KiB — compaction response accumulator +``` + +`ClaudeProvider` accepts `StreamLimits` via `.with_stream_limits()` builder. The fields are +wired through `LlmConfig`, `provider_factory`, and `src/init`. Migration step 56 injects a +commented `[llm.stream_limits]` hint into existing configs. + +### Key Invariants + +- `max_tool_json_bytes` default is 4 MiB — NEVER lower the default below 512 KiB (real tool schemas can be large) +- When a streaming buffer exceeds its cap, the stream is terminated with an error — NEVER silently truncate +- `StreamLimits` applies only to Claude streaming paths — other providers use equivalent internal caps + +## Embedding Helpers in `LlmConfig` (#4850, #4840) + +`effective_embedding_model` and `stable_skill_embedding_model` are moved from +`zeph_core::provider_factory` to `LlmConfig` in `zeph_config`, consistent with the +`stt_provider_entry` precedent. `impl Default for LlmConfig` is derived via serde +deserialization of an empty TOML string, so all `#[serde(default)]` fields produce a +well-typed default. All call sites in `src/bootstrap/` and `src/runner.rs` are updated +accordingly. + +### Key Invariants + +- `effective_embedding_model` and `stable_skill_embedding_model` live in `LlmConfig` — NEVER re-add them to `provider_factory` +- `LlmConfig::default()` must round-trip through empty TOML — NEVER implement `Default` by hand ## Key Invariants diff --git a/specs/005-skills/spec.md b/specs/005-skills/spec.md index 78b579e47..b87cd8bb3 100644 --- a/specs/005-skills/spec.md +++ b/specs/005-skills/spec.md @@ -568,7 +568,77 @@ blocks invocation. requires_trust_check = true ``` -### Stage-1 Advisory SKILL.md Scan (#4132) +### Recursive Nested Skill Discovery (#4682, #4684) + +`WalkDir`-based discovery replaces the flat `read_dir` loop in the skill scanner. The traversal uses +pre-order DFS with lexicographic sibling ordering (max depth 16, no symlink follow). The first skill +with a given name wins; deeper duplicates are silently skipped. The existing `RecursiveMode::Recursive` +hot-reload watcher already covers subdirectories, so no watcher change is needed. + +### Key Invariants + +- Max walkdir depth is 16 — NEVER recurse further (prevents cycles on unusual filesystems) +- First-name-wins rule applies across depths — NEVER accept a deeper skill that duplicates a shallower name +- Symlinks are NOT followed — `follow_links(false)` is non-negotiable + +--- + +## Skill Extension Manifest (`SkillExtensions`) (#4705, #4683) + +`crates/zeph-skills/src/extensions.rs` adds an optional `extensions:` block in SKILL.md +frontmatter. Fields: + +``` +SkillExtensions { + ui: Vec, // hotkey/button declarations + keybindings: Vec, + monitors: Vec, // background watch expressions +} +``` + +`SkillMeta.extensions: Option` is populated by `parse_extensions()` with an +8 KiB byte cap. Parse failure returns `None` — existing SKILL.md files without an `extensions:` +block load unchanged. `serde_norway` is used for runtime YAML parsing within the cap. + +### Key Invariants + +- Extensions block is optional — absent `extensions:` never fails skill load +- 8 KiB cap is enforced before `serde_norway::from_str` — NEVER pass uncapped bytes to the deserializer +- Parse errors are silently ignored (return `None`) — NEVER propagate extension parse failure as a skill load error + +--- + +## Concurrent Semantic Scan (#4705, #4683) + +`semantic_scan_plugin_add` replaces sequential scanning with `buffer_unordered(4)` and a 300s +aggregate `tokio::time::timeout`. Each future carries its own `(skill_name, verdict)` tuple so +rejection messages always name the correct skill regardless of completion order. + +### Key Invariants + +- Scan concurrency cap is 4 — NEVER set above 4 without benchmarking under load +- Aggregate timeout is 300s — NEVER lower it below the per-skill LLM call p99 latency +- Rejection messages MUST include the specific skill name — never a positional index + +--- + +## Skill Egress Attribution (#4682, #4684) + +`ToolCall`, `AuditEntry`, and `EgressEvent` gain `skill_name: Option>` carrying the +names of all skills injected into the system prompt for the current turn. Attribution is +turn-scoped, not per-call. All decorator executors (`ScopedToolExecutor`, policy gate, adversarial +gate) and all `scrape.rs` egress sites propagate the field. `ToolCall` derives `Default` to avoid +breaking existing struct literals. + +### Key Invariants + +- Attribution is turn-scoped — NEVER per-call attribution (that would require per-tool injection tracking) +- All executor decorators must propagate `skill_name` — single-path propagation is incomplete +- NEVER emit a non-`None` `skill_name` for turns with no injected skills + +--- + +## Stage-1 Advisory SKILL.md Scan (#4132) Before executing a skill, the system runs a lightweight static scan over the SKILL.md body to detect high-risk patterns (e.g., `eval`, `exec`, `import os`, network exfil keywords) @@ -583,6 +653,7 @@ and emits an advisory `SecurityEvent::SkillAdvisory` with severity and matched p - Blake3 re-hash only applies to skills with `requires_trust_check = true`; normal skills use load-time trust only - Advisory scan result MUST NOT block skill invocation in v1 — advisory only - NEVER store the raw unsanitized description in the system prompt +- NEVER proceed when `semantic_scan = true` but `semantic_scan_provider` is empty — return a config error (fail-closed, #4706, #4709) --- diff --git a/specs/063-worktree-subsystem/spec.md b/specs/063-worktree-subsystem/spec.md index 1f003cb83..7087862a7 100644 --- a/specs/063-worktree-subsystem/spec.md +++ b/specs/063-worktree-subsystem/spec.md @@ -104,6 +104,7 @@ root = ".claude/worktrees" # relative to repo root; canonicalised at boo branch_prefix = "agent/" # branch = "{prefix}{subagent_id}" prune_branch_on_remove = false # delete the branch after removing the worktree cleanup_on_completion = true # remove worktree when agent completes or is cancelled +git_timeout_secs = 30 # per-git-invocation timeout; clamped to ≥ 1 (#4784) ``` Per-agent opt-in in subagent definition frontmatter: From 08851a1128aed0a1e207eebd9f9779f376938022 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Fri, 5 Jun 2026 22:39:56 +0200 Subject: [PATCH 3/7] docs: fix wizard step numbering and add worktree init spec --- book/src/getting-started/wizard.md | 4 ++-- specs/063-worktree-subsystem/spec.md | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/book/src/getting-started/wizard.md b/book/src/getting-started/wizard.md index 20e921fc9..ff9536c47 100644 --- a/book/src/getting-started/wizard.md +++ b/book/src/getting-started/wizard.md @@ -173,7 +173,7 @@ Enable debug dump at startup: Debug dump is intended for context debugging — use it when you need to inspect exactly what is sent to the LLM and what comes back. See [Debug Dump](../advanced/debug-dump.md) for details. -## Step 18: Security +## Step 19: Security Configure security features: @@ -185,7 +185,7 @@ Configure security features: - Shell tools checked by default: `bash`, `shell`, `terminal` (configurable in `config.toml` via `security.pre_execution_verify.destructive_commands.shell_tools`) - **Guardrail** (requires `guardrail` feature) — LLM-based prompt injection pre-screening via a dedicated safety model (e.g. `llama-guard-3:1b`) -## Step 18: Review and Save +## Step 20: Review and Save Inspect the generated TOML, confirm the output path, and save. If the file already exists, the wizard asks before overwriting. diff --git a/specs/063-worktree-subsystem/spec.md b/specs/063-worktree-subsystem/spec.md index 7087862a7..6de9191b1 100644 --- a/specs/063-worktree-subsystem/spec.md +++ b/specs/063-worktree-subsystem/spec.md @@ -90,6 +90,7 @@ agent in the wrong repository tree. - **NEVER** allow `base_ref = fresh` to silently fall back to HEAD when a fetch fails; fail with a clear error. - **NEVER** add the `set_working_directory` tool to the allowed list for a worktree-opted agent, even if the caller explicitly requests it. - **NEVER** skip the capability probe when `worktree.enabled = true`; a missing `git` must be caught at bootstrap, not at first spawn. +- **NEVER** allow `git_timeout_secs = 0`; the value is clamped to `max(1, configured_value)` in `DefaultGitRunner`. --- @@ -296,6 +297,17 @@ New file `crates/zeph-config/src/worktree.rs`: - Add `WorktreeCommand { List, Clean }` under `zeph worktree` (or extend `zeph agents`) - `--worktree-base-ref ` session override flag +### `--init` Wizard (#4656, #4847) + +`step_worktree()` is added to the interactive configuration wizard. The step asks the user: +1. Whether to enable worktree isolation (`worktree.enabled`) +2. Which background isolation mode to use (`bg_isolation: None | Worktree`) — deferred child-process isolation knob +3. Which base ref to use (`base_ref: head | fresh`) + +`build_config()` maps the wizard state to `WorktreeConfig`. The `[worktree]` section is emitted +only when the user opts in (`enabled = true`). Two unit tests cover the disabled-default path +and the enabled+None+Fresh path. + ### TUI - Command palette: `/worktree list`, `/worktree clean` From 3eb94a964eef966ab4ce984c6213ae5a7ffc9a17 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Fri, 5 Jun 2026 22:42:43 +0200 Subject: [PATCH 4/7] docs: compact changelog duplicate headers; add worktree, tui, cocoon, skills docs; update specs --- CHANGELOG.md | 2448 +++++++++++++------------- book/src/SUMMARY.md | 1 + book/src/advanced/tui.md | 16 + book/src/guides/cocoon.md | 11 + book/src/guides/custom-skills.md | 42 + book/src/guides/worktree.md | 178 ++ specs/004-memory/spec.md | 59 + specs/009-orchestration/spec.md | 34 + specs/035-profiling/spec.md | 44 +- specs/041-experiments/spec.md | 27 +- specs/044-subagent-lifecycle/spec.md | 42 +- specs/058-plugins/spec.md | 47 +- 12 files changed, 1694 insertions(+), 1255 deletions(-) create mode 100644 book/src/guides/worktree.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 2351631fe..426cd5707 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] + ## [0.21.4] - 2026-06-05 ### Added @@ -15,6 +16,154 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `--init` wizard now includes a worktree isolation step: prompts for `worktree.enabled`, `bg_isolation` (`none`/`worktree`), and `base_ref` (`head`/`fresh`) (#4656) + +- `feat(llm): make SSE buffer caps configurable via LlmConfig.stream_limits (#4750)` — adds + `StreamLimits` struct to `zeph-config` with `max_tool_json_bytes` (4 MiB), `max_thinking_bytes` + (1 MiB), and `max_compaction_bytes` (32 KiB) defaults. All three were previously hardcoded + constants in `sse.rs`. `ClaudeProvider` gains a `with_stream_limits()` builder method and is + wired from `provider_factory.rs`. Migration step 56 adds a commented `[llm.stream_limits]` hint + to existing configs. Backward-compatible: configs without the section parse with defaults. + +- `feat(tracing): add profiling instrument to LlmProvider trait methods (#4808, #4790)` — adds + `#[cfg_attr(feature = "profiling", tracing::instrument(...))]` to `chat_with_tools` on + `ClaudeProvider`, `OpenAiProvider`, `GeminiProvider`, and `CompatibleProvider`; to `embed_batch` + on `CompatibleProvider`; and to all five `LlmProvider` trait methods plus `bandit_chat` and + `cascade_chat` on `RouterProvider`. Span names follow `llm..` convention. + +- `feat(worktree): make DefaultGitRunner timeout configurable via WorktreeConfig (#4704)` — adds + `git_timeout_secs: u64` (default `30`) to `WorktreeConfig`; both production construction sites + now use `DefaultGitRunner::with_timeout(Duration::from_secs(...))`. Configs without the field + parse as before (backward compatible). Migration step 55 injects a commented-out + `# git_timeout_secs = 30` hint into existing `[worktree]` sections. + +- `zeph-skills`: add `SkillExtensions` manifest parsing for UI/keybinding/monitor declarations in + `SKILL.md` `extensions:` frontmatter block (`SkillExtensions`, `SkillUiElement`, + `SkillKeybinding`, `SkillMonitor`). Parse failures log a warning and fall back to `None` so + existing skills are unaffected. Closes #4683. + + +- `zeph-skills`: recursive `SKILL.md` discovery now uses depth-first pre-order traversal + (siblings sorted lexicographically, `max_depth = 16`) via `walkdir`, replacing the previous + flat single-level `read_dir` scan. Skills at any nesting depth are discovered and registered; + symlinked directories are not followed; per-file path validation is unchanged. The first + encountered skill wins on duplicate names (DFS pre-order within each base directory, base + directories ordered by caller priority). Closes #4682. +- `zeph-tools`: `ToolCall` now carries `skill_name: Option>` (turn-level attribution + of which skills were active in the system prompt when the tool call was issued). The field is + propagated to `AuditEntry` and `EgressEvent` (both with `#[serde(skip_serializing_if = + "Option::is_none")]` to preserve existing JSONL consumers). Attribution is set in the main agent + loop (`tier_loop.rs`) and threads through all decorator executors (`ScopedToolExecutor`, + `PolicyGateExecutor`, `AdversarialPolicyGateExecutor`) and the scrape executor's egress path. + `ToolCall` now derives `Default` to reduce churn at struct literal sites. Closes #4684. + + +- `zeph-skills`: new `SkillSemanticScanner` module (`semantic_scanner.rs`) implementing LLM-based + Stage-2 semantic compliance check for third-party SKILL.md files. Detects Semantic Compliance + Hijacking (SCH) attacks (arXiv:2605.14460) where malicious skills encode behavior through natural + language instructions. The scanner uses `chat_typed_erased` with a fast/cheap provider, enforces + an 8 KiB content limit with head+tail sampling, neutralizes `` delimiter-escape + injection, and returns `ScanVerdict::{Allow, Warn, Block}`. Opt-in via `[skill] + semantic_scan = true` and `semantic_scan_provider` in config. +- `zeph-plugins`: `scan_targets()` method on `PluginManager` extracts SKILL.md candidates and + parsed metadata from an archive for pre-installation semantic scanning, keeping the plugins crate + LLM-free. Includes a path-traversal guard (`canonicalize + starts_with`) mirroring `add()`. +- `zeph-core`: `semantic_scan_plugin_add` async function performs Stage-2 scan before plugin + installation when `semantic_scan = true`. Fail-closed on `Block` verdict; logs `Warn` and + continues on `Warn` verdict; returns a config error if `semantic_scan_provider` is empty. Closes #3947. +- `zeph-memory`, `zeph-config`: SYNAPSE multi-timescale synaptic variables (#3709). Adds + `confidence_fast` and `confidence_slow` fields to every graph edge (migration 096). On each + confidence merge (`insert_edge_typed` max-merge branch), the Benna-Fusi rule updates both + variables: `fast' = fast + η_f*(c − fast)`, `slow' = slow + η_s*(fast' − slow)`. Spreading + activation uses a blended confidence `α*fast + (1−α)*slow` instead of raw confidence so that + recently reinforced edges receive higher activation than edges with only historical support. + Config keys: `[memory.graph.spreading_activation] alpha`, `benna_fast_rate`, `benna_slow_rate`. +- `zeph-memory`, `zeph-config`: Turn-level provenance for graph edges (#3710). New `turn_index` + column (migration 096) stores the position within the episode at which an edge was first + committed. Threaded via `GraphExtractionConfig.turn_index`; `None` for pre-migration rows. +- `zeph-memory`, `zeph-config`: `MemORAI` write-gate prefilter (#3710). When + `[memory.graph.write_gate] enabled = true`, edges below `min_edge_relevance` whose relation + matches a low-signal vocabulary (e.g. "related_to", "is") are dropped before graph write, + reducing noise accumulation. Opt-in, default disabled. +- `zeph-config`: `ConflictRecencyConfig` — `[memory.graph.conflict] recency_slow_threshold` + controls when the recency conflict-resolution strategy is permitted to override `valid_from` + comparison (#3709). Default: 0.2. +- `zeph-memory`, `zeph-config`: DeepReasoning tier query-conditioned routing (#3994). When + `[memory.tiered_retrieval] deep_reasoning_query_conditioned = true`, the `DeepReasoning` + tier routes through HELA spreading activation (`recall_graph_hela`) instead of static-weight + BFS, producing query-aligned graph recall results. Opt-in, default disabled. +- `zeph-memory`: `GraphStore::with_benna_rates` builder for passing Benna-Fusi η rates from + config into the store at bootstrap time. +- `zeph-memory`: `GraphStore::insert_or_supersede_with_turn_index_and_metrics` — APEX INSERT + path now records `turn_index` on new head edges. + +- `zeph-config`: new `[cocoon]` section with `show_balance: bool` (default `true`). When set to + `false`, the TON balance in the TUI status bar is rendered as `*** TON` instead of the real + value. Implements the redaction option from spec §15.2. Default `true` preserves current + behaviour. Config migration step 53 appends a commented `[cocoon]` advisory notice to existing + configs; the field is optional so existing configs load without modification. +- `--init` wizard: prompts "Show Cocoon TON balance in the TUI status bar?" in the Cocoon provider + step (default `true`). +- TUI: Ctrl+R reverse-search over current-session prompt history. Opens a floating overlay above + the input area; typing filters by substring match (newest-first); Ctrl+R again cycles to the + next older match; Enter copies the selection into the input (no auto-submit); Esc cancels. + History is current-session only (populated on each submit, empty at startup). +- `zeph-subagent`: transitive constraint propagation for sub-agent spawns (#3993). + `SpawnContext` gains `max_trust_level` and `inherited_tool_allowlist` fields. + When set by the orchestration layer, `apply_constraint_propagation` narrows the + spawned agent's tool policy and `PolicyGateExecutor::set_effective_trust` clamps + trust via `min(own, cap)` semantics — privilege can only narrow, never escalate. + `DenyList` agents under an inherited allowlist are converted to an explicit + `AllowList(parent_set \ deny_entries)` (fail-closed). `resume()` constraint propagation + support added in #4690. +- Native worktree isolation for background sub-agents (closes #4679). New `zeph-worktree` crate + (`DefaultWorktreeManager`) manages git worktrees via subprocess calls with full path sanitization + and capability probing. `zeph-subagent` integrates the worktree lifecycle into `SubAgentManager`: + each background agent with `permissions.worktree = true` gets a dedicated worktree and a + `CwdLock` guard that serializes cwd changes across concurrent agents (INV-1). `set_working_directory` + is automatically added to `disallowed_tools` for worktree-opted agents (INV-3). `bg_isolation` + determines whether a worktree is created (`Worktree`) or only the lock is held (`None`). The + worktree is removed after the agent completes when `cleanup_on_completion = true`. +- `zeph-config`: new `WorktreeConfig` type with `BgIsolation` enum and `WorktreeBaseRef` enum, + added to `Config` and `SubAgentConfig` structs. +- CLI: `zeph worktree list|clean` subcommands; `--worktree-base-ref` flag for `run`. +- TUI command palette: `WorktreeList` and `WorktreeClean` entries. +- Config migration step 54: appends a commented-out `[worktree]` section with defaults to + configs that lack it. +- `zeph-config`: `ProviderOverrides` struct — per-session LLM generation override parameters + persisted across restarts (Phase 1: `reasoning_effort` only). Serialized as JSON and stored + in the `channel_preferences` table under `pref_key = "provider_overrides"`. Uses + `#[serde(default)]` for forward-compatible deserialization (unknown fields from future phases + are silently ignored). **Note**: the issue acceptance criteria specified + `#[serde(deny_unknown_fields)]`; this was intentionally relaxed to `#[serde(default)]` so + older binaries can read blobs written by newer ones without losing the known params. +- `zeph-config`: `SessionConfig` gains `persist_provider_overrides: bool` (default `true`). Only + takes effect when `provider_persistence` is also `true`. +- `zeph-core`: `persist_channel_provider` now persists a `ProviderOverrides` blob alongside the + provider name. A `restoring_provider` guard prevents clobbering the stored blob during the + restore path (F1 fix). Fire-and-forget under `TaskClass::Telemetry`; write-side 1 KB size cap. +- `zeph-core`: `restore_channel_provider` now loads and validates the persisted overrides blob on + successful provider name restore (M3 guard: skipped when name restore fails). Read-side 1 KB + size cap; deserialization errors and inapplicable params are logged as warnings. +- `--init` wizard: prompts for `persist_provider_overrides`. +- Config migration step 52: splices `persist_provider_overrides = true` (commented, discoverability + only) into existing `[session]` blocks. `SessionConfig` already defaults the field to `true` + so existing configs load fine without the key. +- `--plugin-url [--plugin-sha256 ]` CLI flag for ephemeral session-scoped plugin + loading (closes #4653). The plugin archive is downloaded over HTTPS, verified against the + optional SHA-256 digest, scanned for injection patterns (blocking), and loaded into a temporary + directory that is cleaned up on process exit. The plugin is never written to the permanent + plugins store and its config overlays are never applied. +- `--plugin-url` now accepts multiple values (pass the flag once per URL; closes #4675). Each + value can be a plain `https://…` URL or an inline `https://…@sha256hex` pair for integrity + pinning. The separate `--plugin-sha256` flag has been removed; use the `url@sha256` syntax + instead. +- `zeph-plugins`: `PluginName` newtype (closes #4674). `AddResult::name`, + `InstalledPlugin::name`, and `AutoUpdateResult::name` now carry a `PluginName` instead of a + plain `String`. Construct via `PluginName::try_from("name")` — the conversion delegates to + `validate_plugin_name` and returns `PluginError::InvalidName` on failure. `PluginName` + implements `Display`, `AsRef`, `Ord`, `serde::Serialize` (transparent), and + `serde::Deserialize` (validating — rejects names that fail `validate_plugin_name`). + ### Changed - `docs(config)`: fix `judge_model` doc comment in `LearningConfig` — field accepts a provider @@ -91,6 +240,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). helper into `zeph-skills::merge_prompts` module; remove duplicate code from `miner.rs` and `trace_extractor.rs`. Closes #4857. + +- refactor(llm): extract `openai_post()` private helper in `OpenAiProvider` to eliminate 7 + repetitions of `Authorization`/`Content-Type` header setup (#4800) + + +- refactor(context): make embed and compress timeouts in `FidelityScorer` configurable via + `embed_timeout_secs` / `compress_timeout_secs` in `[memory.fidelity]` config (closes #4645, #4651). + Both fields default to 30 seconds to preserve existing behaviour; config migration step 51 adds + commented-out hints for existing configs that contain a `[memory.fidelity]` section. +- `zeph-config`: `FidelityConfig.compress_provider` and `semantic_scoring_provider` now use + `Option` instead of `Option`, consistent with all other provider-reference + fields in the config layer. Existing TOML configs are unaffected (deserialization is transparent). +- `zeph-sanitizer`: `GuardrailVerdict` and `ResponseVerificationResult` are now `#[non_exhaustive]`, + allowing new verdict variants to be added without breaking downstream exhaustive matches. +- `zeph-context`: `CompressionMethod` is now `#[non_exhaustive]`, allowing new compression methods + to be added without breaking downstream exhaustive matches. + ### Fixed - `fix(orchestration)`: `check_graph_completion` deadlock→Failed branch now sets `graph_dirty` flag, @@ -138,33 +304,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `PlanCacheError::Database`, and `PatternError::Db` display strings to `"sqlx error: {0}"`, consistent with `zeph-memory` and `zeph-scheduler`. Closes #4798. -### Added - -- `feat(llm): make SSE buffer caps configurable via LlmConfig.stream_limits (#4750)` — adds - `StreamLimits` struct to `zeph-config` with `max_tool_json_bytes` (4 MiB), `max_thinking_bytes` - (1 MiB), and `max_compaction_bytes` (32 KiB) defaults. All three were previously hardcoded - constants in `sse.rs`. `ClaudeProvider` gains a `with_stream_limits()` builder method and is - wired from `provider_factory.rs`. Migration step 56 adds a commented `[llm.stream_limits]` hint - to existing configs. Backward-compatible: configs without the section parse with defaults. - -- `feat(tracing): add profiling instrument to LlmProvider trait methods (#4808, #4790)` — adds - `#[cfg_attr(feature = "profiling", tracing::instrument(...))]` to `chat_with_tools` on - `ClaudeProvider`, `OpenAiProvider`, `GeminiProvider`, and `CompatibleProvider`; to `embed_batch` - on `CompatibleProvider`; and to all five `LlmProvider` trait methods plus `bandit_chat` and - `cascade_chat` on `RouterProvider`. Span names follow `llm..` convention. - -- `feat(worktree): make DefaultGitRunner timeout configurable via WorktreeConfig (#4704)` — adds - `git_timeout_secs: u64` (default `30`) to `WorktreeConfig`; both production construction sites - now use `DefaultGitRunner::with_timeout(Duration::from_secs(...))`. Configs without the field - parse as before (backward compatible). Migration step 55 injects a commented-out - `# git_timeout_secs = 30` hint into existing `[worktree]` sections. - -- `zeph-skills`: add `SkillExtensions` manifest parsing for UI/keybinding/monitor declarations in - `SKILL.md` `extensions:` frontmatter block (`SkillExtensions`, `SkillUiElement`, - `SkillKeybinding`, `SkillMonitor`). Parse failures log a warning and fall back to `None` so - existing skills are unaffected. Closes #4683. - -### Fixed - `zeph-core`, `zeph-context`, `zeph-skills`, `zeph-tools`: replace six `span.enter()` guards held across `.await` boundaries with `.instrument(span).await` (and `.instrument(span)` on @@ -271,47 +410,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `zeph-vault`: add `#[non_exhaustive]` to `AgeVaultError`. Closes #4736. - `zeph-bench`: add `#[non_exhaustive]` to `BenchError` and `Role`. Closes #4736. -### Changed - -- refactor(llm): extract `openai_post()` private helper in `OpenAiProvider` to eliminate 7 - repetitions of `Authorization`/`Content-Type` header setup (#4800) - -### Performance - -- `zeph-memory`: HELA spreading-activation BFS hop loop now uses a `HashSet` for O(1) - frontier membership checks instead of `Vec::contains()` (O(F) per edge, O(F²) overall). - The `Vec` is preserved for BFS traversal order. Closes #4698. -- `zeph-memory`: `is_low_signal_relation` replaced `to_lowercase() == "..."` comparisons with - `eq_ignore_ascii_case("...")`, eliminating ~30 short-string heap allocations per extraction - pass. Closes #4699. - -### Added - -- `zeph-skills`: recursive `SKILL.md` discovery now uses depth-first pre-order traversal - (siblings sorted lexicographically, `max_depth = 16`) via `walkdir`, replacing the previous - flat single-level `read_dir` scan. Skills at any nesting depth are discovered and registered; - symlinked directories are not followed; per-file path validation is unchanged. The first - encountered skill wins on duplicate names (DFS pre-order within each base directory, base - directories ordered by caller priority). Closes #4682. -- `zeph-tools`: `ToolCall` now carries `skill_name: Option>` (turn-level attribution - of which skills were active in the system prompt when the tool call was issued). The field is - propagated to `AuditEntry` and `EgressEvent` (both with `#[serde(skip_serializing_if = - "Option::is_none")]` to preserve existing JSONL consumers). Attribution is set in the main agent - loop (`tier_loop.rs`) and threads through all decorator executors (`ScopedToolExecutor`, - `PolicyGateExecutor`, `AdversarialPolicyGateExecutor`) and the scrape executor's egress path. - `ToolCall` now derives `Default` to reduce churn at struct literal sites. Closes #4684. - -### Research - -- Competitive parity scan: assessed Goose v1.34.0–v1.35.0 features against Zeph. Three features - confirmed as already covered (vault chmod 600 enforcement, plugin auto-update, compaction - guards). Filed parity issues for recursive nested skill discovery (#4682), skills platform - extension manifest (#4683), egress logging with skill attribution (#4684), and deep link scheme - (#4687). Two issues (#4685, #4686) closed as duplicates of #4023. Updated - `.local/testing/playbooks/competitive-parity.md` with full assessment log. - Closes #3917, #4023, #4059. - -### Fixed - `zeph-subagent`: `SubAgentManager::resume()` now accepts an optional `SpawnContext` and applies the same constraint propagation as `spawn()`: `apply_constraint_propagation` narrows the tool @@ -336,116 +434,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). remote plugin archive containing `.bundled` marker files would cause ephemeral skills to be misclassified as compile-time bundled by the `SkillRegistry` (wrong trust model). Closes #4673. -### Added - -- `zeph-skills`: new `SkillSemanticScanner` module (`semantic_scanner.rs`) implementing LLM-based - Stage-2 semantic compliance check for third-party SKILL.md files. Detects Semantic Compliance - Hijacking (SCH) attacks (arXiv:2605.14460) where malicious skills encode behavior through natural - language instructions. The scanner uses `chat_typed_erased` with a fast/cheap provider, enforces - an 8 KiB content limit with head+tail sampling, neutralizes `` delimiter-escape - injection, and returns `ScanVerdict::{Allow, Warn, Block}`. Opt-in via `[skill] - semantic_scan = true` and `semantic_scan_provider` in config. -- `zeph-plugins`: `scan_targets()` method on `PluginManager` extracts SKILL.md candidates and - parsed metadata from an archive for pre-installation semantic scanning, keeping the plugins crate - LLM-free. Includes a path-traversal guard (`canonicalize + starts_with`) mirroring `add()`. -- `zeph-core`: `semantic_scan_plugin_add` async function performs Stage-2 scan before plugin - installation when `semantic_scan = true`. Fail-closed on `Block` verdict; logs `Warn` and - continues on `Warn` verdict; returns a config error if `semantic_scan_provider` is empty. Closes #3947. -- `zeph-memory`, `zeph-config`: SYNAPSE multi-timescale synaptic variables (#3709). Adds - `confidence_fast` and `confidence_slow` fields to every graph edge (migration 096). On each - confidence merge (`insert_edge_typed` max-merge branch), the Benna-Fusi rule updates both - variables: `fast' = fast + η_f*(c − fast)`, `slow' = slow + η_s*(fast' − slow)`. Spreading - activation uses a blended confidence `α*fast + (1−α)*slow` instead of raw confidence so that - recently reinforced edges receive higher activation than edges with only historical support. - Config keys: `[memory.graph.spreading_activation] alpha`, `benna_fast_rate`, `benna_slow_rate`. -- `zeph-memory`, `zeph-config`: Turn-level provenance for graph edges (#3710). New `turn_index` - column (migration 096) stores the position within the episode at which an edge was first - committed. Threaded via `GraphExtractionConfig.turn_index`; `None` for pre-migration rows. -- `zeph-memory`, `zeph-config`: `MemORAI` write-gate prefilter (#3710). When - `[memory.graph.write_gate] enabled = true`, edges below `min_edge_relevance` whose relation - matches a low-signal vocabulary (e.g. "related_to", "is") are dropped before graph write, - reducing noise accumulation. Opt-in, default disabled. -- `zeph-config`: `ConflictRecencyConfig` — `[memory.graph.conflict] recency_slow_threshold` - controls when the recency conflict-resolution strategy is permitted to override `valid_from` - comparison (#3709). Default: 0.2. -- `zeph-memory`, `zeph-config`: DeepReasoning tier query-conditioned routing (#3994). When - `[memory.tiered_retrieval] deep_reasoning_query_conditioned = true`, the `DeepReasoning` - tier routes through HELA spreading activation (`recall_graph_hela`) instead of static-weight - BFS, producing query-aligned graph recall results. Opt-in, default disabled. -- `zeph-memory`: `GraphStore::with_benna_rates` builder for passing Benna-Fusi η rates from - config into the store at bootstrap time. -- `zeph-memory`: `GraphStore::insert_or_supersede_with_turn_index_and_metrics` — APEX INSERT - path now records `turn_index` on new head edges. - -- `zeph-config`: new `[cocoon]` section with `show_balance: bool` (default `true`). When set to - `false`, the TON balance in the TUI status bar is rendered as `*** TON` instead of the real - value. Implements the redaction option from spec §15.2. Default `true` preserves current - behaviour. Config migration step 53 appends a commented `[cocoon]` advisory notice to existing - configs; the field is optional so existing configs load without modification. -- `--init` wizard: prompts "Show Cocoon TON balance in the TUI status bar?" in the Cocoon provider - step (default `true`). -- TUI: Ctrl+R reverse-search over current-session prompt history. Opens a floating overlay above - the input area; typing filters by substring match (newest-first); Ctrl+R again cycles to the - next older match; Enter copies the selection into the input (no auto-submit); Esc cancels. - History is current-session only (populated on each submit, empty at startup). -- `zeph-subagent`: transitive constraint propagation for sub-agent spawns (#3993). - `SpawnContext` gains `max_trust_level` and `inherited_tool_allowlist` fields. - When set by the orchestration layer, `apply_constraint_propagation` narrows the - spawned agent's tool policy and `PolicyGateExecutor::set_effective_trust` clamps - trust via `min(own, cap)` semantics — privilege can only narrow, never escalate. - `DenyList` agents under an inherited allowlist are converted to an explicit - `AllowList(parent_set \ deny_entries)` (fail-closed). `resume()` constraint propagation - support added in #4690. -- Native worktree isolation for background sub-agents (closes #4679). New `zeph-worktree` crate - (`DefaultWorktreeManager`) manages git worktrees via subprocess calls with full path sanitization - and capability probing. `zeph-subagent` integrates the worktree lifecycle into `SubAgentManager`: - each background agent with `permissions.worktree = true` gets a dedicated worktree and a - `CwdLock` guard that serializes cwd changes across concurrent agents (INV-1). `set_working_directory` - is automatically added to `disallowed_tools` for worktree-opted agents (INV-3). `bg_isolation` - determines whether a worktree is created (`Worktree`) or only the lock is held (`None`). The - worktree is removed after the agent completes when `cleanup_on_completion = true`. -- `zeph-config`: new `WorktreeConfig` type with `BgIsolation` enum and `WorktreeBaseRef` enum, - added to `Config` and `SubAgentConfig` structs. -- CLI: `zeph worktree list|clean` subcommands; `--worktree-base-ref` flag for `run`. -- TUI command palette: `WorktreeList` and `WorktreeClean` entries. -- Config migration step 54: appends a commented-out `[worktree]` section with defaults to - configs that lack it. -- `zeph-config`: `ProviderOverrides` struct — per-session LLM generation override parameters - persisted across restarts (Phase 1: `reasoning_effort` only). Serialized as JSON and stored - in the `channel_preferences` table under `pref_key = "provider_overrides"`. Uses - `#[serde(default)]` for forward-compatible deserialization (unknown fields from future phases - are silently ignored). **Note**: the issue acceptance criteria specified - `#[serde(deny_unknown_fields)]`; this was intentionally relaxed to `#[serde(default)]` so - older binaries can read blobs written by newer ones without losing the known params. -- `zeph-config`: `SessionConfig` gains `persist_provider_overrides: bool` (default `true`). Only - takes effect when `provider_persistence` is also `true`. -- `zeph-core`: `persist_channel_provider` now persists a `ProviderOverrides` blob alongside the - provider name. A `restoring_provider` guard prevents clobbering the stored blob during the - restore path (F1 fix). Fire-and-forget under `TaskClass::Telemetry`; write-side 1 KB size cap. -- `zeph-core`: `restore_channel_provider` now loads and validates the persisted overrides blob on - successful provider name restore (M3 guard: skipped when name restore fails). Read-side 1 KB - size cap; deserialization errors and inapplicable params are logged as warnings. -- `--init` wizard: prompts for `persist_provider_overrides`. -- Config migration step 52: splices `persist_provider_overrides = true` (commented, discoverability - only) into existing `[session]` blocks. `SessionConfig` already defaults the field to `true` - so existing configs load fine without the key. -- `--plugin-url [--plugin-sha256 ]` CLI flag for ephemeral session-scoped plugin - loading (closes #4653). The plugin archive is downloaded over HTTPS, verified against the - optional SHA-256 digest, scanned for injection patterns (blocking), and loaded into a temporary - directory that is cleaned up on process exit. The plugin is never written to the permanent - plugins store and its config overlays are never applied. -- `--plugin-url` now accepts multiple values (pass the flag once per URL; closes #4675). Each - value can be a plain `https://…` URL or an inline `https://…@sha256hex` pair for integrity - pinning. The separate `--plugin-sha256` flag has been removed; use the `url@sha256` syntax - instead. -- `zeph-plugins`: `PluginName` newtype (closes #4674). `AddResult::name`, - `InstalledPlugin::name`, and `AutoUpdateResult::name` now carry a `PluginName` instead of a - plain `String`. Construct via `PluginName::try_from("name")` — the conversion delegates to - `validate_plugin_name` and returns `PluginError::InvalidName` on failure. `PluginName` - implements `Display`, `AsRef`, `Ord`, `serde::Serialize` (transparent), and - `serde::Deserialize` (validating — rejects names that fail `validate_plugin_name`). - -### Fixed - `zeph-config`: `FidelityConfig::validate()` now rejects `embed_timeout_secs = 0` and `compress_timeout_secs = 0` with a descriptive error referencing `[memory.fidelity]`; a zero @@ -455,19 +443,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). references to `[memory.fidelity]` in module doc comment, warn log, and field doc comment (closes #4667). -### Changed +### Performance -- refactor(context): make embed and compress timeouts in `FidelityScorer` configurable via - `embed_timeout_secs` / `compress_timeout_secs` in `[memory.fidelity]` config (closes #4645, #4651). - Both fields default to 30 seconds to preserve existing behaviour; config migration step 51 adds - commented-out hints for existing configs that contain a `[memory.fidelity]` section. -- `zeph-config`: `FidelityConfig.compress_provider` and `semantic_scoring_provider` now use - `Option` instead of `Option`, consistent with all other provider-reference - fields in the config layer. Existing TOML configs are unaffected (deserialization is transparent). -- `zeph-sanitizer`: `GuardrailVerdict` and `ResponseVerificationResult` are now `#[non_exhaustive]`, - allowing new verdict variants to be added without breaking downstream exhaustive matches. -- `zeph-context`: `CompressionMethod` is now `#[non_exhaustive]`, allowing new compression methods - to be added without breaking downstream exhaustive matches. +- `zeph-memory`: HELA spreading-activation BFS hop loop now uses a `HashSet` for O(1) + frontier membership checks instead of `Vec::contains()` (O(F) per edge, O(F²) overall). + The `Vec` is preserved for BFS traversal order. Closes #4698. +- `zeph-memory`: `is_low_signal_relation` replaced `to_lowercase() == "..."` comparisons with + `eq_ignore_ascii_case("...")`, eliminating ~30 short-string heap allocations per extraction + pass. Closes #4699. + +### Research + +- Competitive parity scan: assessed Goose v1.34.0–v1.35.0 features against Zeph. Three features + confirmed as already covered (vault chmod 600 enforcement, plugin auto-update, compaction + guards). Filed parity issues for recursive nested skill discovery (#4682), skills platform + extension manifest (#4683), egress logging with skill attribution (#4684), and deep link scheme + (#4687). Two issues (#4685, #4686) closed as duplicates of #4023. Updated + `.local/testing/playbooks/competitive-parity.md` with full assessment log. + Closes #3917, #4023, #4059. ## [0.21.3] - 2026-05-29 @@ -942,6 +935,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). enumerating `o1`/`o3`/`o4`. This covers all current and future o-series models (`o2`, `o5`, …) without requiring per-model additions (closes #4600, #4602). + ## [0.21.2] - 2026-05-18 ### Added @@ -1568,6 +1562,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Updated `metrics` 0.24.5 → 0.24.6 and `metrics-util` 0.20.3 → 0.20.4 (closes #3895). + ## [0.21.1] - 2026-05-12 ### Added @@ -1629,6 +1624,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). New `fire_permission_denied_hooks` helper ensures hooks fire at every denial path with `ZEPH_DENIED_TOOL` and `ZEPH_DENY_REASON` populated. Closes #3774. + ## [0.21.0] - 2026-05-11 ### Added @@ -1779,6 +1775,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). (was documenting `default = ["scheduler", "sqlite"]` since v0.18 while reality had 5 features since v0.20) + ## [0.20.2] - 2026-05-06 ### Added @@ -1800,31 +1797,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `CompositeExecutor`, `PolicyGateExecutor`, and `TrustGateExecutor`. `PartialJsonParser::push` rejects inputs exceeding 512 KB. (#3641, #3642) -### Fixed - -- fix(core): `commit_speculative_tier` now has a `#[cfg(debug_assertions)]` guard that emits - `tracing::error!` when a committed speculative result carries `ToolError::ConfirmationRequired`, - making the invariant machine-checkable in debug builds at zero release cost. (#3653) - -- fix(core): `ErasedToolExecutor::requires_confirmation_erased` default inverted from `false` to - `true`, making speculative dispatch safe-by-default. Added `ToolExecutor::requires_confirmation` - (default `false`) with a blanket impl that delegates to it; `TrustGateExecutor` overrides this to - mirror its trust-check policy. All existing direct `ErasedToolExecutor` implementors updated to - explicitly return `false` where appropriate. (#3644) - -- fix(core): `SpeculationEngine` sweeper task was never aborted on `Drop` in the no-supervisor - branch due to `std::mem::forget` discarding the `JoinHandle` and the `AbortHandle` being - immediately dropped. Replaced the dummy `TaskHandle` approach with a `SweepHandle` enum that - stores either a `TaskHandle` (supervisor path) or a raw `JoinHandle<()>` (no-supervisor path), - with `abort(self)` called in `Drop`. (#3645) - -- fix(memory): `insert_or_supersede_with_metrics` no longer violates `uq_graph_edges_active_head`. - SQLite enforces unique indexes at statement level; the prior fix inserted the new row before - deactivating the old head. Split `invalidate_prior_head` into `expire_prior_head` (sets - `valid_to`/`expired_at` before INSERT) and `set_superseded_by` (back-fills `superseded_by` - after INSERT), both within the same transaction. (#3635) - -### Added - test(core): four focused unit tests for `Agent::commit_speculative_tier` covering all result branches: engine absent (fast path), cache miss, `Ok` result with `tool_started_ats` @@ -1884,42 +1856,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Gonka AI integration guide covering GonkaGate and native inference paths (#3603) -### Changed - -- perf(tracing): rename `llm_call` span to `llm.turn_call` with `provider` field for main-turn - LLM call visibility; avoids naming collision with existing `llm.chat_stream` spans at the - provider layer (#3619). - -- docs(readme): reposition the project README around memory-first operation, low-resource - deployment, multi-provider routing, and the current Gonka.ai integration path. - -### Fixed - -- fix(memory.graph): add `extract_provider` to `[memory.graph]` config. Graph extraction tasks - produce low prompt/response similarity, causing systematic quality gate false positives. Setting - `extract_provider` to a named provider from `[[llm.providers]]` routes extraction through a - provider without the quality gate. **Action required**: set `extract_provider = ""` in - `[memory.graph]` to activate the fix; the default (empty) preserves existing behavior (#3601). - -- fix(cli): `zeph project purge` now returns a clear error message when stdin is not a terminal - and neither `--dry-run` nor `-y` was provided: `Aborted: stdin is not a terminal. Use --dry-run - to preview or -y to confirm non-interactively.` Previously the raw `IO error: not a terminal` - from `dialoguer` was propagated with no guidance. (#3599) - -- fix(lsp): LSP diagnostics context injection now works with mcpls v0.3.6+. Previously - `fetch_diagnostics` silently returned `None` on every call because it expected a bare JSON array - from `get_diagnostics`, but mcpls changed the response shape to `{"diagnostics": [...]}`. - A new `parse_diagnostics_json` helper handles both the legacy bare-array format and the - object-wrapper format, restoring diagnostics injection into the prompt after `write` tool calls. - -- fix(mcp): HTTP 4xx responses (401, 403, 404, 410, 422) from remote MCP endpoints are now mapped - to the new `McpError::HttpAuth` variant, which is non-retryable (`McpErrorCode::AuthFailure`). - Previously these were wrapped into `McpError::Connection` (retryable), causing up to 47 s of - exponential-backoff delay on startup when credentials were misconfigured. Statuses 408, 425, and - 429 remain retryable. Clear `warn!` log messages are emitted with `server_id` and `status` fields. - Follow-up: OAuth handshake paths tracked in #3586. - -### Added - feat(zeph-llm): `RequestSigner` for Gonka-compatible signed-request headers (#3609). Produces ECDSA/secp256k1 signatures over `sha256(sha256(body) || timestamp_ns || transfer_address)` @@ -2093,6 +2029,64 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `[security.capability_scopes]` sections is automatic — `ConfigMigrator` picks up the new defaults from `default.toml`. +### Fixed + +- fix(core): `commit_speculative_tier` now has a `#[cfg(debug_assertions)]` guard that emits + `tracing::error!` when a committed speculative result carries `ToolError::ConfirmationRequired`, + making the invariant machine-checkable in debug builds at zero release cost. (#3653) + +- fix(core): `ErasedToolExecutor::requires_confirmation_erased` default inverted from `false` to + `true`, making speculative dispatch safe-by-default. Added `ToolExecutor::requires_confirmation` + (default `false`) with a blanket impl that delegates to it; `TrustGateExecutor` overrides this to + mirror its trust-check policy. All existing direct `ErasedToolExecutor` implementors updated to + explicitly return `false` where appropriate. (#3644) + +- fix(core): `SpeculationEngine` sweeper task was never aborted on `Drop` in the no-supervisor + branch due to `std::mem::forget` discarding the `JoinHandle` and the `AbortHandle` being + immediately dropped. Replaced the dummy `TaskHandle` approach with a `SweepHandle` enum that + stores either a `TaskHandle` (supervisor path) or a raw `JoinHandle<()>` (no-supervisor path), + with `abort(self)` called in `Drop`. (#3645) + +- fix(memory): `insert_or_supersede_with_metrics` no longer violates `uq_graph_edges_active_head`. + SQLite enforces unique indexes at statement level; the prior fix inserted the new row before + deactivating the old head. Split `invalidate_prior_head` into `expire_prior_head` (sets + `valid_to`/`expired_at` before INSERT) and `set_superseded_by` (back-fills `superseded_by` + after INSERT), both within the same transaction. (#3635) + + +- fix(memory.graph): add `extract_provider` to `[memory.graph]` config. Graph extraction tasks + produce low prompt/response similarity, causing systematic quality gate false positives. Setting + `extract_provider` to a named provider from `[[llm.providers]]` routes extraction through a + provider without the quality gate. **Action required**: set `extract_provider = ""` in + `[memory.graph]` to activate the fix; the default (empty) preserves existing behavior (#3601). + +- fix(cli): `zeph project purge` now returns a clear error message when stdin is not a terminal + and neither `--dry-run` nor `-y` was provided: `Aborted: stdin is not a terminal. Use --dry-run + to preview or -y to confirm non-interactively.` Previously the raw `IO error: not a terminal` + from `dialoguer` was propagated with no guidance. (#3599) + +- fix(lsp): LSP diagnostics context injection now works with mcpls v0.3.6+. Previously + `fetch_diagnostics` silently returned `None` on every call because it expected a bare JSON array + from `get_diagnostics`, but mcpls changed the response shape to `{"diagnostics": [...]}`. + A new `parse_diagnostics_json` helper handles both the legacy bare-array format and the + object-wrapper format, restoring diagnostics injection into the prompt after `write` tool calls. + +- fix(mcp): HTTP 4xx responses (401, 403, 404, 410, 422) from remote MCP endpoints are now mapped + to the new `McpError::HttpAuth` variant, which is non-retryable (`McpErrorCode::AuthFailure`). + Previously these were wrapped into `McpError::Connection` (retryable), causing up to 47 s of + exponential-backoff delay on startup when credentials were misconfigured. Statuses 408, 425, and + 429 remain retryable. Clear `warn!` log messages are emitted with `server_id` and `status` fields. + Follow-up: OAuth handshake paths tracked in #3586. + +### Changed + +- perf(tracing): rename `llm_call` span to `llm.turn_call` with `provider` field for main-turn + LLM call visibility; avoids naming collision with existing `llm.chat_stream` spans at the + provider layer (#3619). + +- docs(readme): reposition the project README around memory-first operation, low-resource + deployment, multi-provider routing, and the current Gonka.ai integration path. + ### Documentation - research: synthesize MemCoT, OmniMem, and OCR-Memory memory architecture papers; document integration roadmap and file follow-up issues (#3564, #3566, #3571); add Research Backlog to APEX-MEM spec (see specs §14) @@ -2123,16 +2117,136 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `zeph-subagent/manager.rs` with a tracking comment naming the three existing enforcement sites (`PlanModeExecutor`, `apply_def_config_defaults`, `FilteredToolExecutor`) — no behaviour change. -### Added -- feat(agent-context): add `MetricsCallback` trait and `ToolOutputArchive`, `CompactionProbeCallback`, - `CompactionPersistence` callback traits to `zeph-agent-context` (#3527). These four traits define - the cross-crate interface for compaction side-effects; `zeph-agent-context` declares them and - `zeph-core` provides concrete implementations, preserving the crate isolation invariant. - `MetricsCallback` exposes six recording methods (hard-compaction, tool-output-prune, probe - pass/soft-fail/hard-fail/error) consumed by `ContextService` internals. +- fix(acp): restore session/update notification routing in run-agent client (#3520). Removed + no-op `on_receive_notification` handler from `spawn_subagent_inner` that was silently + discarding all `agent_message_chunk` notifications before they could reach `drain_until_stop`, + causing `zeph acp run-agent` to always produce empty output. +- fix(acp,bench): box `agent.run()` future to suppress `clippy::large_futures` with + `--features full` (#3521). Changed `agent.run().await` to `Box::pin(agent.run()).await` + at `src/acp.rs:901` and `crates/zeph-bench/src/runner.rs:532` to keep the 17 400-byte + future on the heap rather than the stack. -### Changed + +- fix(scheduler): `Scheduler::init()` now hydrates CLI-added periodic jobs from the DB into + `self.tasks` on startup; jobs written via `zeph schedule add` are visible to `tick()` and + fire on their cron schedule without requiring a daemon restart (#3499). +- fix(gateway): `spawn_gateway_server` now forwards webhook payloads to the agent loop via + `GatewayChannel` instead of silently draining them; the wrapper merges webhook and + interactive input with interactive-first bias so live sessions stay responsive (#3500). + +- fix(config): `LlmConfig::effective_model()` now skips embed-only providers (`embed = true`) + when resolving the display model, so `/status` shows the first chat-capable provider's model + instead of the embed model name (#3488). +- fix(llm): `AnyProvider::provider_kind_str()` for `Router` now delegates to the last-selected + child provider via `RouterProvider::last_selected_provider_kind()`, enabling cost tracking to + correctly attribute API costs when Thompson or other routing strategies are active (#3489). +- fix(ci): add `--all-targets` to the clippy step so test-target violations are caught in CI (#3490). +- fix(tui): `/compact` slash command now updates `compaction_last_*` metrics, making the TUI + compaction badge visible after manual compaction (#3493). `emit_compaction_status_signal` is + called from `compact_context()` after `finalize_compacted_messages` completes. + +- fix(core): remove `test` arm from `EnvVaultProvider` re-export cfg gate in `zeph-core/src/lib.rs` + so that `cargo nextest run -p zeph-core` (no explicit `--features env-vault`) compiles cleanly + (#3485, regressed by #3479). +- fix(bootstrap): `build_single_provider_from_pool` now skips embed-only entries when selecting + the fallback provider index, preventing an embed model from being used for chat when no provider + has `default = true` (#3484). + +- fix(focus): `complete_focus` called in a batch with other tools no longer orphans the + current-turn tool results (#3476). The truncation in `complete_focus_tool` now preserves + the current turn's assistant `tool_calls` message so that the subsequent `User(tool_results)` + message has a valid parent, preventing a 422 from the OpenAI API on the next LLM call. + +- fix(config): standardize provider reference fields to `Option` (#3482). + Changed `DigestConfig.provider`, `SemanticConfig.embed_provider`, + `HebbianConfig.consolidate_provider`, `GraphConfig.strategy_classifier_provider`, + and `IndexConfig.embed_provider` from raw `String`/`Option` to + `Option`. All fields retain `#[serde(default)]` for TOML backward + compatibility. Callers updated to use `.as_ref()?` / `.as_deref()` patterns. + + +- `zeph-bench`: `BookReservationParams`, `UpdateReservationFlightsParams`, and + `UpdateReservationPassengersParams` used `Vec` for `flights` and + `passengers` fields. `schemars` emits an `array` schema without `items` for `Value`, + which OpenAI rejects with HTTP 400. Replaced with typed structs `FlightSegment` and + `Passenger`; the tau2-bench-airline dataset now produces valid tool schemas. (#3426) +- `zeph-bench`: `bench run --scenario ` silently exited 0 with "Benchmark complete: + 0/0 exact" when the ID matched no scenarios. Now returns an error with a descriptive + message and non-zero exit code. (#3427) +- `zeph-llm`: `RouterProvider::model_identifier()` fell through to the default empty-string + implementation, causing `results.json` to record `model: ""` when the router was used as + the bench provider. Overridden to return `"router"`. (#3430) + + +- `zeph-memory`: `EmbeddingRegistry::search_raw` now validates query vector dimension against the + stored collection dimension before issuing a gRPC search. Qdrant gRPC silently returned + near-zero cosine scores (~0.022) on dimension mismatch, causing all skill candidates to be + dropped below the `min_injection_score` threshold and making skill injection non-functional. + A cached dimension (populated at sync time) is checked first; on cache miss the collection is + probed once and the result cached for subsequent calls. (#3418) +- `zeph-core`: shutdown summary LLM calls no longer timeout on multi-turn sessions. Increased + `shutdown_summary_timeout_secs` default from 10 to 30 (matching `extraction_timeout_secs`) and + added a cooperative yield loop between `abort_all()` and `maybe_store_shutdown_summary()` so + cancelled enrichment tasks can release their HTTP connections before the summary LLM call + competes for the API rate-limit budget. (#3431) + + +- `zeph-core`, `zeph-llm`: tool schema filter now receives the embedding provider instead of the + main chat provider in `runner.rs` and `daemon.rs`; eliminates 1536-vs-768-dim mismatch that + caused all tool similarity scores to be 0.0 (fixes #3413). +- `zeph-llm`: `OllamaProvider::embed()` tracing span now records the actual embedding model name + (`self.embedding_model`) instead of the chat model name (`self.model_identifier()`), making + `llm.embed` spans in Perfetto/Jaeger unambiguous (fixes #3414). + +### Added + +- feat(agent-context): add `MetricsCallback` trait and `ToolOutputArchive`, `CompactionProbeCallback`, + `CompactionPersistence` callback traits to `zeph-agent-context` (#3527). These four traits define + the cross-crate interface for compaction side-effects; `zeph-agent-context` declares them and + `zeph-core` provides concrete implementations, preserving the crate isolation invariant. + `MetricsCallback` exposes six recording methods (hard-compaction, tool-output-prune, probe + pass/soft-fail/hard-fail/error) consumed by `ContextService` internals. + + +- context: plumb compression-spectrum `active_levels` through `ContextAssemblyInput` (#3455). + The `RetrievalPolicy`-selected tier set is now forwarded into the context assembler, which + skips tier-excluded fetchers (Episodic / Procedural / Declarative) before scheduling them. + Corrections and code RAG remain always-on. Resolves the `TODO(#3455)` in `assembly.rs`. + + +- A2A: `AgentCard` served at `/.well-known/agent.json` now accurately reflects the agent's + modality capabilities. `capabilities.images` is set from `provider.supports_vision()`; + `capabilities.audio` is set when an STT provider is configured (`[llm.stt]`); + `capabilities.files` is controlled by the new `[a2a] advertise_files = false` config field + (opt-in, default `false` — set `true` only when skills or MCP tools can ingest file parts). + Card construction is extracted into a private `build_default_card` helper in `daemon.rs` + for testability. (#3378) + + +- `zeph-bench`: full tau2-bench environment support — in-memory retail and airline domain executors + (`RetailEnv`, `AirlineEnv`) implementing `ToolExecutor`; `TauBenchEvaluator` scoring tool-call + traces via `Action.compare_with_tool_call` semantics; `Tau2BenchLoader` replacing the old + string-match `tau-bench` loader; `run_dataset_with_env_factory` runner method for tool-driven + datasets; `ResponseMode::ToolUse` with a customer-service system prompt; `bench download + --dataset tau2-bench` git-clone download; `bench run --dataset tau2-bench-retail/airline` + dispatch (closes #3417, Phase 1: ACTION-only scoring). +- `zeph-bench`: aggregate `median_score`, `stddev` (population), and `error_count` statistics on + `BenchRun.aggregate`; all three fields are persisted in `results.json` and included in + `summary.md`. +- `zeph-bench`: multi-turn `Scenario` via `Vec` with `Role::{User,Assistant}`; all built-in + loaders construct via the new `Scenario::single` constructor; `primary_prompt()` returns + `Result<&str, BenchError>` so malformed scenarios surface immediately. +- `zeph-bench`: `--baseline` dual-run for memory-relevant datasets (`longmemeval`, `locomo`); + runs memory-off and memory-on passes sequentially, writes + `baseline/memory-{off,on}/results.json` and `baseline/comparison.json` reusing the existing + `baseline::BaselineComparison`; non-memory datasets (`gaia`, `frames`, `tau-bench`) reject + `--baseline` with a clear error. +- `zeph-bench`: `MemoryMode::{Off,On}` and `BenchMemoryParams` on `BenchRunner`; memory-on pass + wires a `SQLite`-only `SemanticMemory` per scenario (no Qdrant), with + `summarization_threshold = 100_000` to preserve run determinism (FR-003). + +### Changed - refactor(context): relocate `RetrievedContext` and `collect_retrieved_context` from `zeph-core` to `zeph-agent-context::retrieved` (feature `self-check`); `zeph-core::quality::pipeline` @@ -2154,18 +2268,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `#[cfg(test)]` delegations. `PartialEq` (variant-only, ignores `qdrant_future`) added to `CompactionOutcome` for test assertions. -### Fixed - -- fix(acp): restore session/update notification routing in run-agent client (#3520). Removed - no-op `on_receive_notification` handler from `spawn_subagent_inner` that was silently - discarding all `agent_message_chunk` notifications before they could reach `drain_until_stop`, - causing `zeph acp run-agent` to always produce empty output. -- fix(acp,bench): box `agent.run()` future to suppress `clippy::large_futures` with - `--features full` (#3521). Changed `agent.run().await` to `Box::pin(agent.run()).await` - at `src/acp.rs:901` and `crates/zeph-bench/src/runner.rs:532` to keep the 17 400-byte - future on the heap rather than the stack. - -### Changed - refactor(context): fix workspace compilation errors introduced during context migration (#3523 fixup). Resolved 7 errors in `zeph-core`: unresolved prefix-constant imports in @@ -2221,46 +2323,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `agent/tests.rs` (6,166 LoC) → `agent/tests/` (1 mod.rs + 9 sub-files inside `agent_tests/` + 8 sibling modules). Move-only: no logic changes, test count preserved. -### Fixed - -- fix(scheduler): `Scheduler::init()` now hydrates CLI-added periodic jobs from the DB into - `self.tasks` on startup; jobs written via `zeph schedule add` are visible to `tick()` and - fire on their cron schedule without requiring a daemon restart (#3499). -- fix(gateway): `spawn_gateway_server` now forwards webhook payloads to the agent loop via - `GatewayChannel` instead of silently draining them; the wrapper merges webhook and - interactive input with interactive-first bias so live sessions stay responsive (#3500). - -- fix(config): `LlmConfig::effective_model()` now skips embed-only providers (`embed = true`) - when resolving the display model, so `/status` shows the first chat-capable provider's model - instead of the embed model name (#3488). -- fix(llm): `AnyProvider::provider_kind_str()` for `Router` now delegates to the last-selected - child provider via `RouterProvider::last_selected_provider_kind()`, enabling cost tracking to - correctly attribute API costs when Thompson or other routing strategies are active (#3489). -- fix(ci): add `--all-targets` to the clippy step so test-target violations are caught in CI (#3490). -- fix(tui): `/compact` slash command now updates `compaction_last_*` metrics, making the TUI - compaction badge visible after manual compaction (#3493). `emit_compaction_status_signal` is - called from `compact_context()` after `finalize_compacted_messages` completes. - -- fix(core): remove `test` arm from `EnvVaultProvider` re-export cfg gate in `zeph-core/src/lib.rs` - so that `cargo nextest run -p zeph-core` (no explicit `--features env-vault`) compiles cleanly - (#3485, regressed by #3479). -- fix(bootstrap): `build_single_provider_from_pool` now skips embed-only entries when selecting - the fallback provider index, preventing an embed model from being used for chat when no provider - has `default = true` (#3484). - -- fix(focus): `complete_focus` called in a batch with other tools no longer orphans the - current-turn tool results (#3476). The truncation in `complete_focus_tool` now preserves - the current turn's assistant `tool_calls` message so that the subsequent `User(tool_results)` - message has a valid parent, preventing a 422 from the OpenAI API on the next LLM call. - -- fix(config): standardize provider reference fields to `Option` (#3482). - Changed `DigestConfig.provider`, `SemanticConfig.embed_provider`, - `HebbianConfig.consolidate_provider`, `GraphConfig.strategy_classifier_provider`, - and `IndexConfig.embed_provider` from raw `String`/`Option` to - `Option`. All fields retain `#[serde(default)]` for TOML backward - compatibility. Callers updated to use `.as_ref()?` / `.as_deref()` patterns. - -### Changed - refactor(config): invert `zeph-config` dependency arrow (#3481). Moved pure-data config types (`McpTrustLevel`, `ToolSecurityMeta`, `CacheTtl`, @@ -2271,14 +2333,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `zeph-config` and re-export the moved types for backward compatibility. `zeph-config` is now a pure-data leaf crate with no dependency on feature crates. -### Added - -- context: plumb compression-spectrum `active_levels` through `ContextAssemblyInput` (#3455). - The `RetrievalPolicy`-selected tier set is now forwarded into the context assembler, which - skips tier-excluded fetchers (Episodic / Procedural / Declarative) before scheduling them. - Corrections and code RAG remain always-on. Resolves the `TODO(#3455)` in `assembly.rs`. - -### Changed - refactor(zeph-core): decompose long functions in `agent/tool_execution/` (#3457). All `#[allow(clippy::too_many_lines)]` suppressions removed from `native.rs`, @@ -2291,17 +2345,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `fire_vigil_audit_entry`, `record_tool_experience`, `process_one_tool_result`. Pure structural refactoring — no behavioral changes. -### Added - -- A2A: `AgentCard` served at `/.well-known/agent.json` now accurately reflects the agent's - modality capabilities. `capabilities.images` is set from `provider.supports_vision()`; - `capabilities.audio` is set when an STT provider is configured (`[llm.stt]`); - `capabilities.files` is controlled by the new `[a2a] advertise_files = false` config field - (opt-in, default `false` — set `true` only when skills or MCP tools can ingest file parts). - Card construction is extracted into a private `build_default_card` helper in `daemon.rs` - for testability. (#3378) - -### Changed - `zeph-tui`: removed all `#[allow(clippy::too_many_lines)]` suppressions in `command.rs`, `app/keys.rs`, `widgets/security.rs`, `widgets/status.rs`, and `widgets/resources.rs` by @@ -2331,30 +2374,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `cross_session_rollout_ok_for_demote`, `try_auto_promote`, `try_auto_demote`), and `agent/state/security.rs` (`run_ner_classifier`). No behavioral changes. (#3457) -### Refactored - -- `zeph-tools`: decomposed long functions in shell and scrape executors to satisfy - `clippy::too_many_lines`. `execute_block` was split into `capture_snapshot_for`, - `maybe_rollback`, `classify_and_audit`, and `apply_output_filter`; `execute_bash` was - split into `build_bash_command`, `apply_sandbox`, `spawn_output_readers`, - `run_bash_stream`, and `finalize_envelope`; `execute_tool_call` in `WebScrapeExecutor` - was deduplicated via `run_with_audit`. (#3450) - -### Fixed - -- `zeph-bench`: `BookReservationParams`, `UpdateReservationFlightsParams`, and - `UpdateReservationPassengersParams` used `Vec` for `flights` and - `passengers` fields. `schemars` emits an `array` schema without `items` for `Value`, - which OpenAI rejects with HTTP 400. Replaced with typed structs `FlightSegment` and - `Passenger`; the tau2-bench-airline dataset now produces valid tool schemas. (#3426) -- `zeph-bench`: `bench run --scenario ` silently exited 0 with "Benchmark complete: - 0/0 exact" when the ID matched no scenarios. Now returns an error with a descriptive - message and non-zero exit code. (#3427) -- `zeph-llm`: `RouterProvider::model_identifier()` fell through to the default empty-string - implementation, causing `results.json` to record `model: ""` when the router was used as - the bench provider. Overridden to return `"router"`. (#3430) - -### Changed - `zeph-core`: decomposed all `#[allow(clippy::too_many_lines)]` functions in `crates/zeph-core/src/agent/` into focused private helpers (A.1–A.13, issue #3453). Affected modules: `mod.rs` (tool-call batch @@ -2376,60 +2395,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `migrate_memory_retrieval_query_bias`, `migrate_memory_persona_config`) so existing user configs receive the new defaults on `zeph --migrate-config`. -### Fixed - -- `zeph-memory`: `EmbeddingRegistry::search_raw` now validates query vector dimension against the - stored collection dimension before issuing a gRPC search. Qdrant gRPC silently returned - near-zero cosine scores (~0.022) on dimension mismatch, causing all skill candidates to be - dropped below the `min_injection_score` threshold and making skill injection non-functional. - A cached dimension (populated at sync time) is checked first; on cache miss the collection is - probed once and the result cached for subsequent calls. (#3418) -- `zeph-core`: shutdown summary LLM calls no longer timeout on multi-turn sessions. Increased - `shutdown_summary_timeout_secs` default from 10 to 30 (matching `extraction_timeout_secs`) and - added a cooperative yield loop between `abort_all()` and `maybe_store_shutdown_summary()` so - cancelled enrichment tasks can release their HTTP connections before the summary LLM call - competes for the API rate-limit budget. (#3431) - -### Added -- `zeph-bench`: full tau2-bench environment support — in-memory retail and airline domain executors - (`RetailEnv`, `AirlineEnv`) implementing `ToolExecutor`; `TauBenchEvaluator` scoring tool-call - traces via `Action.compare_with_tool_call` semantics; `Tau2BenchLoader` replacing the old - string-match `tau-bench` loader; `run_dataset_with_env_factory` runner method for tool-driven - datasets; `ResponseMode::ToolUse` with a customer-service system prompt; `bench download - --dataset tau2-bench` git-clone download; `bench run --dataset tau2-bench-retail/airline` - dispatch (closes #3417, Phase 1: ACTION-only scoring). -- `zeph-bench`: aggregate `median_score`, `stddev` (population), and `error_count` statistics on - `BenchRun.aggregate`; all three fields are persisted in `results.json` and included in - `summary.md`. -- `zeph-bench`: multi-turn `Scenario` via `Vec` with `Role::{User,Assistant}`; all built-in - loaders construct via the new `Scenario::single` constructor; `primary_prompt()` returns - `Result<&str, BenchError>` so malformed scenarios surface immediately. -- `zeph-bench`: `--baseline` dual-run for memory-relevant datasets (`longmemeval`, `locomo`); - runs memory-off and memory-on passes sequentially, writes - `baseline/memory-{off,on}/results.json` and `baseline/comparison.json` reusing the existing - `baseline::BaselineComparison`; non-memory datasets (`gaia`, `frames`, `tau-bench`) reject - `--baseline` with a clear error. -- `zeph-bench`: `MemoryMode::{Off,On}` and `BenchMemoryParams` on `BenchRunner`; memory-on pass - wires a `SQLite`-only `SemanticMemory` per scenario (no Qdrant), with - `summarization_threshold = 100_000` to preserve run determinism (FR-003). +- `zeph-bench`: `Scenario.prompt: String` replaced by `turns: Vec`; loaders construct via + `Scenario::single`; `runner::RunOptions` gains `memory_mode: MemoryMode` field (default `Off`). +- `zeph-bench`: `ResultWriter::new` uses `create_dir_all` for nested output directories. -### Fixed - -- `zeph-core`, `zeph-llm`: tool schema filter now receives the embedding provider instead of the - main chat provider in `runner.rs` and `daemon.rs`; eliminates 1536-vs-768-dim mismatch that - caused all tool similarity scores to be 0.0 (fixes #3413). -- `zeph-llm`: `OllamaProvider::embed()` tracing span now records the actual embedding model name - (`self.embedding_model`) instead of the chat model name (`self.model_identifier()`), making - `llm.embed` spans in Perfetto/Jaeger unambiguous (fixes #3414). - -### Changed - -- `zeph-bench`: `Scenario.prompt: String` replaced by `turns: Vec`; loaders construct via - `Scenario::single`; `runner::RunOptions` gains `memory_mode: MemoryMode` field (default `Off`). -- `zeph-bench`: `ResultWriter::new` uses `create_dir_all` for nested output directories. - -### Changed - `zeph-vault`: split 1302-line `lib.rs` into four module files (`age.rs`, `arc.rs`, `env.rs`, `mock.rs`); `lib.rs` now contains only the `VaultProvider` trait, `default_vault_dir`, and @@ -2452,6 +2422,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `zeph-scheduler`: added `#[derive(Debug, Clone)]` to `ScheduledTaskInfo` and `#[derive(Debug)]` to `JobStore` to satisfy the workspace `Debug`-on-all-public-types convention. +### Refactored + +- `zeph-tools`: decomposed long functions in shell and scrape executors to satisfy + `clippy::too_many_lines`. `execute_block` was split into `capture_snapshot_for`, + `maybe_rollback`, `classify_and_audit`, and `apply_output_filter`; `execute_bash` was + split into `build_bash_command`, `apply_sandbox`, `spawn_output_readers`, + `run_bash_stream`, and `finalize_envelope`; `execute_tool_call` in `WebScrapeExecutor` + was deduplicated via `run_with_audit`. (#3450) + ## [0.20.0] - 2026-04-25 ### Added @@ -2783,6 +2762,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Test-only context/assembler.rs helpers moved into `assembler_helpers` test module in `zeph-core` (#3254). + ## [0.19.3] - 2026-04-19 ### Added @@ -2810,44 +2790,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). llm strategies) resolves contradictory edges in SYNAPSE recall. `insert_or_supersede` and `check_supersede_depth` (recursive CTE, depth-capped) added to `GraphStore`. -### Fixed - -- **fix(index): wire `CodeRetriever` into `IndexState` for automatic code RAG injection** (#3236) — - `IndexState.retriever` was always `None` because no call path ever set it; added - `AgentBuilder::with_code_retriever` and `apply_code_rag_retriever` helper wired from `runner.rs`. - Code RAG context injection is now active when `index.enabled = true`, `budget_ratio > 0`, a - Qdrant backend is available, and `mcp_enabled = false`. - -- **fix(channels): Telegram elicitation response map uses raw field names** (#3217) — the - `sanitize_field_key` transformation was incorrectly applied to MCP elicitation response map keys - in the Telegram channel, causing field names with spaces or dashes to be remapped (e.g. - `pass word` → `pass_word`), which broke round-trip fidelity with the MCP server and could cause - key collisions. Keys now use the raw `field.name` as specified by the MCP schema; sanitization - is retained only for Telegram display labels. Parity restored with the CLI channel. - -- **fix(quality): MARCH `flag_marker` no longer emits extra `response_chunk`/`response_end` after `response_end` in `--json` mode** (#3231) — `quality_hook.rs` now calls `channel.send_chunk()` instead of `channel.send()` at the flag-marker injection sites, and `agent/mod.rs` calls `flush_chunks()` after `run_self_check_for_turn()` returns. This ensures the `[verify]` marker is emitted as part of the primary response stream with a single `response_end`. - -- **fix(commands): `/plugins` slash command now routed through `CommandRegistry`** (#3215) — added - `PluginsCommand` handler in `zeph-commands` that delegates to `Agent::handle_plugins` via - `AgentAccess`. `/plugins overlay|list|add|remove` are now dispatched correctly instead of falling - through to the LLM. Also extracted the two near-identical slash-command dispatch blocks in - `zeph-core/agent/mod.rs` into a single `apply_dispatch_result` helper with a `DispatchFlow` enum - (#3214), eliminating divergence risk between the session-registry and agent-registry paths. - -- **fix(json-cli): spurious `response_end` events in `--json` mode** (#3212) — `JsonCliChannel` - now tracks whether any `ResponseChunk` has been emitted since the last `ResponseEnd` and only - emits `ResponseEnd` when there is at least one preceding chunk. Lifecycle strings (e.g. - `"Shutting down..."`) are now routed through `send_status` (emitting `{"event":"status",...}`) - instead of `send`, eliminating a spurious `response_chunk`/`response_end` pair at shutdown. - `JsonEventSink` gained a `with_writer` constructor for testability; behavioural unit tests added - for all emission sequences. - -- **fix(agent): slash command errors no longer terminate the agent loop** (#3211) — `CommandError` - returned by registry-handled slash commands (e.g. `/loop` with an invalid interval) is now - surfaced to the user channel and logged at WARN level, then the loop continues. Previously the - agent exited with `AgentError::Other`, producing no visible output on stdout. - -### Added - **feat(mcp): server-driven elicitation support** (#3141) — MCP servers speaking protocol version 2025-06-18 can now pause a tool call and request structured user input (credentials, file paths, @@ -2895,6 +2837,46 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **feat(config): `[cli]` TOML section** (#2790, #3083) — `bare`, `json`, `auto` booleans and `[cli.loop]` subsection (`min_interval_secs`, `max_iterations`). +### Fixed + +- **fix(index): wire `CodeRetriever` into `IndexState` for automatic code RAG injection** (#3236) — + `IndexState.retriever` was always `None` because no call path ever set it; added + `AgentBuilder::with_code_retriever` and `apply_code_rag_retriever` helper wired from `runner.rs`. + Code RAG context injection is now active when `index.enabled = true`, `budget_ratio > 0`, a + Qdrant backend is available, and `mcp_enabled = false`. + +- **fix(channels): Telegram elicitation response map uses raw field names** (#3217) — the + `sanitize_field_key` transformation was incorrectly applied to MCP elicitation response map keys + in the Telegram channel, causing field names with spaces or dashes to be remapped (e.g. + `pass word` → `pass_word`), which broke round-trip fidelity with the MCP server and could cause + key collisions. Keys now use the raw `field.name` as specified by the MCP schema; sanitization + is retained only for Telegram display labels. Parity restored with the CLI channel. + +- **fix(quality): MARCH `flag_marker` no longer emits extra `response_chunk`/`response_end` after `response_end` in `--json` mode** (#3231) — `quality_hook.rs` now calls `channel.send_chunk()` instead of `channel.send()` at the flag-marker injection sites, and `agent/mod.rs` calls `flush_chunks()` after `run_self_check_for_turn()` returns. This ensures the `[verify]` marker is emitted as part of the primary response stream with a single `response_end`. + +- **fix(commands): `/plugins` slash command now routed through `CommandRegistry`** (#3215) — added + `PluginsCommand` handler in `zeph-commands` that delegates to `Agent::handle_plugins` via + `AgentAccess`. `/plugins overlay|list|add|remove` are now dispatched correctly instead of falling + through to the LLM. Also extracted the two near-identical slash-command dispatch blocks in + `zeph-core/agent/mod.rs` into a single `apply_dispatch_result` helper with a `DispatchFlow` enum + (#3214), eliminating divergence risk between the session-registry and agent-registry paths. + +- **fix(json-cli): spurious `response_end` events in `--json` mode** (#3212) — `JsonCliChannel` + now tracks whether any `ResponseChunk` has been emitted since the last `ResponseEnd` and only + emits `ResponseEnd` when there is at least one preceding chunk. Lifecycle strings (e.g. + `"Shutting down..."`) are now routed through `send_status` (emitting `{"event":"status",...}`) + instead of `send`, eliminating a spurious `response_chunk`/`response_end` pair at shutdown. + `JsonEventSink` gained a `with_writer` constructor for testability; behavioural unit tests added + for all emission sequences. + +- **fix(agent): slash command errors no longer terminate the agent loop** (#3211) — `CommandError` + returned by registry-handled slash commands (e.g. `/loop` with an invalid interval) is now + surfaced to the user channel and logged at WARN level, then the loop continues. Previously the + agent exited with `AgentError::Other`, producing no visible output on stdout. + + +- fix(cli): skip scheduler, code indexer, and mem-eviction task in `--bare` mode (#3209) + ### Changed - **chore(ci): overhaul Renovate configuration** — `.github/renovate.json` migrated from @@ -2916,10 +2898,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). database, independent of GitHub's vulnerability-alerts API) and the `vulnerabilityAlerts` block in `.github/renovate.json`. Secret scanning remains enabled (separate system). -### Fixed - -- fix(cli): skip scheduler, code indexer, and mem-eviction task in `--bare` mode (#3209) - ## [0.19.2] - 2026-04-18 ### Added @@ -2965,6 +2943,145 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). note in the output clarifies this. Closes [#3147](https://github.com/bug-ops/zeph/issues/3147). + +- **feat(plugins): plugin config overlay merge** — `zeph-plugins` now exposes + `apply_plugin_config_overlays(&mut Config, &Path) -> Result`. + At bootstrap (`AppBuilder::new`) and on hot-reload (`reload_config`) the helper scans every + installed `/.plugin.toml` and applies tighten-only merges for the three safelisted keys: + `tools.blocked_commands` (union), `tools.allowed_commands` (intersection, base-gated — empty + base stays empty so plugins cannot widen the allowlist), and `skills.disambiguation_threshold` + (max). Iteration is sorted by directory name for deterministic log output (M1). The safelist + validation from install time is re-run at load time as defence-in-depth against post-install + tampering (M2). Integer-literal threshold values (`0`, `1`) are accepted alongside float + literals (M3). Symlinked plugin subdirectories are rejected (E8). + `ResolvedOverlay` (returned from the helper and stored on `AppBuilder`) carries `source_plugins`, + `skipped_plugins`, and all merged values for diagnostic surfacing. + **Hot-reload shell behaviour** (updated by #3146): `blocked_commands` is now rebuilt live via + `ShellPolicyHandle::rebuild` — no restart required when a plugin overlay changes the blocklist. + `allowed_commands` changes still require a restart (feeds sandbox path intersection at construction + time); a `tracing::warn!` and a status-channel banner are emitted in that case so the user is + never silently misled. `skills.disambiguation_threshold` applies live. + Closes [#3128](https://github.com/bug-ops/zeph/issues/3128). + +- **feat(security): OS-level file permission hardening for sensitive files** — adds + `zeph-common::fs_secure` module with `open_private_truncate`, `append_private`, + `write_private`, and `atomic_write_private` helpers. All sensitive files created by + Zeph (vault secrets, SQLite databases, debug dumps, audit JSONL, router state, ACP + permission files, init config) are now created with mode 0600 (owner read/write only), + independent of process umask. `atomic_write_private` uses `O_EXCL` on the temp file + and fsyncs before rename for crash safety. `zeph doctor` now reports a warning when + the vault file has group/world-readable bits set and correctly fails on unexpected + metadata errors. Closes [#3121](https://github.com/bug-ops/zeph/issues/3121). + +- **feat(session): session recap on resume (#3064)** — adds `/recap` command and + `[session.recap]` config section. When a session has a persisted digest, a brief recap is shown + before the first user message on resume. The cached digest is now always loaded on startup + regardless of `memory.digest.enabled` (C3-bis fix). Input is sanitized with credential redaction + and injection-pattern stripping before reaching the LLM. Provider is configurable via + `session.recap.provider`; falls back to the primary provider when unset. + +- **feat(llm): configurable prompt cache TTL with 1-hour Claude variant** — adds `CacheTtl` enum + (`Ephemeral` | `OneHour`) to `zeph-llm`. Setting `prompt_cache_ttl = "1h"` in a Claude provider + block enables the `extended-cache-ttl-2025-04-25` beta and extends cached prefix lifetime to 1 + hour at approximately 2× write cost. Default behaviour (omit or set `"ephemeral"`) is byte-identical + to the previous wire format — no rollout risk for existing deployments. Closes + [#3096](https://github.com/bug-ops/zeph/issues/3096). + +- **feat(core): `invoke_skill` tool (`SkillInvokeExecutor`)** — new `invoke_skill` tool that + returns a skill body as tool output with trust-aware sanitization. Blocked skills are refused + before any body read; non-Trusted bodies pass through `sanitize_skill_text`; Quarantined bodies + are additionally wrapped with `wrap_quarantined`. A per-turn trust snapshot + (`Arc>`) is written by `prepare_context` and read by the executor without + re-querying SQLite on every tool call. `invoke_skill` and `load_skill` are added to + `QUARANTINE_DENIED` and `READONLY_TOOLS`. CLI subcommand `zeph skill invoke [args]` + with the same trust-aware pipeline. Closes [#3105](https://github.com/bug-ops/zeph/issues/3105). + +- **feat(plugins): `zeph-plugins` crate + `zeph plugin` CLI** — new `zeph-plugins` crate with + `PluginManager` for install/remove/list of plugin packages. Enforces tighten-only config overlay + validation at install time (union on `blocked_commands`, intersection on `allowed_commands`, max + on `disambiguation_threshold`; keys outside this safelist are rejected with `UnsafeOverlay`). + **Note:** runtime application of overlay values to the live config is validated and stored in + `plugin.toml` but not yet merged into the running `Config` struct — scheduled for a follow-up PR. + MCP allowlist validation, skill name conflict detection (managed, bundled, other-plugin), path + traversal defense (canonicalize + `starts_with(root)`), and recursive `.bundled` marker stripping + via `walkdir`. CLI subcommand `zeph plugin list|add|remove`. TUI slash command + `/plugins list|add|remove` wired through `AgentAccess::handle_plugins` and TUI command palette. + Implements [#2806](https://github.com/bug-ops/zeph/issues/2806). + + +- **feat(orchestration): persist task graph state to `SQLite` across scheduler ticks; `/plan resume ` hydrates from disk** — `GraphPersistence` is now wired into `OrchestrationState` and saved once per scheduler tick plus two defensive saves around plan completion. `/plan resume ` supports a full status×action matrix: `Paused` (hydrate), `Running` (crash-recovery: reset in-flight tasks to `Ready`), `Failed` (hydrate for retry), `Completed`/`Canceled` (refuse). New config key `orchestration.persistence_enabled` (default `true`). Follow-up `/plan gc` for TTL-based pruning tracked as P3/enhancement. Closes #3107. + +- **feat(orchestration): AdaptOrch topology advisor** — 16-arm Thompson Beta-bandit that classifies + goals into `TaskClass` variants and samples `TopologyHint` (Sequential / Parallel / Cascade / + Adaptive) to inject into the planner prompt. Outcomes are recorded synchronously; state persists + to disk on graceful shutdown. Enabled via `[orchestration.adaptorch]` config block. + Closes #2434. +- **feat(llm): Collaborative Entropy (`CoE`) routing** — per-call `ChatExtras { entropy }` returned + by `chat_with_extras()` for all providers (OpenAI, Compatible, Ollama, Mock). Router uses + intra-entropy threshold and inter-divergence `(1-cosine)/2` to escalate uncertain primary + responses to a configured secondary provider. Gated to `Ema` and `Thompson` routing strategies. + Configured via `[llm.coe]` block. Closes #2505. +- **feat(orchestration): VeriMAP per-subtask predicate gate (#2269).** + Adds a verification predicate gate to `DagScheduler`. Each `TaskNode` may carry a + `verify_predicate: Option` (natural-language criterion). After task + completion, the scheduler emits `SchedulerAction::VerifyPredicate`; the agent loop + calls `PredicateEvaluator::evaluate()` via the configured LLM provider and records the + outcome via `DagScheduler::record_predicate_outcome()`. Downstream tasks are blocked + until `predicate_outcome.is_some()`. Failed predicates inject a remediation task (re-run + with prior failure context). New config fields: `verify_predicate_enabled`, + `predicate_provider`, `max_predicate_replans`. New types: `VerifyPredicate`, + `PredicateOutcome`, `PredicateEvaluator`. New error variant: + `OrchestrationError::PredicateNotSupported`. Planner emits `verify_criteria` field in + plan JSON when enabled. +- **feat(orchestration): error lineage and cascade abort defense (#2407).** + Introduces `ErrorLineage` side-table on `DagScheduler` that tracks consecutive failure + chains across `depends_on` paths. When N consecutive nodes fail (configurable via + `cascade_chain_threshold`, default 3), or a region's fan-out failure rate reaches + `cascade_failure_rate_abort_threshold` (default 0.0 = opt-in), the DAG is aborted + immediately with `OrchestrationError::CascadeAborted`. New config fields: + `cascade_chain_threshold`, `cascade_failure_rate_abort_threshold`, `lineage_ttl_secs`. + New types: `lineage::ErrorLineage`, `lineage::LineageEntry`, `lineage::LineageKind`, + `cascade::AbortDecision`. Audit log emits one structured `tracing::error!` per abort + with full lineage path and cause discriminator. + + +- **feat(sandbox): `--init` wizard has a dedicated OS sandbox step, `migrate-config` inserts a + commented-out `[tools.sandbox]` block, `config/default.toml` and `docker-compose.yml` carry the + section so legacy configs can upgrade without manual editing.** + ([#3070](https://github.com/bug-ops/zeph/issues/3070)) + + +- **mcp: forward MCP tool `outputSchema` to LLM tool declarations** (`#2931`): When + `mcp.forward_output_schema = true` (default: `false`), Zeph appends a bounded + "Expected output schema" hint to the tool description sent to the LLM, enabling + more accurate tool-result parsing and typed tool chaining. Schema content is sanitized + through the existing injection pipeline (drop-on-injection policy); the hint is capped + at `mcp.output_schema_hint_bytes` (default: 512). The tool-cache key now covers + `description` and `output_schema` to prevent stale cache hits on server reconnects. + +- **cli: `zeph doctor` startup diagnostic subcommand** (`#2930`): `zeph doctor + [--config ] [--json]` runs 15 preflight checks — config parse, vault + accessibility and key permissions, LLM provider reachability (read-only HTTP probe), + SQLite availability, Qdrant health (when configured), skills registry, filesystem + writability, and MCP server connectivity — and prints `[OK]` / `[WARN]` / `[FAIL]` + per check. Exits 0 if no failures, 1 if any `FAIL`. All output is redacted through + `scrub_content` (secrets, paths, URL-embedded credentials). + +- **core: URL-credential redaction in `scrub_content`**: `redact.rs::scrub_content` now + strips `scheme://user:pass@host` patterns before secret and path redaction, preventing + basic-auth credentials from leaking through error messages. + +- **skills: hub skill install pipeline** (`#2806`, `#3040`): `SkillManager` now exposes + `install_from_path` and `install_from_url` that copy a skill package into `managed_dir` + with `TrustLevel::Quarantined` as the default. During install, `.bundled` marker files are + recursively stripped from the package to prevent trust escalation (a forged `.bundled` + would otherwise suppress the injection-pattern scanner for the installed skill). Symlinks + in the source package are skipped rather than copied. If stripping fails, the partially + installed directory is cleaned up before the error is propagated. `SkillRegistry` gains a + `with_hub_dirs` builder method and a `reload`-safe `hub_dirs` field: hub-managed skills + ignore any `.bundled` marker even if one reappears post-install (defense-in-depth). Hub + dirs are preserved across hot-reloads via `std::mem::take`. + ### Breaking (pre-1.0) - `zeph-tui`: `App::render_cache`, `App::view_target`, and `App::transcript_cache` @@ -3054,82 +3171,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Qdrant WARN rate-limiter. Closes [#3129](https://github.com/bug-ops/zeph/issues/3129). -### Security - -- **sha256 integrity registry for plugin manifests (#3148)** — the integrity registry at - `/.plugin-integrity.toml` defends against accidental corruption and naive in-tree - tampering (a skill's bash block editing its own `.plugin.toml` without knowing the sibling - data-root location). Known limits: not cryptographically signed (vault-signed digests are a - future P3); concurrent `plugin add` races are not locked (last writer wins — see module doc); - plugins installed before this release load without verification until reinstalled. - -### Added - -- **feat(plugins): plugin config overlay merge** — `zeph-plugins` now exposes - `apply_plugin_config_overlays(&mut Config, &Path) -> Result`. - At bootstrap (`AppBuilder::new`) and on hot-reload (`reload_config`) the helper scans every - installed `/.plugin.toml` and applies tighten-only merges for the three safelisted keys: - `tools.blocked_commands` (union), `tools.allowed_commands` (intersection, base-gated — empty - base stays empty so plugins cannot widen the allowlist), and `skills.disambiguation_threshold` - (max). Iteration is sorted by directory name for deterministic log output (M1). The safelist - validation from install time is re-run at load time as defence-in-depth against post-install - tampering (M2). Integer-literal threshold values (`0`, `1`) are accepted alongside float - literals (M3). Symlinked plugin subdirectories are rejected (E8). - `ResolvedOverlay` (returned from the helper and stored on `AppBuilder`) carries `source_plugins`, - `skipped_plugins`, and all merged values for diagnostic surfacing. - **Hot-reload shell behaviour** (updated by #3146): `blocked_commands` is now rebuilt live via - `ShellPolicyHandle::rebuild` — no restart required when a plugin overlay changes the blocklist. - `allowed_commands` changes still require a restart (feeds sandbox path intersection at construction - time); a `tracing::warn!` and a status-channel banner are emitted in that case so the user is - never silently misled. `skills.disambiguation_threshold` applies live. - Closes [#3128](https://github.com/bug-ops/zeph/issues/3128). - -- **feat(security): OS-level file permission hardening for sensitive files** — adds - `zeph-common::fs_secure` module with `open_private_truncate`, `append_private`, - `write_private`, and `atomic_write_private` helpers. All sensitive files created by - Zeph (vault secrets, SQLite databases, debug dumps, audit JSONL, router state, ACP - permission files, init config) are now created with mode 0600 (owner read/write only), - independent of process umask. `atomic_write_private` uses `O_EXCL` on the temp file - and fsyncs before rename for crash safety. `zeph doctor` now reports a warning when - the vault file has group/world-readable bits set and correctly fails on unexpected - metadata errors. Closes [#3121](https://github.com/bug-ops/zeph/issues/3121). - -- **feat(session): session recap on resume (#3064)** — adds `/recap` command and - `[session.recap]` config section. When a session has a persisted digest, a brief recap is shown - before the first user message on resume. The cached digest is now always loaded on startup - regardless of `memory.digest.enabled` (C3-bis fix). Input is sanitized with credential redaction - and injection-pattern stripping before reaching the LLM. Provider is configurable via - `session.recap.provider`; falls back to the primary provider when unset. - -- **feat(llm): configurable prompt cache TTL with 1-hour Claude variant** — adds `CacheTtl` enum - (`Ephemeral` | `OneHour`) to `zeph-llm`. Setting `prompt_cache_ttl = "1h"` in a Claude provider - block enables the `extended-cache-ttl-2025-04-25` beta and extends cached prefix lifetime to 1 - hour at approximately 2× write cost. Default behaviour (omit or set `"ephemeral"`) is byte-identical - to the previous wire format — no rollout risk for existing deployments. Closes - [#3096](https://github.com/bug-ops/zeph/issues/3096). - -- **feat(core): `invoke_skill` tool (`SkillInvokeExecutor`)** — new `invoke_skill` tool that - returns a skill body as tool output with trust-aware sanitization. Blocked skills are refused - before any body read; non-Trusted bodies pass through `sanitize_skill_text`; Quarantined bodies - are additionally wrapped with `wrap_quarantined`. A per-turn trust snapshot - (`Arc>`) is written by `prepare_context` and read by the executor without - re-querying SQLite on every tool call. `invoke_skill` and `load_skill` are added to - `QUARANTINE_DENIED` and `READONLY_TOOLS`. CLI subcommand `zeph skill invoke [args]` - with the same trust-aware pipeline. Closes [#3105](https://github.com/bug-ops/zeph/issues/3105). - -- **feat(plugins): `zeph-plugins` crate + `zeph plugin` CLI** — new `zeph-plugins` crate with - `PluginManager` for install/remove/list of plugin packages. Enforces tighten-only config overlay - validation at install time (union on `blocked_commands`, intersection on `allowed_commands`, max - on `disambiguation_threshold`; keys outside this safelist are rejected with `UnsafeOverlay`). - **Note:** runtime application of overlay values to the live config is validated and stored in - `plugin.toml` but not yet merged into the running `Config` struct — scheduled for a follow-up PR. - MCP allowlist validation, skill name conflict detection (managed, bundled, other-plugin), path - traversal defense (canonicalize + `starts_with(root)`), and recursive `.bundled` marker stripping - via `walkdir`. CLI subcommand `zeph plugin list|add|remove`. TUI slash command - `/plugins list|add|remove` wired through `AgentAccess::handle_plugins` and TUI command palette. - Implements [#2806](https://github.com/bug-ops/zeph/issues/2806). - -### Fixed - **fix(tools): add `invoke_skill` to adversarial policy, VIGIL, and tool-filter always-on lists** (#3133) — `AdversarialPolicyConfig::default_exempt_tools()`, `vigil::default_exempt_tools()`, and @@ -3159,98 +3200,39 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). mechanical lint fixes: `map_or(false, …)` → `is_some_and`, `Duration::from_secs(N*60)` → `Duration::from_mins`, `from_millis(N000)` → `from_secs`, unused `use super::*` import, doc-markdown bare identifiers wrapped in backticks, `&[x.clone()]` → `std::slice::from_ref`, - `_handle` field renamed to `guard` (RAII semantics), `assert!(CONST > 0)` → `const { assert! }`, - underscore-prefixed binding used in test, no-effect closure binding. Affected files: - `crates/zeph-llm`, `crates/zeph-tools`, `crates/zeph-a2a`, `crates/zeph-index`, - `crates/zeph-skills`, `src/`. - -- **fix(config): make `migrate-config --in-place` fully idempotent** — refactored - `merge_table_commented` to collect comment lines and append them via raw-string guards - scoped to the target section body. Removed `append_comment_to_table_suffix` which - misused `toml_edit`'s `table.decor().suffix()` API (always empty after a write→re-parse - roundtrip), causing 45 entries re-added on every subsequent run and section-header - inline comment corruption. Added `section_body()` helper to prevent false-positive - cross-section suppression. Closes [#3116](https://github.com/bug-ops/zeph/issues/3116). - -- **chore(deps): remove stale RUSTSEC-2026-0097 ignore entry from `deny.toml`** — - advisory was patched in rand 0.8.6 (landed in #3104); the ignore entry and its - incorrect comment (claiming rand ≥0.9.3 was needed) are now removed. - Closes [#3113](https://github.com/bug-ops/zeph/issues/3113). - -### Performance - -- **perf(orchestration): cache forward adjacency in `CascadeDetector`** — eliminates per-call - O(N+E) forward-adjacency rebuild inside `descendant_count` for diamond/fan-in DAGs. - The map is computed once on first use and reused until `reset()` is called (on graph mutation - via `inject_tasks`). Converts `is_cascading`, `deprioritized_tasks`, and `evaluate_abort` - from `&self` to `&mut self`; call-sites in `scheduler.rs` updated accordingly. - Closes [#3094](https://github.com/bug-ops/zeph/issues/3094). - -### Fixed - -- **fix(mcp): wire `forward_output_schema` for Compatible, Gemini, and Ollama providers** — - `forward_output_schema = true` was silently ignored for all backends except Claude and OpenAi. - `CompatibleProvider` now exposes `with_output_schema_forwarding()` (delegating to the inner - `OpenAiProvider`) and the factory wires it for the Compatible branch. Gemini and Ollama emit a - `WARN` log when the setting is enabled, since neither backend supports the feature, so users are - no longer silently misled. Closes #3111. -- **fix(mcp): raise default `output_schema_hint_bytes` from 512 to 1024** — the 512-byte default - caused stub fallback for most real-world MCP tool schemas, making `forward_output_schema` - largely ineffective at default config. Closes #3084. -- **fix(mcp): distinguish stub event name from success event** — WARN emitted when - `output_schema_hint_bytes` budget is exceeded now uses `event = "mcp.output_schema.stub_used"` - instead of the misleading `mcp.output_schema.forwarded_to_llm`. Fixed in openai and claude - backends. Closes #3087. - -### Added - -- **feat(orchestration): persist task graph state to `SQLite` across scheduler ticks; `/plan resume ` hydrates from disk** — `GraphPersistence` is now wired into `OrchestrationState` and saved once per scheduler tick plus two defensive saves around plan completion. `/plan resume ` supports a full status×action matrix: `Paused` (hydrate), `Running` (crash-recovery: reset in-flight tasks to `Ready`), `Failed` (hydrate for retry), `Completed`/`Canceled` (refuse). New config key `orchestration.persistence_enabled` (default `true`). Follow-up `/plan gc` for TTL-based pruning tracked as P3/enhancement. Closes #3107. - -- **feat(orchestration): AdaptOrch topology advisor** — 16-arm Thompson Beta-bandit that classifies - goals into `TaskClass` variants and samples `TopologyHint` (Sequential / Parallel / Cascade / - Adaptive) to inject into the planner prompt. Outcomes are recorded synchronously; state persists - to disk on graceful shutdown. Enabled via `[orchestration.adaptorch]` config block. - Closes #2434. -- **feat(llm): Collaborative Entropy (`CoE`) routing** — per-call `ChatExtras { entropy }` returned - by `chat_with_extras()` for all providers (OpenAI, Compatible, Ollama, Mock). Router uses - intra-entropy threshold and inter-divergence `(1-cosine)/2` to escalate uncertain primary - responses to a configured secondary provider. Gated to `Ema` and `Thompson` routing strategies. - Configured via `[llm.coe]` block. Closes #2505. -- **feat(orchestration): VeriMAP per-subtask predicate gate (#2269).** - Adds a verification predicate gate to `DagScheduler`. Each `TaskNode` may carry a - `verify_predicate: Option` (natural-language criterion). After task - completion, the scheduler emits `SchedulerAction::VerifyPredicate`; the agent loop - calls `PredicateEvaluator::evaluate()` via the configured LLM provider and records the - outcome via `DagScheduler::record_predicate_outcome()`. Downstream tasks are blocked - until `predicate_outcome.is_some()`. Failed predicates inject a remediation task (re-run - with prior failure context). New config fields: `verify_predicate_enabled`, - `predicate_provider`, `max_predicate_replans`. New types: `VerifyPredicate`, - `PredicateOutcome`, `PredicateEvaluator`. New error variant: - `OrchestrationError::PredicateNotSupported`. Planner emits `verify_criteria` field in - plan JSON when enabled. -- **feat(orchestration): error lineage and cascade abort defense (#2407).** - Introduces `ErrorLineage` side-table on `DagScheduler` that tracks consecutive failure - chains across `depends_on` paths. When N consecutive nodes fail (configurable via - `cascade_chain_threshold`, default 3), or a region's fan-out failure rate reaches - `cascade_failure_rate_abort_threshold` (default 0.0 = opt-in), the DAG is aborted - immediately with `OrchestrationError::CascadeAborted`. New config fields: - `cascade_chain_threshold`, `cascade_failure_rate_abort_threshold`, `lineage_ttl_secs`. - New types: `lineage::ErrorLineage`, `lineage::LineageEntry`, `lineage::LineageKind`, - `cascade::AbortDecision`. Audit log emits one structured `tracing::error!` per abort - with full lineage path and cause discriminator. + `_handle` field renamed to `guard` (RAII semantics), `assert!(CONST > 0)` → `const { assert! }`, + underscore-prefixed binding used in test, no-effect closure binding. Affected files: + `crates/zeph-llm`, `crates/zeph-tools`, `crates/zeph-a2a`, `crates/zeph-index`, + `crates/zeph-skills`, `src/`. -### Security +- **fix(config): make `migrate-config --in-place` fully idempotent** — refactored + `merge_table_commented` to collect comment lines and append them via raw-string guards + scoped to the target section body. Removed `append_comment_to_table_suffix` which + misused `toml_edit`'s `table.decor().suffix()` API (always empty after a write→re-parse + roundtrip), causing 45 entries re-added on every subsequent run and section-header + inline comment corruption. Added `section_body()` helper to prevent false-positive + cross-section suppression. Closes [#3116](https://github.com/bug-ops/zeph/issues/3116). -- Add deny-first Seatbelt rules for 37 well-known credential paths in macOS sandbox workspace - profile (`.ssh`, `.aws`, `.config/zeph`, git credentials, AI agent caches, vault tokens, etc.) - — closes [#3086](https://github.com/bug-ops/zeph/issues/3086) -- **sandbox/macos: Workspace profile no longer scopes file reads to `/usr|/bin|/sbin|/lib`.** The - profile now grants global `(allow file-read*)` so bash can load dylibs from the DYLD shared cache - on macOS 14/15. This means write and network protections remain enforced but read-secret - protection has regressed. Follow-up P1 issue tracked for deny-first rules on `~/.ssh`, - `~/.aws`, `~/Library/Keychains`. ([#3077](https://github.com/bug-ops/zeph/issues/3077)) +- **chore(deps): remove stale RUSTSEC-2026-0097 ignore entry from `deny.toml`** — + advisory was patched in rand 0.8.6 (landed in #3104); the ignore entry and its + incorrect comment (claiming rand ≥0.9.3 was needed) are now removed. + Closes [#3113](https://github.com/bug-ops/zeph/issues/3113). + + +- **fix(mcp): wire `forward_output_schema` for Compatible, Gemini, and Ollama providers** — + `forward_output_schema = true` was silently ignored for all backends except Claude and OpenAi. + `CompatibleProvider` now exposes `with_output_schema_forwarding()` (delegating to the inner + `OpenAiProvider`) and the factory wires it for the Compatible branch. Gemini and Ollama emit a + `WARN` log when the setting is enabled, since neither backend supports the feature, so users are + no longer silently misled. Closes #3111. +- **fix(mcp): raise default `output_schema_hint_bytes` from 512 to 1024** — the 512-byte default + caused stub fallback for most real-world MCP tool schemas, making `forward_output_schema` + largely ineffective at default config. Closes #3084. +- **fix(mcp): distinguish stub event name from success event** — WARN emitted when + `output_schema_hint_bytes` budget is exceeded now uses `event = "mcp.output_schema.stub_used"` + instead of the misleading `mcp.output_schema.forwarded_to_llm`. Fixed in openai and claude + backends. Closes #3087. -### Fixed - **fix(sandbox/macos): workspace profile now grants `file-read*` and `process-info*` so bash can load dylibs from the DYLD shared cache on macOS 14/15.** Previous `/usr|/bin|/sbin|/lib` subpath @@ -3275,83 +3257,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). the loop parks and waits for natural task completion; cancellation on EOF only fires when the running set is empty. Adds `DagScheduler::has_running_tasks()` and 5 new tests. ([#3063](https://github.com/bug-ops/zeph/issues/3063)) -### Added - -- **feat(sandbox): `--init` wizard has a dedicated OS sandbox step, `migrate-config` inserts a - commented-out `[tools.sandbox]` block, `config/default.toml` and `docker-compose.yml` carry the - section so legacy configs can upgrade without manual editing.** - ([#3070](https://github.com/bug-ops/zeph/issues/3070)) - -### Changed - -- **fix(sandbox): `build_sandbox(strict=true)` now returns `SandboxError::Unavailable` on - unsupported platforms instead of silently falling back to noop.** Previously a misconfigured - strict sandbox on Windows or a Linux build without the `sandbox` feature degraded to - `NoopSandbox` with only a WARN log entry, hiding the misconfiguration. - ([#3070](https://github.com/bug-ops/zeph/issues/3070)) - -- **chore(msrv): bump workspace MSRV from 1.88 to 1.94.** Brings the declared - `rust-version` in line with APIs already in use (`Duration::from_mins`, - `Duration::from_hours`, stable in 1.91) and unlocks `with_added_extension` - (1.91), `str::floor_char_boundary` (1.91), `Vec::extract_if` (1.87, no - gate change) and `<[T]>::array_windows::()` (1.94). CI now enforces the - MSRV via a dedicated `msrv` job that mirrors the `lint-clippy` feature - matrix (`desktop,ide,server,chat,pdf,scheduler` and `bench`) and is wired - into the `ci-status` gate. The `docker/Dockerfile.dev` base image is - bumped from `rust:1.88-slim` to `rust:1.94-slim`. - -- feat(security): add egress network logging to `zeph-tools` — `EgressEvent` struct with - per-hop emission (success, non-2xx, connection failure, body-too-large, redirect-blocked, - domain/scheme/SSRF blocked), bounded `mpsc::channel(256)` + `Arc` drop counter, - `EgressConfig` (enable/disable logging categories), `correlation_id` field on `AuditEntry`, - and `AuditLogger::log_egress()` for JSONL egress records - -- feat(security): add VIGIL verify-before-commit gate to `zeph-core` — `VigilGate` regex - tripwire that runs before `ContentSanitizer` on every tool output; `VigilConfig` in - `zeph-config` (`[security.vigil]`) with `enabled`, `strict_mode`, `sanitize_max_chars`, - `extra_patterns`, `exempt_tools`; `VigilRiskLevel` on `AuditEntry`; `VigilFlag` in - `SecurityEventCategory`; `vigil_flags_total` / `vigil_blocks_total` counters in - `MetricsSnapshot`; `FailureKind::SecurityBlocked` for skill outcome tracking; subagent - sessions exempt via `SecurityState::vigil = None` (FR-009); fail-open on invalid config - -- fix(profiling): emit periodic system resource metrics on `TRACE` instead of `INFO` to keep - routine RSS/CPU/thread/fd snapshots out of normal logs; `target = "system.metrics"` is - unchanged - -### Added - -- **mcp: forward MCP tool `outputSchema` to LLM tool declarations** (`#2931`): When - `mcp.forward_output_schema = true` (default: `false`), Zeph appends a bounded - "Expected output schema" hint to the tool description sent to the LLM, enabling - more accurate tool-result parsing and typed tool chaining. Schema content is sanitized - through the existing injection pipeline (drop-on-injection policy); the hint is capped - at `mcp.output_schema_hint_bytes` (default: 512). The tool-cache key now covers - `description` and `output_schema` to prevent stale cache hits on server reconnects. - -- **cli: `zeph doctor` startup diagnostic subcommand** (`#2930`): `zeph doctor - [--config ] [--json]` runs 15 preflight checks — config parse, vault - accessibility and key permissions, LLM provider reachability (read-only HTTP probe), - SQLite availability, Qdrant health (when configured), skills registry, filesystem - writability, and MCP server connectivity — and prints `[OK]` / `[WARN]` / `[FAIL]` - per check. Exits 0 if no failures, 1 if any `FAIL`. All output is redacted through - `scrub_content` (secrets, paths, URL-embedded credentials). - -- **core: URL-credential redaction in `scrub_content`**: `redact.rs::scrub_content` now - strips `scheme://user:pass@host` patterns before secret and path redaction, preventing - basic-auth credentials from leaking through error messages. - -- **skills: hub skill install pipeline** (`#2806`, `#3040`): `SkillManager` now exposes - `install_from_path` and `install_from_url` that copy a skill package into `managed_dir` - with `TrustLevel::Quarantined` as the default. During install, `.bundled` marker files are - recursively stripped from the package to prevent trust escalation (a forged `.bundled` - would otherwise suppress the injection-pattern scanner for the installed skill). Symlinks - in the source package are skipped rather than copied. If stripping fails, the partially - installed directory is cleaned up before the error is propagated. `SkillRegistry` gains a - `with_hub_dirs` builder method and a `reload`-safe `hub_dirs` field: hub-managed skills - ignore any `.bundled` marker even if one reappears post-install (defense-in-depth). Hub - dirs are preserved across hot-reloads via `std::mem::take`. - -### Fixed - Wire `with_managed_skills_dir` to populate `hub_dirs` in `SkillRegistry`, activating M1 defense-in-depth for builder-constructed agents (closes #3044) - **tools: sandbox security hardening** (`#2808`): fixed six critical security defects @@ -3423,6 +3328,70 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). so the TUI panel shows the correct count at startup. The list is refined per-turn by `rebuild_system_prompt` as usual. +### Security + +- **sha256 integrity registry for plugin manifests (#3148)** — the integrity registry at + `/.plugin-integrity.toml` defends against accidental corruption and naive in-tree + tampering (a skill's bash block editing its own `.plugin.toml` without knowing the sibling + data-root location). Known limits: not cryptographically signed (vault-signed digests are a + future P3); concurrent `plugin add` races are not locked (last writer wins — see module doc); + plugins installed before this release load without verification until reinstalled. + + +- Add deny-first Seatbelt rules for 37 well-known credential paths in macOS sandbox workspace + profile (`.ssh`, `.aws`, `.config/zeph`, git credentials, AI agent caches, vault tokens, etc.) + — closes [#3086](https://github.com/bug-ops/zeph/issues/3086) +- **sandbox/macos: Workspace profile no longer scopes file reads to `/usr|/bin|/sbin|/lib`.** The + profile now grants global `(allow file-read*)` so bash can load dylibs from the DYLD shared cache + on macOS 14/15. This means write and network protections remain enforced but read-secret + protection has regressed. Follow-up P1 issue tracked for deny-first rules on `~/.ssh`, + `~/.aws`, `~/Library/Keychains`. ([#3077](https://github.com/bug-ops/zeph/issues/3077)) + +### Performance + +- **perf(orchestration): cache forward adjacency in `CascadeDetector`** — eliminates per-call + O(N+E) forward-adjacency rebuild inside `descendant_count` for diamond/fan-in DAGs. + The map is computed once on first use and reused until `reset()` is called (on graph mutation + via `inject_tasks`). Converts `is_cascading`, `deprioritized_tasks`, and `evaluate_abort` + from `&self` to `&mut self`; call-sites in `scheduler.rs` updated accordingly. + Closes [#3094](https://github.com/bug-ops/zeph/issues/3094). + +### Changed + +- **fix(sandbox): `build_sandbox(strict=true)` now returns `SandboxError::Unavailable` on + unsupported platforms instead of silently falling back to noop.** Previously a misconfigured + strict sandbox on Windows or a Linux build without the `sandbox` feature degraded to + `NoopSandbox` with only a WARN log entry, hiding the misconfiguration. + ([#3070](https://github.com/bug-ops/zeph/issues/3070)) + +- **chore(msrv): bump workspace MSRV from 1.88 to 1.94.** Brings the declared + `rust-version` in line with APIs already in use (`Duration::from_mins`, + `Duration::from_hours`, stable in 1.91) and unlocks `with_added_extension` + (1.91), `str::floor_char_boundary` (1.91), `Vec::extract_if` (1.87, no + gate change) and `<[T]>::array_windows::()` (1.94). CI now enforces the + MSRV via a dedicated `msrv` job that mirrors the `lint-clippy` feature + matrix (`desktop,ide,server,chat,pdf,scheduler` and `bench`) and is wired + into the `ci-status` gate. The `docker/Dockerfile.dev` base image is + bumped from `rust:1.88-slim` to `rust:1.94-slim`. + +- feat(security): add egress network logging to `zeph-tools` — `EgressEvent` struct with + per-hop emission (success, non-2xx, connection failure, body-too-large, redirect-blocked, + domain/scheme/SSRF blocked), bounded `mpsc::channel(256)` + `Arc` drop counter, + `EgressConfig` (enable/disable logging categories), `correlation_id` field on `AuditEntry`, + and `AuditLogger::log_egress()` for JSONL egress records + +- feat(security): add VIGIL verify-before-commit gate to `zeph-core` — `VigilGate` regex + tripwire that runs before `ContentSanitizer` on every tool output; `VigilConfig` in + `zeph-config` (`[security.vigil]`) with `enabled`, `strict_mode`, `sanitize_max_chars`, + `extra_patterns`, `exempt_tools`; `VigilRiskLevel` on `AuditEntry`; `VigilFlag` in + `SecurityEventCategory`; `vigil_flags_total` / `vigil_blocks_total` counters in + `MetricsSnapshot`; `FailureKind::SecurityBlocked` for skill outcome tracking; subagent + sessions exempt via `SecurityState::vigil = None` (FR-009); fail-open on invalid config + +- fix(profiling): emit periodic system resource metrics on `TRACE` instead of `INFO` to keep + routine RSS/CPU/thread/fd snapshots out of normal logs; `target = "system.metrics"` is + unchanged + ## [0.19.1] - 2026-04-15 ### Added @@ -3538,6 +3507,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). pre-allocation from ~200KB to a few KB per context assembly pass when using large-context providers (Claude, GPT-4). + ## [0.19.0] - 2026-04-13 ### Changed @@ -3571,20 +3541,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `sync_mcp_registry` (replaced with owned `McpToolOwned`). `McpCommand` is now registered in the agent registry; `/mcp` removed from `dispatch_slash_command`. -### Security - -- **Path traversal fix in `ImageCommand`** (`#2937`): `handle_image_as_string` in - `zeph-core` now rejects absolute paths (e.g. `/etc/passwd`) in addition to `../` - traversal sequences. Mirrors the equivalent fix applied to the CLI channel in `#2933`. - -- **Upgrade `zeph-experiments` to `rand 0.10`** (`#2929`): removed unsound `rand 0.8.5` - dependency (RUSTSEC-2026-0097). `rand` now resolves via `workspace = true` to the safe - `rand 0.10.1`. Updated call sites from `Rng::gen_range` to `RngExt::random_range` per - the rand 0.10 API. All other previously inline-versioned dependencies (`serde`, - `serde_json`, `thiserror`, `tracing`) in `zeph-experiments` also migrated to workspace - references. - -### Changed - **Compaction internals: owned-type refactoring** (`#2935`, `#2936`): refactored `validate_compaction`, `summarize_messages_with_deps`, and `archive_tool_outputs` to take owned @@ -3690,6 +3646,31 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `SkillMatcherBackend::Qdrant` are `#[cfg(feature = "qdrant")]`. Default builds of `zeph-skills` compile without `zeph-memory` or `qdrant-client`. + +- **MemoryState decomposition** (`#2897`): split the 37-field flat `MemoryState` struct into 4 + concern-separated sub-structs (`MemoryPersistenceState`, `MemoryCompactionState`, + `MemoryExtractionState`, `MemorySubsystemState`), each in its own file under + `crates/zeph-core/src/agent/state/`. All call sites migrated. No behavioral changes. + +- **AgentBuilder consolidation** (`#2899`): added `BuildError` enum with `build()` validation + method; 93 builder methods reorganized into 15 doc-section groups; `with_orchestration` replaces + 3 separate methods; `with_experiment` replaces 2 separate methods. Production bootstrap in + `src/runner.rs` now calls `.build()?`. No behavioral changes except early misconfiguration + detection. + +### Security + +- **Path traversal fix in `ImageCommand`** (`#2937`): `handle_image_as_string` in + `zeph-core` now rejects absolute paths (e.g. `/etc/passwd`) in addition to `../` + traversal sequences. Mirrors the equivalent fix applied to the CLI channel in `#2933`. + +- **Upgrade `zeph-experiments` to `rand 0.10`** (`#2929`): removed unsound `rand 0.8.5` + dependency (RUSTSEC-2026-0097). `rand` now resolves via `workspace = true` to the safe + `rand 0.10.1`. Updated call sites from `Rng::gen_range` to `RngExt::random_range` per + the rand 0.10 API. All other previously inline-versioned dependencies (`serde`, + `serde_json`, `thiserror`, `tracing`) in `zeph-experiments` also migrated to workspace + references. + ### Added - **Turn domain type** (`#2895`): introduced `Turn` as a first-class domain entity in @@ -3844,26 +3825,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `llm.embed` spans. All instrumentation uses `#[cfg_attr(feature = "profiling", ...)]` — zero binary overhead without the feature flag. -### Performance - -- **Budget-first context assembly** (`#2817`): `prepare_context` now skips zero-budget context - sources entirely instead of spawning fetchers that immediately return nothing. Sources guarded: - summaries, cross-session, semantic recall + document RAG (gated together on `semantic_recall`), - code context, graph facts, persona facts, trajectory hints, and tree memory. Corrections fetch - remains unconditional (safety-critical). Added `BudgetAllocation::active_sources()` helper for - observability tracing. `memory_first_keep_tail` scan is capped at 50 messages to prevent O(N) - backward scans on very long sessions; the cap only fires at a non-ToolResult boundary so - tool-call/result pairs are never split. - -- **Turn-local embedding reuse** (`#2819`): added `TurnEmbedCache` (per-`chat()` call, keyed by - `String`, 2-4 entries) to avoid redundant embed calls within a single turn. The query embedding - computed for the quality gate is now cached and reused if the same text is requested again. - `spawn_asi_update` accepts an optional pre-computed embedding so the quality-gate response - embedding is passed directly instead of being re-embedded by the ASI background task. Added - session-level `embed_call_count` / `embed_cache_hits` counters (exposed via - `RouterProvider::embed_cache_metrics()`). - -### Added - **`zeph-bench` result writer and resume support** (`#2833`, `#2835`): added `results` module to `zeph-bench` with `BenchRun`, `ScenarioResult`, `Aggregate`, and `RunStatus` types (all `Serialize`/`Deserialize`). `ResultWriter` writes `results.json` (leaderboard-compatible, superset of `LongMemEval` submission format) and `summary.md` (Markdown table: scenario_id / score / response_excerpt / error) via atomic temp-file rename. `BenchRun::recompute_aggregate()` updates aggregate stats in place. `ResultWriter::load_existing()` enables `--resume`: callers load a partial run, obtain `completed_ids()`, skip already-done scenarios, and append new results before calling `write()` again. Partial runs are persisted with `status: interrupted`. Output directory is created automatically (single level). @@ -3883,12 +3844,31 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `[llm.candle]`). Worker panic maps to `LlmError::Inference("inference worker died")`. Embed path is unchanged (`Arc` was already lock-free). -- **Turn-level latency metrics** (`#2820`): introduced `TurnTimings` struct with four - `u64` fields (`prepare_context_ms`, `llm_chat_ms`, `tool_exec_ms`, `persist_message_ms`). - `MetricsSnapshot` gains `last_turn_timings`, `avg_turn_timings`, `max_turn_timings` - (M3: tail-latency visibility), and `timing_sample_count`. Rolling window of 10 turns - maintained in agent state. TUI Resources panel displays the latency section after the - first completed turn. +- **Turn-level latency metrics** (`#2820`): introduced `TurnTimings` struct with four + `u64` fields (`prepare_context_ms`, `llm_chat_ms`, `tool_exec_ms`, `persist_message_ms`). + `MetricsSnapshot` gains `last_turn_timings`, `avg_turn_timings`, `max_turn_timings` + (M3: tail-latency visibility), and `timing_sample_count`. Rolling window of 10 turns + maintained in agent state. TUI Resources panel displays the latency section after the + first completed turn. + +### Performance + +- **Budget-first context assembly** (`#2817`): `prepare_context` now skips zero-budget context + sources entirely instead of spawning fetchers that immediately return nothing. Sources guarded: + summaries, cross-session, semantic recall + document RAG (gated together on `semantic_recall`), + code context, graph facts, persona facts, trajectory hints, and tree memory. Corrections fetch + remains unconditional (safety-critical). Added `BudgetAllocation::active_sources()` helper for + observability tracing. `memory_first_keep_tail` scan is capped at 50 messages to prevent O(N) + backward scans on very long sessions; the cap only fires at a non-ToolResult boundary so + tool-call/result pairs are never split. + +- **Turn-local embedding reuse** (`#2819`): added `TurnEmbedCache` (per-`chat()` call, keyed by + `String`, 2-4 entries) to avoid redundant embed calls within a single turn. The query embedding + computed for the quality gate is now cached and reused if the same text is requested again. + `spawn_asi_update` accepts an optional pre-computed embedding so the quality-gate response + embedding is passed directly instead of being re-embedded by the ASI background task. Added + session-level `embed_call_count` / `embed_cache_hits` counters (exposed via + `RouterProvider::embed_cache_metrics()`). ### Fixed @@ -3905,19 +3885,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **MCP handshake timeout not enforced** (`#2815`): `connect()`, `connect_url()`, and `connect_url_with_headers()` now wrap `handler.serve(transport)` with `tokio::time::timeout(timeout, ...)`, returning `McpError::Timeout` on expiry. `list_tools()` applies the same guard to `list_all_tools()`. Previously, a stalled MCP server during the initialize handshake or tool listing would block `connect_all()` indefinitely, causing TUI startup to hang at "Connecting tools..." forever. Only `call_tool` had a timeout; the fix brings the other paths to parity. -### Changed - -- **MemoryState decomposition** (`#2897`): split the 37-field flat `MemoryState` struct into 4 - concern-separated sub-structs (`MemoryPersistenceState`, `MemoryCompactionState`, - `MemoryExtractionState`, `MemorySubsystemState`), each in its own file under - `crates/zeph-core/src/agent/state/`. All call sites migrated. No behavioral changes. - -- **AgentBuilder consolidation** (`#2899`): added `BuildError` enum with `build()` validation - method; 93 builder methods reorganized into 15 doc-section groups; `with_orchestration` replaces - 3 separate methods; `with_experiment` replaces 2 separate methods. Production bootstrap in - `src/runner.rs` now calls `.build()?`. No behavioral changes except early misconfiguration - detection. - ## [0.18.6] - 2026-04-08 ### Removed @@ -3926,6 +3893,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **Retain `async-trait` in `zeph-core`, `zeph-mcp`, `zeph-acp`** (`#2781`): these crates depend on `rmcp` and `agent-client-protocol` which re-export `#[async_trait]` macros; removal is blocked by upstream and tracked for future cleanup. + +- **zeph-core module re-exports of entire crates** (`#2784`): removed `pub mod experiments/orchestration/subagent` shims from `zeph-core/src/lib.rs`; all callers migrated to direct crate imports (`zeph_orchestration::`, `zeph_experiments::`, `zeph_subagent::`). + +- **Dead accessor migration** (`#2783`): deleted `crates/zeph-core/src/agent/accessors.rs` (239 lines of unused accessor methods behind blanket `#[allow(dead_code)]`). + +- **Dead code behind `#[allow(dead_code)]`** (`#2785`): removed `validate_density_budgets`, `apply_density_budget` from compaction strategy; removed `max_tokens` field from `PlanVerifier`. + ### Refactored - **Consolidate `AgentBuilder` methods: 30% reduction** (`#2804`): added `Default`/`new()` constructors to all sub-structs (`McpState`, `IndexState`, `DebugState`, `SecurityState`, `SkillState`, `ToolState`, `SessionState`, `LifecycleState`, `ProviderState`, `MetricsState`, `ExperimentState`, `FeedbackState`, `RuntimeConfig`, etc.), simplified `new_with_registry_arc()` from ~260 to ~50 lines, and removed 40 of 132 `pub fn with_*` builder methods (30.3%). Groups of related single-field setters were consolidated into batch methods: `with_skill_matching_config` (disambiguation/two-stage/confusability), `with_memory_formatting_config` (guidelines/digest/context-strategy), `with_trajectory_and_category_config`, `with_focus_and_sidequest_config`. Unused methods `with_skill_prompt_mode`, `with_result_cache_config`, `with_tool_schema_filter`, `with_classifier_metrics`, `with_server_compaction` removed. All call sites in `src/` and `crates/zeph-core/src/` updated. No behavioral changes. @@ -3950,14 +3924,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **Remove dead `chat_with_named_provider` methods from `AnyProvider`** (`#2786`): deleted `chat_with_named_provider` and `chat_with_named_provider_and_tools` that accepted a `name` parameter but ignored it. Updated 7 call sites across 6 files to use `chat()` / `chat_with_tools()` directly. Removed dead `provider_name` field from `AdversarialPolicyLlmAdapter`, dead `persona_provider` field from `PersonaExtractionConfig`, and dead `model` field from `SubAgentLoopArgs`. -### Removed - -- **zeph-core module re-exports of entire crates** (`#2784`): removed `pub mod experiments/orchestration/subagent` shims from `zeph-core/src/lib.rs`; all callers migrated to direct crate imports (`zeph_orchestration::`, `zeph_experiments::`, `zeph_subagent::`). - -- **Dead accessor migration** (`#2783`): deleted `crates/zeph-core/src/agent/accessors.rs` (239 lines of unused accessor methods behind blanket `#[allow(dead_code)]`). - -- **Dead code behind `#[allow(dead_code)]`** (`#2785`): removed `validate_density_budgets`, `apply_density_budget` from compaction strategy; removed `max_tokens` field from `PlanVerifier`. - ### Added - **Embed backfill progress tracking with bounded memory and concurrency** (`#2765`): `embed_missing` now accepts a `watch::Sender>` and reports `done/total` progress after each message. Messages are processed in micro-batches of 32 with `buffer_unordered(4)` concurrency, reducing peak memory from all-at-once to ~32 messages worth of content and cutting wall-clock time 3-4x via parallel HTTP embedding calls. The TUI status bar shows `Backfilling embeddings: N/M (X%)` during backfill and clears on completion. @@ -4030,6 +3996,19 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **Filter policy-decision language from `key_facts` at store time** (`#2724`): facts containing transient enforcement language (`"blocked"`, `"skipped"`, `"cannot access"`, `"permission denied"`, etc.) are now rejected before embedding and Qdrant insertion. Such facts describe a single-turn policy state, not stable world knowledge, and caused the agent to believe previously-blocked tool calls were permanently unavailable. New helper `is_policy_decision_fact` performs case-insensitive substring matching. - **Thread leak in `store::compression_predictor` tests** (`#2716`): `#[tokio::test]` tests in `store::compression_predictor::tests` now call `store.pool().close().await` before returning, ensuring all sqlx-sqlite background connection threads fully exit before nextest measures the thread count. Previously, pool drop only signalled threads to stop without waiting, causing nextest to attribute lingering connection threads as a LEAK for the concurrently-running pure `compression_predictor::tests::training_on_high_ratio_improves_high_ratio_prediction` test. + +- **Thread leak in `shared_agent_deps_has_document_and_graph_config_fields` test** (`#2710`): the test constructed `GraphConfig` via `..Default::default()` which transitively initializes lazy globals, leaving background threads alive and causing nextest to report a thread leak. Replaced `GraphConfig::default()` with a never-called closure that performs only compile-time type-checking of the `enabled` field, eliminating runtime construction. `DocumentConfig` is now constructed with explicit field literals (5 fields, no nesting) instead of `..Default::default()`. + +- **Graph episode FK constraint on `link_entity_to_episode`** (`#2704`): `INSERT INTO graph_episode_entities` failed with `FOREIGN KEY constraint failed` when the entity was pruned or missing at link time. Changed to `INSERT OR IGNORE` so missing entities are silently skipped. Downgraded the episode-linking failure log from `warn!` to `debug!` in `zeph-memory/src/semantic/graph.rs` — the error is non-actionable when caused by intentional entity pruning. +- **Thread leak in `apply_summary_provider_none_returns_agent_unchanged` test** (`#2698`): the plain `#[test]` function constructed an `Agent` that spawns background Tokio tasks; without a runtime to shut them down, `cargo nextest` reported a thread leak. Converted the test to `#[tokio::test] async fn` so the runtime tears down tasks cleanly on exit. + +- **`zeph-db` fts.rs duplicate function definitions with `--all-features`** (`#2695`): all SQLite-specific FTS helper functions and the test module in `crates/zeph-db/src/fts.rs` were guarded with `#[cfg(feature = "sqlite")]`. When both `sqlite` and `postgres` features are active the compiler saw 17 duplicate definitions. Guards changed to `#[cfg(all(feature = "sqlite", not(feature = "postgres")))]` so the two implementations are mutually exclusive. +- **`rl_embed_dim` hardcoded to 1536** (`#2694`): `runner.rs` and `daemon.rs` used `config.skills.rl_embed_dim.unwrap_or(1536)` for the RL routing head dimension. The fallback of 1536 matches OpenAI embeddings but silently breaks sessions that use a different embedding provider (e.g. Ollama nomic-embed-text, dim=768). Replaced with `resolve_rl_embed_dim` which: (1) uses the explicit `rl_embed_dim` config value when set, (2) probes the actual embedding provider with a single call to determine the output dimension at runtime, (3) falls back to 1536 with a `WARN` instructing the operator to set `skills.rl_embed_dim` explicitly when the probe fails. +- **Explicitly shutdown MCP manager before runtime exit to kill child processes** (`#2693`): `runner.rs` and `daemon.rs` now retain an `Arc` reference and call `shutdown_all_shared().await` before `agent.shutdown()`, ensuring stdio child processes (e.g. `node svelte-mcp`, `mcpls`) are killed while the tokio runtime is still active. Previously, `ChildWithCleanup::drop` in `rmcp` used `tokio::spawn` to kill the child, which raced with runtime shutdown and silently dropped the future. `acp.rs` now also calls `agent.shutdown().await` after `agent.run()`, which was missing entirely. +- **Wire `trajectory_config` and `category_config` into `AgentBuilder` in all binary entry points** (`#2690`): `runner.rs`, `acp.rs`, and `daemon.rs` were never calling `with_trajectory_config` / `with_category_config`, so trajectory extraction and category auto-tagging were silently disabled at runtime despite being enabled in config. +- **`build_skill_matcher` now uses embedding provider** (`#2686`): `runner.rs`, `acp.rs`, and `daemon.rs` were passing the main chat provider to `build_skill_matcher` instead of the configured embedding provider, causing a Qdrant dimension mismatch on every session startup and falling back to returning all skills. +- **`mcp.tool_discovery.embedding_provider` config field now respected in `runner.rs`** (`#2684`): `create_mcp_registry` was always called with the main chat provider instead of the configured embed provider. The runner now resolves `config.mcp.tool_discovery.embedding_provider` via `create_named_provider`, matching the pattern used by the agent setup path. Falls back to the main provider when the field is empty or resolution fails. + ### Added - **Per-provider cost breakdown** (`#2730`): `CostTracker::record_usage` now accepts `provider_name`, `cache_read_tokens`, and `cache_write_tokens` in addition to input/output tokens. Cache pricing is applied per-provider type (Claude: cache read = 10% of prompt, cache write = 125%; `OpenAI`: cache read = 50%; others: 0%). Per-provider totals (input, cache_read, cache_write, output tokens, cost, request count) are accumulated in `CostState::providers` and exposed via `CostTracker::provider_breakdown()`. `MetricsSnapshot` gains `provider_cost_breakdown: Vec<(String, ProviderUsage)>`. The `/status` CLI command and TUI `/cost` view both render a per-provider table sorted by cost descending. Daily reset clears the breakdown alongside the spending total. Helper `reset_if_new_day` ensures consistent state across `record_usage` and `check_budget`. @@ -4054,20 +4033,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **Category-aware memory** (`#2428`): nullable `category TEXT` column added to `messages` table (migration 070) with a partial index for filtered recall. `SearchFilter` gains an optional `category` field that adds a Qdrant `FieldCondition` when set. New `remember_categorized` / `recall_with_category` methods on `SemanticMemory`. Auto-tagging from active skill/tool context wired via `save_message_with_category`. New `[memory.category]` config section with `enabled` and `auto_tag` fields. - **`TiMem` temporal-hierarchical memory tree** (`#2262`): hierarchical `memory_tree` SQLite table (migration 071) stores leaf nodes at level 0 and LLM-merged summaries at higher levels. A background consolidation loop (`[memory.tree]`) clusters unconsolidated leaf nodes by cosine similarity and merges each cluster into a parent node via LLM summarization. Each cluster merge runs in its own SQLite transaction (prevents `SQLITE_BUSY` contention). Traversal from leaf to root via `traverse_tree_up`. New `[memory.tree]` config section with `enabled`, `consolidation_provider`, `sweep_interval_secs`, `batch_size`, `similarity_threshold`, `max_level`, `min_cluster_size`, `recall_top_k`, `context_budget_tokens`. CLI: `zeph memory tree`. TUI: `/memory tree`. -### Fixed - -- **Thread leak in `shared_agent_deps_has_document_and_graph_config_fields` test** (`#2710`): the test constructed `GraphConfig` via `..Default::default()` which transitively initializes lazy globals, leaving background threads alive and causing nextest to report a thread leak. Replaced `GraphConfig::default()` with a never-called closure that performs only compile-time type-checking of the `enabled` field, eliminating runtime construction. `DocumentConfig` is now constructed with explicit field literals (5 fields, no nesting) instead of `..Default::default()`. - -- **Graph episode FK constraint on `link_entity_to_episode`** (`#2704`): `INSERT INTO graph_episode_entities` failed with `FOREIGN KEY constraint failed` when the entity was pruned or missing at link time. Changed to `INSERT OR IGNORE` so missing entities are silently skipped. Downgraded the episode-linking failure log from `warn!` to `debug!` in `zeph-memory/src/semantic/graph.rs` — the error is non-actionable when caused by intentional entity pruning. -- **Thread leak in `apply_summary_provider_none_returns_agent_unchanged` test** (`#2698`): the plain `#[test]` function constructed an `Agent` that spawns background Tokio tasks; without a runtime to shut them down, `cargo nextest` reported a thread leak. Converted the test to `#[tokio::test] async fn` so the runtime tears down tasks cleanly on exit. - -- **`zeph-db` fts.rs duplicate function definitions with `--all-features`** (`#2695`): all SQLite-specific FTS helper functions and the test module in `crates/zeph-db/src/fts.rs` were guarded with `#[cfg(feature = "sqlite")]`. When both `sqlite` and `postgres` features are active the compiler saw 17 duplicate definitions. Guards changed to `#[cfg(all(feature = "sqlite", not(feature = "postgres")))]` so the two implementations are mutually exclusive. -- **`rl_embed_dim` hardcoded to 1536** (`#2694`): `runner.rs` and `daemon.rs` used `config.skills.rl_embed_dim.unwrap_or(1536)` for the RL routing head dimension. The fallback of 1536 matches OpenAI embeddings but silently breaks sessions that use a different embedding provider (e.g. Ollama nomic-embed-text, dim=768). Replaced with `resolve_rl_embed_dim` which: (1) uses the explicit `rl_embed_dim` config value when set, (2) probes the actual embedding provider with a single call to determine the output dimension at runtime, (3) falls back to 1536 with a `WARN` instructing the operator to set `skills.rl_embed_dim` explicitly when the probe fails. -- **Explicitly shutdown MCP manager before runtime exit to kill child processes** (`#2693`): `runner.rs` and `daemon.rs` now retain an `Arc` reference and call `shutdown_all_shared().await` before `agent.shutdown()`, ensuring stdio child processes (e.g. `node svelte-mcp`, `mcpls`) are killed while the tokio runtime is still active. Previously, `ChildWithCleanup::drop` in `rmcp` used `tokio::spawn` to kill the child, which raced with runtime shutdown and silently dropped the future. `acp.rs` now also calls `agent.shutdown().await` after `agent.run()`, which was missing entirely. -- **Wire `trajectory_config` and `category_config` into `AgentBuilder` in all binary entry points** (`#2690`): `runner.rs`, `acp.rs`, and `daemon.rs` were never calling `with_trajectory_config` / `with_category_config`, so trajectory extraction and category auto-tagging were silently disabled at runtime despite being enabled in config. -- **`build_skill_matcher` now uses embedding provider** (`#2686`): `runner.rs`, `acp.rs`, and `daemon.rs` were passing the main chat provider to `build_skill_matcher` instead of the configured embedding provider, causing a Qdrant dimension mismatch on every session startup and falling back to returning all skills. -- **`mcp.tool_discovery.embedding_provider` config field now respected in `runner.rs`** (`#2684`): `create_mcp_registry` was always called with the main chat provider instead of the configured embed provider. The runner now resolves `config.mcp.tool_discovery.embedding_provider` via `create_named_provider`, matching the pattern used by the agent setup path. Falls back to the main provider when the field is empty or resolution fails. - ## [0.18.4] - 2026-04-06 ### Added @@ -4118,6 +4083,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **ML classifier false-positive on utility gate synthetic output** (`#2635`): `sanitize_tool_output` now skips the ML injection classifier for bodies that start with `[skipped]` or `[stopped]`. These strings are produced internally by the utility gate and are trusted content; classifying them produced noisy `WARN` log entries and incremented `classifier_tool_suspicious` metrics, which could mask real injection events. - **Utility gate explicit-request bypass never fired** (`#2641`): text-only user messages created via `Message::from_legacy` have `parts: vec![]` with text stored only in `content`. The bypass detection in `tool_execution/native.rs` was reading `m.parts` and thus always produced an empty string, so `has_explicit_tool_request` always returned `false`. Fixed by falling back to `m.content` when `parts` is empty. + ## [0.18.3] - 2026-04-04 ### Added @@ -4228,6 +4194,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Remove dead `ModelOrchestrator` and related `LlmRoutingStrategy::Task` code superseded by `RouterProvider` (#2540) + ## [0.18.2] - 2026-03-31 ### Added @@ -4247,18 +4214,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - feat(init): add `[tools.file]` wizard step to `--init` interactive config wizard — two prompts for `deny_read` and conditional `allow_read` comma-separated glob lists, wired into `build_config()` (#2525) - docs: add File Read Sandbox page to mdBook under Reference / Security (`book/src/reference/security/file-sandbox.md`), linked in `SUMMARY.md` (#2527) -### Fixed - -- fix(classifiers): add configurable `pii_ner_allowlist` to `ClassifiersConfig` — tokens matching an allowlist entry (case-insensitive) are never redacted by the piiranha NER model, suppressing false positives such as "Zeph" → `[PII:CITY]`; default entries: `["Zeph", "Rust", "OpenAI", "Ollama", "Claude"]`; list is empty-able via config to disable the feature (closes #2537) -- fix(classifiers): document that macOS Apple Silicon requires `--features full,metal` for piiranha NER GPU acceleration; without `metal`, the 1.1 GB model exceeds the 30s timeout on CPU and falls back to regex-only PII detection (closes #2538) -- fix(mcp): elicitation deadlock in `run_inline_tool_loop` (phase 3, closes #2542) — `run_inline_tool_loop` now wraps each `execute_tool_call_erased` call in `tokio::select!` that concurrently drains `elicitation_rx`; `handle_elicitation_event` changed to `pub(super)` for cross-module access; regression test added with a blocking executor that simulates the real MCP deadlock scenario -- fix(tools): propagate `claim_source` from `ToolOutput` into the post-execution audit entry in `AdversarialPolicyGateExecutor`; `write_audit` now accepts an explicit `claim_source` parameter so the field is no longer hardcoded to `None` for successful executions (closes #2535) -- fix(tools): `extract_paths` now detects relative path tokens that contain `/` but do not start with `/` or `./` (e.g. `src/main.rs`, `.local/foo/bar`); URL schemes (`://`) and shell variable assignments (`KEY=value`) are excluded from matching (closes #2536) - -- fix(mcp): replace unbounded elicitation mpsc channel with a bounded channel (default capacity 16) to prevent memory exhaustion from misbehaving MCP servers; requests that arrive when the queue is full are auto-declined with a warning log instead of accumulating indefinitely; capacity is configurable via `[mcp] elicitation_queue_capacity` (closes #2524) -- fix(mcp): pre-existing `clippy::non_exhaustive_omitted_patterns`, `match_single_binding`, and `uninlined_format_args` warnings in elicitation CLI prompt builder and test code (caught while adding bounded-channel support) - -### Added - security(mcp): warn user before prompting for elicitation fields whose names match sensitive patterns (password, token, secret, key, credential, auth, private, passphrase, pin, etc.); warning shows the server name and field name so the user can make an informed decision; configurable via `[mcp] elicitation_warn_sensitive_fields` (default `true`) (closes #2523) @@ -4274,7 +4229,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(memory): orphaned tool-pair messages removed by `sanitize_tool_pairs` are now soft-deleted from SQLite after each `load_history` call, preventing the same orphan warnings from reappearing on every restart; deletion is non-fatal (warning on error, debug on success) (closes #2507) - fix(memory): `strip_mid_history_orphans` now soft-deletes messages whose `content` field contains only legacy tool bracket strings (e.g. `[tool_use: ...]`) after all `ToolUse`/`ToolResult` parts are stripped; previously the non-empty `content` prevented soft-delete and caused the orphan `WARN` to repeat on every session restart (closes #2529) -### Added - metrics: add `sanitizer_injection_fp_local` counter for injection flags on local (`ToolResult`) sources (#2515) - metrics: add `pii_ner_timeouts` counter for NER classifier timeout events (#2516) @@ -4296,6 +4250,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - config: `[mcp] lock_tool_list`, `[mcp] default_env_isolation`, `[[mcp.servers]] env_isolation` fields - config: `[a2a] ibct_keys`, `[a2a] ibct_signing_key_vault_ref`, `[a2a] ibct_ttl_secs` fields +### Fixed + +- fix(classifiers): add configurable `pii_ner_allowlist` to `ClassifiersConfig` — tokens matching an allowlist entry (case-insensitive) are never redacted by the piiranha NER model, suppressing false positives such as "Zeph" → `[PII:CITY]`; default entries: `["Zeph", "Rust", "OpenAI", "Ollama", "Claude"]`; list is empty-able via config to disable the feature (closes #2537) +- fix(classifiers): document that macOS Apple Silicon requires `--features full,metal` for piiranha NER GPU acceleration; without `metal`, the 1.1 GB model exceeds the 30s timeout on CPU and falls back to regex-only PII detection (closes #2538) +- fix(mcp): elicitation deadlock in `run_inline_tool_loop` (phase 3, closes #2542) — `run_inline_tool_loop` now wraps each `execute_tool_call_erased` call in `tokio::select!` that concurrently drains `elicitation_rx`; `handle_elicitation_event` changed to `pub(super)` for cross-module access; regression test added with a blocking executor that simulates the real MCP deadlock scenario +- fix(tools): propagate `claim_source` from `ToolOutput` into the post-execution audit entry in `AdversarialPolicyGateExecutor`; `write_audit` now accepts an explicit `claim_source` parameter so the field is no longer hardcoded to `None` for successful executions (closes #2535) +- fix(tools): `extract_paths` now detects relative path tokens that contain `/` but do not start with `/` or `./` (e.g. `src/main.rs`, `.local/foo/bar`); URL schemes (`://`) and shell variable assignments (`KEY=value`) are excluded from matching (closes #2536) + +- fix(mcp): replace unbounded elicitation mpsc channel with a bounded channel (default capacity 16) to prevent memory exhaustion from misbehaving MCP servers; requests that arrive when the queue is full are auto-declined with a warning log instead of accumulating indefinitely; capacity is configurable via `[mcp] elicitation_queue_capacity` (closes #2524) +- fix(mcp): pre-existing `clippy::non_exhaustive_omitted_patterns`, `match_single_binding`, and `uninlined_format_args` warnings in elicitation CLI prompt builder and test code (caught while adding bounded-channel support) + ### Removed - **BREAKING**: `RoutingConfig` and `RoutingStrategy` removed from `zeph-config` — superseded by `StoreRoutingConfig` / `StoreRoutingStrategy`; the `[memory.routing]` TOML section is no longer recognised (use `[memory.store_routing]` instead) @@ -4307,6 +4272,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) - fix(db): in-memory SQLite pool forced to `max_connections = 1` so all queries share the same connection and the migrated schema — previously each additional connection in the pool opened a separate empty in-memory database, causing `no such column: superseded_by` in 91 graph tests after migration 056 was introduced (closes #2468) + +- fix(memory): use `wait=true` on Qdrant upsert to eliminate testcontainer timing race — points are now indexed and queryable immediately after `upsert` returns (closes #2413) +- fix(acp): populate `authMethods` in `initialize` response with `Agent` auth method — ACP clients now receive `[{type: "agent", id: "zeph", name: "Zeph"}]` in the `authMethods` field of every `InitializeResponse` (closes #2422) +- fix(acp): serve agent identity manifest at `GET /agent.json` — new endpoint gated on `discovery_enabled`, returns `id`, `name`, `version`, `description`, and `distribution` fields for ACP Registry discovery (closes #2422) +- fix(acp): eliminate IPI wiring duplication in `acp.rs` `spawn_acp_agent` — extract `apply_three_class_classifier_with_cfg` and `apply_causal_analyzer_with_cfg` helpers in `agent_setup.rs`; `spawn_acp_agent` now delegates to shared helpers instead of inlining classifier construction (closes #2370) +- fix(acp): discovery endpoint already reflects `ProtocolVersion::LATEST` — confirmed fixed in PR #2423; no code change required (closes #2412) +- fix(security): extend MCP env var blocklist — `PATH`, `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY`, `BASH_ENV`, `ENV`, `PYTHONPATH`, `NODE_PATH`, `RUBYLIB` are now stripped from ACP-provided env vars for MCP stdio child processes (closes #2437) +- fix(tools): `AuditLogger::log` now emits `tracing::error!` when `serde_json` serialization fails instead of silently dropping the audit entry (closes #2438) +- fix(security): scrub credential env vars (`ZEPH_*`, `AWS_*`, `ANTHROPIC_*`, `OPENAI_*`, `AZURE_*`, `GCP_*`, `GOOGLE_*`, `HF_*`, `HUGGING*`) from `ShellExecutor` subprocess environment to prevent exfiltration via shell commands; configurable via `[tools.shell] env_blocklist` (closes #2449) + ### Added - feat(tools): `[tools.shell] max_snapshot_bytes` config option to limit transaction snapshot size — returns `SnapshotFailed` when cumulative copied bytes exceed the limit; `0` means unlimited (default) @@ -4358,16 +4333,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - feat(skills): confusability report (`SkillMatcher::confusability_report`) — O(n²) pairwise cosine similarity with `spawn_blocking` offload; lists excluded skills whose embedding failed; disabled by default (`confusability_threshold = 0.0`) (#2268) - feat(core): catch_unwind guard for all RuntimeLayer hook invocations (before_chat, after_chat, before_tool, after_tool) — panicking hooks no longer crash the agent turn (#2363) -### Fixed -- fix(memory): use `wait=true` on Qdrant upsert to eliminate testcontainer timing race — points are now indexed and queryable immediately after `upsert` returns (closes #2413) -- fix(acp): populate `authMethods` in `initialize` response with `Agent` auth method — ACP clients now receive `[{type: "agent", id: "zeph", name: "Zeph"}]` in the `authMethods` field of every `InitializeResponse` (closes #2422) -- fix(acp): serve agent identity manifest at `GET /agent.json` — new endpoint gated on `discovery_enabled`, returns `id`, `name`, `version`, `description`, and `distribution` fields for ACP Registry discovery (closes #2422) -- fix(acp): eliminate IPI wiring duplication in `acp.rs` `spawn_acp_agent` — extract `apply_three_class_classifier_with_cfg` and `apply_causal_analyzer_with_cfg` helpers in `agent_setup.rs`; `spawn_acp_agent` now delegates to shared helpers instead of inlining classifier construction (closes #2370) -- fix(acp): discovery endpoint already reflects `ProtocolVersion::LATEST` — confirmed fixed in PR #2423; no code change required (closes #2412) -- fix(security): extend MCP env var blocklist — `PATH`, `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY`, `BASH_ENV`, `ENV`, `PYTHONPATH`, `NODE_PATH`, `RUBYLIB` are now stripped from ACP-provided env vars for MCP stdio child processes (closes #2437) -- fix(tools): `AuditLogger::log` now emits `tracing::error!` when `serde_json` serialization fails instead of silently dropping the audit entry (closes #2438) -- fix(security): scrub credential env vars (`ZEPH_*`, `AWS_*`, `ANTHROPIC_*`, `OPENAI_*`, `AZURE_*`, `GCP_*`, `GOOGLE_*`, `HF_*`, `HUGGING*`) from `ShellExecutor` subprocess environment to prevent exfiltration via shell commands; configurable via `[tools.shell] env_blocklist` (closes #2449) +- perf(memory): consolidation sweep embeds all candidates concurrently via `futures::future::join_all` instead of sequentially, reducing latency for large batches (#2365) ### Added (tests) @@ -4382,10 +4349,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(memory): consolidation LLM prompt splits into `Role::System` (instructions) + `Role::User` (memory entries) to prevent adversarial content from influencing consolidation decisions (#2362) - fix(memory): `TopologyOp::Update` now performs an actual in-place content UPDATE on the target row instead of duplicating `apply_consolidation_merge` logic; `target_id` is no longer ignored (#2364) -### Changed - -- perf(memory): consolidation sweep embeds all candidates concurrently via `futures::future::join_all` instead of sequentially, reducing latency for large batches (#2365) - ## [0.18.0] - 2026-03-29 ### Changed @@ -4403,35 +4366,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) -### Added - -- feat(db): `numbered_placeholder(n)` and `placeholder_list(start, count)` helpers in `zeph-db` for dialect-agnostic dynamic SQL construction (#2386) - -- docker: add all missing ZEPH_* env vars to docker-compose.yml and docker-compose.dev.yml (64 vars added, ZEPH_MEMORY_SEMANTIC_RECALL_LIMIT renamed to ZEPH_MEMORY_RECALL_LIMIT) -- docker: add scripts/check-env-vars.sh drift-prevention script - -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) -### Added - -- feat(tui): interactive subagent management — `a` key focuses the `SubAgents` sidebar panel; `j`/`k` navigate the agent list; `Enter` loads the selected agent's JSONL transcript into the main chat area; `Esc` returns to the main conversation or closes panel focus; status bar shows `Viewing: ` when a subagent transcript is active; chat title changes to `Subagent: ` during transcript view; transcript is reloaded automatically when the agent's turn count increases; partial last-line writes from active agents are silently discarded to prevent spurious parse errors; transcript is truncated to the last 200 entries with a `[showing last N of M messages]` indicator; empty subagent list shows a placeholder message; `Tab` panel cycling now includes `SubAgents`; all new keybindings documented in the `?` help overlay (#2376) -- feat(core): `SubAgentMetrics` gains `transcript_dir: Option` field populated from `SubAgentHandle::transcript_dir` in `refresh_subagent_metrics()`; `SubAgentManager::agent_transcript_dir()` accessor added (#2376) -- feat(routing): `LlmRoutingStrategy::Bandit` — PILOT-inspired LinUCB contextual bandit routing with online learning, LRU embedding cache, hard embed timeout with Thompson fallback during cold start, and reward signal combining heuristic quality and cost penalty; adds `[llm.router.bandit]` config section with `alpha`, `dim`, `cost_weight`, `decay_factor`, `embedding_provider`, `embedding_timeout_ms`, `cache_size` fields; bandit state persisted to `~/.config/zeph/router_bandit_state.json` with atomic writes (closes #2230) -- feat(config): SLM provider recommendations — `config/default.toml` documents which subsystems (`triage_provider`, `compress_provider`, `probe_provider`, `scene_provider`, `admission_provider`, `feedback_provider`, `planner_provider`, etc.) are suitable for lightweight models; `BanditConfig` `embedding_provider` and `extract_model` include inline SLM guidance; multi-model configuration example added (closes #2192) -- feat(db): Phase 3 DevEx integration — `--init` wizard backend selection (SQLite vs PostgreSQL), `--migrate-config` adds `database_url` placeholder under `[memory]`, `zeph db migrate` CLI subcommand runs pending migrations with URL validation and redacted output, `build-postgres` CI job checks `zeph-db --no-default-features --features postgres` compile; `is_postgres_url()` ungated (available in all builds); `zeph-db` and `zeph-memory` workspace deps set to `default-features = false` for correct mutual-exclusivity under `--no-default-features --features postgres` (follow-up #2374 tracks remaining call site fixes) -- feat(db): `zeph-db` Phase 2 PostgreSQL backend — 52 PostgreSQL migration files with full DDL translation (`BIGSERIAL`, `TIMESTAMPTZ`, `BYTEA`, `BOOLEAN`, `tsvector`/GIN for FTS, `plpgsql` trigger functions); `Dialect::EPOCH_NOW` constant and `epoch_from_col()` method for backend-portable epoch extraction; FTS helper functions in `fts.rs` covering all 5 query patterns (basic search, ranked search, prefix search, graph entity FTS, ORDER BY direction); bootstrap guard rejects `postgres://` URLs in SQLite-compiled binary with actionable error; `bytemuck` removed from `zeph-memory` — vector serialization uses `to_le_bytes`/`from_le_bytes` throughout (`db_vector_store.rs`, `response_cache.rs`); `MemoryConfig::database_url` field + `ZEPH_DATABASE_URL` vault key for runtime backend selection; testcontainers integration tests (`#[ignore = "requires Docker"]`) for migrations, idempotency, CRUD, and FTS trigger verification; dialect and FTS unit tests added -- feat(db): `zeph-db` Phase 1 SQLite — compile-time database abstraction layer via `DatabaseDriver` trait, `Dialect` trait (`INSERT_IGNORE`, `CONFLICT_NOTHING`, `COLLATE_NOCASE`, `AUTO_PK`, `ilike()`), `DbConfig` with WAL/busy_timeout pool setup, `begin_write()` for `BEGIN IMMEDIATE` write serialization, `sql!()` macro for portable `?`→`$N` placeholder rewriting, `redact_url()` with `LazyLock`, and 52 SQLite migrations; consumer crates (`zeph-memory`, `zeph-scheduler`, `zeph-mcp`, `zeph-index`) migrated to `DbPool`/`DbConfig` type aliases; all SQLite-specific SQL fragments replaced with `Dialect` constants or standard SQL (`CURRENT_TIMESTAMP`); `DbVectorStore`, `DbGraphStore`, `DbStore` replace `Sqlite`-prefixed names - -### Internal - -- refactor(db): Phase 1 sqlx cleanup — remove direct `sqlx` dependency from `zeph-scheduler`, `zeph-orchestration`, `zeph-mcp`, `zeph-index`, and `zeph-core`; all consumers now use `zeph-db` re-exports (`zeph_db::query*`, `zeph_db::SqlxError`, `zeph_db::FromRow`); add `numbered_placeholder(n)` and `placeholder_list(start, count)` helpers to `zeph-db` for backend-portable numbered bind positions; fix 8 dynamic `format!()`-built SQL queries in `graph/store/mod.rs` (BFS, centrality, community IDs, batch edges, entity fetch, mark-processed) to use `placeholder_list`/`numbered_placeholder` instead of SQLite-only `?` literals; add `#[cfg]` dialect guard for `json_each`/`jsonb_array_elements_text` in `entity_community_ids`; re-export `sqlx::query_builder::QueryBuilder` and `sqlx` from `zeph-db` (#2386) -- feat(memory): A-MAC adaptive admission control — `AdmissionControl` in `zeph-memory::admission` evaluates 5 factors (future utility via LLM, factual confidence via hedging heuristics, semantic novelty via Qdrant top-3 cosine search, temporal recency fixed at 1.0 at write time, content-type prior by role) and rejects messages scoring below a configurable threshold; `remember()` now returns `Result>` and `remember_with_parts()` returns `Result<(Option, bool)>` — `None` means admission rejected, no panic, no silent drop; `unsummarized_count` only incremented when a message is truly persisted; `[memory.admission]` config block added with `enabled`, `threshold`, `fast_path_margin`, `admission_provider`, and `weights` fields; runtime weight normalization eliminates the fragile sum-to-1.0 constraint; `memory_save` tool returns a human-readable rejection message when admission fails (#2317) -- feat(memory): `MemScene` consolidation — `mem_scenes` and `mem_scene_members` SQLite tables (migration 049) store entity profiles derived from clusters of semantic-tier messages; `start_scene_consolidation_loop()` runs a background sweep on a configurable interval independent of tier promotion; greedy nearest-neighbor cosine clustering groups messages above `scene_similarity_threshold`; LLM generates a short label and 2–3 sentence profile per scene with JSON fallback; `[memory.tiers]` config gains `scene_enabled`, `scene_similarity_threshold`, `scene_batch_size`, `scene_provider` fields (#2332) -- feat(core): `compress_context` native tool — always available when `context-compression` feature is enabled regardless of `CompressionStrategy`; compresses the current conversation (excluding pinned Knowledge and system messages) via LLM, appends the summary to the Knowledge block, and removes original messages from history; guarded by `Arc` concurrency lock in `FocusState` (`try_acquire_compression()` / `release_compression()`); blocked when a focus session is active; `CompressionStrategy::Autonomous` variant added; `compress_provider` field added to `CompressionConfig` (#2218) - -### Fixed - fix(core): `MemoryFirst` context drain no longer orphans `role=tool` messages when a tool-call turn lands at the keep-tail boundary, preventing OpenAI HTTP 400 errors (#2366) - fix(tools): wire `is_reasoning_model()` and `record_reasoning_quality_failure()` into the agent tool execution path; quality failures (`ToolNotFound`, `InvalidParameters`, `TypeMismatch`) from reasoning models (o1, o3, o4-mini, QwQ, DeepSeek-R1, Claude extended-thinking) now call `record_reasoning_quality_failure()` on the anomaly detector which emits a `reasoning_amplification` WARN log; `reasoning_model_warning = false` suppresses the WARN while still counting the error; `AuditEntry.error_phase` now populated from `ToolErrorCategory::phase()` in `ShellExecutor`, `WebScrapeExecutor`, and pre-execution verifier audit entries (#2357) @@ -4443,12 +4380,44 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(core): `/reset` command now handled in `handle_builtin_command` as an alias for `/clear` with confirmation reply; previously fell through to LLM inference in all channels (#2339) - fix(telegram-e2e): reduce `scenario_long_output` prompt from 400 to 100 items and `first_timeout` from 90s to 60s to avoid LLM timeout under load (#2340) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- fix(mcp): wire `EmbeddingAnomalyGuard` into `McpManager` — `with_embedding_guard()` builder added; `call_tool()` calls `guard.check_async()` fire-and-forget after every successful tool call; `build_tool_setup()` in `agent_setup.rs` creates and wires the guard when `security.content_isolation.embedding_guard.enabled = true`; background drain task logs anomalous and regex-injection events at `warn!` level; `AnyProvider::embed_fn()` return type annotated with `+ use<>` to prevent lifetime overcapture in Edition 2024 (#2331) + +- fix(a2a): A2A response shift — replace `try_recv()` drain with a blocking drain-until-`Flush` loop; the agent loop always emits `Flush` as the definitive end-of-turn sentinel after `FullMessage`, so waiting for it eliminates the TOCTOU race where tail events (`Usage`, `SessionTitle`, `Flush`) arrived after the drain window and leaked into the next request (#2326) +- fix(a2a): drain timeout — the drain-until-`Flush` loop in `AgentTaskProcessor::process()` is now guarded by a configurable `tokio::time::timeout`; if the agent loop panics while holding the sender `Arc`, the drain no longer blocks indefinitely but logs a `warn` and proceeds; `A2aServerConfig` gains `drain_timeout_ms` (default `30_000`) (#2329) +- fix(mcp): silent drop of embedding guard result on closed receiver — replace `let _ = tx.send(result)` with an explicit `is_err()` check that logs a `WARN` when the result channel is closed; prevents silent failure hiding in `EmbeddingAnomalyGuard::check_async()` (#2313) +- fix(mcp): wire `DefaultMcpProber` and `TrustScoreStore` into all three bootstrap paths (runner, daemon, ACP) via new `wire_trust_calibration()` in `zeph-core::bootstrap`; `TrustCalibrationConfig.enabled` now has effect at startup (#2315) +- fix(mcp): `TrustScoreStore::apply_delta()` operated on stale pre-decay score; new `load_and_apply_delta()` reads with decay applied in-memory before writing back, preventing inflated base scores after long server idle periods; both call sites in `manager.rs` updated (#2323) +- fix(config): `EmbeddingGuardConfig::threshold` now validated to `(0.0, 1.0]` at deserialization time; `min_samples` validated to `>= 1`; invalid values produce a descriptive config error at startup (#2322) +- fix(daemon): stale PID file from a crashed run no longer blocks startup — read and liveness-check the existing PID before writing; remove the file if the process is dead, error if the process is still alive (#2295) +- fix(mcp): `prune_tools` `max_tools == 0` now means no cap on LLM-selected candidates (#2294) +- fix(security): sanitize MCP tool descriptions and names before interpolating into the pruning prompt — strip control characters, cap description at 200 chars and name at 64 chars (#2297) +- fix(mcp): document and enforce `always_include` semantics — pinned tools bypass the `max_tools` cap; cap applies only to LLM-selected candidates (#2296) +- fix(security): sanitizer classifier 401 on HuggingFace download — add `hf_token: Option` to `ClassifiersConfig` and `CandleConfig`, resolved from vault key `ZEPH_HF_TOKEN` in `resolve_secrets()`; all five `hf_hub::api::sync::Api::new()` call sites replaced with `ApiBuilder::new().with_token(hf_token).build()`; add `scan_user_input: bool` (default `false`) to `ClassifiersConfig` to gate DeBERTa classifier on direct user chat messages and eliminate false positives on benign greetings and arithmetic; upgrade silent `warn!` fallback in `classify_injection` to `error!`; add `tracing::error!` at cached load-failure return path in `CandleClassifier` to surface permanent classifier degradation (#2292) +- fix(tui): MCP Tools panel and Resources widget now show per-server connection status — `connect_all()` returns `(Vec, Vec)` with per-server id, connected flag, tool count, and error string; `MetricsSnapshot` gains `mcp_connected_count` and `mcp_servers: Vec`; Resources panel shows `N/M connected, K tools`; Skills panel shows per-server OK (green) / FAIL (red) rows above the tool list; `mcp_server_count` now reflects total configured servers, not just connected ones (#2277) +- fix(skills): tighten `system_prompt_leak` pattern to require an extraction verb or interrogative before "system prompt"; eliminates false-positive WARN for user-installed skills (e.g. `mcp-generate`) whose documentation describes where MCP tool output appears in the system prompt (#2274) +- `zeph-memory`: compat deserializer for pre-v0.17.1 `MessagePart` SQLite records; SQLite migration resets legacy-format `parts` rows to `[]` (#2278) + ### Added +- feat(db): `numbered_placeholder(n)` and `placeholder_list(start, count)` helpers in `zeph-db` for dialect-agnostic dynamic SQL construction (#2386) + +- docker: add all missing ZEPH_* env vars to docker-compose.yml and docker-compose.dev.yml (64 vars added, ZEPH_MEMORY_SEMANTIC_RECALL_LIMIT renamed to ZEPH_MEMORY_RECALL_LIMIT) +- docker: add scripts/check-env-vars.sh drift-prevention script + + +- feat(tui): interactive subagent management — `a` key focuses the `SubAgents` sidebar panel; `j`/`k` navigate the agent list; `Enter` loads the selected agent's JSONL transcript into the main chat area; `Esc` returns to the main conversation or closes panel focus; status bar shows `Viewing: ` when a subagent transcript is active; chat title changes to `Subagent: ` during transcript view; transcript is reloaded automatically when the agent's turn count increases; partial last-line writes from active agents are silently discarded to prevent spurious parse errors; transcript is truncated to the last 200 entries with a `[showing last N of M messages]` indicator; empty subagent list shows a placeholder message; `Tab` panel cycling now includes `SubAgents`; all new keybindings documented in the `?` help overlay (#2376) +- feat(core): `SubAgentMetrics` gains `transcript_dir: Option` field populated from `SubAgentHandle::transcript_dir` in `refresh_subagent_metrics()`; `SubAgentManager::agent_transcript_dir()` accessor added (#2376) +- feat(routing): `LlmRoutingStrategy::Bandit` — PILOT-inspired LinUCB contextual bandit routing with online learning, LRU embedding cache, hard embed timeout with Thompson fallback during cold start, and reward signal combining heuristic quality and cost penalty; adds `[llm.router.bandit]` config section with `alpha`, `dim`, `cost_weight`, `decay_factor`, `embedding_provider`, `embedding_timeout_ms`, `cache_size` fields; bandit state persisted to `~/.config/zeph/router_bandit_state.json` with atomic writes (closes #2230) +- feat(config): SLM provider recommendations — `config/default.toml` documents which subsystems (`triage_provider`, `compress_provider`, `probe_provider`, `scene_provider`, `admission_provider`, `feedback_provider`, `planner_provider`, etc.) are suitable for lightweight models; `BanditConfig` `embedding_provider` and `extract_model` include inline SLM guidance; multi-model configuration example added (closes #2192) +- feat(db): Phase 3 DevEx integration — `--init` wizard backend selection (SQLite vs PostgreSQL), `--migrate-config` adds `database_url` placeholder under `[memory]`, `zeph db migrate` CLI subcommand runs pending migrations with URL validation and redacted output, `build-postgres` CI job checks `zeph-db --no-default-features --features postgres` compile; `is_postgres_url()` ungated (available in all builds); `zeph-db` and `zeph-memory` workspace deps set to `default-features = false` for correct mutual-exclusivity under `--no-default-features --features postgres` (follow-up #2374 tracks remaining call site fixes) +- feat(db): `zeph-db` Phase 2 PostgreSQL backend — 52 PostgreSQL migration files with full DDL translation (`BIGSERIAL`, `TIMESTAMPTZ`, `BYTEA`, `BOOLEAN`, `tsvector`/GIN for FTS, `plpgsql` trigger functions); `Dialect::EPOCH_NOW` constant and `epoch_from_col()` method for backend-portable epoch extraction; FTS helper functions in `fts.rs` covering all 5 query patterns (basic search, ranked search, prefix search, graph entity FTS, ORDER BY direction); bootstrap guard rejects `postgres://` URLs in SQLite-compiled binary with actionable error; `bytemuck` removed from `zeph-memory` — vector serialization uses `to_le_bytes`/`from_le_bytes` throughout (`db_vector_store.rs`, `response_cache.rs`); `MemoryConfig::database_url` field + `ZEPH_DATABASE_URL` vault key for runtime backend selection; testcontainers integration tests (`#[ignore = "requires Docker"]`) for migrations, idempotency, CRUD, and FTS trigger verification; dialect and FTS unit tests added +- feat(db): `zeph-db` Phase 1 SQLite — compile-time database abstraction layer via `DatabaseDriver` trait, `Dialect` trait (`INSERT_IGNORE`, `CONFLICT_NOTHING`, `COLLATE_NOCASE`, `AUTO_PK`, `ilike()`), `DbConfig` with WAL/busy_timeout pool setup, `begin_write()` for `BEGIN IMMEDIATE` write serialization, `sql!()` macro for portable `?`→`$N` placeholder rewriting, `redact_url()` with `LazyLock`, and 52 SQLite migrations; consumer crates (`zeph-memory`, `zeph-scheduler`, `zeph-mcp`, `zeph-index`) migrated to `DbPool`/`DbConfig` type aliases; all SQLite-specific SQL fragments replaced with `Dialect` constants or standard SQL (`CURRENT_TIMESTAMP`); `DbVectorStore`, `DbGraphStore`, `DbStore` replace `Sqlite`-prefixed names + + - feat(core): `RuntimeLayer` middleware trait — `before_chat`, `after_chat`, `before_tool`, `after_tool` hooks for LLM call and tool dispatch interception; `NoopLayer` no-op implementation; `BeforeToolResult` type alias; `LayerContext` carries `conversation_id` and `turn_number`; `Agent` holds a `runtime_layers: Vec>` (empty by default, zero-cost); hooks called in `call_chat_with_tools` before and after every LLM round-trip and tool execution (#2286) - feat(memory): All-Mem lifelong memory consolidation — background `start_consolidation_loop` sweeps `messages` table per conversation, clusters semantically similar unconsolidated messages via greedy cosine similarity, calls LLM to propose `TopologyOp` (Merge/Update), applies atomically in a single SQLite transaction; migration 049 adds `consolidated INTEGER` column and `memory_consolidation_sources` join table; `ConsolidationConfig` in `[memory.consolidation]` controls `enabled`, `confidence_threshold`, `sweep_interval_secs`, `sweep_batch_size`, `similarity_threshold`; loop cancelled cleanly on shutdown via `CancellationToken` (#2270) - feat(graph): MAGMA edge-type weight multipliers — `edge_type_weight()` assigns per-type multipliers (Causal 1.2, Semantic 1.0, Temporal 0.9, Entity 0.8); `composite_score()` in `Edge` now multiplies by `edge_type_weight`; spreading activation spread formula incorporates the multiplier alongside `evolved_weight` and `recency_weight` (#2231) @@ -4470,6 +4439,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - feat(memory): crossover context strategy — `[memory] context_strategy` config field (`full_history` | `memory_first` | `adaptive`) selects how context is assembled; `memory_first` de-emphasizes recent history in favor of semantic memory retrieval; `adaptive` starts as `full_history` and switches to `memory_first` after `crossover_turn_threshold` turns; budget allocation updated to deduct digest tokens before percentage splits (#2288) - feat(memory): `guidelines_provider` field in `[memory.compression_guidelines]` wires the ACON compression-guidelines updater to a named provider from `[[llm.providers]]` instead of always inheriting the primary conversational provider; empty value falls back to primary (#2201) +### Internal + +- refactor(db): Phase 1 sqlx cleanup — remove direct `sqlx` dependency from `zeph-scheduler`, `zeph-orchestration`, `zeph-mcp`, `zeph-index`, and `zeph-core`; all consumers now use `zeph-db` re-exports (`zeph_db::query*`, `zeph_db::SqlxError`, `zeph_db::FromRow`); add `numbered_placeholder(n)` and `placeholder_list(start, count)` helpers to `zeph-db` for backend-portable numbered bind positions; fix 8 dynamic `format!()`-built SQL queries in `graph/store/mod.rs` (BFS, centrality, community IDs, batch edges, entity fetch, mark-processed) to use `placeholder_list`/`numbered_placeholder` instead of SQLite-only `?` literals; add `#[cfg]` dialect guard for `json_each`/`jsonb_array_elements_text` in `entity_community_ids`; re-export `sqlx::query_builder::QueryBuilder` and `sqlx` from `zeph-db` (#2386) +- feat(memory): A-MAC adaptive admission control — `AdmissionControl` in `zeph-memory::admission` evaluates 5 factors (future utility via LLM, factual confidence via hedging heuristics, semantic novelty via Qdrant top-3 cosine search, temporal recency fixed at 1.0 at write time, content-type prior by role) and rejects messages scoring below a configurable threshold; `remember()` now returns `Result>` and `remember_with_parts()` returns `Result<(Option, bool)>` — `None` means admission rejected, no panic, no silent drop; `unsummarized_count` only incremented when a message is truly persisted; `[memory.admission]` config block added with `enabled`, `threshold`, `fast_path_margin`, `admission_provider`, and `weights` fields; runtime weight normalization eliminates the fragile sum-to-1.0 constraint; `memory_save` tool returns a human-readable rejection message when admission fails (#2317) +- feat(memory): `MemScene` consolidation — `mem_scenes` and `mem_scene_members` SQLite tables (migration 049) store entity profiles derived from clusters of semantic-tier messages; `start_scene_consolidation_loop()` runs a background sweep on a configurable interval independent of tier promotion; greedy nearest-neighbor cosine clustering groups messages above `scene_similarity_threshold`; LLM generates a short label and 2–3 sentence profile per scene with JSON fallback; `[memory.tiers]` config gains `scene_enabled`, `scene_similarity_threshold`, `scene_batch_size`, `scene_provider` fields (#2332) +- feat(core): `compress_context` native tool — always available when `context-compression` feature is enabled regardless of `CompressionStrategy`; compresses the current conversation (excluding pinned Knowledge and system messages) via LLM, appends the summary to the Knowledge block, and removes original messages from history; guarded by `Arc` concurrency lock in `FocusState` (`try_acquire_compression()` / `release_compression()`); blocked when a focus session is active; `CompressionStrategy::Autonomous` variant added; `compress_provider` field added to `CompressionConfig` (#2218) + ### Security - feat(security): `InjectionEnforcementMode` enum (`Warn` | `Block`) added to `ClassifiersConfig`; default is `Warn` (soft-signal mode); `classify_injection()` in `ContentSanitizer` returns `Suspicious` instead of `Blocked` in `Warn` mode for scores above the hard threshold; ML classification wired into `sanitize_tool_output()` in the tool execution pipeline; original body (before spotlight wrapping) is classified to avoid false positives on `` delimiter tags; serde validation added to `injection_threshold` and `injection_threshold_soft` fields; `has_classifier_backend()` guard prevents duplicate detection events when no ML backend is configured (#2193) @@ -4488,25 +4464,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - feat(mcp): MCPShield three-phase trust calibration — Phase 1: `DefaultMcpProber` scans resource/prompt descriptions for injection patterns on connect; Phase 2: `AuditEntry` gains `mcp_server_id`, `injection_flagged`, `embedding_anomalous` fields; Phase 3: `TrustScoreStore` (SQLite-backed) with asymmetric decay (scores > 0.5 decay toward 0.5 at 1%/day, scores ≤ 0.5 require explicit `record_success()`), atomic `INSERT ... ON CONFLICT DO UPDATE` delta updates; `TrustCalibrationConfig` added under `[mcp.trust_calibration]` (#2217) - feat(mcp): embedding anomaly guard — `EmbeddingAnomalyGuard` runs fire-and-forget cosine-distance checks against a per-server centroid in a background `tokio::spawn` task; cold-start (< `min_samples` clean outputs) falls back to synchronous regex injection detection; results delivered via `mpsc::UnboundedSender`; `EmbeddingGuardConfig` added under `[security.content_isolation.embedding_guard]` (#2217) -### Fixed - -- fix(mcp): wire `EmbeddingAnomalyGuard` into `McpManager` — `with_embedding_guard()` builder added; `call_tool()` calls `guard.check_async()` fire-and-forget after every successful tool call; `build_tool_setup()` in `agent_setup.rs` creates and wires the guard when `security.content_isolation.embedding_guard.enabled = true`; background drain task logs anomalous and regex-injection events at `warn!` level; `AnyProvider::embed_fn()` return type annotated with `+ use<>` to prevent lifetime overcapture in Edition 2024 (#2331) - -- fix(a2a): A2A response shift — replace `try_recv()` drain with a blocking drain-until-`Flush` loop; the agent loop always emits `Flush` as the definitive end-of-turn sentinel after `FullMessage`, so waiting for it eliminates the TOCTOU race where tail events (`Usage`, `SessionTitle`, `Flush`) arrived after the drain window and leaked into the next request (#2326) -- fix(a2a): drain timeout — the drain-until-`Flush` loop in `AgentTaskProcessor::process()` is now guarded by a configurable `tokio::time::timeout`; if the agent loop panics while holding the sender `Arc`, the drain no longer blocks indefinitely but logs a `warn` and proceeds; `A2aServerConfig` gains `drain_timeout_ms` (default `30_000`) (#2329) -- fix(mcp): silent drop of embedding guard result on closed receiver — replace `let _ = tx.send(result)` with an explicit `is_err()` check that logs a `WARN` when the result channel is closed; prevents silent failure hiding in `EmbeddingAnomalyGuard::check_async()` (#2313) -- fix(mcp): wire `DefaultMcpProber` and `TrustScoreStore` into all three bootstrap paths (runner, daemon, ACP) via new `wire_trust_calibration()` in `zeph-core::bootstrap`; `TrustCalibrationConfig.enabled` now has effect at startup (#2315) -- fix(mcp): `TrustScoreStore::apply_delta()` operated on stale pre-decay score; new `load_and_apply_delta()` reads with decay applied in-memory before writing back, preventing inflated base scores after long server idle periods; both call sites in `manager.rs` updated (#2323) -- fix(config): `EmbeddingGuardConfig::threshold` now validated to `(0.0, 1.0]` at deserialization time; `min_samples` validated to `>= 1`; invalid values produce a descriptive config error at startup (#2322) -- fix(daemon): stale PID file from a crashed run no longer blocks startup — read and liveness-check the existing PID before writing; remove the file if the process is dead, error if the process is still alive (#2295) -- fix(mcp): `prune_tools` `max_tools == 0` now means no cap on LLM-selected candidates (#2294) -- fix(security): sanitize MCP tool descriptions and names before interpolating into the pruning prompt — strip control characters, cap description at 200 chars and name at 64 chars (#2297) -- fix(mcp): document and enforce `always_include` semantics — pinned tools bypass the `max_tools` cap; cap applies only to LLM-selected candidates (#2296) -- fix(security): sanitizer classifier 401 on HuggingFace download — add `hf_token: Option` to `ClassifiersConfig` and `CandleConfig`, resolved from vault key `ZEPH_HF_TOKEN` in `resolve_secrets()`; all five `hf_hub::api::sync::Api::new()` call sites replaced with `ApiBuilder::new().with_token(hf_token).build()`; add `scan_user_input: bool` (default `false`) to `ClassifiersConfig` to gate DeBERTa classifier on direct user chat messages and eliminate false positives on benign greetings and arithmetic; upgrade silent `warn!` fallback in `classify_injection` to `error!`; add `tracing::error!` at cached load-failure return path in `CandleClassifier` to surface permanent classifier degradation (#2292) -- fix(tui): MCP Tools panel and Resources widget now show per-server connection status — `connect_all()` returns `(Vec, Vec)` with per-server id, connected flag, tool count, and error string; `MetricsSnapshot` gains `mcp_connected_count` and `mcp_servers: Vec`; Resources panel shows `N/M connected, K tools`; Skills panel shows per-server OK (green) / FAIL (red) rows above the tool list; `mcp_server_count` now reflects total configured servers, not just connected ones (#2277) -- fix(skills): tighten `system_prompt_leak` pattern to require an extraction verb or interrogative before "system prompt"; eliminates false-positive WARN for user-installed skills (e.g. `mcp-generate`) whose documentation describes where MCP tool output appears in the system prompt (#2274) -- `zeph-memory`: compat deserializer for pre-v0.17.1 `MessagePart` SQLite records; SQLite migration resets legacy-format `parts` rows to `[]` (#2278) - ## [0.17.1] - 2026-03-27 ### Fixed @@ -4529,10 +4486,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - triage routing: `debug_request_json` now reflects the actual selected tier provider instead of always showing the first-tier model (#2229) - triage routing: removed context size metadata (`msg_count`/`token_estimate`) from classification prompt to prevent bias toward higher tiers in long conversations (#2228) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- fix(tools): `TrustGateExecutor` now delegates `is_tool_retryable()` to its inner executor; previously the missing delegation caused Phase 2 transient retry to never fire for HTTP-based tools (e.g. `fetch` returning 503) because the default `false` short-circuited the retry guard (#2223) +- fix(tools): `ToolErrorFeedback.retryable` is now set from `category.is_retryable()` instead of being hardcoded `false`; transient categories (ServerError, NetworkError, RateLimited, Timeout) now correctly report `retryable: true` in LLM feedback; suggestion text for transient categories changed from "The system will retry automatically." to "The system will retry if possible." to avoid contradicting a `retryable: false` state at exhaustion (#2222) +- fix(memory): deferred tool pair summarization no longer leaves orphaned `tool_use` messages in the DB; `apply_deferred_summaries()` now accumulates `(db_id, summary_text)` pairs into `deferred_db_hide_ids`/`deferred_db_summaries` fields on `Agent`, flushed atomically to SQLite via `apply_tool_pair_summaries()` (sets `agent_visible=0` on hidden pairs and inserts summary messages in one transaction); `db_id: Option` added to `MessageMetadata` (`#[serde(skip)]`) and populated from DB on `load_history_filtered()`; eliminates unbounded growth of `WARN stripping orphaned mid-history tool_use parts` on every subsequent session restore (#2243) + +- fix(memory): reject self-loop edges in graph extractor — `extract_and_store` skips edges where source and target resolve to the same entity ID; `insert_edge_typed` returns `MemoryError::InvalidInput` for same-ID pairs; migration `044` removes existing self-loops and adds a BEFORE INSERT trigger to enforce the constraint at the DB level (#2215) + +- fix(security): agent no longer calls `fetch`/`web_scrape` with hallucinated URLs; three-layer defense: (1) tool descriptions now explicitly prohibit constructing or inferring URLs from entity names; (2) system prompt `## Guidelines` adds a fetch/URL grounding rule; (3) new `UrlGroundingVerifier` pre-execution gate blocks `fetch`, `web_scrape`, and `*_fetch` tool calls when the requested URL was not present in any user message in the session — returns "fetch rejected: URL was not provided by the user"; `user_provided_urls` extracted via `extract_flagged_urls` on every user turn, cleared on `/clear`; configurable via `[security.pre_execution_verify.url_grounding]` (#2191) + +- fix(core): permanent tool errors (e.g. HTTP 403) no longer cause OpenAI HTTP 400 "tool_calls must be followed by tool messages"; `attempt_self_reflection` is now deferred until after all `ToolResult` parts are assembled and pushed to message history, preserving the `Assistant{ToolUse} → User{ToolResults}` ordering required by the OpenAI and Claude APIs; `record_anomaly_outcome` errors are silently ignored so channel failures cannot abandon mid-batch ToolResult assembly; adds regression tests R-NTP-13 and R-NTP-14 for single and parallel permanent errors (#2197) +- fix(llm): `TriageRouter` now delegates `embed()` to the first embedding-capable tier provider instead of always returning `EmbeddingNotSupported`; `supports_embeddings()` reflects tier provider capability — resolves tool schema filter being silently disabled when `routing = "triage"` (#2174) +- fix(core): `/provider` switch and `/provider status` now display the configured `name` field from `[[llm.providers]]` instead of the provider type string (e.g. `"openai"`); `active_provider_name` stored in `RuntimeConfig` and updated on every switch (#2173) +- fix(llm): add missing `use crate::provider::MessageMetadata` import inside `#[cfg(test)]` in `candle_provider/template.rs`; `--features candle` alone now compiles and runs unit tests (`cargo nextest run -p zeph-llm --features candle --lib`) (#2189) +- fix(mcp): narrow `new_directive` injection pattern to require colon suffix, preventing false positive match on legitimate phrases like "new persona" in Todoist MCP tool descriptions; add regression test (#2170) +- fix(memory): run `PRAGMA wal_checkpoint(PASSIVE)` after FTS5 entity inserts to fix cross-session SYNAPSE seed lookup (#2166); checkpoint is called at `SqliteStore` startup (safety net) and after every `EntityResolver::resolve_batch` (targeted hook) +- fix(config): add `[security.guardrail]` stub to `default.toml` so `--migrate-config` injects commented guardrail defaults for configs that have `[security]` but no `[security.guardrail]` (#2158) +- ci: increase publish-crates timeout from 20 to 60 minutes and add `no-verify: true` to skip recompilation during publish (workspace has 21 crates; sequential publish with 15 s delays exceeded the previous limit) + ### Added - feat(classifiers): `ClassifierMetrics` ring buffer in `zeph-llm` — per-task (Injection, PII, Feedback) latency samples with p50/p95 percentiles computed via nearest-rank with `.round()`; capacity 100 samples per task; `record()` emits structured `tracing::info!` event with task, latency_ms, p50_ms, p95_ms, call_count; `snapshot()` returns `ClassifierMetricsSnapshot` for TUI consumption; `ContentSanitizer::with_classifier_metrics()` records Injection and PII call latencies; `LlmClassifier::with_metrics()` records Feedback call latencies; `MetricsSnapshot` extended with `classifier: ClassifierMetricsSnapshot`; TUI Resources panel shows compact classifier rows (calls/p50/p95) when at least one task has been called (#2249) @@ -4563,36 +4537,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - feat(orchestration): wire `planner_provider` — `[orchestration] planner_provider` now references a `[[llm.providers]]` name; `build_planner_provider()` resolves it at bootstrap; `LlmPlanner` receives the dedicated provider instead of always falling back to primary; `migrate_planner_model_to_provider()` comments out old `planner_model` values with a warning (#2172) - feat(config): unify STT provider under `[[llm.providers]]` (#2175) — `SttConfig` now holds only `provider` (name reference) and `language`; `model` and `base_url` move to a `stt_model` field on `ProviderEntry` (mirrors `embedding_model` pattern); `LlmConfig::stt_provider_entry()` resolves the active STT entry by name with auto-detect fallback to the first entry with `stt_model`; `LlmConfig::validate_stt()` checks cross-reference consistency; `migrate_stt_to_provider()` auto-converts old `[llm.stt]` `model`/`base_url` fields to `stt_model` on the matching provider entry (W2: forces explicit `name` on migrated entries); env vars `ZEPH_STT_MODEL` and `ZEPH_STT_BASE_URL` removed (log deprecation warning); unified dispatch in `runner.rs` respects `#[cfg(feature = "candle")]` and `#[cfg(feature = "stt")]` gates with explicit error log when feature is absent; TUI metrics `stt_model` now read from resolved `ProviderEntry` -- feat(memory): structured compaction probe categories — four functional probe dimensions (Recall, Artifact, Continuation, Decision) with per-category scoring, configurable `category_weights`, dedicated `probe_provider` config field resolving from `[[llm.providers]]`; TUI memory panel shows per-category breakdown (`Rec/Art/Con/Dec`) with threshold-based color coding; `last_probe_category_scores` added to `MetricsSnapshot`; debug dump includes category breakdown (#2164) - -### Breaking Changes - -- `TopologyClassifier::suggest_max_parallel()` removed; use `TopologyClassifier::analyze()` which returns `TopologyAnalysis` with `max_parallel`, `strategy`, `depth`, and `depths` fields. `DagScheduler::topology()` now returns `&TopologyAnalysis` instead of `Option`; use `.topology` field to access the `Topology` variant. -- `[[mcp.servers]]` entries added without `trust_level` now default to `untrusted`, which enforces SSRF validation. Previous behavior was equivalent to `trusted` (SSRF skipped). Run `zeph --migrate-config --in-place` to set `trust_level = "trusted"` on all existing servers automatically. -- `[orchestration] planner_model` renamed to `planner_provider`; now references a `[[llm.providers]]` name instead of a raw model string; empty = use primary provider. Run `zeph --migrate-config --in-place` to auto-migrate (old value is commented out; update it to a provider name). -- `[llm.stt].model` and `[llm.stt].base_url` fields removed from `SttConfig`; move them to `stt_model` / `base_url` on the corresponding `[[llm.providers]]` entry. Run `zeph --migrate-config --in-place` to auto-convert. -- `ZEPH_STT_MODEL` and `ZEPH_STT_BASE_URL` environment variables no longer applied; use provider-level config instead. -- feat(tui): Phase 2 dynamic metrics — add `stt_model`, `compaction_model`, `provider_temperature`, `provider_top_p`, `active_channel`, `embedding_model`, `token_budget`, `self_learning_enabled`, `semantic_cache_enabled` to `MetricsSnapshot`; status bar shows active model name and `ch:` segment; resources panel shows embedding model, token budget, and learning flag; `/provider` switch updates `provider_temperature`/`provider_top_p` in real time for Candle providers (#2160) -- feat(tui): Phase 1 dynamic metrics in TUI — 8 new fields in `MetricsSnapshot` (`embedding_model`, `token_budget`, `compaction_threshold`, `vault_backend`, `active_channel`, `self_learning_enabled`, `cache_enabled`, `autosave_enabled`); Resources panel redesigned with LLM/Session/Infra grouped sections and overflow collapse at height < 30; status bar shows active model name replacing the low-value Panel toggle indicator - -### Fixed - -- fix(tools): `TrustGateExecutor` now delegates `is_tool_retryable()` to its inner executor; previously the missing delegation caused Phase 2 transient retry to never fire for HTTP-based tools (e.g. `fetch` returning 503) because the default `false` short-circuited the retry guard (#2223) -- fix(tools): `ToolErrorFeedback.retryable` is now set from `category.is_retryable()` instead of being hardcoded `false`; transient categories (ServerError, NetworkError, RateLimited, Timeout) now correctly report `retryable: true` in LLM feedback; suggestion text for transient categories changed from "The system will retry automatically." to "The system will retry if possible." to avoid contradicting a `retryable: false` state at exhaustion (#2222) -- fix(memory): deferred tool pair summarization no longer leaves orphaned `tool_use` messages in the DB; `apply_deferred_summaries()` now accumulates `(db_id, summary_text)` pairs into `deferred_db_hide_ids`/`deferred_db_summaries` fields on `Agent`, flushed atomically to SQLite via `apply_tool_pair_summaries()` (sets `agent_visible=0` on hidden pairs and inserts summary messages in one transaction); `db_id: Option` added to `MessageMetadata` (`#[serde(skip)]`) and populated from DB on `load_history_filtered()`; eliminates unbounded growth of `WARN stripping orphaned mid-history tool_use parts` on every subsequent session restore (#2243) - -- fix(memory): reject self-loop edges in graph extractor — `extract_and_store` skips edges where source and target resolve to the same entity ID; `insert_edge_typed` returns `MemoryError::InvalidInput` for same-ID pairs; migration `044` removes existing self-loops and adds a BEFORE INSERT trigger to enforce the constraint at the DB level (#2215) - -- fix(security): agent no longer calls `fetch`/`web_scrape` with hallucinated URLs; three-layer defense: (1) tool descriptions now explicitly prohibit constructing or inferring URLs from entity names; (2) system prompt `## Guidelines` adds a fetch/URL grounding rule; (3) new `UrlGroundingVerifier` pre-execution gate blocks `fetch`, `web_scrape`, and `*_fetch` tool calls when the requested URL was not present in any user message in the session — returns "fetch rejected: URL was not provided by the user"; `user_provided_urls` extracted via `extract_flagged_urls` on every user turn, cleared on `/clear`; configurable via `[security.pre_execution_verify.url_grounding]` (#2191) - -- fix(core): permanent tool errors (e.g. HTTP 403) no longer cause OpenAI HTTP 400 "tool_calls must be followed by tool messages"; `attempt_self_reflection` is now deferred until after all `ToolResult` parts are assembled and pushed to message history, preserving the `Assistant{ToolUse} → User{ToolResults}` ordering required by the OpenAI and Claude APIs; `record_anomaly_outcome` errors are silently ignored so channel failures cannot abandon mid-batch ToolResult assembly; adds regression tests R-NTP-13 and R-NTP-14 for single and parallel permanent errors (#2197) -- fix(llm): `TriageRouter` now delegates `embed()` to the first embedding-capable tier provider instead of always returning `EmbeddingNotSupported`; `supports_embeddings()` reflects tier provider capability — resolves tool schema filter being silently disabled when `routing = "triage"` (#2174) -- fix(core): `/provider` switch and `/provider status` now display the configured `name` field from `[[llm.providers]]` instead of the provider type string (e.g. `"openai"`); `active_provider_name` stored in `RuntimeConfig` and updated on every switch (#2173) -- fix(llm): add missing `use crate::provider::MessageMetadata` import inside `#[cfg(test)]` in `candle_provider/template.rs`; `--features candle` alone now compiles and runs unit tests (`cargo nextest run -p zeph-llm --features candle --lib`) (#2189) -- fix(mcp): narrow `new_directive` injection pattern to require colon suffix, preventing false positive match on legitimate phrases like "new persona" in Todoist MCP tool descriptions; add regression test (#2170) -- fix(memory): run `PRAGMA wal_checkpoint(PASSIVE)` after FTS5 entity inserts to fix cross-session SYNAPSE seed lookup (#2166); checkpoint is called at `SqliteStore` startup (safety net) and after every `EntityResolver::resolve_batch` (targeted hook) -- fix(config): add `[security.guardrail]` stub to `default.toml` so `--migrate-config` injects commented guardrail defaults for configs that have `[security]` but no `[security.guardrail]` (#2158) -- ci: increase publish-crates timeout from 20 to 60 minutes and add `no-verify: true` to skip recompilation during publish (workspace has 21 crates; sequential publish with 15 s delays exceeded the previous limit) +- feat(memory): structured compaction probe categories — four functional probe dimensions (Recall, Artifact, Continuation, Decision) with per-category scoring, configurable `category_weights`, dedicated `probe_provider` config field resolving from `[[llm.providers]]`; TUI memory panel shows per-category breakdown (`Rec/Art/Con/Dec`) with threshold-based color coding; `last_probe_category_scores` added to `MetricsSnapshot`; debug dump includes category breakdown (#2164) + +### Breaking Changes + +- `TopologyClassifier::suggest_max_parallel()` removed; use `TopologyClassifier::analyze()` which returns `TopologyAnalysis` with `max_parallel`, `strategy`, `depth`, and `depths` fields. `DagScheduler::topology()` now returns `&TopologyAnalysis` instead of `Option`; use `.topology` field to access the `Topology` variant. +- `[[mcp.servers]]` entries added without `trust_level` now default to `untrusted`, which enforces SSRF validation. Previous behavior was equivalent to `trusted` (SSRF skipped). Run `zeph --migrate-config --in-place` to set `trust_level = "trusted"` on all existing servers automatically. +- `[orchestration] planner_model` renamed to `planner_provider`; now references a `[[llm.providers]]` name instead of a raw model string; empty = use primary provider. Run `zeph --migrate-config --in-place` to auto-migrate (old value is commented out; update it to a provider name). +- `[llm.stt].model` and `[llm.stt].base_url` fields removed from `SttConfig`; move them to `stt_model` / `base_url` on the corresponding `[[llm.providers]]` entry. Run `zeph --migrate-config --in-place` to auto-convert. +- `ZEPH_STT_MODEL` and `ZEPH_STT_BASE_URL` environment variables no longer applied; use provider-level config instead. +- feat(tui): Phase 2 dynamic metrics — add `stt_model`, `compaction_model`, `provider_temperature`, `provider_top_p`, `active_channel`, `embedding_model`, `token_budget`, `self_learning_enabled`, `semantic_cache_enabled` to `MetricsSnapshot`; status bar shows active model name and `ch:` segment; resources panel shows embedding model, token budget, and learning flag; `/provider` switch updates `provider_temperature`/`provider_top_p` in real time for Candle providers (#2160) +- feat(tui): Phase 1 dynamic metrics in TUI — 8 new fields in `MetricsSnapshot` (`embedding_model`, `token_budget`, `compaction_threshold`, `vault_backend`, `active_channel`, `self_learning_enabled`, `cache_enabled`, `autosave_enabled`); Resources panel redesigned with LLM/Session/Infra grouped sections and overflow collapse at height < 30; status bar shows active model name replacing the low-value Panel toggle indicator ## [0.17.0] - 2026-03-23 @@ -4600,6 +4555,19 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- fix(config): vault token resolution no longer auto-creates channel configs — `ZEPH_TELEGRAM_TOKEN`, `ZEPH_DISCORD_TOKEN`, and `ZEPH_SLACK_BOT_TOKEN` are now only injected if the corresponding `[telegram]`/`[discord]`/`[slack]` section already exists in the config file; previously `get_or_insert` would create a bare config with empty `allowed_users`, causing immediate startup failure (#2132) +- fix(tools): normalize `..` components in `resolve_via_ancestors` to prevent sandbox bypass (#2125) +- fix(tui): tool output content no longer duplicated within a single entry (#2126) — `handle_tool_output_event` now truncates streaming chunks accumulated during ToolStart/ToolOutputChunk and replaces them with the canonical `body_display` from ToolOutput; streaming chunks served as a live preview and are discarded when the final output arrives, eliminating the double `$ cmd\noutput` appearance +- fix(policy): `PolicyGateExecutor::set_effective_trust` now updates `PolicyContext.trust_level` so trust_level-based policy rules are evaluated against the actual invoking skill trust tier instead of the hardcoded `Trusted` default (#2112) + + +- fix(sidequest): `rebuild_cursors` and `apply_eviction` now scan both `MessagePart::ToolOutput` and `MessagePart::ToolResult` — SideQuest eviction was silently a no-op for all native tool-use providers (OpenAI, Claude) because they store results as `ToolResult`, not `ToolOutput`; eviction now works correctly regardless of provider; `ToolResult` uses content sentinel `"[evicted by sidequest]"` for idempotency since the variant has no `compacted_at` field (#2114) +- fix(tools): `FileExecutor` now expands `~` in `allowed_paths` before canonicalization, preventing silent sandbox violations when config contains tilde-prefixed paths (#2115) +- fix(tui): remove duplicate ToolStart/ToolOutput events from `forward_tool_events_to_tui` bridge (#2116) — `ToolEvent::Started` and `ToolEvent::Completed` are now skipped in the bridge; `TuiChannel::send_tool_start` / `send_tool_output` (called via the `Channel` trait) are the sole source of these events +- fix(memory): AOI tier promotion FOREIGN KEY constraint violation (#2102) — `run_promotion_sweep()` used `ConversationId(0)` as a sentinel for promoted semantic facts, but `conversations` uses `AUTOINCREMENT` starting at 1 so id=0 never exists; replaced with the real `conversation_id` from the highest-ranked candidate in the cluster; `PromotionCandidate` now carries `conversation_id` propagated from `find_promotion_candidates()` SELECT; added FK regression guard test and `find_promotion_candidates` conversation_id assertion test +- fix(skills): convert unsupported `>-` YAML block scalar modifier to `>` in all 19 skill files in `.zeph/skills/` — resolves silent load failures for all rewritten skills; 9 new skills (archive, cron, database, json-yaml, network, process-management, qdrant, regex, ssh-remote, text-processing) were completely unavailable (#2087) + ### Added - test: add unit tests for /provider command handlers (#2152) @@ -4616,17 +4584,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - feat(config): remove legacy LLM config structs (#2139) — `CloudLlmConfig`, `OpenAiConfig`, `GeminiConfig`, `OllamaConfig` (top-level), `OrchestratorConfig`, `OrchestratorProviderConfig`, `CompatibleConfig` structs removed from `zeph-config`; `ProviderKind::Orchestrator` and `ProviderKind::Router` variants removed; legacy `LlmConfig` fields (`provider`, `base_url`, `model`, `cloud`, `openai`, `gemini`, `ollama`, `compatible`, `orchestrator`, `vision_model`) removed; all bootstrap paths now use `create_provider_from_pool()` exclusively; empty pool falls back to default Ollama on localhost; `--init` wizard Orchestrator option now generates a two-entry `[[llm.providers]]` pool instead of the removed `[llm.orchestrator]` section; `check_legacy_format()` simplified to always return `Ok(())` -### Removed - feat(config): remove legacy LLM config structs (#2139) — `CloudLlmConfig`, `OpenAiConfig`, `GeminiConfig`, `OllamaConfig` (top-level), `OrchestratorConfig`, `OrchestratorProviderConfig`, `CompatibleConfig` structs removed from `zeph-config`; `ProviderKind::Orchestrator` and `ProviderKind::Router` variants removed; legacy `LlmConfig` fields (`provider`, `base_url`, `model`, `cloud`, `openai`, `gemini`, `ollama`, `compatible`, `orchestrator`, `vision_model`) removed; all bootstrap paths now use `create_provider_from_pool()` exclusively; empty pool falls back to default Ollama on localhost; `--init` wizard Orchestrator option now generates a two-entry `[[llm.providers]]` pool instead of the removed `[llm.orchestrator]` section; `check_legacy_format()` simplified to always return `Ok(())` -### Fixed - -- fix(config): vault token resolution no longer auto-creates channel configs — `ZEPH_TELEGRAM_TOKEN`, `ZEPH_DISCORD_TOKEN`, and `ZEPH_SLACK_BOT_TOKEN` are now only injected if the corresponding `[telegram]`/`[discord]`/`[slack]` section already exists in the config file; previously `get_or_insert` would create a bare config with empty `allowed_users`, causing immediate startup failure (#2132) -- fix(tools): normalize `..` components in `resolve_via_ancestors` to prevent sandbox bypass (#2125) -- fix(tui): tool output content no longer duplicated within a single entry (#2126) — `handle_tool_output_event` now truncates streaming chunks accumulated during ToolStart/ToolOutputChunk and replaces them with the canonical `body_display` from ToolOutput; streaming chunks served as a live preview and are discarded when the final output arrives, eliminating the double `$ cmd\noutput` appearance -- fix(policy): `PolicyGateExecutor::set_effective_trust` now updates `PolicyContext.trust_level` so trust_level-based policy rules are evaluated against the actual invoking skill trust tier instead of the hardcoded `Trusted` default (#2112) - ### Testing - test(channels): add injectable test transport to `TelegramChannel` (#2121) — `new_test()` constructor under `#[cfg(test)]` exposes an `mpsc::Sender` so all channel behavioral paths can be tested without a real bot token or live Telegram API; 12 new tests cover `recv()` message delivery, `/reset` and `/skills` command routing, unknown-command passthrough, channel-close returning `None`, text accumulation in `send_chunk()`, `flush_chunks()` state clearing, the `/start` welcome path via wiremock, `flush_chunks()` with `message_id` via wiremock, and `confirm()` timeout/close/yes/no logic at the rx-timeout level; adds `wiremock` and tokio `test-util` to dev-dependencies @@ -4650,20 +4610,25 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - refactor(zeph-llm): remove redundant `schema` feature gate — `schemars` is now a mandatory dependency of `zeph-llm`; all `#[cfg(feature = "schema")]` / `#[cfg_attr(feature = "schema", ...)]` annotations removed; `chat_typed`, `chat_typed_erased`, structured output types, and the `extractor` module are always compiled (#2100) - Promote `scheduler` and `guardrail` features to the default feature set; users with `default-features = false` are unaffected -### Fixed - -- fix(sidequest): `rebuild_cursors` and `apply_eviction` now scan both `MessagePart::ToolOutput` and `MessagePart::ToolResult` — SideQuest eviction was silently a no-op for all native tool-use providers (OpenAI, Claude) because they store results as `ToolResult`, not `ToolOutput`; eviction now works correctly regardless of provider; `ToolResult` uses content sentinel `"[evicted by sidequest]"` for idempotency since the variant has no `compacted_at` field (#2114) -- fix(tools): `FileExecutor` now expands `~` in `allowed_paths` before canonicalization, preventing silent sandbox violations when config contains tilde-prefixed paths (#2115) -- fix(tui): remove duplicate ToolStart/ToolOutput events from `forward_tool_events_to_tui` bridge (#2116) — `ToolEvent::Started` and `ToolEvent::Completed` are now skipped in the bridge; `TuiChannel::send_tool_start` / `send_tool_output` (called via the `Channel` trait) are the sole source of these events -- fix(memory): AOI tier promotion FOREIGN KEY constraint violation (#2102) — `run_promotion_sweep()` used `ConversationId(0)` as a sentinel for promoted semantic facts, but `conversations` uses `AUTOINCREMENT` starting at 1 so id=0 never exists; replaced with the real `conversation_id` from the highest-ranked candidate in the cluster; `PromotionCandidate` now carries `conversation_id` propagated from `find_promotion_candidates()` SELECT; added FK regression guard test and `find_promotion_candidates` conversation_id assertion test -- fix(skills): convert unsupported `>-` YAML block scalar modifier to `>` in all 19 skill files in `.zeph/skills/` — resolves silent load failures for all rewritten skills; 9 new skills (archive, cron, database, json-yaml, network, process-management, qdrant, regex, ssh-remote, text-processing) were completely unavailable (#2087) - ## [0.16.1] - 2026-03-21 ### Fixed - fix(memory): align MAGMA entity extraction prompt taxonomy with EntityType enum — replace `technology` with `tool` and `language` as separate types with clarifying descriptions; prevents "unknown entity type, falling back to Concept" resolver warnings for programming languages and frameworks (#2079) + +- fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + + +- fix(scheduler): make task injection format explicit about execution intent (#2073) — replace ambiguous `[Scheduled task] ` prefix with `Execute the following scheduled task now: ` to prevent LLM from cancelling bash tasks instead of executing them + + +- fix(sanitizer): classify `memory_search` tool output as `MemoryRetrieval` with `ConversationHistory` hint — prevents false positive injection flags on benign recalled content (e.g. user discussing "system prompt"), which previously caused Qdrant embedding to be skipped for the entire turn (#2057) +- fix(core): distinguish probe rejection from success in `/compact` handler (#2058) — when `memory.compression.probe` is enabled and compaction probe HardFails, the `/compact` command now correctly shows "Compaction rejected: summary quality below threshold. Original context preserved." instead of the misleading "Context compacted successfully." message; updated handler to explicitly match `CompactionOutcome::ProbeRejected` separately from `Compacted | NoChange` +- test(memory): add corrupted BLOB deserialization tests in semantic cache (#2033) — verify graceful degradation when bytemuck::try_cast_slice fails; 4 tests cover odd-length BLOBs (SizeMismatch), even-but-not-aligned BLOBs (REC-1), empty BLOBs, and mixed corrupt+valid rows; no panics, proper error handling, IEEE 754 NaN safety documented +- fix(ml): rubato 1.0.1 API upgrade and StreamChunk wrapping for candle provider (#1858) — updated `candle_whisper.rs` resample function for rubato 1.0.1 (SincFixedIn removed, replaced with Async::new_sinc); wrapped ChatStream output in StreamChunk::Content pattern in candle_provider; added audioadapter-buffers dependency (gated on candle feature) +- fix(tests): gate `subgoal_extraction_tests` module on `context-compression` feature (#2067) — module at line 3520 in `crates/zeph-core/src/agent/context/summarization.rs` called `parse_subgoal_extraction_response()` which is gated with `#[cfg(feature = "context-compression")]`, but the test module itself was missing the feature gate, causing E0425 (cannot find function) when running `cargo nextest run --workspace --lib --bins` without `--features full`; added `#[cfg(feature = "context-compression")]` attribute to the test module definition + ### Documentation - Add compaction probe documentation to context engineering guide (#2050) @@ -4672,9 +4637,25 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - docs: document `semantic_cache_max_candidates` and `semantic_cache_threshold` recall-vs-performance tradeoff with detailed doc comments and tuning guidance; add DEBUG-level diagnostic logs for semantic cache lookup lifecycle (candidate count, per-candidate scores, hit/miss verdicts) (#2031) -### Fixed -- fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- refactor(config): remove dead `PruningStrategy::TaskAwareMig` variant (#1851) — the variant was routed to the same scored path as `TaskAware` (MIG scoring was never applied); serde `Deserialize` is now hand-implemented to route through `FromStr` so that `task_aware_mig` in TOML configs falls back to `Reactive` with a warning instead of hard-erroring; CLI `--pruning-strategy` help and `--init` wizard updated to remove the option; `config/default.toml` comment updated +- fix(core): enforce `prune_protect_tokens` in scored (`TaskAware`) and MIG pruning paths (#1851) — messages inside the protected tail are now skipped during scored/MIG eviction, consistent with the existing oldest-first behavior; new helper `prune_protection_boundary()` shared across all three paths +- fix(core): cap LLM-returned task goal to 500 characters in `maybe_refresh_task_goal()` (#1851) — oversized goals are truncated with a `WARN` log to prevent unbounded memory growth + +- refactor(config): add `Config::validate()` check for `llm.semantic_cache_threshold`; rejects values outside [0.0, 1.0] and non-finite values (NaN, Inf) with a descriptive error including the env var override hint (#2036) + +- fix(channels): `AnyChannel` and `AppChannel` now forward all 16 `Channel` trait methods; previously `send_thinking_chunk`, `send_stop_hint`, `send_usage`, and `send_tool_start` fell through to trait defaults, silently dropping events (CHAN-01, epic #1978) +- fix(channels): Discord and Slack `confirm()` now deny after 30s timeout, matching the existing Telegram behavior; previously they blocked indefinitely waiting for user input (CHAN-02, epic #1978) + +- refactor(core): add state-group accessor methods to `Agent` for all sub-structs (`msg`, `memory_state`, `skill_state`, `runtime`, etc.); migration from direct field access is incremental per file (ABS-04, epic #1977) +- fix(llm): `convert_messages_structured()` now preserves `Recall`, `CodeContext`, `Summary`, and `CrossSession` variants in OpenAI tool-use messages instead of silently dropping them (ABS-05, epic #1977) +- refactor(core): `with_context_budget()` emits `tracing::warn` when `budget_tokens == 0`; `Agent::new()` has `debug_assert` for `max_active_skills > 0` (ABS-07, epic #1977) + +- refactor(llm): extract `UsageTracker` struct to consolidate duplicate token usage tracking across Claude, OpenAI, Ollama, and Gemini providers (DRY-01+06, epic #1975) +- refactor(memory): remove duplicate `BoxFuture` type alias from `in_memory_store.rs`; import canonical definition from `vector_store.rs` (DRY-05, epic #1975) +- refactor(channels): add `ChannelError::other()` helper; replace 15 `.map_err(|e| ChannelError::Other(e.to_string()))` sites in telegram, discord, slack, and cli channels (DRY-04, epic #1975) +- refactor: remove dead code: `FOCUS_REMINDER_PREFIX` constant, `FocusState::should_remind()`, `ToolRateLimiter::is_tripped()`, `CorrectionKind::Abandonment` variant, `SidequestState::parse_eviction_response()` (epic #1976) +- ci: expand feature matrix to test intermediate feature combinations: `orchestration`, `orchestration,graph-memory`, `daemon,acp`, `tui,scheduler` (epic #1976) ### Added @@ -4709,39 +4690,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - refactor(memory): wrap `ResponseCache::cleanup()` DELETE and UPDATE operations in a single SQLite transaction for atomicity (closes #2032) -### Fixed - -- fix(scheduler): make task injection format explicit about execution intent (#2073) — replace ambiguous `[Scheduled task] ` prefix with `Execute the following scheduled task now: ` to prevent LLM from cancelling bash tasks instead of executing them - -### Changed - -- refactor(config): remove dead `PruningStrategy::TaskAwareMig` variant (#1851) — the variant was routed to the same scored path as `TaskAware` (MIG scoring was never applied); serde `Deserialize` is now hand-implemented to route through `FromStr` so that `task_aware_mig` in TOML configs falls back to `Reactive` with a warning instead of hard-erroring; CLI `--pruning-strategy` help and `--init` wizard updated to remove the option; `config/default.toml` comment updated -- fix(core): enforce `prune_protect_tokens` in scored (`TaskAware`) and MIG pruning paths (#1851) — messages inside the protected tail are now skipped during scored/MIG eviction, consistent with the existing oldest-first behavior; new helper `prune_protection_boundary()` shared across all three paths -- fix(core): cap LLM-returned task goal to 500 characters in `maybe_refresh_task_goal()` (#1851) — oversized goals are truncated with a `WARN` log to prevent unbounded memory growth - -- refactor(config): add `Config::validate()` check for `llm.semantic_cache_threshold`; rejects values outside [0.0, 1.0] and non-finite values (NaN, Inf) with a descriptive error including the env var override hint (#2036) - -- fix(channels): `AnyChannel` and `AppChannel` now forward all 16 `Channel` trait methods; previously `send_thinking_chunk`, `send_stop_hint`, `send_usage`, and `send_tool_start` fell through to trait defaults, silently dropping events (CHAN-01, epic #1978) -- fix(channels): Discord and Slack `confirm()` now deny after 30s timeout, matching the existing Telegram behavior; previously they blocked indefinitely waiting for user input (CHAN-02, epic #1978) - -- refactor(core): add state-group accessor methods to `Agent` for all sub-structs (`msg`, `memory_state`, `skill_state`, `runtime`, etc.); migration from direct field access is incremental per file (ABS-04, epic #1977) -- fix(llm): `convert_messages_structured()` now preserves `Recall`, `CodeContext`, `Summary`, and `CrossSession` variants in OpenAI tool-use messages instead of silently dropping them (ABS-05, epic #1977) -- refactor(core): `with_context_budget()` emits `tracing::warn` when `budget_tokens == 0`; `Agent::new()` has `debug_assert` for `max_active_skills > 0` (ABS-07, epic #1977) - -- refactor(llm): extract `UsageTracker` struct to consolidate duplicate token usage tracking across Claude, OpenAI, Ollama, and Gemini providers (DRY-01+06, epic #1975) -- refactor(memory): remove duplicate `BoxFuture` type alias from `in_memory_store.rs`; import canonical definition from `vector_store.rs` (DRY-05, epic #1975) -- refactor(channels): add `ChannelError::other()` helper; replace 15 `.map_err(|e| ChannelError::Other(e.to_string()))` sites in telegram, discord, slack, and cli channels (DRY-04, epic #1975) -- refactor: remove dead code: `FOCUS_REMINDER_PREFIX` constant, `FocusState::should_remind()`, `ToolRateLimiter::is_tripped()`, `CorrectionKind::Abandonment` variant, `SidequestState::parse_eviction_response()` (epic #1976) -- ci: expand feature matrix to test intermediate feature combinations: `orchestration`, `orchestration,graph-memory`, `daemon,acp`, `tui,scheduler` (epic #1976) - -### Fixed - -- fix(sanitizer): classify `memory_search` tool output as `MemoryRetrieval` with `ConversationHistory` hint — prevents false positive injection flags on benign recalled content (e.g. user discussing "system prompt"), which previously caused Qdrant embedding to be skipped for the entire turn (#2057) -- fix(core): distinguish probe rejection from success in `/compact` handler (#2058) — when `memory.compression.probe` is enabled and compaction probe HardFails, the `/compact` command now correctly shows "Compaction rejected: summary quality below threshold. Original context preserved." instead of the misleading "Context compacted successfully." message; updated handler to explicitly match `CompactionOutcome::ProbeRejected` separately from `Compacted | NoChange` -- test(memory): add corrupted BLOB deserialization tests in semantic cache (#2033) — verify graceful degradation when bytemuck::try_cast_slice fails; 4 tests cover odd-length BLOBs (SizeMismatch), even-but-not-aligned BLOBs (REC-1), empty BLOBs, and mixed corrupt+valid rows; no panics, proper error handling, IEEE 754 NaN safety documented -- fix(ml): rubato 1.0.1 API upgrade and StreamChunk wrapping for candle provider (#1858) — updated `candle_whisper.rs` resample function for rubato 1.0.1 (SincFixedIn removed, replaced with Async::new_sinc); wrapped ChatStream output in StreamChunk::Content pattern in candle_provider; added audioadapter-buffers dependency (gated on candle feature) -- fix(tests): gate `subgoal_extraction_tests` module on `context-compression` feature (#2067) — module at line 3520 in `crates/zeph-core/src/agent/context/summarization.rs` called `parse_subgoal_extraction_response()` which is gated with `#[cfg(feature = "context-compression")]`, but the test module itself was missing the feature gate, causing E0425 (cannot find function) when running `cargo nextest run --workspace --lib --bins` without `--features full`; added `#[cfg(feature = "context-compression")]` attribute to the test module definition - ### Performance - perf(memory): add `expires_at` to `idx_response_cache_semantic` composite index (migration 038) — `get_semantic()` now filters expired rows within the index scan instead of post-filtering on the heap (#2030) @@ -4751,6 +4699,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- fix(llm): OpenAI API 400 Bad Request on skill documentation queries (closes #1952) + - Root cause: `StructuredApiMessage.content` was `String` instead of `Option`. When LLM called tools without preceding text, empty string `""` was serialized alongside `tool_calls`, but OpenAI API requires `null` (or absent) for messages with `tool_calls` + - Changed `content: String` → `content: Option` with `#[serde(skip_serializing_if = "Option::is_none")]` + - Updated `convert_messages_structured` to emit `None` when text content is empty + - Fixed tool `arguments` JSON fallback: `unwrap_or_default()` → `unwrap_or_else(|_| "{}".to_owned())` + - Added regression test: `convert_messages_structured_assistant_tool_only_content_is_none` + - Error was intermittent because it only manifested when prior assistant turns had tool_calls without text and survived compression cycles +- fix(memory): `QdrantOps::ensure_collection` and `ensure_collection_with_quantization` now detect + vector dimension mismatches on existing collections and automatically recreate them instead of + silently returning `Ok(())` with stale dimensions (closes #1951) + - Affects all Qdrant-backed collections: `zeph_conversations`, `zeph_session_summaries`, + `zeph_key_facts`, `zeph_corrections`, `zeph_graph_entities`, and code-index collections + - Logs a `WARN`-level message with collection name, existing and required dimensions before + recreating; data loss is expected and intentional when the embedding model changes + - Added four `#[ignore]` integration tests covering idempotency (same size) and recreation + (mismatched size) for both `ensure_collection` and `ensure_collection_with_quantization` + ### Added - refactor(orchestration): extract task orchestration into new `zeph-orchestration` crate (Epic #1973 Phase 1g, #1979) @@ -4842,25 +4808,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Covers `InstructionState`, `ExperimentState`, `MessageState`, `SessionState`, `RuntimeConfig`, `FeedbackState`, and `CompressionState` - Feature-gated tests: `experiments` and `context-compression` paths verified independently -### Fixed - -- fix(llm): OpenAI API 400 Bad Request on skill documentation queries (closes #1952) - - Root cause: `StructuredApiMessage.content` was `String` instead of `Option`. When LLM called tools without preceding text, empty string `""` was serialized alongside `tool_calls`, but OpenAI API requires `null` (or absent) for messages with `tool_calls` - - Changed `content: String` → `content: Option` with `#[serde(skip_serializing_if = "Option::is_none")]` - - Updated `convert_messages_structured` to emit `None` when text content is empty - - Fixed tool `arguments` JSON fallback: `unwrap_or_default()` → `unwrap_or_else(|_| "{}".to_owned())` - - Added regression test: `convert_messages_structured_assistant_tool_only_content_is_none` - - Error was intermittent because it only manifested when prior assistant turns had tool_calls without text and survived compression cycles -- fix(memory): `QdrantOps::ensure_collection` and `ensure_collection_with_quantization` now detect - vector dimension mismatches on existing collections and automatically recreate them instead of - silently returning `Ok(())` with stale dimensions (closes #1951) - - Affects all Qdrant-backed collections: `zeph_conversations`, `zeph_session_summaries`, - `zeph_key_facts`, `zeph_corrections`, `zeph_graph_entities`, and code-index collections - - Logs a `WARN`-level message with collection name, existing and required dimensions before - recreating; data loss is expected and intentional when the embedding model changes - - Added four `#[ignore]` integration tests covering idempotency (same size) and recreation - (mismatched size) for both `ensure_collection` and `ensure_collection_with_quantization` - ## [0.15.3] - 2026-03-17 ### Fixed @@ -4871,10 +4818,28 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `record_filter_metrics` extracted to `agent/utils.rs` as shared helper; called from all four metric-recording sites (3 native + 1 legacy) - Added two regression tests: normal native path and self-reflection remaining-tools path -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- fix(tui): graph metrics panel now shows correct entity/edge/community counts (closes #1938) + - `App::with_metrics_rx()` now eagerly reads the initial `MetricsSnapshot` value so counts are visible immediately on TUI startup, not skipped because `has_changed() = false` + - `spawn_graph_extraction()` in `zeph-memory` now returns `JoinHandle<()>`; a follow-up spawn in `persistence.rs` awaits the handle and re-reads graph counts from the DB after extraction completes, replacing the stale-zero read that happened synchronously before the fire-and-forget task finished +- fix(tui): implement `send_tool_start` in `TuiChannel` — native tool calls now emit a `ToolStart` event so the TUI shows a spinner and `$ command` header before tool output arrives (closes #1931); `handle_tool_output_event` now appends output content when finalizing a streaming tool message +- fix(tui): graph memory metrics (entities/edges/communities) now update every turn instead of only when graph extraction fires — `sync_graph_counts()` is now called per-turn in `process_user_message_inner` in addition to at startup (closes #1932) +- fix(context-compression): `extract_task_goal` is now fire-and-forget — spawns a background tokio task and returns immediately; result is applied at the start of the next Soft compaction (#1909). Eliminates the 5-second blocking LLM call on every compaction that made `task_aware`/`mig`/`task_aware_mig` strategies non-functional for cloud LLM providers. Timeout raised from 5s to 30s in the background task. Current compaction uses the cached goal from the previous turn with no latency impact. +- fix(llm): `/model` list no longer returns 404 for standard OpenAI config — `list_models_remote` was constructing `{base_url}/v1/models` when `base_url` already contains `/v1`; corrected to `{base_url}/models` (closes #1903) +- fix(core): corrections now stored even when `LearningConfig::enabled = false` (closes #1910) +- fix(memory): sync session summaries to Qdrant on compact_context happy path (#1911) — `store_session_summary()` was only called in fallback branches; now also called after a successful `replace_conversation()` in both `compact_context` variants +- Wire `[agent.focus]` and `[memory.sidequest]` config to `AgentBuilder` in all bootstrap paths (`runner.rs`, `daemon.rs`, `acp.rs`); previously both configs were parsed but never applied, causing focus and sidequest to always use defaults (`enabled = false`) (closes #1907) +- fix(memory): use deterministic UUID v5 for session summary Qdrant point to prevent duplicates on repeated compaction (#1917) +- fix(tui): clear "saving to graph..." spinner immediately after `spawn_graph_extraction` — spinner was never cleared since the spawn is fire-and-forget; status is now reset to `""` right after scheduling the background task (closes #1924) +- fix(graph-memory): prevent structural noise from polluting `zeph_graph_entities` graph (closes #1912) + - Skip graph extraction entirely for `Role::User` messages containing `ToolResult` parts — tool outputs (TOML, JSON, command output) are structural data, not conversational content (FIX-1) + - Exclude `ToolResult` user messages from the context window passed to the extraction LLM call (FIX-2) + - Add `min_entity_name_bytes = 3` to `MemoryWriteValidationConfig` and enforce it in `validate_graph_extraction`; also added a matching guard in `EntityResolver::resolve()` via `MIN_ENTITY_NAME_BYTES` constant (FIX-3) + - Revise extraction prompt: restrict entity types to `person`, `project`, `technology`, `organization`, `concept`; add explicit rules against extracting structural data (config keys, file paths, tool names, TOML/JSON keys), short tokens, and raw command output (FIX-4) + ### Added - test(memory): add integration tests for `store_session_summary` → Qdrant upsert roundtrip (closes #1916) — four `#[ignore]` tests in `crates/zeph-memory/tests/qdrant_integration.rs` using testcontainers: `store_session_summary_roundtrip`, `store_session_summary_multiple_conversations`, `store_shutdown_summary_full_roundtrip`, `search_session_summaries_returns_empty_when_no_data`; each test guards against silent Qdrant disconnection and verifies both the Qdrant vector path and (where applicable) the SQLite content path @@ -4908,26 +4873,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - obs(orchestration): `tasks_skipped` counter now correctly incremented in both `GraphStatus::Completed` and `GraphStatus::Failed` arms of `finalize_plan_execution` - obs(orchestration): `/status` command shows an `Orchestration:` block (plans, tasks completed/failed/skipped) when `orchestration.enabled = true` and at least one plan has been executed -### Fixed - -- fix(tui): graph metrics panel now shows correct entity/edge/community counts (closes #1938) - - `App::with_metrics_rx()` now eagerly reads the initial `MetricsSnapshot` value so counts are visible immediately on TUI startup, not skipped because `has_changed() = false` - - `spawn_graph_extraction()` in `zeph-memory` now returns `JoinHandle<()>`; a follow-up spawn in `persistence.rs` awaits the handle and re-reads graph counts from the DB after extraction completes, replacing the stale-zero read that happened synchronously before the fire-and-forget task finished -- fix(tui): implement `send_tool_start` in `TuiChannel` — native tool calls now emit a `ToolStart` event so the TUI shows a spinner and `$ command` header before tool output arrives (closes #1931); `handle_tool_output_event` now appends output content when finalizing a streaming tool message -- fix(tui): graph memory metrics (entities/edges/communities) now update every turn instead of only when graph extraction fires — `sync_graph_counts()` is now called per-turn in `process_user_message_inner` in addition to at startup (closes #1932) -- fix(context-compression): `extract_task_goal` is now fire-and-forget — spawns a background tokio task and returns immediately; result is applied at the start of the next Soft compaction (#1909). Eliminates the 5-second blocking LLM call on every compaction that made `task_aware`/`mig`/`task_aware_mig` strategies non-functional for cloud LLM providers. Timeout raised from 5s to 30s in the background task. Current compaction uses the cached goal from the previous turn with no latency impact. -- fix(llm): `/model` list no longer returns 404 for standard OpenAI config — `list_models_remote` was constructing `{base_url}/v1/models` when `base_url` already contains `/v1`; corrected to `{base_url}/models` (closes #1903) -- fix(core): corrections now stored even when `LearningConfig::enabled = false` (closes #1910) -- fix(memory): sync session summaries to Qdrant on compact_context happy path (#1911) — `store_session_summary()` was only called in fallback branches; now also called after a successful `replace_conversation()` in both `compact_context` variants -- Wire `[agent.focus]` and `[memory.sidequest]` config to `AgentBuilder` in all bootstrap paths (`runner.rs`, `daemon.rs`, `acp.rs`); previously both configs were parsed but never applied, causing focus and sidequest to always use defaults (`enabled = false`) (closes #1907) -- fix(memory): use deterministic UUID v5 for session summary Qdrant point to prevent duplicates on repeated compaction (#1917) -- fix(tui): clear "saving to graph..." spinner immediately after `spawn_graph_extraction` — spinner was never cleared since the spawn is fire-and-forget; status is now reset to `""` right after scheduling the background task (closes #1924) -- fix(graph-memory): prevent structural noise from polluting `zeph_graph_entities` graph (closes #1912) - - Skip graph extraction entirely for `Role::User` messages containing `ToolResult` parts — tool outputs (TOML, JSON, command output) are structural data, not conversational content (FIX-1) - - Exclude `ToolResult` user messages from the context window passed to the extraction LLM call (FIX-2) - - Add `min_entity_name_bytes = 3` to `MemoryWriteValidationConfig` and enforce it in `validate_graph_extraction`; also added a matching guard in `EntityResolver::resolve()` via `MIN_ENTITY_NAME_BYTES` constant (FIX-3) - - Revise extraction prompt: restrict entity types to `person`, `project`, `technology`, `organization`, `concept`; add explicit rules against extracting structural data (config keys, file paths, tool names, TOML/JSON keys), short tokens, and raw command output (FIX-4) - ### Security - Suppress CodeQL `rust/cleartext-logging` false positives on intentional debug/trace log sites (closes #1905) @@ -4939,6 +4884,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- fix(policy): `/policy status` now reports the correct total rule count when rules are loaded from an external `policy_file` — previously `handle_policy_command()` used `policy_config.rules.len()` which only counted inline TOML rules; the handler now compiles the enforcer to get the merged count, falling back to the inline count on compile error (closes #1898) + +- fix(orchestration): scheduler deadlock no longer emits misleading "Plan failed. 0/N tasks failed" message — non-terminal tasks are now marked `Canceled` at deadlock time (mirrors `cancel_all()` semantics); the done message now distinguishes pure deadlock ("Plan canceled. N/M tasks did not run."), mixed failure+cancellation ("Plan failed. X/M tasks failed, Y canceled:"), and normal failure paths (closes #1879) +- sec(policy): `load_policy_file()` now canonicalizes the path before reading and rejects policy files whose canonical path escapes the process working directory — mirrors the symlink boundary check already present in `load_instructions()`; adds `PolicyCompileError::FileEscapesRoot` variant (closes #1872) +- fix(security): all MCP tools are now denied for quarantined skills — `TrustGateExecutor` tracks registered MCP tool IDs via `mcp_tool_ids_handle()` and blocks any call whose ID appears in the set; `is_quarantine_denied()` suffix matching provides defence-in-depth for MCP tools matching the `QUARANTINE_DENIED` list (fixes #1876) +- fix(policy): accept "shell"/"sh" as aliases for "bash" tool_id in policy rules — `ShellExecutor` registers as `tool_id="bash"` but users write `tool="shell"` in TOML rules; `resolve_tool_alias()` in `PolicyEnforcer` normalizes both sides (compile-time rule names and runtime tool_id) so `tool="shell"`, `tool="bash"`, and `tool="sh"` all match correctly (closes #1877) +- fix(security): `/policy check` no longer leaks process environment variables into trace output — `PolicyContext.env` is now an empty `HashMap` for the diagnostic command (#1873); added optional `--trust-level ` argument to simulate non-default trust tiers (`trusted`, `verified`, `quarantined`, `blocked`); `TrustLevel` now implements `FromStr` +- fix(policy): remove `PolicyEffect::AllowIf` variant — it was declared but evaluated identically to `Allow`, creating misleading TOML documentation; conditions are expressed via rule fields directly (closes #1871) +- fix(core): overflow notice no longer embeds `overflow:` prefix — notice format changed from `[full output stored as overflow:{uuid} — ...]` to `[full output stored — ID: {uuid} — ...]` so the LLM does not pass `overflow:` to `read_overflow`, which only accepts bare UUIDs; `read_overflow` now also accepts and strips the legacy `overflow:` prefix for backwards compatibility (closes #1868) +- fix(memory): session summary timeout now attempts plain-text fallback instead of silently returning `None` — when the structured LLM call in `call_llm_for_session_summary()` times out, the agent falls back to a plain `chat()` call (same path already used on structured call error); extracted `plain_text_summary_fallback()` helper to avoid code duplication; added `shutdown_summary_timeout_secs` (default: 10) to `[memory]` config to replace the hardcoded 5s limit (closes #1869) +- fix(security): redact JWT Bearer tokens in `redact_sensitive()` — `Authorization: Bearer ` headers and standalone JWT strings (`eyJ...`) are now replaced with `[REDACTED]`/`[REDACTED_JWT]` before `compression_failure_pairs` SQLite insert (closes #1847) +- fix(memory): widen soft compaction window — lower `soft_compaction_threshold` default from `0.70` to `0.60`, widening the soft tier firing range from 20% to 30% of the context budget; prevents large tool outputs (10–30k tokens) from jumping directly past soft into hard compaction; add `maybe_soft_compact_mid_iteration()` called after per-tool summarization in native and legacy tool loops so context pressure is relieved without touching turn counters, cooldown, or triggering LLM calls; config validation that `soft < hard` was already enforced and remains in place (closes #1828) +- fix(security): redact secrets and filesystem paths in compression_failure_pairs before SQLite storage (#1801) +- fix(llm): strip URL path in `parse_host_port` — Ollama `base_url` with `/v1` suffix no longer produces 404 on embed calls (#1832) +- Qdrant collection dimension mismatch when switching embedding models on collections with 0 points (#1815) +- fix(debug): trace.json now written inside per-session subdir, preventing overwrites (#1814) +- A-MEM note linking never created `similar_to` edges because `EntityResolver` in `extract_and_store` was constructed without `with_embedding_store()`, leaving `zeph_graph_entities` unpopulated; pass the Qdrant embedding store through to the resolver so entity embeddings are stored and note linking can find semantically similar entities across sessions (#1817) +- graph-memory: entity embeddings now correctly stored in Qdrant — `EntityResolver` was built without a provider in `extract_and_store()`, causing `store_entity_embedding()` to never be called and `zeph_graph_entities` collection to remain empty (fixes #1829) +- fix(core): JIT tool reference injection now works after overflow migration to SQLite — `OVERFLOW_NOTICE_PREFIX` and `extract_overflow_ref()` updated to match the `overflow:{uuid}` format; pruned tool output notices now read `[tool output pruned; use read_overflow {uuid} to retrieve]` instead of a stale file-path reference (closes #1818) + ### Added - feat(core): context compression subsystem — Focus Agent, SWE-Pruner/COMI, and SideQuest eviction behind `context-compression` feature flag (closes #1850, #1851, #1885) @@ -4975,38 +4941,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - refactor(acp): centralize ACP session config wiring via `AgentSessionConfig::from_config()` and `Agent::apply_session_config()` (#1812) — replaces ~25 individually-copied scalar fields in `SharedAgentDeps` and redundant builder call blocks in `spawn_acp_agent`, `runner.rs`, and `daemon.rs` with a single struct; eliminates hardcoded `0.20` literal (now `CONTEXT_BUDGET_RESERVE_RATIO`); fixes missing `with_orchestration_config` and `with_server_compaction` in daemon sessions -### Fixed - -- fix(policy): `/policy status` now reports the correct total rule count when rules are loaded from an external `policy_file` — previously `handle_policy_command()` used `policy_config.rules.len()` which only counted inline TOML rules; the handler now compiles the enforcer to get the merged count, falling back to the inline count on compile error (closes #1898) - -- fix(orchestration): scheduler deadlock no longer emits misleading "Plan failed. 0/N tasks failed" message — non-terminal tasks are now marked `Canceled` at deadlock time (mirrors `cancel_all()` semantics); the done message now distinguishes pure deadlock ("Plan canceled. N/M tasks did not run."), mixed failure+cancellation ("Plan failed. X/M tasks failed, Y canceled:"), and normal failure paths (closes #1879) -- sec(policy): `load_policy_file()` now canonicalizes the path before reading and rejects policy files whose canonical path escapes the process working directory — mirrors the symlink boundary check already present in `load_instructions()`; adds `PolicyCompileError::FileEscapesRoot` variant (closes #1872) -- fix(security): all MCP tools are now denied for quarantined skills — `TrustGateExecutor` tracks registered MCP tool IDs via `mcp_tool_ids_handle()` and blocks any call whose ID appears in the set; `is_quarantine_denied()` suffix matching provides defence-in-depth for MCP tools matching the `QUARANTINE_DENIED` list (fixes #1876) -- fix(policy): accept "shell"/"sh" as aliases for "bash" tool_id in policy rules — `ShellExecutor` registers as `tool_id="bash"` but users write `tool="shell"` in TOML rules; `resolve_tool_alias()` in `PolicyEnforcer` normalizes both sides (compile-time rule names and runtime tool_id) so `tool="shell"`, `tool="bash"`, and `tool="sh"` all match correctly (closes #1877) -- fix(security): `/policy check` no longer leaks process environment variables into trace output — `PolicyContext.env` is now an empty `HashMap` for the diagnostic command (#1873); added optional `--trust-level ` argument to simulate non-default trust tiers (`trusted`, `verified`, `quarantined`, `blocked`); `TrustLevel` now implements `FromStr` -- fix(policy): remove `PolicyEffect::AllowIf` variant — it was declared but evaluated identically to `Allow`, creating misleading TOML documentation; conditions are expressed via rule fields directly (closes #1871) -- fix(core): overflow notice no longer embeds `overflow:` prefix — notice format changed from `[full output stored as overflow:{uuid} — ...]` to `[full output stored — ID: {uuid} — ...]` so the LLM does not pass `overflow:` to `read_overflow`, which only accepts bare UUIDs; `read_overflow` now also accepts and strips the legacy `overflow:` prefix for backwards compatibility (closes #1868) -- fix(memory): session summary timeout now attempts plain-text fallback instead of silently returning `None` — when the structured LLM call in `call_llm_for_session_summary()` times out, the agent falls back to a plain `chat()` call (same path already used on structured call error); extracted `plain_text_summary_fallback()` helper to avoid code duplication; added `shutdown_summary_timeout_secs` (default: 10) to `[memory]` config to replace the hardcoded 5s limit (closes #1869) -- fix(security): redact JWT Bearer tokens in `redact_sensitive()` — `Authorization: Bearer ` headers and standalone JWT strings (`eyJ...`) are now replaced with `[REDACTED]`/`[REDACTED_JWT]` before `compression_failure_pairs` SQLite insert (closes #1847) -- fix(memory): widen soft compaction window — lower `soft_compaction_threshold` default from `0.70` to `0.60`, widening the soft tier firing range from 20% to 30% of the context budget; prevents large tool outputs (10–30k tokens) from jumping directly past soft into hard compaction; add `maybe_soft_compact_mid_iteration()` called after per-tool summarization in native and legacy tool loops so context pressure is relieved without touching turn counters, cooldown, or triggering LLM calls; config validation that `soft < hard` was already enforced and remains in place (closes #1828) -- fix(security): redact secrets and filesystem paths in compression_failure_pairs before SQLite storage (#1801) -- fix(llm): strip URL path in `parse_host_port` — Ollama `base_url` with `/v1` suffix no longer produces 404 on embed calls (#1832) -- Qdrant collection dimension mismatch when switching embedding models on collections with 0 points (#1815) -- fix(debug): trace.json now written inside per-session subdir, preventing overwrites (#1814) -- A-MEM note linking never created `similar_to` edges because `EntityResolver` in `extract_and_store` was constructed without `with_embedding_store()`, leaving `zeph_graph_entities` unpopulated; pass the Qdrant embedding store through to the resolver so entity embeddings are stored and note linking can find semantically similar entities across sessions (#1817) -- graph-memory: entity embeddings now correctly stored in Qdrant — `EntityResolver` was built without a provider in `extract_and_store()`, causing `store_entity_embedding()` to never be called and `zeph_graph_entities` collection to remain empty (fixes #1829) -- fix(core): JIT tool reference injection now works after overflow migration to SQLite — `OVERFLOW_NOTICE_PREFIX` and `extract_overflow_ref()` updated to match the `overflow:{uuid}` format; pruned tool output notices now read `[tool output pruned; use read_overflow {uuid} to retrieve]` instead of a stale file-path reference (closes #1818) - ## [0.15.1] - 2026-03-15 ### Fixed - fix(memory): `save_compression_guidelines` now uses a single atomic `INSERT ... SELECT COALESCE(MAX(version), 0) + 1` statement instead of a read-then-write pattern, eliminating the TOCTOU race where two concurrent callers could insert duplicate version numbers; migration 033 adds a `UNIQUE(version)` constraint to the `compression_guidelines` table with row-level deduplication for pre-existing corrupt data (closes #1799) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- fix(memory): add `edge_history_limit` config field to `[memory.graph]` (default 100); `GraphStore::edge_history()` already accepted a `limit` parameter but callers had no config-driven default — future TUI/API call sites must read `config.memory.graph.edge_history_limit` instead of hardcoding a value (closes #1778) +- fix(llm): `cascade_chat` and `cascade_chat_stream` no longer store an empty-string provider response as `best_seen`; a provider returning `""` is now skipped for best-seen tracking so the caller receives an explicit error instead of a silent empty response on all-fail fallback (#1754) +- fix(tui): skip ACP stdio/both autostart when `--tui` is active; stdio and TUI are mutually exclusive (both own stdin/stdout); HTTP transport is still allowed alongside TUI when `acp-http` feature is enabled (#1729) +- fix(mcp): suppress MCP child process stderr in TUI mode to prevent ratatui display corruption; `McpManager` gains `with_suppress_stderr` builder method (#1729) +- fix(llm): `cascade_chat_stream` now tracks best-seen response across early providers (#1722); on token budget exhaustion with a would-escalate response the highest-scoring prior response is returned; when the last provider fails and an early provider succeeded, the best-seen response is returned instead of propagating the error — achieving parity with `cascade_chat` +- fix(llm): `cascade_chat` and `cascade_chat_stream` now return the best-seen response when `escalations_remaining == 0` and the current response would have triggered escalation, matching the existing budget-exhaustion behaviour and closing the parity gap with `best_seen` tracking (#1755) + ### Added - feat(memory,core): ACON failure-driven compression guidelines (#1647) — after a hard compaction, the agent watches subsequent LLM responses for two-signal context-loss indicators (uncertainty phrase + prior-context reference); confirmed failure pairs are stored in SQLite (`compression_failure_pairs`); a background updater wakes periodically, calls the LLM to synthesise updated guidelines from accumulated pairs, sanitizes the output to strip prompt injection, and persists the result; guidelines are injected into every future compaction prompt via a `` block; `CompressionGuidelinesConfig` in `[memory.compression_guidelines]` (disabled by default); addresses all critic findings including two-signal false-positive guard, `enabled` guard ordering, LLM timeout, prompt injection sanitization, field truncation, and cleanup policy @@ -5052,7 +5003,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - feat(llm): `--extended-context` CLI flag enables Claude 1M context window for the session; overrides `llm.cloud.enable_extended_context` from config and emits a cost warning (tokens above 200K use long-context pricing) (#1685) - test(llm): add `build_request` integration test for extended context enabled path, asserting `anthropic-beta` header contains `context-1m-2025-08-07` (#1687) -### Changed - perf(tools): cache leaf string values extracted from each tool call's input JSON in `ToolCallDag`; expose via `string_values_for(idx)` and reuse in `native.rs` tier dispatch to eliminate the redundant `extract_string_values` traversal (closes #1714) - refactor(mcp,core): extract the 17 injection-detection regexes into `zeph_mcp::sanitize::RAW_INJECTION_PATTERNS` (`pub const`); `zeph-core`'s `ContentSanitizer` now compiles its `INJECTION_PATTERNS` from this single shared slice instead of maintaining a duplicate list — any future pattern change is automatically reflected in both sanitization layers. Also fixes two patterns in `zeph-core` that were missing the `(?i)` case-insensitive flag (`xml_tag_injection`, `markdown_image_exfil`) which existed in the `zeph-mcp` copy but had drifted out (closes #1747) @@ -5077,15 +5027,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - refactor: eliminate all `#[allow(clippy::too_many_lines)]` suppressions workspace-wide (#1734); extract helper functions from `loopback_event_to_updates`, `prompt`, `new_session`, `load_session`, `fork_session`, `resume_session`, `set_session_config_option` in `zeph-acp`, and `push_event` in `zeph-tui`; zero behavior change -### Fixed - -- fix(memory): add `edge_history_limit` config field to `[memory.graph]` (default 100); `GraphStore::edge_history()` already accepted a `limit` parameter but callers had no config-driven default — future TUI/API call sites must read `config.memory.graph.edge_history_limit` instead of hardcoding a value (closes #1778) -- fix(llm): `cascade_chat` and `cascade_chat_stream` no longer store an empty-string provider response as `best_seen`; a provider returning `""` is now skipped for best-seen tracking so the caller receives an explicit error instead of a silent empty response on all-fail fallback (#1754) -- fix(tui): skip ACP stdio/both autostart when `--tui` is active; stdio and TUI are mutually exclusive (both own stdin/stdout); HTTP transport is still allowed alongside TUI when `acp-http` feature is enabled (#1729) -- fix(mcp): suppress MCP child process stderr in TUI mode to prevent ratatui display corruption; `McpManager` gains `with_suppress_stderr` builder method (#1729) -- fix(llm): `cascade_chat_stream` now tracks best-seen response across early providers (#1722); on token budget exhaustion with a would-escalate response the highest-scoring prior response is returned; when the last provider fails and an early provider succeeded, the best-seen response is returned instead of propagating the error — achieving parity with `cascade_chat` -- fix(llm): `cascade_chat` and `cascade_chat_stream` now return the best-seen response when `escalations_remaining == 0` and the current response would have triggered escalation, matching the existing budget-exhaustion behaviour and closing the parity gap with `best_seen` tracking (#1755) - ## [0.15.0] - 2026-03-14 ### Changed @@ -5097,6 +5038,26 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). is still accepted via serde alias and maps to `hard_compaction_threshold`. `deferred_apply_threshold` is removed — absorbed into the soft compaction tier. + +- `zeph-core`: parallel tool dispatch now respects intra-turn `tool_use_id` dependencies — independent calls execute concurrently, dependent calls execute in topological tiers (closes #1646). A lightweight `ToolCallDag` (Kahn's algorithm) partitions tool calls into parallel tiers; when no dependencies exist the existing `join_all` fast path is used with zero overhead. Dependent calls whose prerequisite failed or requires confirmation receive a synthetic error. Cycle detection falls back to sequential execution of all calls. +- **Claude 3 model ID retirement** (#1625): replaced retired Claude 3 model IDs (`claude-3-opus`, `claude-3`, `claude:claude-3-5-sonnet`) with `claude-sonnet-4-6` in test files. `ClaudeProvider::new()` now emits a `tracing::warn!` when the configured model starts with `claude-3`, alerting users with stale configs before the first API call fails. + + +- Share a single `QdrantOps` instance (one gRPC channel) across all subsystems at startup: `AppBuilder::new()` constructs `QdrantOps` once when `vector_backend = "qdrant"` and propagates it via clone (O(1) `Arc` bump) to `SemanticMemory`, `QdrantSkillMatcher`, `McpToolRegistry`, and `CodeStore`. Previously 4+ independent gRPC channels were created. Invalid `qdrant_url` when `vector_backend = "qdrant"` is now a hard startup error instead of a silent `None`. URL-based constructors (`QdrantSkillMatcher::new`, `McpToolRegistry::new`, `CodeStore::new`) are replaced by `::with_ops(ops)` variants. (#1337) +- Consolidate `is_private_ip` (SSRF IP check) into `zeph-tools::net::is_private_ip` (canonical superset with CGNAT `100.64.0.0/10`); update `zeph-mcp`, `zeph-acp`, `zeph-tools/scrape` to use it; upgrade A2A's own copy with CGNAT range (DEDUP-01) +- Consolidate `cosine_similarity` into `zeph-memory::math::cosine_similarity` (single-pass loop, length guard); update all callers in `zeph-memory` and `zeph-skills` (DEDUP-02) +- Restore parallel tool execution: `handle_native_tool_calls()` now runs all independent tool calls concurrently via `join_all` bounded by `max_parallel_tools` semaphore (previously serialized by PR #1340). Phase 2 retries only transient failures on executors that explicitly opt in (`WebScrapeExecutor`); `ShellExecutor` is never retried. Self-reflection early-return paths emit actual parallel results instead of synthetic `[skipped]` messages. Fixes PERF-1 (#1403) +- Add `text::truncate_chars(&str, usize) -> &str` to `zeph-core::text`; replace `context/mod.rs::truncate_chars` with a re-export of the canonical version (DEDUP-03) +- Split all four `#[cfg(test)]` blocks from `agent/mod.rs` (~3190 lines) into `agent/tests.rs`; reduce `agent/mod.rs` from 6282 to ~3096 lines (SPLIT-01) +- Split `zeph-acp/agent.rs` into `agent/mod.rs` (2137 lines), `agent/helpers.rs` (547 lines helpers), `agent/tests.rs` (3396 lines tests); reduce main impl file from 6097 to 2137 lines (SPLIT-02) +- Update insta snapshot `config_default_snapshot` to reflect removal of deprecated `[lsp]` config section +- Split `agent/tool_execution.rs` (5426 lines) into `tool_execution/mod.rs`, `tool_execution/legacy.rs`, `tool_execution/native.rs` for improved navigability (ARCH-06) +- Split `agent/context.rs` (5590 lines) into `context/mod.rs`, `context/assembly.rs`, `context/summarization.rs` for improved navigability (ARCH-07) +- Replace 11-parameter `Channel::send_tool_output` signature with `ToolOutputEvent` struct; replace 4-parameter `send_tool_start` with `ToolStartEvent` struct (ARCH-02) +- Extract `SecurityState` struct (sanitizer, quarantine_summarizer, exfiltration_guard, flagged_urls) and `DebugState` struct (debug_dumper, dump_format, anomaly_detector, logging_config) from `Agent` struct; access via `agent.security.*` and `agent.debug_state.*` (ARCH-01) +- Expand `AgentError` with `Shutdown`, `ContextExhausted`, `ToolTimeout`, `SchemaValidation` variants; change `Agent::run` return type from `anyhow::Result<()>` to `Result<(), AgentError>` (ARCH-10) +- Add `AgentTestHarness` builder struct with `new()`, `with_responses()`, `with_registry()`, `with_tool_outputs()`, and `build()` to the test module for cleaner agent unit tests (ARCH-08) + ### Fixed - Context compaction loop when budget too tight: added cooldown guard (`compaction_cooldown_turns`, default 2), counterproductive summary guard (marks exhausted when net freed tokens is zero — summary consumed all freed space), exhaustion guard (marks exhausted when context remains above threshold after compaction — further attempts unlikely to help), and user-visible warning when compaction is exhausted (#1708) @@ -5114,46 +5075,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Fix anomaly detector not recording outcomes for native tool_use providers (Claude, OpenAI, Gemini) (#1677) - OpenAI: tools with no parameters (empty struct schemas) no longer cause `400 Bad Request` in strict mode; `parameters` field is omitted for no-param tools, matching the Gemini provider behavior (fixes #1673) -### Changed - -- `zeph-core`: parallel tool dispatch now respects intra-turn `tool_use_id` dependencies — independent calls execute concurrently, dependent calls execute in topological tiers (closes #1646). A lightweight `ToolCallDag` (Kahn's algorithm) partitions tool calls into parallel tiers; when no dependencies exist the existing `join_all` fast path is used with zero overhead. Dependent calls whose prerequisite failed or requires confirmation receive a synthetic error. Cycle detection falls back to sequential execution of all calls. -- **Claude 3 model ID retirement** (#1625): replaced retired Claude 3 model IDs (`claude-3-opus`, `claude-3`, `claude:claude-3-5-sonnet`) with `claude-sonnet-4-6` in test files. `ClaudeProvider::new()` now emits a `tracing::warn!` when the configured model starts with `claude-3`, alerting users with stale configs before the first API call fails. - -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) -### Added - -- **Integration test for `ConfirmationRequired` dependency propagation in tiered dispatch** (closes #1713): added `confirmation_propagation_tests` module to `zeph-core` agent tests with two tests — `confirmation_required_propagates_to_dependent_tier` verifies that a tier-1 dependent tool receives a synthetic `ToolResult::Error` containing "Skipped: a prerequisite tool failed or requires confirmation" when the tier-0 prerequisite returns `ConfirmationRequired`; `independent_tool_not_affected_by_confirmation_required` verifies that an independent tool in the same dispatch executes normally. -- **Cascade routing strategy** (closes #1339): new `RouterStrategy::Cascade` in `zeph-llm`. When `strategy = "cascade"` is configured, the router tries providers in chain order (cheapest first) and escalates to the next provider only when the response is classified as degenerate (empty, repetitive, incoherent). The heuristic classifier (`ClassifierMode::Heuristic`, default) detects degenerate outputs only — not semantic failures; `ClassifierMode::Judge` (requires `summary_model`) provides LLM-based quality scoring with automatic fallback to heuristic on failure. Key behaviors: network/API errors do not consume the escalation budget; the best-seen response is returned on exhaustion (not `NoProviders`); `max_cascade_tokens` caps cumulative token cost across escalation levels; cascade is intentionally skipped for `chat_with_tools`; Thompson/EMA outcome tracking is not contaminated by quality-based failures. Config: `[llm.router.cascade]` section with `quality_threshold` (default 0.5), `max_escalations` (default 2), `classifier_mode`, `window_size`, `max_cascade_tokens`. - -- **Gemini `thinking_level` / `thinking_budget` support** (closes #1652): `GeminiThinkingLevel` enum (`Minimal/Low/Medium/High`, lowercase serde matching Gemini API) and `GeminiThinkingConfig` struct (`thinkingLevel`, `thinkingBudget`, `includeThoughts`, camelCase per API spec) added to `zeph-llm`. `GeminiProvider` gains builder methods `with_thinking_level()`, `with_thinking_budget()` (fallible — validates -1/0/1..=32768, returns `LlmError` on out-of-range), and `with_include_thoughts()`. `GeminiConfig` in `zeph-core` gains `thinking_level`, `thinking_budget`, and `include_thoughts` optional fields. Thinking config is wired at all three `GeminiProvider` construction sites (primary, orchestrator, router). `--init` wizard adds a `thinking_level` select prompt in the Gemini section. Applies to Gemini 3+ (`thinkingLevel`) and Gemini 2.5 (`thinkingBudget`) models. -- **Async parallel dispatch in `DagScheduler`** (closes #1628): `DagScheduler::tick()` now dispatches all ready tasks in a single tick instead of capping at `max_parallel - running_in_graph`. Concurrency is enforced by `SubAgentManager` which returns `ConcurrencyLimit` when capacity is exceeded; tasks revert to `Ready` and are retried on the next tick. Event buffer guard in `wait_event()` changed from `max_parallel * 2` to `graph.tasks.len() * 2` to prevent dropped completion events during parallel bursts. Added `record_batch_backoff(any_success, any_concurrency_failure)` for batch-aware backoff tracking: the `consecutive_spawn_failures` counter now increments once per all-failed tick rather than once per rejected spawn, preventing incorrect exponential backoff after concurrent rejections from the same batch. - -- **Claude server-side context compaction** (`compact-2026-01-12` beta, closes #1626): `ClaudeProvider` gains `server_compaction: bool` and sends `context_management: { type: "enabled", trigger_tokens: N }` in all request bodies when enabled. The `compact-2026-01-12` beta header is appended alongside any existing beta headers. SSE parser is now stateful (`ClaudeSseState`) and accumulates `compaction`-typed content blocks across events, emitting `StreamChunk::Compaction(summary)`. Non-streaming path stores the compaction summary via `take_compaction_summary()` on the trait. Agent loop (both native and legacy streaming paths) handles compaction by pruning old messages and inserting a synthetic `MessagePart::Compaction` assistant turn for round-trip fidelity. Client-side `maybe_compact` and `maybe_proactive_compress` return early when server compaction is active. New metric `server_compaction_events` tracks compaction occurrences. Configurable via `[llm.cloud] server_compaction = true`, `--server-compaction` CLI flag, and `--init` wizard. -- **COV-03 scheduler-loop integration test** (#1611): adds `scheduler_loop_queues_non_cancel_message` to `agent/tests.rs`, verifying end-to-end that a non-cancel message delivered via `channel.recv()` during `run_scheduler_loop` is passed to `enqueue_or_merge()` and appears in `agent.message_queue` after the loop exits. Complements the `enqueue_or_merge` unit tests in `message_queue.rs`. -- **Claude 1M extended context window** (#1649): adds `enable_extended_context: bool` to `CloudLlmConfig` (default `false`). When enabled, `ClaudeProvider` injects `anthropic-beta: context-1m-2025-08-07` into all API requests, unlocking the 1M token context window for Opus 4.6 and Sonnet 4.6. `context_window()` now returns `1_000_000` instead of `200_000` when the flag is set, so the auto-budget correctly scales to 1M. All four Claude construction sites in `bootstrap/provider.rs` wire the flag (summary provider intentionally skips it — summaries are capped at 4096 tokens). `--init` wizard adds a Confirm prompt after the thinking mode question. An INFO log is emitted at provider construction when extended context is active. - -- **Gemini SSE TODO for Phase 4 (streaming tool use)**: added a TODO comment in `parse_gemini_sse_event()` documenting that `GeminiStreamPart` lacks a `function_call` field and that `functionCall` SSE chunks are silently dropped. `chat_with_tools()` uses the non-streaming endpoint today, so this is safe; the TODO tracks Phase 4 work (extend `GeminiStreamPart` and handle `functionCall` parts in the SSE loop). Closes #1639. -- **Gemini `uppercase_types` test coverage** (#1636): added unit tests for `number`, `boolean`, `array`, and `null` JSON Schema type names in `crates/zeph-llm/src/gemini.rs`. Previously only `string`, `object`, and `integer` were covered; `array` test also verifies recursive `items.type` uppercasing. -- **Gemini schema conversion edge case tests** (#1637): adds 5 unit tests in `zeph-llm` covering previously untested paths: `oneOf` Option<T> pattern, null-first `anyOf` order, unknown `$ref` fallback (→ `OBJECT`/`"unresolved reference"`), nested multi-level `$ref` chain (A→B→C), and parameterless tools declarations guard. Part of #1592. -- **Router debug logging**: `RouterProvider` now emits `tracing::debug!` on every provider selection — Thompson selections include `alpha`, `beta`, and `mode` (exploit/explore); EMA selections include `latency_ema_ms`. Closes #1388. - -- **`/scheduler list` command and `list_tasks` tool**: adds `list_jobs_full()` to `JobStore` returning a new `ScheduledTaskInfo` struct with `name`, `kind`, `task_mode`, `cron_expr`, and `next_run` fields. Adds a `list_tasks` LLM tool to `SchedulerExecutor` (fenced block dispatch, registered in `tool_definitions()`). Adds `/scheduler list` slash command in `zeph-core` (dispatches through `tool_executor.execute_tool_call_erased` — no new cross-crate dependency). `/scheduler` with no subcommand also lists tasks; unknown subcommands show help. `/scheduler` entry added to the help registry, feature-gated on `scheduler`. Closes #1423. -- **5-field cron expression support in scheduler**: `normalize_cron_expr()` now accepts standard 5-field cron expressions (e.g. `*/5 * * * *`) by auto-prepending `0` for the seconds field. All three parse sites (`ScheduledTask::periodic`, `SchedulerExecutor::schedule_periodic`, `load_config_tasks`) and the DB persistence path now use the normalized 6-field form. Closes #1422. - -- **Chunked edge loading in community detection**: `detect_communities` now loads edges in configurable chunks (keyset pagination via `WHERE id > ?1 LIMIT ?2`) instead of loading all edges at once, reducing peak memory proportional to chunk size on large graphs. Configurable via `GraphConfig.lpa_edge_chunk_size` (default 10,000); `chunk_size = 0` falls back to the legacy full-stream path. Closes #1259. - -- **Gemini provider** (Phase 6): real remote model discovery via `GET /v1beta/models`. `GeminiProvider::list_models_remote()` fetches all available Gemini models, filters to `generateContent`-capable ones (excluding embedding-only models such as `text-embedding-004`), maps to `RemoteModelInfo` (strips `models/` prefix, populates `context_window` from `inputTokenLimit`), and persists via `ModelCache`. `AnyProvider::list_models_remote()` now delegates to the real implementation instead of the hardcoded static list. Authentication uses the existing `x-goog-api-key` header; request is retried via `send_with_retry` for transient 429/503 errors; 401/403 return a specific auth error message. Part of epic #1592, closes #1598. -- **Gemini provider** (Phase 5): `embedContent` endpoint for semantic embeddings. `GeminiConfig` gains an optional `embedding_model` field (e.g. `text-embedding-004`); when set, `supports_embeddings()` returns `true`. `embed()` calls `POST /v1beta/models/{model}:embedContent?key=...` with `taskType: RETRIEVAL_QUERY`. Error handling reuses `parse_gemini_error()` — 429 RESOURCE_EXHAUSTED correctly maps to `LlmError::RateLimited`. Empty string is rejected in `with_embedding_model()`. Configured embedding model appears in `list_models()`. Bootstrap wires `embedding_model` at primary provider creation sites (`create_named_provider`, `create_provider_from_config`). Compatible with the existing Qdrant/SemanticMemory pipeline. Part of epic #1592, closes #1597. -- **Gemini provider** (Phase 4): vision / multimodal input via `inlineData` parts. `MessagePart::Image` is now converted to `{ "inlineData": { "mimeType": "...", "data": "" } }` parts in `contents[].parts[]`. Multiple images per message and mixed text + image parts in a single message are both supported. `supports_vision()` returns `true` for all Gemini 2.0+ models. Part of epic #1592, closes #1596. -- **Gemini provider** (Phase 3): native tool use / function calling via `tools` + `functionDeclarations`. `supports_tool_use()` returns `true`. `chat_with_tools()` converts `ToolDefinition` to Gemini `functionDeclarations` with a schema normalization pipeline: `$ref`/`$defs` inlining (depth 8), allowlist cleanup (`anyOf`/`oneOf` Option → `nullable`), and type name uppercasing. Tool calls parsed from `functionCall` parts into `ChatResponse::ToolUse` with UUID-generated IDs (Gemini provides none). Tool results sent as `functionResponse` parts in a user message with a name lookup from conversation history. `toolConfig.functionCallingConfig.mode` set to `AUTO`. Empty declarations fall back to regular `chat()`. Part of epic #1592, closes #1595. -- **Gemini provider** (Phase 2): SSE streaming via `streamGenerateContent?alt=sse`. `chat_stream()` now produces `StreamChunk::Content` chunks; Gemini 2.5 thinking parts (`thought: true`) are emitted as `StreamChunk::Thinking`. `supports_streaming()` returns `true`. `GeminiProvider` gains `status_tx: Option` field with `with_status_tx()`/`set_status_tx()` builders; `AnyProvider::set_status_tx()` now propagates the sender to the Gemini arm. Both streaming and non-streaming paths use `status_tx` for retry notifications. API key stays in the `x-goog-api-key` header (never in URL query params). Part of epic #1592, closes #1594. -- **Gemini provider** (Phase 1): new `GeminiProvider` in `crates/zeph-llm/src/gemini.rs` supporting basic `generateContent` chat via the Google Gemini API. Authentication via `x-goog-api-key` header (not URL query param). System prompt extracted to `systemInstruction` top-level field; assistant role mapped to `"model"`. Consecutive same-role messages merged to satisfy Gemini's strict `user`/`model` alternation requirement. First-message guard: if the first content is a `"model"` turn, a synthetic empty user message is prepended. Configurable `base_url` (default `https://generativelanguage.googleapis.com/v1beta`), `model` (default `gemini-2.0-flash`), and `max_tokens`. JSON serialized once before retry loop. HTTP 429 and 503 retried via shared `send_with_retry()`. `ProviderKind::Gemini`, `GeminiConfig`, and `[llm.gemini]` TOML section added; `ZEPH_GEMINI_API_KEY` vault key supported; `--init` wizard updated. Part of epic #1592, closes #1593. -- **Opus 4.6 effort parameter GA**: `ThinkingCapability` gains `prefers_effort: bool` (true for `claude-opus-4-6`). `build_thinking_param()` now auto-converts `ThinkingConfig::Extended { budget_tokens }` to adaptive thinking with an `effort` level for Opus 4.6 models, emitting a `tracing::warn!` deprecation notice. `budget_to_effort()` maps budget values to `ThinkingEffort` levels (`< 5000` → Low, `< 15000` → Medium, `>= 15000` → High). `build_request()` strips trailing assistant messages for Opus 4.6 with thinking enabled (no-prefill constraint). Closes #1627. - -### Fixed - Fix anomaly detector not recording outcomes for native tool_use providers (Claude, OpenAI, Gemini) (#1677) @@ -5168,11 +5092,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **ACP graph memory extraction silently disabled**: `spawn_acp_agent` in `src/acp.rs` now calls `agent.with_graph_config()` with the `[memory.graph]` config section. Previously the `graph_config` field in `MemoryState` defaulted to `GraphConfig { enabled: false }`, causing `maybe_spawn_graph_extraction()` to return early for every ACP session regardless of user configuration. Closes #1633. - **ACP anomaly detector and orchestration config not wired**: `spawn_acp_agent` in `src/acp.rs` now calls `agent.with_anomaly_detector()` when `[tools.anomaly] enabled = true` and `agent.with_orchestration_config()` unconditionally — mirroring the `runner.rs` pattern. Previously both `debug_state.anomaly_detector` and `orchestration_config` defaulted to their disabled values, silently disabling tool-output anomaly detection and plan orchestration for all ACP sessions (Zed, Helix, VS Code) regardless of TOML configuration. Closes #1643, #1642. -### Tests - -- Added regression test `execute_confirmed_blocked_command_rejected` in `zeph-tools`: asserts that `execute_confirmed()` with a blocklisted command returns `ToolError::Blocked`, covering the code path fixed in #1529 (closes #1530). - -### Fixed - **ACP sessions now receive document RAG and graph memory configuration**: `spawn_acp_agent` was not calling `with_document_config()` or `with_graph_config()`, so `DocumentConfig::default()` (`rag_enabled = false`) and `GraphConfig::default()` (`enabled = false`) were silently applied regardless of TOML settings. Both configs are now propagated through `SharedAgentDeps` and applied to every ACP session, matching the behavior in `runner.rs`. Closes #1634 and #1633. - `ModelOrchestrator` no longer logs `INFO falling back to default provider` on every request when no router chain is configured (the normal orchestrator path). The message is now `DEBUG` when no chain providers were attempted; `INFO` is kept only when a chain was configured but all providers failed — the genuine fallback case. Closes #1484. @@ -5180,14 +5099,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Prevent `DagScheduler` deadlock when `SubAgentManager` concurrency is exhausted during planning phase (#1619): default `max_concurrent` raised from 1 to 5; `SubAgentManager` now supports slot reservation (`reserve_slots` / `release_reservation`); startup warning when `max_concurrent < max_parallel + 1` - HTTP 503 (`SERVICE_UNAVAILABLE`) responses are now retried by `send_with_retry()` alongside 429, benefiting all LLM providers (#1593) -### Security - -- SEC-001: Replace `DefaultHasher` with a process-scoped `RandomState`-seeded SipHash-1-3 in `tool_args_hash()` to prevent adversarial hash collision bypasses of the repeat-detection window (#1399) -- SEC-002: Replace `SystemTime::now().subsec_nanos()` jitter with `rand::rng().random_range()` in `retry_backoff_ms()` to eliminate predictable retry timing that could be exploited by an adversary (#1400) -- SEC-003: Truncate tool names to 256 bytes at UTF-8 boundaries before storing in the `recent_tool_calls` sliding window to prevent unbounded memory growth from adversarially long names (#1401) -- SEC-004: Add `max_retry_duration_secs` (default 30) wall-clock retry budget to `AgentConfig`; the retry loop in `handle_native_tool_calls()` breaks when the budget is exhausted even if attempts remain, preventing indefinite retry loops (#1402) - -### Fixed - `/plan cancel` is now delivered during active plan execution: `run_scheduler_loop()` polls `channel.recv()` concurrently with `scheduler.wait_event()` via `tokio::select!`. Receiving `/plan cancel` calls `cancel_all()`, processes the returned `Cancel` actions to abort sub-agent tasks, and exits the loop with `GraphStatus::Canceled`. Non-cancel messages received during execution are queued for processing after plan completion. Fixes #1603. @@ -5213,12 +5124,40 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - ACP `[acp] enabled = true` in config now auto-starts the server without requiring `--acp` CLI flag; `--acp` and `--acp-http` remain functional and bypass the config field (#1574, #1590) - `apply_code_index()` now starts `CodeIndexer` and `IndexWatcher` for native-tool-use providers so the tree-sitter index is available to the `search_code` tool regardless of provider type (#1556, #1591) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) ### Added +- **Integration test for `ConfirmationRequired` dependency propagation in tiered dispatch** (closes #1713): added `confirmation_propagation_tests` module to `zeph-core` agent tests with two tests — `confirmation_required_propagates_to_dependent_tier` verifies that a tier-1 dependent tool receives a synthetic `ToolResult::Error` containing "Skipped: a prerequisite tool failed or requires confirmation" when the tier-0 prerequisite returns `ConfirmationRequired`; `independent_tool_not_affected_by_confirmation_required` verifies that an independent tool in the same dispatch executes normally. +- **Cascade routing strategy** (closes #1339): new `RouterStrategy::Cascade` in `zeph-llm`. When `strategy = "cascade"` is configured, the router tries providers in chain order (cheapest first) and escalates to the next provider only when the response is classified as degenerate (empty, repetitive, incoherent). The heuristic classifier (`ClassifierMode::Heuristic`, default) detects degenerate outputs only — not semantic failures; `ClassifierMode::Judge` (requires `summary_model`) provides LLM-based quality scoring with automatic fallback to heuristic on failure. Key behaviors: network/API errors do not consume the escalation budget; the best-seen response is returned on exhaustion (not `NoProviders`); `max_cascade_tokens` caps cumulative token cost across escalation levels; cascade is intentionally skipped for `chat_with_tools`; Thompson/EMA outcome tracking is not contaminated by quality-based failures. Config: `[llm.router.cascade]` section with `quality_threshold` (default 0.5), `max_escalations` (default 2), `classifier_mode`, `window_size`, `max_cascade_tokens`. + +- **Gemini `thinking_level` / `thinking_budget` support** (closes #1652): `GeminiThinkingLevel` enum (`Minimal/Low/Medium/High`, lowercase serde matching Gemini API) and `GeminiThinkingConfig` struct (`thinkingLevel`, `thinkingBudget`, `includeThoughts`, camelCase per API spec) added to `zeph-llm`. `GeminiProvider` gains builder methods `with_thinking_level()`, `with_thinking_budget()` (fallible — validates -1/0/1..=32768, returns `LlmError` on out-of-range), and `with_include_thoughts()`. `GeminiConfig` in `zeph-core` gains `thinking_level`, `thinking_budget`, and `include_thoughts` optional fields. Thinking config is wired at all three `GeminiProvider` construction sites (primary, orchestrator, router). `--init` wizard adds a `thinking_level` select prompt in the Gemini section. Applies to Gemini 3+ (`thinkingLevel`) and Gemini 2.5 (`thinkingBudget`) models. +- **Async parallel dispatch in `DagScheduler`** (closes #1628): `DagScheduler::tick()` now dispatches all ready tasks in a single tick instead of capping at `max_parallel - running_in_graph`. Concurrency is enforced by `SubAgentManager` which returns `ConcurrencyLimit` when capacity is exceeded; tasks revert to `Ready` and are retried on the next tick. Event buffer guard in `wait_event()` changed from `max_parallel * 2` to `graph.tasks.len() * 2` to prevent dropped completion events during parallel bursts. Added `record_batch_backoff(any_success, any_concurrency_failure)` for batch-aware backoff tracking: the `consecutive_spawn_failures` counter now increments once per all-failed tick rather than once per rejected spawn, preventing incorrect exponential backoff after concurrent rejections from the same batch. + +- **Claude server-side context compaction** (`compact-2026-01-12` beta, closes #1626): `ClaudeProvider` gains `server_compaction: bool` and sends `context_management: { type: "enabled", trigger_tokens: N }` in all request bodies when enabled. The `compact-2026-01-12` beta header is appended alongside any existing beta headers. SSE parser is now stateful (`ClaudeSseState`) and accumulates `compaction`-typed content blocks across events, emitting `StreamChunk::Compaction(summary)`. Non-streaming path stores the compaction summary via `take_compaction_summary()` on the trait. Agent loop (both native and legacy streaming paths) handles compaction by pruning old messages and inserting a synthetic `MessagePart::Compaction` assistant turn for round-trip fidelity. Client-side `maybe_compact` and `maybe_proactive_compress` return early when server compaction is active. New metric `server_compaction_events` tracks compaction occurrences. Configurable via `[llm.cloud] server_compaction = true`, `--server-compaction` CLI flag, and `--init` wizard. +- **COV-03 scheduler-loop integration test** (#1611): adds `scheduler_loop_queues_non_cancel_message` to `agent/tests.rs`, verifying end-to-end that a non-cancel message delivered via `channel.recv()` during `run_scheduler_loop` is passed to `enqueue_or_merge()` and appears in `agent.message_queue` after the loop exits. Complements the `enqueue_or_merge` unit tests in `message_queue.rs`. +- **Claude 1M extended context window** (#1649): adds `enable_extended_context: bool` to `CloudLlmConfig` (default `false`). When enabled, `ClaudeProvider` injects `anthropic-beta: context-1m-2025-08-07` into all API requests, unlocking the 1M token context window for Opus 4.6 and Sonnet 4.6. `context_window()` now returns `1_000_000` instead of `200_000` when the flag is set, so the auto-budget correctly scales to 1M. All four Claude construction sites in `bootstrap/provider.rs` wire the flag (summary provider intentionally skips it — summaries are capped at 4096 tokens). `--init` wizard adds a Confirm prompt after the thinking mode question. An INFO log is emitted at provider construction when extended context is active. + +- **Gemini SSE TODO for Phase 4 (streaming tool use)**: added a TODO comment in `parse_gemini_sse_event()` documenting that `GeminiStreamPart` lacks a `function_call` field and that `functionCall` SSE chunks are silently dropped. `chat_with_tools()` uses the non-streaming endpoint today, so this is safe; the TODO tracks Phase 4 work (extend `GeminiStreamPart` and handle `functionCall` parts in the SSE loop). Closes #1639. +- **Gemini `uppercase_types` test coverage** (#1636): added unit tests for `number`, `boolean`, `array`, and `null` JSON Schema type names in `crates/zeph-llm/src/gemini.rs`. Previously only `string`, `object`, and `integer` were covered; `array` test also verifies recursive `items.type` uppercasing. +- **Gemini schema conversion edge case tests** (#1637): adds 5 unit tests in `zeph-llm` covering previously untested paths: `oneOf` Option<T> pattern, null-first `anyOf` order, unknown `$ref` fallback (→ `OBJECT`/`"unresolved reference"`), nested multi-level `$ref` chain (A→B→C), and parameterless tools declarations guard. Part of #1592. +- **Router debug logging**: `RouterProvider` now emits `tracing::debug!` on every provider selection — Thompson selections include `alpha`, `beta`, and `mode` (exploit/explore); EMA selections include `latency_ema_ms`. Closes #1388. + +- **`/scheduler list` command and `list_tasks` tool**: adds `list_jobs_full()` to `JobStore` returning a new `ScheduledTaskInfo` struct with `name`, `kind`, `task_mode`, `cron_expr`, and `next_run` fields. Adds a `list_tasks` LLM tool to `SchedulerExecutor` (fenced block dispatch, registered in `tool_definitions()`). Adds `/scheduler list` slash command in `zeph-core` (dispatches through `tool_executor.execute_tool_call_erased` — no new cross-crate dependency). `/scheduler` with no subcommand also lists tasks; unknown subcommands show help. `/scheduler` entry added to the help registry, feature-gated on `scheduler`. Closes #1423. +- **5-field cron expression support in scheduler**: `normalize_cron_expr()` now accepts standard 5-field cron expressions (e.g. `*/5 * * * *`) by auto-prepending `0` for the seconds field. All three parse sites (`ScheduledTask::periodic`, `SchedulerExecutor::schedule_periodic`, `load_config_tasks`) and the DB persistence path now use the normalized 6-field form. Closes #1422. + +- **Chunked edge loading in community detection**: `detect_communities` now loads edges in configurable chunks (keyset pagination via `WHERE id > ?1 LIMIT ?2`) instead of loading all edges at once, reducing peak memory proportional to chunk size on large graphs. Configurable via `GraphConfig.lpa_edge_chunk_size` (default 10,000); `chunk_size = 0` falls back to the legacy full-stream path. Closes #1259. + +- **Gemini provider** (Phase 6): real remote model discovery via `GET /v1beta/models`. `GeminiProvider::list_models_remote()` fetches all available Gemini models, filters to `generateContent`-capable ones (excluding embedding-only models such as `text-embedding-004`), maps to `RemoteModelInfo` (strips `models/` prefix, populates `context_window` from `inputTokenLimit`), and persists via `ModelCache`. `AnyProvider::list_models_remote()` now delegates to the real implementation instead of the hardcoded static list. Authentication uses the existing `x-goog-api-key` header; request is retried via `send_with_retry` for transient 429/503 errors; 401/403 return a specific auth error message. Part of epic #1592, closes #1598. +- **Gemini provider** (Phase 5): `embedContent` endpoint for semantic embeddings. `GeminiConfig` gains an optional `embedding_model` field (e.g. `text-embedding-004`); when set, `supports_embeddings()` returns `true`. `embed()` calls `POST /v1beta/models/{model}:embedContent?key=...` with `taskType: RETRIEVAL_QUERY`. Error handling reuses `parse_gemini_error()` — 429 RESOURCE_EXHAUSTED correctly maps to `LlmError::RateLimited`. Empty string is rejected in `with_embedding_model()`. Configured embedding model appears in `list_models()`. Bootstrap wires `embedding_model` at primary provider creation sites (`create_named_provider`, `create_provider_from_config`). Compatible with the existing Qdrant/SemanticMemory pipeline. Part of epic #1592, closes #1597. +- **Gemini provider** (Phase 4): vision / multimodal input via `inlineData` parts. `MessagePart::Image` is now converted to `{ "inlineData": { "mimeType": "...", "data": "" } }` parts in `contents[].parts[]`. Multiple images per message and mixed text + image parts in a single message are both supported. `supports_vision()` returns `true` for all Gemini 2.0+ models. Part of epic #1592, closes #1596. +- **Gemini provider** (Phase 3): native tool use / function calling via `tools` + `functionDeclarations`. `supports_tool_use()` returns `true`. `chat_with_tools()` converts `ToolDefinition` to Gemini `functionDeclarations` with a schema normalization pipeline: `$ref`/`$defs` inlining (depth 8), allowlist cleanup (`anyOf`/`oneOf` Option → `nullable`), and type name uppercasing. Tool calls parsed from `functionCall` parts into `ChatResponse::ToolUse` with UUID-generated IDs (Gemini provides none). Tool results sent as `functionResponse` parts in a user message with a name lookup from conversation history. `toolConfig.functionCallingConfig.mode` set to `AUTO`. Empty declarations fall back to regular `chat()`. Part of epic #1592, closes #1595. +- **Gemini provider** (Phase 2): SSE streaming via `streamGenerateContent?alt=sse`. `chat_stream()` now produces `StreamChunk::Content` chunks; Gemini 2.5 thinking parts (`thought: true`) are emitted as `StreamChunk::Thinking`. `supports_streaming()` returns `true`. `GeminiProvider` gains `status_tx: Option` field with `with_status_tx()`/`set_status_tx()` builders; `AnyProvider::set_status_tx()` now propagates the sender to the Gemini arm. Both streaming and non-streaming paths use `status_tx` for retry notifications. API key stays in the `x-goog-api-key` header (never in URL query params). Part of epic #1592, closes #1594. +- **Gemini provider** (Phase 1): new `GeminiProvider` in `crates/zeph-llm/src/gemini.rs` supporting basic `generateContent` chat via the Google Gemini API. Authentication via `x-goog-api-key` header (not URL query param). System prompt extracted to `systemInstruction` top-level field; assistant role mapped to `"model"`. Consecutive same-role messages merged to satisfy Gemini's strict `user`/`model` alternation requirement. First-message guard: if the first content is a `"model"` turn, a synthetic empty user message is prepended. Configurable `base_url` (default `https://generativelanguage.googleapis.com/v1beta`), `model` (default `gemini-2.0-flash`), and `max_tokens`. JSON serialized once before retry loop. HTTP 429 and 503 retried via shared `send_with_retry()`. `ProviderKind::Gemini`, `GeminiConfig`, and `[llm.gemini]` TOML section added; `ZEPH_GEMINI_API_KEY` vault key supported; `--init` wizard updated. Part of epic #1592, closes #1593. +- **Opus 4.6 effort parameter GA**: `ThinkingCapability` gains `prefers_effort: bool` (true for `claude-opus-4-6`). `build_thinking_param()` now auto-converts `ThinkingConfig::Extended { budget_tokens }` to adaptive thinking with an `effort` level for Opus 4.6 models, emitting a `tracing::warn!` deprecation notice. `budget_to_effort()` maps budget values to `ThinkingEffort` levels (`< 5000` → Low, `< 15000` → Medium, `>= 15000` → High). `build_request()` strips trailing assistant messages for Opus 4.6 with thinking enabled (no-prefill constraint). Closes #1627. + + - **#1515**: Add `SubAgentError::ConcurrencyLimit { active: usize, max: usize }` variant to replace the fragile `Spawn(String)` concurrency message. `record_spawn_failure()` now accepts `&SubAgentError` and uses a typed `matches!` check instead of string matching. Both `spawn()` and `resume()` in `SubAgentManager` emit the new variant. Callers pass `&e` instead of `&e.to_string()`. - **#1516**: Add three edge-case tests for `DagScheduler` concurrency-deferral: running task is unaffected when a concurrent task defers (`test_concurrency_deferral_does_not_affect_running_task`), `max_parallel=0` stalls the scheduler without triggering deadlock detection (`test_max_concurrent_zero_no_infinite_loop`), and all tasks deferring with `ConcurrencyLimit` keep the graph in `Running` and retry on the next tick (`test_all_tasks_deferred_graph_stays_running`). - **#1457**: Add `plan_cancel_token: Option` to `Agent`. A fresh token is created in `handle_plan_confirm()` and passed into `run_scheduler_loop()`. The tick loop adds a `tokio::select!` branch on `cancel_token.cancelled()` at `wait_event()` (calls `cancel_all()` and breaks) and wraps `RunInline` execution so it can be interrupted. `handle_plan_cancel()` fires the token if a plan is in flight. `plan_cancel_token` is always cleared in both `Ok` and `Err` paths to prevent stale-token bugs. **Known limitation**: the delivery path for `/plan cancel` during active execution requires restructuring the agent message loop (#1603, SEC-M34-002; currently only reachable from concurrent-reader channels such as Telegram). @@ -5238,22 +5177,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - LSP hook debug tracing: `LspHookRunner::after_tool()`, `fetch_hover()`, and `fetch_diagnostics()` now emit `tracing` events for hook activation, skip reasons, symbol extraction, and MCP call attempts, making hook failures diagnosable from logs without source inspection (#1536, #1588) - **#1538**: Add `McpCaller` trait to `zeph-mcp` to abstract `McpManager` for unit testing; `MockMcpCaller` stub (feature `mock`) provides configurable FIFO responses and call recording. `fetch_diagnostics` and `fetch_hover_inner` now accept `&impl McpCaller`; 4 regression tests verify `file_path` (not `path`) is passed to `call_tool` for `get_diagnostics` and `get_hover`. -### Changed +### Tests -- Share a single `QdrantOps` instance (one gRPC channel) across all subsystems at startup: `AppBuilder::new()` constructs `QdrantOps` once when `vector_backend = "qdrant"` and propagates it via clone (O(1) `Arc` bump) to `SemanticMemory`, `QdrantSkillMatcher`, `McpToolRegistry`, and `CodeStore`. Previously 4+ independent gRPC channels were created. Invalid `qdrant_url` when `vector_backend = "qdrant"` is now a hard startup error instead of a silent `None`. URL-based constructors (`QdrantSkillMatcher::new`, `McpToolRegistry::new`, `CodeStore::new`) are replaced by `::with_ops(ops)` variants. (#1337) -- Consolidate `is_private_ip` (SSRF IP check) into `zeph-tools::net::is_private_ip` (canonical superset with CGNAT `100.64.0.0/10`); update `zeph-mcp`, `zeph-acp`, `zeph-tools/scrape` to use it; upgrade A2A's own copy with CGNAT range (DEDUP-01) -- Consolidate `cosine_similarity` into `zeph-memory::math::cosine_similarity` (single-pass loop, length guard); update all callers in `zeph-memory` and `zeph-skills` (DEDUP-02) -- Restore parallel tool execution: `handle_native_tool_calls()` now runs all independent tool calls concurrently via `join_all` bounded by `max_parallel_tools` semaphore (previously serialized by PR #1340). Phase 2 retries only transient failures on executors that explicitly opt in (`WebScrapeExecutor`); `ShellExecutor` is never retried. Self-reflection early-return paths emit actual parallel results instead of synthetic `[skipped]` messages. Fixes PERF-1 (#1403) -- Add `text::truncate_chars(&str, usize) -> &str` to `zeph-core::text`; replace `context/mod.rs::truncate_chars` with a re-export of the canonical version (DEDUP-03) -- Split all four `#[cfg(test)]` blocks from `agent/mod.rs` (~3190 lines) into `agent/tests.rs`; reduce `agent/mod.rs` from 6282 to ~3096 lines (SPLIT-01) -- Split `zeph-acp/agent.rs` into `agent/mod.rs` (2137 lines), `agent/helpers.rs` (547 lines helpers), `agent/tests.rs` (3396 lines tests); reduce main impl file from 6097 to 2137 lines (SPLIT-02) -- Update insta snapshot `config_default_snapshot` to reflect removal of deprecated `[lsp]` config section -- Split `agent/tool_execution.rs` (5426 lines) into `tool_execution/mod.rs`, `tool_execution/legacy.rs`, `tool_execution/native.rs` for improved navigability (ARCH-06) -- Split `agent/context.rs` (5590 lines) into `context/mod.rs`, `context/assembly.rs`, `context/summarization.rs` for improved navigability (ARCH-07) -- Replace 11-parameter `Channel::send_tool_output` signature with `ToolOutputEvent` struct; replace 4-parameter `send_tool_start` with `ToolStartEvent` struct (ARCH-02) -- Extract `SecurityState` struct (sanitizer, quarantine_summarizer, exfiltration_guard, flagged_urls) and `DebugState` struct (debug_dumper, dump_format, anomaly_detector, logging_config) from `Agent` struct; access via `agent.security.*` and `agent.debug_state.*` (ARCH-01) -- Expand `AgentError` with `Shutdown`, `ContextExhausted`, `ToolTimeout`, `SchemaValidation` variants; change `Agent::run` return type from `anyhow::Result<()>` to `Result<(), AgentError>` (ARCH-10) -- Add `AgentTestHarness` builder struct with `new()`, `with_responses()`, `with_registry()`, `with_tool_outputs()`, and `build()` to the test module for cleaner agent unit tests (ARCH-08) +- Added regression test `execute_confirmed_blocked_command_rejected` in `zeph-tools`: asserts that `execute_confirmed()` with a blocklisted command returns `ToolError::Blocked`, covering the code path fixed in #1529 (closes #1530). + +### Security + +- SEC-001: Replace `DefaultHasher` with a process-scoped `RandomState`-seeded SipHash-1-3 in `tool_args_hash()` to prevent adversarial hash collision bypasses of the repeat-detection window (#1399) +- SEC-002: Replace `SystemTime::now().subsec_nanos()` jitter with `rand::rng().random_range()` in `retry_backoff_ms()` to eliminate predictable retry timing that could be exploited by an adversary (#1400) +- SEC-003: Truncate tool names to 256 bytes at UTF-8 boundaries before storing in the `recent_tool_calls` sliding window to prevent unbounded memory growth from adversarially long names (#1401) +- SEC-004: Add `max_retry_duration_secs` (default 30) wall-clock retry budget to `AgentConfig`; the retry loop in `handle_native_tool_calls()` breaks when the budget is exhausted even if attempts remain, preventing indefinite retry loops (#1402) ### CI / Docs @@ -5304,6 +5237,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - The executor now inspects the response for actual fenced-block tool invocations by matching against registered `InvocationHint::FencedBlock` language tags via `extract_fenced_blocks()` - Plain text responses and markdown code fences that do not match any registered tool tag now return `Ok(None)`, allowing the agent loop to break normally; SEC-03 policy is preserved for genuine fenced-block tool invocations + ## [0.14.2] - 2026-03-09 ### Fixed @@ -5319,29 +5253,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Experiment engine now applies generation parameter variations (temperature, top_p, etc.) to the subject provider before evaluation, fixing all-zero delta scores (#1407). `AnyProvider::with_generation_overrides` clones and patches the provider; each variation is scored with its specific parameters rather than the unmodified baseline provider. `GenerationOverrides` moved to `zeph_llm::provider` and re-exported from `zeph_core::experiments::snapshot` for backwards compatibility. - Sub-agent transcript sweep no longer logs a spurious `transcript sweep failed` warning on first run when the transcript directory does not exist yet; the directory is now created automatically (#1397) -### Performance - -- Parallelize LLM summarization calls across communities in `detect_communities` using `tokio::task::JoinSet` bounded by `Arc`. New `GraphConfig.community_summary_concurrency` field (default: 4) controls the concurrency limit; `concurrency=1` provides sequential fallback (#1260) -- Incremental community detection: store BLAKE3 fingerprint (sorted entity IDs + intra-community edge IDs) per community in `graph_communities`. On refresh, only re-summarize communities whose membership changed; unchanged partitions skip LLM calls entirely. Adds migration 028 (`fingerprint TEXT` column). Second refresh with no graph changes triggers 0 LLM calls (#1262) - -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) -### Added - -- Add `ErrorKind::{Transient, Permanent}` enum to `zeph-tools` and `ToolError::kind()` method for typed error classification. `Execution(io::Error)` is sub-classified by `io::ErrorKind`: transient variants (`TimedOut`, `WouldBlock`, `Interrupted`, `ConnectionReset`, `ConnectionAborted`, `BrokenPipe`) are retryable; `NotFound`, `PermissionDenied`, `AlreadyExists`, and all others are permanent (#1340) -- Add retry logic with exponential backoff for transient tool errors in the native `tool_use` path. Default: 2 retries, 500ms base delay, 5s cap, ~12.5% jitter. Configurable via `[agent] max_tool_retries` (default 2, max 5). Backoff sleep uses `tokio::select!` for cancellation-aware waiting. Debug dumps include `dump_tool_error()` with error kind (#1340) -- Add repeat-detection heuristic in `ToolOrchestrator`: tracks recent LLM-initiated tool calls in a sliding window (`VecDeque`); aborts with an error message when the same tool+args hash appears `>= tool_repeat_threshold` times within `2 * threshold` calls. Retry re-executions are excluded from the window. Configurable via `[agent] tool_repeat_threshold` (default 2, 0 to disable) (#1340) -- Rewrite all 19 native and ACP `ToolDefinition` descriptions to contract format with `Parameters / Returns / Errors / Example` sections for improved tool selection accuracy, especially on smaller local models (#1342) - -### Changed - -- Tool execution in native `tool_use` path is now sequential per call (previously parallel `join_all`). This enables per-call retry state without additional abstractions. Behavioral equivalence is preserved for the common case; parallel execution restoration is tracked in a follow-up issue (#1340) -- Validate `deferred_apply_threshold < compaction_threshold` ordering at config load and in `--init` wizard. Both thresholds also enforce finite (0.0, 1.0) exclusive range. Wizard re-prompts on violation instead of silently accepting. `tui_remote` now calls `Config::validate()` after load (#1302) -- Consolidate all project-level runtime artifacts under `.zeph/` directory. Default paths changed: `data/zeph.db` → `.zeph/data/zeph.db`, `skills/` → `.zeph/skills/`, `.local/debug` → `.zeph/debug`. Startup migration warning logs exact `mv` commands when old paths are detected. Explicit config paths are unaffected (#1353) - -### Fixed - Skill trust system was entirely non-functional: trust DB was never populated on skill load, `TrustGateExecutor` was defined but never wired into the executor chain, and trust commands always returned "not found". Fixed by populating `skill_trust` table after load/reload with source-based level (local→`local_level`, hub→`default_level`) and hash-mismatch detection, wrapping `CompositeExecutor` with `TrustGateExecutor` as the outermost layer, adding `set_effective_trust` to `ErasedToolExecutor` trait with forwarding through `DynExecutor`, overriding `set_effective_trust` in `impl ToolExecutor for TrustGateExecutor` (inherent method was shadowed by trait default no-op), and extending `Quarantined` trust blocking to `execute()`/`execute_confirmed()` paths (#1405) - Sub-agent LLM call no longer fails with `no route configured` when `model` is omitted in the agent definition. `ModelOrchestrator::chat_with_fallback` and `stream_with_fallback` now fall through to `default_provider` when no matching route chain exists, instead of returning `LlmError::NoRoute` early. Sub-agents with an explicit `model` field now route to the named provider via the new `chat_for_named` method, with fallback to default routing if the named provider fails (#1396) @@ -5364,16 +5278,22 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Compatible provider API key is now optional for local endpoints (localhost, private networks). Add `api_key` field to `[[llm.compatible]]` config as an alternative to vault secrets (#1345) - Claude adaptive thinking mode (`--thinking adaptive`) no longer fails with 400 Bad Request. Use correct API type `"adaptive"` instead of `"enabled"` without `budget_tokens`. Add `output_config.effort` support for adaptive effort levels (#1356) -### Breaking Changes -- Remove `daemon`, `mock`, `orchestration`, and `graph-memory` Cargo feature flags. All four are now compiled unconditionally into every build. Remove these flags from any `--features` lists or CI matrix entries. The `full` feature set no longer includes them. +- fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) -### Fixed +### Performance -- fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Parallelize LLM summarization calls across communities in `detect_communities` using `tokio::task::JoinSet` bounded by `Arc`. New `GraphConfig.community_summary_concurrency` field (default: 4) controls the concurrency limit; `concurrency=1` provides sequential fallback (#1260) +- Incremental community detection: store BLAKE3 fingerprint (sorted entity IDs + intra-community edge IDs) per community in `graph_communities`. On refresh, only re-summarize communities whose membership changed; unchanged partitions skip LLM calls entirely. Adds migration 028 (`fingerprint TEXT` column). Second refresh with no graph changes triggers 0 LLM calls (#1262) ### Added +- Add `ErrorKind::{Transient, Permanent}` enum to `zeph-tools` and `ToolError::kind()` method for typed error classification. `Execution(io::Error)` is sub-classified by `io::ErrorKind`: transient variants (`TimedOut`, `WouldBlock`, `Interrupted`, `ConnectionReset`, `ConnectionAborted`, `BrokenPipe`) are retryable; `NotFound`, `PermissionDenied`, `AlreadyExists`, and all others are permanent (#1340) +- Add retry logic with exponential backoff for transient tool errors in the native `tool_use` path. Default: 2 retries, 500ms base delay, 5s cap, ~12.5% jitter. Configurable via `[agent] max_tool_retries` (default 2, max 5). Backoff sleep uses `tokio::select!` for cancellation-aware waiting. Debug dumps include `dump_tool_error()` with error kind (#1340) +- Add repeat-detection heuristic in `ToolOrchestrator`: tracks recent LLM-initiated tool calls in a sliding window (`VecDeque`); aborts with an error message when the same tool+args hash appears `>= tool_repeat_threshold` times within `2 * threshold` calls. Retry re-executions are excluded from the window. Configurable via `[agent] tool_repeat_threshold` (default 2, 0 to disable) (#1340) +- Rewrite all 19 native and ACP `ToolDefinition` descriptions to contract format with `Parameters / Returns / Errors / Example` sections for improved tool selection accuracy, especially on smaller local models (#1342) + + - Add configurable log file path (`[logging]` config section, `--log-file` CLI flag, `ZEPH_LOG_FILE`/`ZEPH_LOG_LEVEL` env overrides). File logging uses a separate level filter from `RUST_LOG`, supports daily/hourly/never rotation via `tracing-appender`, defaults to `.zeph/logs/zeph.log`. Single unified `init_tracing()` replaces scattered tracing init calls in `runner.rs`. TUI `/log` command shows current log config and recent entries; tail output is redacted via `scrub_content()` and capped at 512 chars/line and 4 KiB total. Init wizard `--init` includes a logging configuration step with level validation (#1355) - ACP gap closure (SDK v0.10): upgrade `agent-client-protocol` to 0.10; rename `kill_terminal_command` → `kill_terminal` throughout zeph-acp; advertise MCP capabilities with `http=true, sse=false` (SSE deprecated in MCP spec 2025-11-25); implement `ResourceLink` resolution with SSRF defense (post-fetch `remote_addr()` private-IP check eliminating DNS rebinding TOCTOU window, fail-closed on missing remote_addr, CGNAT 100.64.0.0/10 blocked, cwd boundary enforcement, pseudo-filesystem blocklist, binary-file null-byte detection, pre-flight size check, 10s timeout, 1 MiB cap, full XML-injection escaping via `xml_escape()` on both URI attribute and content body); add `StopReason::MaxTokens` / `MaxTurnRequests` mapping via `StopHint` channel event and `MAX_TOKENS_TRUNCATION_MARKER` constant detected in Claude text-only responses and OpenAI (`finish_reason="length"`) responses; add `SessionConfigOptionCategory` annotations to config options; emit fire-and-forget `ConfigOptionUpdate` notification for only the changed option on model/thinking/auto-approve changes. @@ -5394,22 +5314,31 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Add parameter variation engine for autonomous experiments (`experiments` feature flag): `SearchSpace` with `ParameterRange` (min/max/step/default, validation, quantization anchored at min), `ConfigSnapshot` for sandboxed parameter snapshots with `apply`/`diff`/`to_generation_overrides`, `VariationGenerator` trait with three pluggable strategies — `GridStep` (systematic sweep), `Random` (uniform sampling with rejection), `Neighborhood` (perturbation around baseline). One-at-a-time constraint ensures each variation changes exactly one parameter. Deduplication via `OrderedFloat`-based `HashSet`. Integer-aware handling for `TopK`/`RetrievalTopK` (#1315) - Add experiment loop engine for autonomous experiments (`experiments` feature flag): `ExperimentEngine` orchestrates the full vary-evaluate-decide cycle with progressive baseline (greedy hill climbing), `CancellationToken` graceful shutdown via `tokio::select!`, SQLite persistence of all results, `ExperimentSessionReport` with session summary and best config. Consecutive NaN guard (3-strike limit), baseline NaN early exit, cancellation-aware baseline evaluation. Parameter recording mode for Phase 4 MVP (#1316) +### Changed + +- Tool execution in native `tool_use` path is now sequential per call (previously parallel `join_all`). This enables per-call retry state without additional abstractions. Behavioral equivalence is preserved for the common case; parallel execution restoration is tracked in a follow-up issue (#1340) +- Validate `deferred_apply_threshold < compaction_threshold` ordering at config load and in `--init` wizard. Both thresholds also enforce finite (0.0, 1.0) exclusive range. Wizard re-prompts on violation instead of silently accepting. `tui_remote` now calls `Config::validate()` after load (#1302) +- Consolidate all project-level runtime artifacts under `.zeph/` directory. Default paths changed: `data/zeph.db` → `.zeph/data/zeph.db`, `skills/` → `.zeph/skills/`, `.local/debug` → `.zeph/debug`. Startup migration warning logs exact `mv` commands when old paths are detected. Explicit config paths are unaffected (#1353) + +### Breaking Changes + +- Remove `daemon`, `mock`, `orchestration`, and `graph-memory` Cargo feature flags. All four are now compiled unconditionally into every build. Remove these flags from any `--features` lists or CI matrix entries. The `full` feature set no longer includes them. + ## [0.14.1] - 2026-03-07 ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- Fix deferred tool pair summaries never being applied: `prepare_context` recomputes `cached_prompt_tokens` to a low post-pruning value each turn, so the token-based threshold (70% of budget) was never reached. Add count-based fallback: apply deferred summaries when `pending >= tool_call_cutoff`, preventing accumulated deferred summaries from being silently discarded as `[pruned]` content. + ### Added - Extend `[agent] summary_model` to support all provider backends: `claude[/]` (requires `ZEPH_CLAUDE_API_KEY`), `openai[/]` (requires `ZEPH_OPENAI_API_KEY`), `compatible/` (named entry from `[[llm.compatible]]`), `candle` (uses `[llm.candle]` config, feature-gated). Previously only `ollama/` was supported. - Add LSP code intelligence via mcpls: `step_mcpls` wizard step in `zeph --init` with PATH detection, workspace root prompt, and `[mcp.servers.mcpls]` config generation; add `mcpls` to MCP command allowlist in `zeph-mcp`; `docs/src/guides/lsp.md` with full setup guide and all 16 tool descriptions; `skills/code-analysis/SKILL.md` for LLM-guided LSP workflows (Phase 1, #1288, #1287) -### Fixed - -- Fix deferred tool pair summaries never being applied: `prepare_context` recomputes `cached_prompt_tokens` to a low post-pruning value each turn, so the token-based threshold (70% of budget) was never reached. Add count-based fallback: apply deferred summaries when `pending >= tool_call_cutoff`, preventing accumulated deferred summaries from being silently discarded as `[pruned]` content. - ### Changed - Deferred tool pair summarization: summaries are computed eagerly during the tool loop but applied lazily (Tier 0) when context usage exceeds `deferred_apply_threshold` (default 0.70), preserving the message prefix for Claude API prompt cache hits; add `deferred_apply_threshold` config option, `--init` wizard support, force-apply safety net before compaction drain (#1294) @@ -5423,7 +5352,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Fix token counting using flattened `content` instead of structured `parts` — add `count_message_tokens` to `TokenCounter` that estimates tokens per `MessagePart` variant matching API payload structure, update 6 call sites in context budget tracking (#1280) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) @@ -5525,7 +5453,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Add `GraphConfig` to `[memory.graph]` TOML section: `enabled`, `extract_model`, `max_hops`, `recall_limit`, and 7 more tuning knobs (#1224) - Add `graph-memory` feature flag in root, `zeph-core`, and `zeph-memory` crates (included in `full`) (#1224) -### Changed - Arc-wrap `EmbeddingStore` in `SemanticMemory` for shared access in future background tasks (#1223) - Replace dual cfg-gated `try_join!` blocks in `prepare_context` with `FuturesUnordered` + `ContextSlot` enum for extensible concurrent context fetching (#1223) @@ -5577,6 +5504,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- Telegram `confirm()` was blocking indefinitely on `rx.recv().await` with no timeout — now denied after 30s (#1147) + ### Added - `/agents` management UI: interactive CLI subcommand (`zeph agents list|show|create|edit|delete`) and TUI panel with 5-state FSM (List, Detail, Create form, Edit form, ConfirmDelete) for full CRUD of sub-agent definitions; CLI `edit` opens `$VISUAL`/`$EDITOR` with fallback to `vi`; TUI wizard covers name, description, model, permission_mode, max_turns, background fields; atomic file writes via `tempfile::NamedTempFile::persist()`; `AGENT_NAME_RE` validation on all create paths; extra confirmation for non-project scope delete in TUI (#1154) @@ -5633,33 +5563,26 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - E2E integration tests for sub-agent lifecycle: background spawn+collect and foreground spawn+secret-bridge (#1147) - Memory eviction subsystem with Ebbinghaus forgetting curve policy, two-phase SQLite+Qdrant sweep, and configurable retention (`[memory.eviction]`) (1.1) -### Fixed - -- Telegram `confirm()` was blocking indefinitely on `rx.recv().await` with no timeout — now denied after 30s (#1147) - ## [0.12.6] - 2026-03-04 ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) -### Added - -- Hot reload for instruction files: `InstructionWatcher` in `zeph-core` subscribes to filesystem events via `notify-debouncer-mini` (500ms debounce) and reloads `instruction_blocks` in-place on `.md` file changes without agent restart (#1124) -- `InstructionReloadState` carries reload parameters (base dir, provider kinds, explicit files, auto-detect flag) through the agent select loop -- Explicit instruction file paths are boundary-checked against project root before being added to the watcher; TOCTOU-free load via canonicalize-before-open - -### Fixed - PERF-SC-04: `Scheduler::tick()` `Ok(None)` branch now computes and persists `next_run` via the cron schedule instead of treating missing `next_run` as "due now" — cron expressions are now respected at runtime (#1133) - `tick_interval_secs` from `[scheduler]` config and `--scheduler-tick` CLI flag now control the actual tick interval; previously hardcoded to 60s; zero/sub-1s values are clamped to 1s (#1136) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) ### Added +- Hot reload for instruction files: `InstructionWatcher` in `zeph-core` subscribes to filesystem events via `notify-debouncer-mini` (500ms debounce) and reloads `instruction_blocks` in-place on `.md` file changes without agent restart (#1124) +- `InstructionReloadState` carries reload parameters (base dir, provider kinds, explicit files, auto-detect flag) through the agent select loop +- Explicit instruction file paths are boundary-checked against project root before being added to the watcher; TOCTOU-free load via canonicalize-before-open + + - `TaskMode` enum (`Periodic`/`OneShot`) and `TaskDescriptor` + `SchedulerMessage` mpsc channel: `Scheduler::new()` returns `(Self, Sender)` eliminating `Arc` deadlock risk; oneshot tasks are removed from the task list after execution (#1134) - `CustomTaskHandler`: injects `config["task"]` as a new agent turn via a dedicated mpsc channel at the scheduled time (same pattern as update notifications) (#1134) - `SchedulerExecutor` in `zeph-core`: LLM-facing `ToolExecutor` exposing three tools — `schedule_periodic` (6-field cron), `schedule_deferred` (ISO 8601 UTC future timestamp), `cancel_task`; all `send` paths use `try_send` to avoid blocking agent turns (#1135) @@ -5683,13 +5606,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) -### Added - -- `load_skill` tool in `zeph-core`: LLM can call `load_skill(skill_name)` at inference time to retrieve the full body of any registered skill by name. Non-TOP skills appear in the system prompt as metadata-only catalog entries; this tool enables on-demand access to their full instructions without expanding the system prompt (#1125) - -- Provider instruction file loader (`InstructionLoader`) in `zeph-core`: auto-detects `CLAUDE.md`, `AGENTS.md`, `GEMINI.md`, and `zeph.md` from the working directory and injects them into the system prompt with path-traversal protection (symlink boundary check, null byte guard, 256 KiB size cap) (#1122) - -### Fixed - `zeph.md` and `.zeph/zeph.md` are now loaded unconditionally regardless of provider or `auto_detect` setting; previously the early-return on `!auto_detect` skipped them when auto-detection was disabled and no explicit files were configured (#1122) - `[agent.instructions]` TOML config section: `auto_detect` (default `true`), `extra_files` list, and `max_size_bytes` cap (#1122) @@ -5718,7 +5634,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - ACP P2.2: `LoopbackEvent::ToolOutput` `diff` field now maps to `ToolCallContent::Diff` in `loopback_event_to_updates`, providing structured diff content in ACP tool call updates (#1065) - ACP P2.4: `/review [path]` slash command added to ACP agent; injects read-only constrained prompt; arg sanitized against `^[a-zA-Z0-9_./ -]{0,512}$` allowlist (SEC-P24-1); appears in `/help` and `build_available_commands` (#1065) -### Fixed - Context compaction (tier-1 pruning) now emits `compacting context...` status in TUI; tier-2 compaction no longer clears status prematurely before the next phase overwrites it (#1101) - Context build status changed from `building context...` to `recalling context...` for better clarity (#1100) @@ -5734,12 +5649,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - TUI Skills panel now shows Wilson score confidence bars immediately after skill match, not only after the first LLM outcome is recorded (`context.rs`: call `update_skill_confidence_metrics()` at skill resolution time) (#1077) - TUI event loop redraws on every tick unconditionally; previously the dirty-flag was never set by the tick arm, causing confidence bars to stay stale between user keypresses (#1077) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) ### Added +- `load_skill` tool in `zeph-core`: LLM can call `load_skill(skill_name)` at inference time to retrieve the full body of any registered skill by name. Non-TOP skills appear in the system prompt as metadata-only catalog entries; this tool enables on-demand access to their full instructions without expanding the system prompt (#1125) + +- Provider instruction file loader (`InstructionLoader`) in `zeph-core`: auto-detects `CLAUDE.md`, `AGENTS.md`, `GEMINI.md`, and `zeph.md` from the working directory and injects them into the system prompt with path-traversal protection (symlink boundary check, null byte guard, 256 KiB size cap) (#1122) + + - `zeph-core::testing` module (feature `mock`): reusable `MockChannel`, `MockToolExecutor`, `AgentTestHarness` builder — wires `MockProvider` + `MockChannel` + `MockToolExecutor` + `InMemoryVectorStore` into a ready-to-use agent for unit tests (#1113) - `zeph-llm::testing` module: wiremock fixture helpers for OpenAI (`/v1/chat/completions` happy path, 429, 401, 500, SSE stream with `finish_reason: stop`) and Claude (`/v1/messages` serde roundtrip, SSE stream, 429/529 overload) (#1109) - `zeph-memory::testing` module (feature `mock`): `mock_semantic_memory()` using `:memory:` SQLite + `InMemoryVectorStore` — no Docker required (#1110) @@ -5758,6 +5677,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- ACP tool notifications: `claudeCode.toolName` is now always included in `_meta.claudeCode` for every `tool_call` and `tool_call_update`, regardless of whether `parentToolUseId` is present (#1037) +- ACP tool notifications: `locations` field is now populated on the initial `tool_call` for Read-kind tools by extracting the path from `params["file_path"]` or `params["path"]` at `ToolStart` time (#1040) +- ACP tool notifications: an intermediate `tool_call_update` (without `status`) carrying `_meta.claudeCode.toolResponse` is now emitted before the final status update for non-terminal tools (`AcpFileExecutor`), allowing IDEs to display structured file content (#1038) +- ACP tool notifications: an intermediate `tool_call_update` carrying `_meta.claudeCode.toolResponse` with `stdout`/`stderr`/`interrupted` fields is now emitted before `terminal_exit` for bash tools (`AcpShellExecutor`) (#1039) +- `version_id` always `NULL` in `skill_outcomes`: `record_skill_outcomes_batch()` now resolves + the active version ID before insert, enabling per-version metrics and accurate rollback (#1020) +- Panic on `/skill reject` without arguments: byte-slice guard replaced with safe path (#1020) +- Skill auto-promote skipped skills with no prior trust record in DB (early `Ok(None)` return) (#1022) +- XML injection: `skill.name()` and `skill.description()` are now escaped (`&`, `<`, `>`, `"`) + before interpolation into XML system prompt in all 4 prompt functions (pre-existing vulnerability, + fixed in scope of this epic) (#1023) + ### Added - `zeph ingest ` CLI subcommand: recursively ingests `.txt`, `.md`, `.pdf` files into Qdrant `zeph_documents` collection via `DocumentPipeline` (#1028) @@ -5772,22 +5706,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `fetch` tool in `WebScrapeExecutor`: plain URL-to-text without CSS selector requirement, SSRF protection applied (#1055) - `DiagnosticsExecutor` with `diagnostics` tool: runs `cargo check --message-format=json` or `cargo clippy`, returns structured error/warning list (file, line, col, severity, message), capped output, graceful degradation if cargo absent (#1056) -### Changed - -- Renamed `FileExecutor` tool id `glob` → `find_path` to align with Zed IDE native tool surface (#1052) -- `READONLY_TOOLS` allowlist updated to current tool IDs: `read`, `find_path`, `grep`, `list_directory`, `web_scrape`, `fetch`; removed legacy `file_glob` (#1052) -- `DiagnosticsExecutor` uses `tokio::process::Command` instead of blocking `std::process::Command` -- Migrate dependency automation from Dependabot to self-hosted Renovate: adds `renovate.json` with MSRV-aware `constraintsFiltering: strict`, grouped minor/patch automerge, and a dedicated workflow at `.github/workflows/renovate.yml`; removes `dependabot.yml` and the `dependabot-automerge.yml` workflow (which used the insecure `pull_request_target` trigger) - -### Security - -- ACP tool notifications: `raw_response` (file content for `read_file`, stdout for `bash`) is now passed through `redact_json` before forwarding to `claudeCode.toolResponse`; prevents secrets from bypassing the `redact_secrets` pipeline when content reaches the IDE (SEC-ACP-001) - -### Fixed - -- fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) - -### Added - `FailureKind` enum on `SkillOutcome::ToolFailure` with 7 variants and `from_error()` heuristic classifier (`ExitNonzero`, `Timeout`, `PermissionDenied`, `WrongApproach`, `Partial`, `SyntaxError`, `Unknown`) (#1020) - `/skill reject ` command: records `user_rejection` outcome and immediately triggers @@ -5814,24 +5732,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). `RouterProvider` reorders providers by EMA score every N calls; enabled via `[llm] router_ema_enabled = true` (default: false), alpha default 0.1 (#1023) -### Performance - -- Parallelize agent startup initialization: `build_memory` + `build_tool_setup` run concurrently via `tokio::join!` (est. 1-5s savings); `build_skill_matcher` + `build_cli_history` also parallelized; `warmup_provider` spawned as background task on CLI path overlapping with agent assembly (#1031) +### Changed -### Fixed -- ACP tool notifications: `claudeCode.toolName` is now always included in `_meta.claudeCode` for every `tool_call` and `tool_call_update`, regardless of whether `parentToolUseId` is present (#1037) -- ACP tool notifications: `locations` field is now populated on the initial `tool_call` for Read-kind tools by extracting the path from `params["file_path"]` or `params["path"]` at `ToolStart` time (#1040) -- ACP tool notifications: an intermediate `tool_call_update` (without `status`) carrying `_meta.claudeCode.toolResponse` is now emitted before the final status update for non-terminal tools (`AcpFileExecutor`), allowing IDEs to display structured file content (#1038) -- ACP tool notifications: an intermediate `tool_call_update` carrying `_meta.claudeCode.toolResponse` with `stdout`/`stderr`/`interrupted` fields is now emitted before `terminal_exit` for bash tools (`AcpShellExecutor`) (#1039) -- `version_id` always `NULL` in `skill_outcomes`: `record_skill_outcomes_batch()` now resolves - the active version ID before insert, enabling per-version metrics and accurate rollback (#1020) -- Panic on `/skill reject` without arguments: byte-slice guard replaced with safe path (#1020) -- Skill auto-promote skipped skills with no prior trust record in DB (early `Ok(None)` return) (#1022) -- XML injection: `skill.name()` and `skill.description()` are now escaped (`&`, `<`, `>`, `"`) - before interpolation into XML system prompt in all 4 prompt functions (pre-existing vulnerability, - fixed in scope of this epic) (#1023) +- Renamed `FileExecutor` tool id `glob` → `find_path` to align with Zed IDE native tool surface (#1052) +- `READONLY_TOOLS` allowlist updated to current tool IDs: `read`, `find_path`, `grep`, `list_directory`, `web_scrape`, `fetch`; removed legacy `file_glob` (#1052) +- `DiagnosticsExecutor` uses `tokio::process::Command` instead of blocking `std::process::Command` +- Migrate dependency automation from Dependabot to self-hosted Renovate: adds `renovate.json` with MSRV-aware `constraintsFiltering: strict`, grouped minor/patch automerge, and a dedicated workflow at `.github/workflows/renovate.yml`; removes `dependabot.yml` and the `dependabot-automerge.yml` workflow (which used the insecure `pull_request_target` trigger) -### Changed - `tool_kind_from_name`: `"glob"` now maps to `ToolKind::Search` (was `ToolKind::Other`) — consistent with other search-oriented tools (GAP-02) - `ToolOutput` struct: added `raw_response: Option` field for structured ACP intermediate notification payloads; all existing construction sites default to `None` - `LoopbackEvent::ToolOutput` variant: added `raw_response: Option` field; propagated through `Channel::send_tool_output` trait and all implementations @@ -5842,44 +5749,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `correction_detection`, `correction_confidence_threshold`, `correction_recall_limit`, `correction_min_similarity` added to `[agent.learning]` config section (#1021) +### Security + +- ACP tool notifications: `raw_response` (file content for `read_file`, stdout for `bash`) is now passed through `redact_json` before forwarding to `claudeCode.toolResponse`; prevents secrets from bypassing the `redact_secrets` pipeline when content reaches the IDE (SEC-ACP-001) + +### Performance + +- Parallelize agent startup initialization: `build_memory` + `build_tool_setup` run concurrently via `tokio::join!` (est. 1-5s savings); `build_skill_matcher` + `build_cli_history` also parallelized; `warmup_provider` spawned as background task on CLI path overlapping with agent assembly (#1031) + ## [0.12.3] - 2026-02-27 ### Fixed + - Skill matching fallback: when `QdrantSkillMatcher` returns an empty result set (embed error or Qdrant unavailable), the agent now falls back to all registered skills instead of running with an empty active-skill list - Orchestrator context window detection: `build_provider` now calls `auto_detect_context_window` for `AnyProvider::Orchestrator` so that `auto_budget_tokens` returns a correct value and `prepare_context` injects semantic recall, summaries, and cross-session memories -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) -### Added -- `docs/src/guides/ide-integration.md` — IDE integration guide covering ACP stdio setup, Zed and VS Code configuration, and subagent visibility features (nesting, terminal streaming, agent following) (#1011) -- ACP context window usage widget: `unstable-session-usage` feature enabled in `zeph-acp` by default; `UsageUpdate` (`used`/`size` tokens) now emitted after each LLM response, populating the Context badge in Zed IDE (#1002) -- ACP project rules widget: `project_rules` field on `AcpServerConfig` and `ZephAcpAgent`; session start sends `_meta.projectRules` with basenames of loaded `.claude/rules/*.md` and skill files, populating the "N project rules" badge in Zed IDE (#1002) -- `collect_project_rules` helper in `src/acp.rs` aggregates rule file paths from `cwd/.claude/rules/*.md` and `AgentDeps::skill_paths` (#1002) -- `ZephAcpAgent::with_project_rules()` builder method for supplying rules list to the ACP agent (#1002) -- ACP session history: `GET /sessions` and `GET /sessions/{id}/messages` HTTP endpoints expose persisted session list and event log to ACP clients (#1004) -- Session resume: sending an existing `session_id` reconstructs conversation context from SQLite before the first LLM turn (#1004) -- Session title auto-inference: title truncated from the first user message (`title_max_chars`, default 60) and persisted after the first assistant reply (#1004) -- `[memory.sessions]` config section (`max_history`, `title_max_chars`) in `MemoryConfig` and `config/default.toml` (#1004) -- `sessions list/resume/delete` CLI subcommands (gated behind `acp` feature) (#1004) -- TUI session browser panel (`H` keybind) with `session:history` command palette entry (#1004) -- `SqliteStore::get_acp_session_info()` — single-session lookup with `title`, `updated_at`, `message_count` (#1004) -- `SqliteStore::list_acp_sessions(limit)` enriched with `title`, `updated_at`, `message_count`; `limit=0` returns all (#1004) -- Migration `017_acp_session_updated_at_trigger.sql` — auto-updates `updated_at` on every event insert (#1004) -- `zeph-core::text::truncate_to_chars()` Unicode-aware helper, replaces duplicated truncation in agent and CLI (#1004) -- `created_at` field in `AcpSessionEvent` and `SessionEventDto` REST response (#1004) -- `max_history` wired through `AcpServerConfig` and `ZephAcpAgent`; used in both HTTP handler and agent `list_sessions` (#1004) -- UUID validation on `session_id` path parameter in `session_messages_handler` — returns 400 on invalid input (#1004) -- Startup `tracing::warn!` when `auth_bearer_token` is None and HTTP transport is active (#1004) -- `--init` wizard prompts for `max_history` and `title_max_chars` (#1004) -- `zeph-acp`: `parent_tool_use_id` propagation through `LoopbackEvent::ToolStart/ToolOutput` → `AcpContext` → `loopback_event_to_updates`; subagent events carry `_meta.claudeCode.parentToolUseId` so IDEs can nest subagent output under the parent tool call card (#1008) -- `zeph-core`: `Agent::with_parent_tool_use_id()` builder method; `AgentBuilder` injects the parent tool call UUID when spawning subagents via `SubAgentManager` (#1008) -- `zeph-acp`: `AcpShellExecutor` terminal streaming — `stream_until_exit` helper polls output every 200 ms via `tokio::select!` and emits `ToolCallUpdate` with `_meta.terminal_output` per chunk and `_meta.terminal_exit` on completion; IDEs receive real-time bash output inside tool cards (#1009) -- `zeph-tools`: `locations: Option>` field on `ToolOutput`; `AcpFileExecutor` populates it with the absolute file path for `read_file`/`write_file` operations; `loopback_event_to_updates` forwards it as `ToolCall.location` for IDE file-following (#1010) -- Unit tests: `loopback_tool_start_parent_tool_use_id_injected_into_meta`, `loopback_tool_output_parent_tool_use_id_injected_into_meta`, `streaming_mode_emits_terminal_exit_notification`, `read_file_returns_location`, `write_file_returns_location` (#1008, #1009, #1010) - -### Fixed - ACP terminal release deferred until after `tool_call_update` notification: IDE now receives `ToolCallContent::Terminal` while the terminal is still alive, enabling tool output display in Zed ACP panel (#1013) - `TerminalMessage` enum (`Execute`/`Release`) decouples terminal lifecycle from execution in `zeph-acp`; `AcpShellExecutor::release_terminal()` signals the background handler instead of calling the ACP method inline (#1013) - `SessionEntry` retains a cloned `AcpShellExecutor` so the `prompt()` event loop can trigger deferred `terminal/release` after all `tool_call_update` notifications are dispatched (#1013) @@ -5899,7 +5786,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `/model refresh` clears all provider caches in `~/.cache/zeph/models/` and re-fetches (#997) - ACP `AvailableCommandsUpdate` populated with model list on session start (#997) -### Fixed - `SubAgentConfig` in `zeph-core` config with `enabled`, `max_concurrent` (default 1), `extra_dirs` fields; wired into bootstrap via `with_subagent_manager()` on `AgentBuilder` (#973, #964) - Sub-agent definition discovery from `.zeph/agents/` (project scope) and `~/.config/zeph/agents/` (user scope) with priority-based deduplication (#964) - Skill injection into sub-agent system prompt: filtered skills prepended as fenced `skills` block at spawn time (#967) @@ -5922,7 +5808,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `zeph-core`: `LoopbackEvent::Usage`, `SessionTitle`, `Plan` variants; `PlanItemStatus` enum; `Channel::send_usage` method - New `zeph-acp` feature flags: `unstable-session-usage`, `unstable-session-model`, `unstable-session-info-update`; all enabled by default -### Fixed - `zeph-acp`: tool output content now always appears in ACP tool call blocks (Zed IDE); removed `if !already_streamed` guard so `LoopbackEvent::ToolOutput` is emitted unconditionally for all channels including ACP (#1003) - `zeph-acp`: fenced-block tool execution path now generates a stable UUID `tool_call_id`, emits `ToolStart` before output, and passes the ID to `send_tool_output` — eliminating orphaned `ToolCallUpdate` events with empty ID (#1003) - `AcpShellExecutor`: `terminal_timeout_secs` config value was silently ignored; now correctly passed to `with_timeout` (#956) @@ -5931,6 +5816,34 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `zeph-acp`: trim leading whitespace before slash-command prefix check to prevent bypass via `\n/command` input (#961) - `zeph-acp`: `/clear` now sends a sentinel to the agent loop to also clear in-memory `AgentContext` state and reset the token counter (#981) +### Added + +- `docs/src/guides/ide-integration.md` — IDE integration guide covering ACP stdio setup, Zed and VS Code configuration, and subagent visibility features (nesting, terminal streaming, agent following) (#1011) +- ACP context window usage widget: `unstable-session-usage` feature enabled in `zeph-acp` by default; `UsageUpdate` (`used`/`size` tokens) now emitted after each LLM response, populating the Context badge in Zed IDE (#1002) +- ACP project rules widget: `project_rules` field on `AcpServerConfig` and `ZephAcpAgent`; session start sends `_meta.projectRules` with basenames of loaded `.claude/rules/*.md` and skill files, populating the "N project rules" badge in Zed IDE (#1002) +- `collect_project_rules` helper in `src/acp.rs` aggregates rule file paths from `cwd/.claude/rules/*.md` and `AgentDeps::skill_paths` (#1002) +- `ZephAcpAgent::with_project_rules()` builder method for supplying rules list to the ACP agent (#1002) +- ACP session history: `GET /sessions` and `GET /sessions/{id}/messages` HTTP endpoints expose persisted session list and event log to ACP clients (#1004) +- Session resume: sending an existing `session_id` reconstructs conversation context from SQLite before the first LLM turn (#1004) +- Session title auto-inference: title truncated from the first user message (`title_max_chars`, default 60) and persisted after the first assistant reply (#1004) +- `[memory.sessions]` config section (`max_history`, `title_max_chars`) in `MemoryConfig` and `config/default.toml` (#1004) +- `sessions list/resume/delete` CLI subcommands (gated behind `acp` feature) (#1004) +- TUI session browser panel (`H` keybind) with `session:history` command palette entry (#1004) +- `SqliteStore::get_acp_session_info()` — single-session lookup with `title`, `updated_at`, `message_count` (#1004) +- `SqliteStore::list_acp_sessions(limit)` enriched with `title`, `updated_at`, `message_count`; `limit=0` returns all (#1004) +- Migration `017_acp_session_updated_at_trigger.sql` — auto-updates `updated_at` on every event insert (#1004) +- `zeph-core::text::truncate_to_chars()` Unicode-aware helper, replaces duplicated truncation in agent and CLI (#1004) +- `created_at` field in `AcpSessionEvent` and `SessionEventDto` REST response (#1004) +- `max_history` wired through `AcpServerConfig` and `ZephAcpAgent`; used in both HTTP handler and agent `list_sessions` (#1004) +- UUID validation on `session_id` path parameter in `session_messages_handler` — returns 400 on invalid input (#1004) +- Startup `tracing::warn!` when `auth_bearer_token` is None and HTTP transport is active (#1004) +- `--init` wizard prompts for `max_history` and `title_max_chars` (#1004) +- `zeph-acp`: `parent_tool_use_id` propagation through `LoopbackEvent::ToolStart/ToolOutput` → `AcpContext` → `loopback_event_to_updates`; subagent events carry `_meta.claudeCode.parentToolUseId` so IDEs can nest subagent output under the parent tool call card (#1008) +- `zeph-core`: `Agent::with_parent_tool_use_id()` builder method; `AgentBuilder` injects the parent tool call UUID when spawning subagents via `SubAgentManager` (#1008) +- `zeph-acp`: `AcpShellExecutor` terminal streaming — `stream_until_exit` helper polls output every 200 ms via `tokio::select!` and emits `ToolCallUpdate` with `_meta.terminal_output` per chunk and `_meta.terminal_exit` on completion; IDEs receive real-time bash output inside tool cards (#1009) +- `zeph-tools`: `locations: Option>` field on `ToolOutput`; `AcpFileExecutor` populates it with the absolute file path for `read_file`/`write_file` operations; `loopback_event_to_updates` forwards it as `ToolCall.location` for IDE file-following (#1010) +- Unit tests: `loopback_tool_start_parent_tool_use_id_injected_into_meta`, `loopback_tool_output_parent_tool_use_id_injected_into_meta`, `streaming_mode_emits_terminal_exit_notification`, `read_file_returns_location`, `write_file_returns_location` (#1008, #1009, #1010) + ## [0.12.2] - 2026-02-26 ### Fixed @@ -5986,9 +5899,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Security - `AcpConfig` now uses custom `impl std::fmt::Debug` that redacts `auth_token` as `[REDACTED]`, consistent with `A2aServerConfig` and `TelegramConfig` (#936) + ## [0.12.1] - 2026-02-25 ### Security + - Enforce `unsafe_code = "deny"` at workspace lint level; existing unavoidable unsafe blocks (mmap via candle, `std::env` in tests) annotated with `#[allow(unsafe_code)]` (#867) - Replace `HashMap` with `BTreeMap` in `AgeVaultProvider` to produce deterministic JSON key ordering on `vault.save()` (#876) - `WebScrapeExecutor`: redirect targets now validated against private/internal IP ranges to prevent SSRF via redirect chains (#871) @@ -5998,18 +5913,31 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Shell sandbox: added `<(`, `>(`, `<<<`, `eval ` to default `confirm_patterns` to mitigate process substitution, here-string, and eval bypass vectors; documented known `find_blocked_command` limitations (#870) ### Performance + - `ClaudeProvider` caches pre-serialized `ToolDefinition` slices as `serde_json::Value`; cache is keyed by tool names and invalidated only when the set changes, eliminating per-call JSON construction overhead (#894) +- Replace `should_compact()` O(N) message scan with direct comparison against `cached_prompt_tokens` (#880) +- Cache `EnvironmentContext` on Agent; refresh only `git_branch` on skill reload instead of spawning a full git subprocess each time (#881) +- Hash doom-loop content in-place by feeding stable segments directly into the hasher, eliminating the intermediate normalized `String` allocation (#882) +- Fix double `count_tokens` call in `prune_stale_tool_outputs` for `ToolResult` parts; compute once and reuse (#883) +- Added composite covering index `(conversation_id, id)` on `messages` table (migration 015); replaces single-column index for filter+order access patterns in `oldest_message_ids` and `load_history_filtered` (#895) +- Replaced double-sort subquery in `load_history_filtered` with a CTE — eliminates redundant `ORDER BY` on the derived table (#896) +- Eliminate redundant `Vec` clone in `remove_tool_responses_middle_out` by taking ownership instead of borrowing; replace `HashSet` with `Vec::with_capacity` for small-N index tracking (#884, #888) +- Fast-path empty `parts_json == "[]"` deserialization in `load_history`, `load_history_filtered`, `message_by_id`, `messages_by_ids` to skip serde parse on the common empty case (#886) +- Replace `collect::>().join()` in `consolidate_summaries` with `String::with_capacity` + `write!` loop to eliminate intermediate allocation (#887) + ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) ### Added + - `sqlite_pool_size: u32` field in `MemoryConfig` (default 5) — pool size configurable via `[memory] sqlite_pool_size` in config.toml; `SqliteStore::with_pool_size()` wires the value into the connection pool builder (#893) - Background `tokio::spawn` cleanup task for `ResponseCache::cleanup_expired()` — interval configurable via `[memory] response_cache_cleanup_interval_secs` (default 3600s), first tick skipped to avoid startup overhead (#891) - 6 new unit tests for `unsummarized_count` counter logic and `sqlite_pool_size` config defaults/deserialization ### Changed + - Removed 4 `channel.send_status()` calls from `persist_message()` in `zeph-core` — each Telegram status update is a blocking API call; SQLite WAL inserts < 1ms don't warrant status reporting (#889) - `check_summarization()` no longer issues a `COUNT(*)` SQL query on every message save; replaced with in-memory `unsummarized_count: usize` counter on `MemoryState` — incremented on persist, reset on summarization (#890) - `tui_loop()` in `zeph-tui` skips `terminal.draw()` when no events occurred in the 250ms tick — reduces idle CPU usage (#892) @@ -6017,18 +5945,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `[profile.ci]` build profile with thin LTO and 16 codegen-units for faster CI release builds (#878) - `schema` feature flag in `zeph-llm` gating `schemars` dependency and typed output API (#879) -### Performance -- Replace `should_compact()` O(N) message scan with direct comparison against `cached_prompt_tokens` (#880) -- Cache `EnvironmentContext` on Agent; refresh only `git_branch` on skill reload instead of spawning a full git subprocess each time (#881) -- Hash doom-loop content in-place by feeding stable segments directly into the hasher, eliminating the intermediate normalized `String` allocation (#882) -- Fix double `count_tokens` call in `prune_stale_tool_outputs` for `ToolResult` parts; compute once and reuse (#883) -- Added composite covering index `(conversation_id, id)` on `messages` table (migration 015); replaces single-column index for filter+order access patterns in `oldest_message_ids` and `load_history_filtered` (#895) -- Replaced double-sort subquery in `load_history_filtered` with a CTE — eliminates redundant `ORDER BY` on the derived table (#896) -- Eliminate redundant `Vec` clone in `remove_tool_responses_middle_out` by taking ownership instead of borrowing; replace `HashSet` with `Vec::with_capacity` for small-N index tracking (#884, #888) -- Fast-path empty `parts_json == "[]"` deserialization in `load_history`, `load_history_filtered`, `message_by_id`, `messages_by_ids` to skip serde parse on the common empty case (#886) -- Replace `collect::>().join()` in `consolidate_summaries` with `String::with_capacity` + `write!` loop to eliminate intermediate allocation (#887) - -### Changed - Replace default Ollama model `mistral:7b` with `qwen3:8b` across config defaults, tests, snapshots, and `--init` wizard; add `"qwen3"/"qwen"` as `ChatML` aliases in `ChatTemplate::parse_str` (#897) - Split 3177-line `src/main.rs` into focused modules: `runner.rs` (dispatch), `agent_setup.rs` (tool/MCP/feature setup), `tracing_init.rs`, `tui_bridge.rs`, `channel.rs`, `tests.rs` — `main.rs` reduced to 26 LOC (#839) - Split 1791-line `crates/zeph-core/src/bootstrap.rs` into submodule directory: `config.rs`, `health.rs`, `mcp.rs`, `provider.rs`, `skills.rs`, `tests.rs` — `bootstrap/mod.rs` reduced to 278 LOC (#840) @@ -6054,7 +5970,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Permission cache key collision on anonymous tools — uses `tool_call_id` as fallback when title is absent (#779) + ### Added + - ACP custom methods framework via `ext_method` dispatch — `_session/list`, `_session/get`, `_session/delete`, `_session/export`, `_session/import`, `_agent/tools`, `_agent/working_dir/update` (#787) - Session export/import with SQLite transaction-backed atomic event replay (#787) - Auth hints in ACP `initialize` response meta (#787) @@ -6135,22 +6054,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Body size limit (1 MiB), WS message size limit, max_sessions enforcement (503), CORS deny-all (#783) - SSE keepalive pings (15s interval) and idle reaper with `last_activity` tracking (#783) -### Fixed -- Permission cache key collision on anonymous tools — uses `tool_call_id` as fallback when title is absent (#779) - ### Changed + - CI: add CLA check for external contributors via `contributor-assistant/github-action` ## [0.11.6] - 2026-02-23 ### Fixed + - Auto-create parent directories for `sqlite_path` on startup (#756) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) ### Added + - `autosave_assistant` and `autosave_min_length` config fields in `MemoryConfig` — assistant responses skip embedding when disabled (#748) - `SemanticMemory::save_only()` — persist message to SQLite without generating a vector embedding (#748) - `ResponseCache` in `zeph-memory` — SQLite-backed LLM response cache with blake3 key hashing and TTL expiry (#750) @@ -6178,6 +6096,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Markdown table rendering in TUI chat panel — Unicode box-drawing borders, bold headers, column auto-width ### Changed + - Token estimation uses `chars/4` heuristic instead of `bytes/3` for better accuracy on multi-byte text (#742) ## [0.11.5] - 2026-02-22 @@ -6215,6 +6134,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Removed - `FilterConfig` per-filter config structs (`TestFilterConfig`, `GitFilterConfig`, `ClippyFilterConfig`, `CargoBuildFilterConfig`, `DirListingFilterConfig`, `LogDedupFilterConfig`) — filter params now in TOML strategy fields + ## [0.11.4] - 2026-02-21 ### Fixed @@ -6250,13 +6170,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `allowed-tools` SKILL.md field now uses space-separated values per agentskills.io spec (was comma-separated) — **breaking change** for skills using comma-delimited allowed-tools (#686) - Skill resource files (references, scripts, assets) are no longer eagerly injected into the system prompt on skill activation; only filenames are listed as available resources — **breaking change** for skills relying on auto-injected reference content (#687) + ## [0.11.3] - 2026-02-20 ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- False positive: "sudoku" no longer matched by "sudo" blocked pattern (word-boundary matching) +- PID file creation uses `OpenOptions::create_new(true)` (O_CREAT|O_EXCL) to prevent TOCTOU symlink attacks + ### Added + - `LoopbackChannel` / `LoopbackHandle` / `LoopbackEvent` in zeph-core — headless channel for daemon mode, pairs with a handle that exposes `input_tx` / `output_rx` for programmatic agent I/O - `ProcessorEvent` enum in zeph-a2a server — streaming event type replacing synchronous `ProcessResult`; `TaskProcessor::process` now accepts an `mpsc::Sender` and returns `Result<(), A2aError>` - `--daemon` CLI flag (feature `daemon+a2a`) — bootstraps a full agent + A2A JSON-RPC server under `DaemonSupervisor` with PID file lifecycle and Ctrl-C graceful shutdown @@ -6278,6 +6203,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Path separator rejection in MCP command validation to prevent symlink-based bypasses ### Changed + - `MessagePart::Image` variant now holds `Box` instead of inline fields, improving semantic grouping of image data - `Agent` simplified to `Agent` — ToolExecutor generic replaced with `Box`, reducing monomorphization - Shell command detection rewritten from substring matching to tokenizer-based pipeline with escape normalization, eliminating bypass vectors via backslash insertion, hex/octal escapes, quote splitting, and pipe chains @@ -6314,17 +6240,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Replace `anyhow` in `zeph-tui` with typed `TuiError` enum (Io, Channel); simplify `handle_event()` return to `()` - Sort `[workspace.dependencies]` alphabetically in root Cargo.toml -### Fixed -- False positive: "sudoku" no longer matched by "sudo" blocked pattern (word-boundary matching) -- PID file creation uses `OpenOptions::create_new(true)` (O_CREAT|O_EXCL) to prevent TOCTOU symlink attacks - ## [0.11.2] - 2026-02-19 ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Outdated config keys in default.toml: removed nonexistent `repo_id`, renamed `provider_type` to `type`, corrected candle defaults, fixed observability exporter default +- Add `wait(true)` to Qdrant upsert and delete operations for read-after-write consistency, fixing flaky `ingested_chunks_have_correct_payload` integration test (#567) +- Vault age backend now falls back to default directory for key/path when `--vault-key`/`--vault-path` are not provided, matching `zeph vault init` behavior (#613) + ### Added + - `base_url` and `language` fields in `[llm.stt]` config for OpenAI-compatible local whisper servers (e.g. whisper.cpp) - `ZEPH_STT_BASE_URL` and `ZEPH_STT_LANGUAGE` environment variable overrides - Whisper API provider now passes `language` parameter for accurate non-English transcription @@ -6333,12 +6260,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Full orchestrator example with cloud + local + STT in default.toml - All previously undocumented config keys in default.toml (`agent.auto_update_check`, `llm.stt`, `llm.vision_model`, `skills.disambiguation_threshold`, `tools.filters.*`, `tools.permissions`, `a2a.auth_token`, `mcp.servers.env`) -### Fixed -- Outdated config keys in default.toml: removed nonexistent `repo_id`, renamed `provider_type` to `type`, corrected candle defaults, fixed observability exporter default -- Add `wait(true)` to Qdrant upsert and delete operations for read-after-write consistency, fixing flaky `ingested_chunks_have_correct_payload` integration test (#567) -- Vault age backend now falls back to default directory for key/path when `--vault-key`/`--vault-path` are not provided, matching `zeph vault init` behavior (#613) - ### Changed + - Whisper STT provider no longer requires OpenAI API key when `base_url` points to a local server - Orchestrator sub-providers now resolve `base_url` and `embedding_model` via fallback chain: per-provider, parent section, global default @@ -6348,7 +6271,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Restore `--vault`, `--vault-key`, `--vault-path` CLI flags lost during clap migration (#587) + ### Added + - Persistent CLI input history with rustyline: arrow key navigation, prefix search, line editing, SQLite-backed persistence across restarts (#604) - Clickable markdown links in TUI via OSC 8 hyperlinks — `[text](url)` renders as terminal-clickable link with URL sanitization and scheme allowlist (#580) - `@`-triggered fuzzy file picker in TUI input — type `@` to search project files by name/path/extension with real-time filtering (#600) @@ -6363,10 +6289,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - One-shot update check at startup when scheduler feature is disabled - `--init` wizard step for auto-update check configuration -### Fixed -- Restore `--vault`, `--vault-key`, `--vault-path` CLI flags lost during clap migration (#587) - ### Changed + - Refactor `AppBuilder::from_env()` to `AppBuilder::new()` with explicit CLI overrides - Eliminate redundant manual `std::env::args()` parsing in favor of clap - Add `ZEPH_VAULT_KEY` and `ZEPH_VAULT_PATH` environment variable support @@ -6379,7 +6303,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Strip schema metadata and fix doom loop detection for native tool calls (#534) +- TUI freezes during fast LLM streaming and parallel tool execution: biased event loop with input priority and agent event batching (#500) +- Redundant syntax highlighting and markdown parsing on every TUI frame: per-message render cache with content-hash keying (#501) + ### Added + - Vision (image input) support across Claude, OpenAI, and Ollama providers (#490) - `MessagePart::Image` content type with base64 serialization - `LlmProvider::supports_vision()` trait method for runtime capability detection @@ -6426,18 +6355,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Operation-level cancellation for LLM requests and tool executions (#538) ### Changed + - Consolidate Docker files into `docker/` directory (#539) - Typed deserialization for tool call params (#540) - CI: replace oraclelinux base image with debian bookworm-slim (#532) -### Fixed -- Strip schema metadata and fix doom loop detection for native tool calls (#534) -- TUI freezes during fast LLM streaming and parallel tool execution: biased event loop with input priority and agent event batching (#500) -- Redundant syntax highlighting and markdown parsing on every TUI frame: per-message render cache with content-hash keying (#501) - ## [0.10.0] - 2026-02-18 ### Fixed + - TUI status spinner not cleared after model warmup completes (#517) - Duplicate tool output rendering for shell-streamed tools in TUI (#516) - `send_tool_output` not forwarded through `AppChannel`/`AnyChannel` enum dispatch (#508) @@ -6451,11 +6377,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Duplicate `ToolEvent::Completed` emission in shell executor before filtering was applied (#480) - TUI feature gate compilation errors (#435) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Closed #410 (clap dropped atty), #411 (rmcp updated quinn-udp), #413 (A2A body limit already present) + ### Added + - GitHub CLI skill with token-saving patterns (#507) - Parallel execution of native tool_use calls with configurable concurrency (#486) - TUI compact/detailed tool output toggle with 'e' key binding (#479) @@ -6498,6 +6426,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `full` feature flag enabling all optional features ### Changed + - Remove `P` generic from `Agent`, `SemanticMemory`, `CodeRetriever` — provider resolved at construction (#423) - Architecture improvements, performance optimizations, security hardening (M24) (#417) - Extract bootstrap logic from main.rs into `zeph-core::bootstrap::AppBuilder` (#393): main.rs reduced from 2313 to 978 lines @@ -6510,16 +6439,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - CI updated to use `--features full` ### Breaking + - `LlmConfig.provider` changed from `String` to `ProviderKind` enum - Default features reduced -- users needing a2a, candle, mcp, openai, orchestrator, router, tui must enable explicitly or use `--features full` - Telegram channel rejects empty `allowed_users` at startup - Config with extreme values now rejected by `Config::validate()` ### Deprecated -- `ToolExecutor::execute()` string-based dispatch (use `execute_tool_call()` instead) -### Fixed -- Closed #410 (clap dropped atty), #411 (rmcp updated quinn-udp), #413 (A2A body limit already present) +- `ToolExecutor::execute()` string-based dispatch (use `execute_tool_call()` instead) ## [0.9.9] - 2026-02-17 @@ -6555,6 +6483,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Slack timestamp replay protection (reject requests >5min old) - Configurable Slack webhook bind address (`webhook_host`) + ## [0.9.8] - 2026-02-16 ### Fixed @@ -6583,6 +6512,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - Consolidate all SQLite migrations into root `migrations/` directory (#354) + ## [0.9.7] - 2026-02-15 ### Performance @@ -6611,6 +6541,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Upgraded rmcp to 0.15, toml to 1.0, uuid to 1.21 (#296) - Cleaned up deny.toml advisory and license configuration (#312) + ## [0.9.6] - 2026-02-15 ### Changed @@ -6643,13 +6574,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `anyhow::Result` from Channel trait (replaced with `ChannelError`) - Direct `anyhow::Error` usage in agent module (replaced with `AgentError`) + ## [0.9.5] - 2026-02-14 ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Persist `MessagePart` data to SQLite via `remember_with_parts()` — pruning state now survives session restarts (#229) +- Clear tool output body from memory after Tier 1 pruning to reclaim heap (#230) +- TUI uptime display now updates from agent start time instead of always showing 0s (#259) +- `FileExecutor` `handle_write` now uses canonical path for security (TOCTOU prevention) (#260) +- `resolve_via_ancestors` trailing slash bug on macOS +- `vault.backend` from config now used as default backend; CLI `--vault` flag overrides config (#263) +- A2A error responses sanitized to prevent provider URL leakage + ### Added + - Pattern-based permission policy with glob matching per tool (allow/ask/deny), first-match-wins evaluation (#248) - Legacy blocked_commands and confirm_patterns auto-migrated to permission rules (#249) - Denied tools excluded from LLM system prompt (#250) @@ -6671,26 +6612,25 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `AgentTaskProcessor` replaces `EchoTaskProcessor` for real A2A inference (#262) ### Changed + - ShellExecutor uses PermissionPolicy for all permission checks instead of legacy find_blocked_command/find_confirm_command - Replaced unmaintained dirs-next 2.0 with dirs 6.x - Batch messages retrieval in semantic recall: replaced N+1 query pattern with `messages_by_ids()` for improved performance -### Fixed -- Persist `MessagePart` data to SQLite via `remember_with_parts()` — pruning state now survives session restarts (#229) -- Clear tool output body from memory after Tier 1 pruning to reclaim heap (#230) -- TUI uptime display now updates from agent start time instead of always showing 0s (#259) -- `FileExecutor` `handle_write` now uses canonical path for security (TOCTOU prevention) (#260) -- `resolve_via_ancestors` trailing slash bug on macOS -- `vault.backend` from config now used as default backend; CLI `--vault` flag overrides config (#263) -- A2A error responses sanitized to prevent provider URL leakage - ## [0.9.4] - 2026-02-14 ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- TUI chat line wrapping no longer eats 2 characters on word wrap (accent prefix width accounted for) +- TUI activity indicator moved to dedicated layout row (no longer overlaps content) +- Memory history loading now retrieves most recent messages instead of oldest +- Persisted tool output format includes tool name (`[tool output: bash]`) for proper display on restore +- `summarize_output` serde deserialization used `#[serde(default)]` yielding `false` instead of config default `true` + ### Added + - Bounded FIFO message queue (max 10) in agent loop: users can submit messages during inference, queued messages are delivered sequentially when response cycle completes - Channel trait extended with `try_recv()` (non-blocking poll) and `send_queue_count()` with default no-op impls - Consecutive user messages within 500ms merge window joined by newline @@ -6737,6 +6677,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - inject_semantic_recall, inject_code_context, inject_summaries now create typed MessagePart variants ### Changed + - `index` feature enabled by default (Code RAG pipeline active out of the box) - Agent error handler shows specific error context instead of generic message - TUI inline code rendered as blue with dark background glow instead of bright yellow @@ -6753,13 +6694,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Default `summarize_output`: false → true - Default `context_budget_tokens`: 0 (auto-detect from model) -### Fixed -- TUI chat line wrapping no longer eats 2 characters on word wrap (accent prefix width accounted for) -- TUI activity indicator moved to dedicated layout row (no longer overlaps content) -- Memory history loading now retrieves most recent messages instead of oldest -- Persisted tool output format includes tool name (`[tool output: bash]`) for proper display on restore -- `summarize_output` serde deserialization used `#[serde(default)]` yielding `false` instead of config default `true` - ## [0.9.3] - 2026-02-12 ### Fixed @@ -6778,6 +6712,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `code_context` slot in `BudgetAllocation` for code RAG injection into agent context - `inject_code_context()` method in Agent for transient code chunk injection before semantic recall + ## [0.9.2] - 2026-02-12 ### Fixed @@ -6799,13 +6734,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Progressive skill loading: matched skills get full body, remaining shown as description-only catalog via `` - ZEPH.md project config discovery: walk up directory tree, inject into system prompt as `` + ## [0.9.1] - 2026-02-12 ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Chat messages no longer overflow below the viewport when lines wrap +- Scroll no longer sticks at top after over-scrolling past content boundary + ### Added + - Mouse scroll support for TUI chat widget (scroll up/down via mouse wheel) - Splash screen with colored block-letter "ZEPH" banner on TUI startup - Conversation history loading into chat on TUI startup @@ -6813,17 +6753,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Markdown rendering for all chat messages via `pulldown-cmark`: bold, italic, strikethrough, headings, code blocks, inline code, lists, blockquotes, horizontal rules - Scrollbar track with proportional thumb indicator in chat widget -### Fixed -- Chat messages no longer overflow below the viewport when lines wrap -- Scroll no longer sticks at top after over-scrolling past content boundary - ## [0.9.0] - 2026-02-12 ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Effective embedding model resolution: Qdrant subsystems now use the correct provider-specific embedding model name when provider is `openai` or orchestrator routes to OpenAI +- Skill watcher no longer loops in Docker containers (overlayfs phantom events) + ### Added + - ratatui-based TUI dashboard with real-time agent metrics (feature-gated `tui`, opt-in) - `TuiChannel` as new `Channel` implementation with bottom-up chat feed, input line, and status bar - `MetricsSnapshot` and `MetricsCollector` in zeph-core via `tokio::sync::watch` for live metrics transport @@ -6852,13 +6792,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Qdrant registry sync after dynamic MCP add/remove for semantic tool matching ### Changed + - Docker images now include Node.js, npm, and Python 3 for MCP server runtime - `ServerEntry` uses `McpTransport` enum instead of flat command/args/env fields -### Fixed -- Effective embedding model resolution: Qdrant subsystems now use the correct provider-specific embedding model name when provider is `openai` or orchestrator routes to OpenAI -- Skill watcher no longer loops in Docker containers (overlayfs phantom events) - ## [0.8.2] - 2026-02-10 ### Changed @@ -6867,6 +6804,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - CI clippy uses default features instead of explicit feature list - Docker images now include skill runtime dependencies: `curl`, `wget`, `git`, `jq`, `file`, `findutils`, `procps-ng` + ## [0.8.1] - 2026-02-10 ### Fixed @@ -6896,6 +6834,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `[timeouts]` config section with `llm_seconds`, `embedding_seconds`, `a2a_seconds` - LLM calls wrapped with `tokio::time::timeout` in agent loop + ## [0.8.0] - 2026-02-10 ### Fixed @@ -6968,6 +6907,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Added `blake3` 1.8 to workspace - Added `rmcp` 0.14 to workspace (MCP protocol SDK) + ## [0.7.1] - 2026-02-09 ### Fixed @@ -6985,6 +6925,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Agent tool output label from `[shell output]` to `[tool output]` - `ShellExecutor` block extraction now uses shared `extract_fenced_blocks()` + ## [0.7.0] - 2026-02-08 ### Fixed @@ -7009,6 +6950,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Added `AuthRequired` and `Unknown` variants to `TaskState` - `TaskStatusUpdateEvent` and `TaskArtifactUpdateEvent` gained `kind` field (`status-update`, `artifact-update`) + ## [0.6.0] - 2026-02-08 ### Fixed @@ -7028,6 +6970,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Optional `a2a` feature flag (enabled by default) to gate A2A functionality - 42 new unit tests for protocol types, JSON-RPC envelopes, agent card builder, discovery registry, and client operations + ## [0.5.0] - 2026-02-08 ### Fixed @@ -7057,16 +7000,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `zeph-core` now depends on `zeph-skills` - `zeph-skills` now depends on `tokio` (sync, rt) and `notify` + ## [0.4.3] - 2026-02-08 ### Fixed + - Telegram "Bad Request: text must be non-empty" error when LLM returns whitespace-only content. Added `is_empty()` guard after `markdown_to_telegram` conversion in both `send()` and `send_or_edit()` (Issue #73) -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) ### Added + - `Dockerfile.dev`: multi-stage build from source with cargo registry/build cache layers for fast rebuilds - `docker-compose.dev.yml`: full dev stack (Qdrant + Zeph) with debug tracing (`RUST_LOG`, `RUST_BACKTRACE=1`), uses host Ollama via `host.docker.internal` - `docker-compose.deps.yml`: Qdrant-only compose for native zeph execution on macOS @@ -7074,15 +7019,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [0.4.2] - 2026-02-08 ### Fixed + - Telegram MarkdownV2 parsing errors (Issue #69). Replaced manual character-by-character escaping with AST-based event-driven rendering using pulldown-cmark 0.13.0 - UTF-8 safe text chunking for messages exceeding Telegram's 4096-byte limit. Uses `str::is_char_boundary()` with newline preference to prevent splitting multi-byte characters (emoji, CJK) - Link URL over-escaping. Dedicated `escape_url()` method only escapes `)` and `\` per Telegram MarkdownV2 spec, fixing broken URLs like `https://example\.com` -### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) ### Added + - `TelegramRenderer` state machine for context-aware escaping: 19 special characters in text, only `\` and `` ` `` in code blocks - Markdown formatting support: bold, italic, strikethrough, headers, code blocks, links, lists, blockquotes - Comprehensive benchmark suite with criterion: 7 scenario groups measuring latency (2.83µs for 500 chars) and throughput (121-970 MiB/s) @@ -7090,6 +7036,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - 30 markdown unit tests covering formatting, escaping, edge cases, and UTF-8 chunking (99.32% line coverage) ### Changed + - `crates/zeph-channels/src/markdown.rs`: Complete rewrite with pulldown-cmark event-driven parser (449 lines) - `crates/zeph-channels/src/telegram.rs`: Removed `has_unclosed_code_block()` pre-flight check (no longer needed with AST parsing), integrated UTF-8 safe chunking - Dependencies: Added pulldown-cmark 0.13.0 (MIT) and criterion 0.8.0 (Apache-2.0/MIT) for benchmarking @@ -7102,6 +7049,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - Docker Compose: Added environment variables for semantic memory configuration (`ZEPH_MEMORY_SEMANTIC_ENABLED`, `ZEPH_MEMORY_SEMANTIC_RECALL_LIMIT`) and Qdrant URL override (`ZEPH_QDRANT_URL`) to enable full semantic memory stack via `.env` file + ## [0.4.0] - 2026-02-08 ### Fixed @@ -7195,12 +7143,18 @@ let message_id = store.save_message(conv_id, "user", "hello").await?; - `OllamaProvider::new()` now accepts `embedding_model` parameter (breaking change, pre-v1.0) - Config schema: added `llm.embedding_model` field with serde default for backward compatibility + ## [0.3.0] - 2026-02-07 ### Fixed - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +- Shell command timeout now respects `config.tools.shell.timeout` (was hardcoded 30s in agent.rs) +- Removed duplicate bash parsing logic from agent.rs (now centralized in zeph-tools) +- Error message pattern leakage: blocked commands now show generic security policy message instead of leaking exact blocked pattern + ### Added #### M7 Phase 1: Tool Execution Framework - zeph-tools crate (Issue #39) @@ -7236,12 +7190,6 @@ let message_id = store.save_message(conv_id, "user", "hello").await?; - **CRITICAL fix for SEC-001**: Shell commands now filtered through ShellExecutor with DEFAULT_BLOCKED patterns (rm -rf /, sudo, mkfs, dd if=, curl, wget, nc, shutdown, reboot, halt). Resolves command injection vulnerability where agent.rs bypassed all security checks via inline bash execution. -### Fixed - -- Shell command timeout now respects `config.tools.shell.timeout` (was hardcoded 30s in agent.rs) -- Removed duplicate bash parsing logic from agent.rs (now centralized in zeph-tools) -- Error message pattern leakage: blocked commands now show generic security policy message instead of leaking exact blocked pattern - ### Changed **BREAKING CHANGES** (pre-1.0.0): @@ -7268,6 +7216,11 @@ let agent = Agent::new(provider, channel, &skills_prompt, executor); - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) + +#### M6 Phase 3: Security improvements +- Manual `Debug` implementation for `ClaudeProvider` to prevent API key leakage in debug output +- Error message sanitization: full Claude API errors logged via `tracing::error!()`, generic messages returned to users + ### Added #### M6 Phase 1: Streaming trait extension (Issue #35) @@ -7308,12 +7261,6 @@ let agent = Agent::new(provider, channel, &skills_prompt, executor); - tokio-stream dependency added for `StreamExt` trait - 6 new unit tests for channel streaming behavior -### Fixed - -#### M6 Phase 3: Security improvements -- Manual `Debug` implementation for `ClaudeProvider` to prevent API key leakage in debug output -- Error message sanitization: full Claude API errors logged via `tracing::error!()`, generic messages returned to users - ### Changed **BREAKING CHANGES** (pre-1.0.0): @@ -7328,6 +7275,8 @@ let agent = Agent::new(provider, channel, &skills_prompt, executor); - fix(tools): agent now prefers `memory_search` over `search_code` when recalling user-provided facts — updated `search_code` description to exclude user facts/preferences; updated `memory_search` description to emphasise user-provided context recall; session-level hint injected into the volatile system prompt block when `memory_save` was called in the current session (closes #2475) +- Replace vulnerable `serde_yml`/`libyml` with manual frontmatter parser (GHSA high + medium) + ### Added #### M0: Workspace bootstrap @@ -7388,10 +7337,8 @@ let agent = Agent::new(provider, channel, &skills_prompt, executor); - PR template with review checklist - LICENSE (MIT), CONTRIBUTING.md, SECURITY.md -### Fixed -- Replace vulnerable `serde_yml`/`libyml` with manual frontmatter parser (GHSA high + medium) - ### Changed + - Move dependency features from workspace root to individual crate manifests - Update README with badges, architecture overview, and pre-built binaries section @@ -7402,6 +7349,7 @@ let agent = Agent::new(provider, channel, &skills_prompt, executor); - Agent::run() uses tokio::select! to race channel messages against shutdown signal [0.16.0]: https://github.com/bug-ops/zeph/compare/v0.15.3...v0.16.0 + [Unreleased]: https://github.com/bug-ops/zeph/compare/v0.21.4...HEAD [0.21.4]: https://github.com/bug-ops/zeph/compare/v0.21.3...v0.21.4 [0.21.3]: https://github.com/bug-ops/zeph/compare/v0.21.2...v0.21.3 diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 96db80d35..94f11559b 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -45,6 +45,7 @@ - [Deploy with Docker](guides/docker.md) - [Daemon Mode](guides/daemon-mode.md) - [Prometheus Monitoring](guides/prometheus.md) +- [Worktree Isolation](guides/worktree.md) # Advanced diff --git a/book/src/advanced/tui.md b/book/src/advanced/tui.md index 02857a9e3..377a91ce5 100644 --- a/book/src/advanced/tui.md +++ b/book/src/advanced/tui.md @@ -82,6 +82,7 @@ When using `--connect`, the TUI renders token-by-token streaming from the remote | `Ctrl+J` | Insert newline (multiline input) | | `/` | Open slash-command autocomplete (when input is empty) | | `@` | Open file picker (fuzzy file search) | +| `Ctrl+R` | Open reverse search in session prompt history | | `Escape` | Switch to Normal mode | | `Ctrl+C` | Quit application | | `Ctrl+U` | Clear input line | @@ -123,6 +124,21 @@ Typing `@` in Insert mode opens a fuzzy file search popup above the input area. All other keys are blocked while the picker is visible. +### Prompt History (Reverse Search) + +Press `Ctrl+R` in Insert mode to search through your session prompt history. This is useful for re-running similar prompts without typing from scratch. + +| Key | Action | +|-----|--------| +| Any character | Search history by substring (incremental) | +| `Ctrl+R` / `Up` | Move to previous match | +| `Ctrl+S` | Move to next match | +| `Enter` | Insert selected prompt into input and close | +| `Backspace` | Remove last query character (dismisses if query is empty) | +| `Escape` | Close search without inserting | + +History is session-scoped (in-memory only, not persisted across restarts) and includes all prompts submitted in the current TUI session. Search matches are displayed in reverse chronological order (newest first). The search is case-insensitive. + ### Command Palette Press `Ctrl+P` in Insert mode to open the command palette. The palette provides read-only agent management commands for inspecting runtime state without leaving the TUI. diff --git a/book/src/guides/cocoon.md b/book/src/guides/cocoon.md index 12ce4e41a..f89cae1a1 100644 --- a/book/src/guides/cocoon.md +++ b/book/src/guides/cocoon.md @@ -174,6 +174,17 @@ When using the TUI dashboard with Cocoon enabled, check sidecar status and avail Status updates automatically every 30 seconds in the background. +### TON Balance Display + +The TUI sidebar can display your Cocoon TON balance in real time. By default, the balance is shown. To hide it for privacy (displays `*** TON` instead), configure: + +```toml +[cocoon] +show_balance = false # Hide TON balance in TUI sidebar (default: true) +``` + +This setting is interactive during the `zeph init` wizard under the Cocoon setup step. + ## Configuration Reference | Field | Type | Default | Description | diff --git a/book/src/guides/custom-skills.md b/book/src/guides/custom-skills.md index 5834889ba..de002ebd0 100644 --- a/book/src/guides/custom-skills.md +++ b/book/src/guides/custom-skills.md @@ -67,6 +67,48 @@ zeph vault set ZEPH_SECRET_GITHUB_ORG my-org See [Vault — Custom Secrets](../reference/security.md#custom-secrets) for full details. +### Skill Extensions (UI/Keybindings/Monitors) + +Add optional UI elements, keybindings, and performance monitors to your skill with the `extensions` field: + +```markdown +--- +name: my-skill +description: My skill with UI enhancements. +extensions: | + [[ui_elements]] + type = "button" + label = "Run My Action" + + [[keybindings]] + key = "Ctrl+M" + label = "Execute My Task" + + [[monitors]] + name = "status" + type = "gauge" + label = "Task Progress" +--- +``` + +The `extensions` field is optional and ignored if not recognized. Valid extension types: + +**UI Elements:** +- `type = "button"` — clickable button in TUI (label, optional tooltip) +- `type = "toggle"` — on/off toggle widget (label, default state) +- `type = "text-input"` — single-line text field (label, placeholder, max_length) + +**Keybindings:** +- `key` — keyboard sequence (e.g., `Ctrl+M`, `Alt+A`, `Shift+F12`) +- `label` — action description shown in keymap help + +**Monitors:** +- `name` — unique identifier (lowercase, hyphens) +- `type` — `gauge`, `counter`, `timer`, `status` +- `label` — display name + +If the `extensions` block fails to parse (malformed YAML/TOML), the skill loads normally but extensions are silently skipped — skill functionality is never blocked by extension metadata errors. + ### Channel Allowlist Restrict a skill to specific I/O channels with `x-channels`. When set, the skill is excluded from matching on channels not in the list: diff --git a/book/src/guides/worktree.md b/book/src/guides/worktree.md new file mode 100644 index 000000000..a8120f71f --- /dev/null +++ b/book/src/guides/worktree.md @@ -0,0 +1,178 @@ +# Worktree Isolation for Sub-Agents + +Zeph can automatically create isolated git worktrees for background sub-agents to prevent file system conflicts and mutations when multiple agents work on the same repository simultaneously. + +## Why Worktree Isolation? + +When you spawn multiple sub-agents in parallel (e.g., one fixing bugs while another adds features), they both need to work on the codebase without interfering with each other. Without isolation: + +- Both agents modify the same working directory +- Changes from one agent can be overwritten by another +- File locks and git state conflicts cause failures +- Test results are unpredictable + +Worktree isolation solves this by giving each sub-agent its own independent checkout via git worktrees. Each worktree can have its own branch, uncommitted changes, and build artifacts. + +## Setup + +### 1. Enable in Configuration + +Add the `[worktree]` section to your config: + +```toml +[worktree] +enabled = true # Enable worktree isolation +bg_isolation = "worktree" # Isolation mode: "none" or "worktree" +base_ref = "head" # Worktree base ref: "head" or "fresh" +git_timeout_secs = 30 # Git operation timeout +``` + +Or during `zeph init` wizard: + +```bash +zeph init +# ... follow the prompts until "Worktree Isolation" step +``` + +### 2. Understand the Modes + +**`bg_isolation = "none"` (default):** Sub-agents operate in the same directory as the main agent. No isolation. + +**`bg_isolation = "worktree"`:** Each sub-agent gets its own git worktree cloned from the repository. This requires: +- A git repository at the current working directory (or ancestor) +- Git version 2.7.0 or later +- Sufficient disk space for shallow clones + +### 3. Choose a Base Reference + +When creating worktrees, Zeph uses one of two strategies: + +**`base_ref = "head"` (default):** Clone from the current working tree's HEAD commit. Sub-agents see the exact state of your repository at spawn time. +- **Pros**: Includes all your uncommitted changes and local branches +- **Cons**: If your working tree has broken tests or build errors, sub-agents inherit them + +**`base_ref = "fresh"`:** Clone from the main/master branch on the remote. Sub-agents start with a clean, build-ready state. +- **Pros**: Sub-agents always start from a known-good state +- **Cons**: Local changes are not visible to sub-agents; they may be out-of-sync with your working tree + +Choose based on your workflow: +- **Development flow**: Use `head` so sub-agents can see your in-progress work +- **CI/release flow**: Use `fresh` so sub-agents always test against the main branch + +### 4. Manage Worktrees + +List active worktrees: + +```bash +zeph worktree list +``` + +Output example: + +``` +Active Worktrees: +/home/user/repo/.git/worktrees/zeph-agent-a1b2c3d4 (created 2 minutes ago) +/home/user/repo/.git/worktrees/zeph-agent-e5f6g7h8 (created 1 minute ago) +``` + +Clean up stale worktrees (those no longer tracked by active sub-agents): + +```bash +zeph worktree clean +``` + +This is safe to run anytime — it only removes worktrees that are no longer in use. + +## Spawning Sub-Agents with Isolation + +Once worktree isolation is enabled, background sub-agents automatically use isolated worktrees: + +``` +> /agent spawn code-reviewer Review the auth module +Code reviewer is working in isolated worktree at .git/worktrees/zeph-a1b2c3d4 +``` + +Sub-agents can: +- Read and modify files without affecting your main working tree +- Create branches and commits +- Run tests and builds +- Check out different commits + +## Override Base Reference + +At runtime, override the base ref for a specific sub-agent spawn: + +```bash +zeph --worktree-base-ref fresh +> /agent spawn validator Check main branch +``` + +This is useful for one-off runs where you want a different base than your config default. + +## Performance Considerations + +Worktree creation incurs a git clone operation: + +- **Shallow clones**: Zeph uses `--depth=1` to minimize download time +- **Time**: Typically 1-10 seconds depending on repository size +- **Disk space**: Shallow clones use ~20-30% of a full clone + +Example timings: + +| Repository Size | Time | Disk Space | +|-----------------|------|-----------| +| Small (< 100 MB) | 1-2s | 20-30 MB | +| Medium (100-500 MB) | 3-5s | 60-150 MB | +| Large (> 500 MB) | 10-30s | 200+ MB | + +If worktree creation is slow on your system: + +1. **Increase timeout**: `git_timeout_secs = 60` (default: 30) +2. **Verify network**: Shallow clones fetch from the remote; slow network slows clones +3. **Check disk**: Low disk space can slow git operations +4. **Use `head` mode**: Avoids a remote fetch; only copies the local worktree + +## Troubleshooting + +### "Git operation timed out" + +The git command to create or remove the worktree exceeded `git_timeout_secs`. Solutions: + +1. Increase timeout in config: `git_timeout_secs = 60` +2. Check your network connection +3. Verify the repository is healthy: `git gc`, `git fsck` + +### "Worktree already exists" + +A previous sub-agent's worktree wasn't cleaned up. Run: + +```bash +zeph worktree clean +``` + +If the directory still exists on disk but git doesn't know about it, remove it manually: + +```bash +rm -rf .git/worktrees/zeph-* +``` + +### Sub-agent fails in worktree + +Sub-agents run inside the worktree. If a sub-agent fails: + +1. Check the logs: `zeph --debug-dump` +2. Inspect the worktree: `cd .git/worktrees/zeph- && git log --oneline` +3. File an issue with the worktree path and sub-agent logs + +### No worktrees listed, but sub-agents are spawning + +Worktree listing shows only **created** worktrees. Worktrees are removed automatically when: +- The sub-agent completes and is not being resumed +- You manually run `zeph worktree clean` + +If you want to inspect a completed sub-agent's changes before cleanup, resume the sub-agent or manually check the worktree directory before cleaning. + +## Next Steps + +- [Sub-Agent Orchestration](../advanced/sub-agents.md) — full sub-agent system documentation +- [Configuration Reference](../reference/configuration.md) — all worktree config options diff --git a/specs/004-memory/spec.md b/specs/004-memory/spec.md index 503fdddc8..d238729ff 100644 --- a/specs/004-memory/spec.md +++ b/specs/004-memory/spec.md @@ -246,6 +246,65 @@ embed_timeout_secs = 5 # per-embed timeout; 0 = disabled This is separate from `context.fidelity.max_embed_input_tokens` (which limits input size) — `embed_timeout_secs` limits wall-clock duration of the embed call itself. +## Benna-Fusi Multi-Timescale SYNAPSE Edges (#3709, #3710, #3994) + +### Fast/Slow Synaptic Variables (#3709) + +Graph `Edge` gains two additional floating-point fields alongside the existing `confidence`: + +| Field | Description | +|-------|-------------| +| `confidence_fast` | Short-timescale synaptic variable; high learning rate, fast decay | +| `confidence_slow` | Long-timescale synaptic variable; low learning rate, slow decay | + +Both variables evolve on every reassertion (APEX and legacy paths) via a two-timescale +leaky cascade: + +``` +confidence_fast ← (1 - η_fast) * confidence_fast + η_fast * new_evidence +confidence_slow ← (1 - η_slow) * confidence_slow + η_slow * new_evidence +``` + +SYNAPSE spreading activation uses an `α * fast + (1 − α) * slow` blend as the traversal weight. +The `slow` variable gates the conflict resolver's recency fallback. Rates (`α`, `η_fast`, +`η_slow`) are config-tunable and validated at startup. + +```toml +[memory.graph] +benna_fusi_alpha = 0.7 # blend weight for fast variable in spread +benna_fusi_eta_fast = 0.3 # learning rate for fast variable +benna_fusi_eta_slow = 0.05 # learning rate for slow variable +``` + +### MemORAI Graph Retrieval Improvements (#3710) + +Migration 096 adds `confidence_fast`, `confidence_slow`, and `turn_index` to `graph_edges` +(both SQLite and PostgreSQL schemas). A fail-open `MemoryWriteGate` prefilter in `insert_edges` +drops low-confidence, low-signal edges before storage. `turn_index` is threaded through +`GraphExtractionConfig` and both insert paths (APEX and legacy); population from the agent +turn counter is wired at extraction time. + +### DeepReasoning Query-Conditioned Routing (#3994) + +`memory.retrieval.deep_reasoning_query_conditioned = true` (opt-in, fail-open) routes +`DeepReasoning` tier calls through `recall_graph_hela` instead of the static-weight path. +The static-weight path remains the default when the flag is `false`. + +```toml +[memory.retrieval] +deep_reasoning_query_conditioned = false # opt-in +``` + +### Key Invariants + +- `η_fast > η_slow` MUST be enforced at config validation — equal or reversed rates collapse the two-timescale model +- `α` MUST be in `[0.0, 1.0]` — validated at startup; out-of-range is a config error +- `confidence_fast` and `confidence_slow` are updated on every reassertion — NEVER skip the update for legacy insert paths +- Migration 096 is append-only — existing rows get `NULL` fast/slow until first reassertion; read code handles `NULL` gracefully +- `deep_reasoning_query_conditioned = true` must be fail-open — if `recall_graph_hela` errors, fall back to static-weight path + +--- + ## JoinSet and CancellationToken Fixes - `spawn_graph_extraction` now receives a `CancellationToken` from `LifecycleState` for clean shutdown (commit #4635) diff --git a/specs/009-orchestration/spec.md b/specs/009-orchestration/spec.md index 8551e87bf..3cbd21f0b 100644 --- a/specs/009-orchestration/spec.md +++ b/specs/009-orchestration/spec.md @@ -583,3 +583,37 @@ lineage_ttl_secs = 300 # must be > 0 - NEVER store lineage on `TaskNode` or serialize it to the database — lineage is a runtime-only signal - Audit log MUST emit ONE structured `tracing::error!` per abort with `root`, `chain_depth`, and `cause` +--- + +## `graph_dirty` Consistency (#4809, #4831, #4832, #4835, #4836, #4848) + +`graph_dirty` is the flag used by `GraphPersistence` to decide whether the in-memory DAG state +needs to be flushed to SQLite. A missing `graph_dirty = true` write after a terminal transition +causes silent status loss on crash or restart. + +All state-mutating operations MUST set `graph_dirty = true`: + +| Operation | Affected method | +|-----------|----------------| +| Task transitions to `Completed` | `check_graph_completion` | +| DAG enters deadlock → transitions to `Failed` | `check_graph_completion` | +| Tasks injected via `inject_tasks()` | `inject_tasks` | +| Predicate outcome recorded | `record_predicate_outcome` | + +`refactor(orchestration)` (#4809) extracted `init_common()` to consolidate initialisation paths +and added a `graph_dirty` checkpoint after the common init block. + +### PlanCache Instrumentation (#4835, #4836) + +`PlanCache::new` and `PlanCache::evict` gain `#[tracing::instrument]` annotations (conditional +on the `profiling` feature). `new` records the current `embedding_model` as a span field. This +makes cache initialisation and eviction latency visible in local Chrome JSON traces. + +### Key Invariants + +- `graph_dirty = true` MUST be set in ALL task state transitions — a missing write is a durability bug +- Both terminal transitions (Completed and deadlock→Failed) MUST set `graph_dirty` in `check_graph_completion` +- `inject_tasks` MUST set `graph_dirty` after successful injection — not only on task completion +- `record_predicate_outcome` MUST set `graph_dirty` when an outcome is recorded +- PlanCache span names follow the `..` convention + diff --git a/specs/035-profiling/spec.md b/specs/035-profiling/spec.md index fe805ff48..99ffb6df2 100644 --- a/specs/035-profiling/spec.md +++ b/specs/035-profiling/spec.md @@ -518,7 +518,7 @@ When profiling is active, a `MetricsBridge` layer derives timing from span durat Profiling and tracing system is complete when: - [ ] Phase 1 foundation merged: TelemetryConfig, profiling feature, chrome layer, 4 agent span instruments, LLM instruments -- [ ] Phase 2 deep instrumentation merged: all subsystem spans, InstrumentedChannel wrappers, MetricsBridge validation +- [x] Phase 2 deep instrumentation merged: all subsystem spans, InstrumentedChannel wrappers, MetricsBridge validation - [ ] Phase 3 allocation + metrics merged: profiling-alloc feature, AllocLayer, sysinfo task - [ ] Phase 4 production tier merged: OTLP export, profiling-pyroscope, Grafana stack - [ ] Chrome traces export valid W3C format (verified with Perfetto UI) @@ -530,6 +530,48 @@ Profiling and tracing system is complete when: --- +## Phase 2 Shipped: Deep Instrumentation Roll-Out (#4788–#4864) + +Phase 2 deep instrumentation was completed across a series of PRs. Key changes: + +### `EnteredSpan`-across-`.await` Fixes (#4788, #4795, #4825, #4834, #4844) + +All occurrences of `let _guard = span.enter()` across `.await` points were replaced with +`.instrument(span)` (or `#[tracing::instrument]` on async fns). The `EnteredSpan`-across-await +pattern is undefined behavior under the tokio multi-thread runtime because span guards are +`!Send` and may panic on thread-switch. Affected crates: `zeph-agent-context`, `zeph-context`, +`zeph-memory`. + +**Key invariant added:** NEVER use `let _guard = span.enter()` in an `async fn` body or across +an `.await` point. Always use `.instrument(span)` / `future.instrument(span)` or the +`#[tracing::instrument]` attribute macro. + +### Hot-Path `#[tracing::instrument]` Roll-Out (#4821, #4826, #4833, #4852) + +`#[cfg_attr(feature = "profiling", tracing::instrument(...))]` added to hot-path async +functions in: +- `zeph-agent-context` and `zeph-agent-persistence` (turn context build, persist, restore) +- `zeph-index`, `zeph-commands`, `zeph-subagent` +- `zeph-channels` (CLI, Telegram, Discord, Slack adapters) +- `zeph-orchestration` hot paths +- LLM provider hot paths (claude, openai, gemini, compatible, router) +- `zeph-mcp` (connect, list_tools, call_tool, shutdown) +- `zeph-skills` (match, hot_reload, scan) + +Span naming follows the `..` convention established in FR-036. + +### ACP Instrumentation (#4851) + +`#[tracing::instrument]` added to permission check and all HTTP handlers in `zeph-acp`. + +### MCP and Telegram Span Naming Fix (#4864) + +Incorrect span names in `zeph-mcp` (`mcp.connect_url` named as `mcp.connect`) and missing +Telegram instrumentation were corrected. Telegram adapter now spans: `channel.telegram.send`, +`channel.telegram.send_chunk`, `channel.telegram.poll`. + +--- + ## 14. See Also - [[MOC-specs]] — Map of all specifications diff --git a/specs/041-experiments/spec.md b/specs/041-experiments/spec.md index 27cc94012..5262864bb 100644 --- a/specs/041-experiments/spec.md +++ b/specs/041-experiments/spec.md @@ -393,7 +393,32 @@ Is temperature=0.8 better than control? --- -## 11. See Also +## 11. Evaluator Phase 1 Parallelization (#4794, #4853) + +`Evaluator::evaluate` previously ran Phase 1 subject model calls sequentially. The evaluator +now mirrors the existing Phase 2 pattern: `FuturesUnordered` + `Arc` bounded by +`parallel_evals` (default 3) for both phases. + +After all Phase 1 subject futures complete, results are sorted by case index to restore +deterministic ordering before Phase 2 begins. + +Error semantics are unchanged: any subject failure (`Llm` or `Timeout`) is fatal and +propagates immediately. + +```toml +[experiments] +parallel_evals = 3 # max concurrent subject model calls in Phase 1 and Phase 2 +``` + +### Key Invariants + +- Phase 1 parallelism is bounded by `parallel_evals` — NEVER unbounded concurrency +- Results MUST be sorted by case index before Phase 2 — NEVER rely on future completion order +- Fatal error semantics are preserved — Phase 1 error aborts the entire evaluation, same as Phase 2 + +--- + +## 12. See Also - [[MOC-specs]] — all specifications - [[029-feature-flags/spec]] — compile-time feature flags diff --git a/specs/044-subagent-lifecycle/spec.md b/specs/044-subagent-lifecycle/spec.md index 969c75ebd..fc46cf56c 100644 --- a/specs/044-subagent-lifecycle/spec.md +++ b/specs/044-subagent-lifecycle/spec.md @@ -210,7 +210,7 @@ AND memory content exceeding the token budget is truncated, not omitted entirely | `SubAgentManager` | Lifecycle manager | Concurrency limit, active handles map, cancellation registry | | `SubAgentHandle` | Reference to a running task | Task ID (UUID), status channel, cancellation token | | `SubAgentStatus` | Current state of a task | Variants: `Running`, `Completed`, `Failed`, `Cancelled` | -| `SpawnContext` | Parent-derived spawn state | `parent_messages`, `parent_cancel`, `parent_provider_name`, `spawn_depth`, `mcp_tool_names` | +| `SpawnContext` | Parent-derived spawn state | `parent_messages`, `parent_cancel`, `parent_provider_name`, `spawn_depth`, `mcp_tool_names`, `max_trust_level`, `inherited_tool_allowlist` | | `PermissionGrants` | TTL-bounded permission registry | Map of `GrantKind` → expiry timestamp | | `Grant` | Single permission grant | `kind: GrantKind`, `ttl_secs`, expiry instant | | `GrantKind` | Type of permission | Variants: `VaultSecret`, `Tool` | @@ -331,7 +331,45 @@ whether the worktree is still referenced by any other active subagent before rem --- -## 12. Open Questions +## 12. Transitive Constraint Propagation (#4681, #4690, #4693, #4694) + +Addresses constraint drift (arXiv:2605.10481): safety constraints set at orchestration time +were silently dropped when a subagent spawned its own subagents, allowing trust-level and +tool-allowlist escalation deep in delegation chains. + +### New `SpawnContext` Fields + +| Field | Type | Description | +|-------|------|-------------| +| `max_trust_level` | `Option` | Maximum trust level allowed for skills invoked by this subagent or its children | +| `inherited_tool_allowlist` | `Option>` | Tool allowlist inherited from parent; used to restrict what the child may be granted | + +### `apply_constraint_propagation(def, ctx)` — `zeph-subagent` + +Called during `spawn()` and `resume()` before building the `FilteredToolExecutor`: + +1. **Trust clamping**: executor trust is set to `min(def.trust_level, ctx.max_trust_level)` — narrows only, never raises. +2. **Allowlist intersection**: if `ctx.inherited_tool_allowlist` is `Some(parent_set)`: + - `AllowList(child_set)` → `AllowList(child_set ∩ parent_set)` + - `InheritAll` → `AllowList(parent_set)` (parent set becomes the effective allowlist) + - `DenyList(deny_entries)` → `AllowList(parent_set \ deny_entries)` (fail-closed conversion) +3. Constraint narrowing is logged at `info` level for auditability. + +The propagated fields are passed transitively: when the newly spawned agent itself spawns +children, it sets `max_trust_level` and `inherited_tool_allowlist` from its own (already-clamped) +constraints. + +### Key Invariants + +- Constraint propagation MUST run in both `spawn()` and `resume()` — applying it in spawn only is incomplete +- Propagation MUST be transitive: grandchild constraints are bounded by the grandparent's, not just the parent's +- NEVER raise trust level via propagation — `min()` is the only allowed operation on `max_trust_level` +- `InheritAll` tool policy with a non-None parent allowlist MUST be converted to `AllowList(parent_set)` — `InheritAll` must not survive into a constrained delegation chain +- Constraint narrowing MUST be logged — silent narrowing is a security observability gap + +--- + +## 13. Open Questions None. diff --git a/specs/058-plugins/spec.md b/specs/058-plugins/spec.md index ecc20071d..c07c4bda1 100644 --- a/specs/058-plugins/spec.md +++ b/specs/058-plugins/spec.md @@ -524,7 +524,52 @@ This helps operators identify token-heavy plugins before they cause context pres --- -## 15. Open Questions +## 15. Multiple `--plugin-url` Values and `PluginName` Newtype (#4675, #4674, #4680) + +### Multiple `--plugin-url` Values + +The `--plugin-url` CLI flag now accepts multiple values in a single invocation: + +```bash +zeph --plugin-url https://example.com/plugin-a.tar.gz \ + --plugin-url https://example.com/plugin-b.tar.gz +``` + +Each URL is validated and downloaded in sequence. Validation enforces HTTPS-only (HTTP is +rejected at parse time). Download, extraction, and installation follow the same +`add_remote_ephemeral` path as single-URL invocation. + +### `PluginName` Newtype + +`PluginName(Arc)` replaces `String` as the canonical type for plugin names throughout the +crate. The newtype: +- Derives `Clone`, `Eq`, `Hash`, `Display`, `Debug`, `Serialize`, `Deserialize` +- Validates the plugin name regex `^[a-z0-9][a-z0-9-]*$` in its constructor +- Provides `as_str() -> &str` for low-overhead string access + +All internal maps keyed on plugin names use `PluginName` instead of `String`. + +### `add_remote_ephemeral` Async Fix (#4845) + +`add_remote_ephemeral` previously used `std::fs` reads inside an async function. All file +I/O in `add_remote_ephemeral` is now `tokio::fs` to prevent blocking the async executor. + +### `.bundled` Marker Stripping in Ephemeral Plugins (#4676) + +`extract_archive_safe` now also strips `.bundled` marker files from ephemeral plugin archives +during extraction, before any skill loading. This prevents a scenario where an ephemeral plugin +bundle includes `.bundled` markers that would incorrectly grant `Trusted` trust to its skills. + +### Key Invariants + +- `--plugin-url` MUST reject non-HTTPS URLs — `http://` is a hard block, not a warning +- `PluginName` constructor MUST validate the regex — NEVER create a `PluginName` from a string that has not been validated +- `add_remote_ephemeral` MUST use `tokio::fs` — NEVER use `std::fs` inside an async fn +- `.bundled` markers MUST be stripped during ephemeral extraction — NEVER load ephemeral plugins with bundled trust + +--- + +## 16. Open Questions None. From c888f91470d930715b56604e2b45b6864b8f0469 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Fri, 5 Jun 2026 22:44:53 +0200 Subject: [PATCH 5/7] fix(docs): remove broken link to non-existent guides/plugins.md --- book/src/reference/cli.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/book/src/reference/cli.md b/book/src/reference/cli.md index 68a0fea20..c30ee4bea 100644 --- a/book/src/reference/cli.md +++ b/book/src/reference/cli.md @@ -668,7 +668,7 @@ Configuration: Set `[session.recap]` in your config to control which LLM provide | `--log-file ` | Override the log file path for this session. Set to empty string (`""`) to disable file logging. See [Logging](../concepts/logging.md) | | `--tafc` | Enable Think-Augmented Function Calling for this session, overriding `tools.tafc.enabled`. See [Tools — TAFC](../concepts/tools.md#think-augmented-function-calling-tafc) | | `--debug-dump [PATH]` | Write LLM requests/responses and raw tool output to files. Omit `PATH` to use `debug.output_dir` from config (default: `.zeph/debug`). See [Debug Dump](../advanced/debug-dump.md) | -| `--plugin-url ` | Load a plugin from a remote URL for this session only (ephemeral). Accepts multiple values. Use `url@sha256` syntax to pin a version, e.g., `--plugin-url https://example.com/plugin.tar.gz@abc123def456`. Requires HTTPS. See [Plugins](../guides/plugins.md) | +| `--plugin-url ` | Load a plugin from a remote URL for this session only (ephemeral). Accepts multiple values. Use `url@sha256` syntax to pin a version, e.g., `--plugin-url https://example.com/plugin.tar.gz@abc123def456`. Requires HTTPS. | | `--worktree-base-ref ` | Override the base ref for worktree creation: `head` (current HEAD) or `fresh` (clone main). Requires `[worktree] enabled = true`. See [Worktree Isolation](../guides/worktree.md) | | `--version` | Print version and exit | | `--help` | Print help and exit | From f73a748d8913440f5a56eace01fec74f8dd28dba Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Fri, 5 Jun 2026 22:58:04 +0200 Subject: [PATCH 6/7] refactor(specs): rename parity-claude-code-3918 to 065-ephemeral-plugins-provider-overrides Update all wikilinks, frontmatter aliases, and index entries. Remove vendor name from spec titles. Add specs for judge_provider fallback chain (#4780), Discord rate-limit retry (#4746), and command auth guard (#4762). --- specs/002-agent-loop/spec.md | 18 +++++++ specs/005-skills/spec.md | 50 +++++++++++++++++++ specs/007-channels/spec.md | 12 +++++ specs/011-tui/spec.md | 33 ++++++++++++ specs/016-agent-feedback/spec.md | 21 +++++++- specs/040-sanitizer/spec.md | 27 ++++++++++ specs/042-zeph-commands/spec.md | 21 ++++++++ specs/055-cocoon/spec.md | 17 +++++-- .../brd.md | 14 +++--- .../nfr.md | 10 ++-- .../plan.md | 8 +-- .../spec.md | 20 ++++---- .../srs.md | 10 ++-- .../tasks.md | 8 +-- specs/MOC-specs.md | 1 + specs/README.md | 18 +++---- 16 files changed, 239 insertions(+), 49 deletions(-) rename specs/{parity-claude-code-3918 => 065-ephemeral-plugins-provider-overrides}/brd.md (86%) rename specs/{parity-claude-code-3918 => 065-ephemeral-plugins-provider-overrides}/nfr.md (91%) rename specs/{parity-claude-code-3918 => 065-ephemeral-plugins-provider-overrides}/plan.md (96%) rename specs/{parity-claude-code-3918 => 065-ephemeral-plugins-provider-overrides}/spec.md (90%) rename specs/{parity-claude-code-3918 => 065-ephemeral-plugins-provider-overrides}/srs.md (95%) rename specs/{parity-claude-code-3918 => 065-ephemeral-plugins-provider-overrides}/tasks.md (93%) diff --git a/specs/002-agent-loop/spec.md b/specs/002-agent-loop/spec.md index 3f86df200..946dd6cf7 100644 --- a/specs/002-agent-loop/spec.md +++ b/specs/002-agent-loop/spec.md @@ -490,3 +490,21 @@ Starting from PR #3597, `OmniMem::recall()` logs retrieval failures into the - Logged queries are truncated to 512 characters before storage — no unbounded writes - Failure logs are NOT surfaced to the LLM or the user; they are operator/self-improvement data only - `outcome_type = "memory_miss"` is a stable string — consumers (scheduler micro-benchmark) depend on it + +--- + +## Unknown `ChatResponse` Variant Handling (#4786) + +`tier_loop` previously panicked (via `unreachable!`) on a `ChatResponse` variant it did not +recognize. This was hit in practice when a new provider-specific variant was added without +updating all match arms. + +Fix: unknown `ChatResponse` variants are now handled gracefully: +- The unknown variant is logged at `WARN` with the variant name (using `{:?}`) +- The turn continues with an empty content response rather than panicking + +### Key Invariant + +- NEVER use `unreachable!` in `tier_loop`'s `ChatResponse` match — use a wildcard arm that + logs and recovers. New variants must be added to the match before shipping, but an unknown + variant at runtime must not crash the agent. diff --git a/specs/005-skills/spec.md b/specs/005-skills/spec.md index b87cd8bb3..129662285 100644 --- a/specs/005-skills/spec.md +++ b/specs/005-skills/spec.md @@ -638,6 +638,56 @@ breaking existing struct literals. --- +## Stage-2 LLM Semantic Scan for Third-Party Skills (#3947, #4696) + +Defends against Semantic Compliance Hijacking (SCH) attacks (arXiv:2605.14460) where malicious +third-party skills encode harmful instructions in SKILL.md without explicit code payloads that +Stage-1 regex patterns would catch. + +### `SkillSemanticScanner` + +`crates/zeph-skills/src/semantic_scanner.rs`. Uses `chat_typed_erased` with a configurable +fast provider. Content cap: 8 KiB with head+tail sampling for larger skills. + +XML delimiter-escape neutralization: any `` sequences in the skill body are +neutralized before interpolation into the prompt to prevent prompt-frame escapes. + +Verdicts: + +| `ScanVerdict` | Action | +|--------------|--------| +| `Allow` | Skill passes; proceed with installation/execution | +| `Warn` | Advisory; skill logged at WARN but not blocked | +| `Block` | Skill blocked; installation or execution rejected | + +Unknown LLM output tokens fall back to `Block` (fail-closed). + +### Integration Points + +- **Plugin add**: `zeph-plugins` calls `scan_targets()` to extract SKILL.md candidates from an + archive before installation. The `zeph-plugins` crate itself remains LLM-free; the scan + is performed in `zeph-core` via `semantic_scan_plugin_add`, which wires the scanner. +- **Fail-closed on config error**: `semantic_scan = true` with an empty `semantic_scan_provider` + returns a config error — never proceeds with an unconfigured scanner. + +### Config + +```toml +[skills] +semantic_scan = false # opt-in Stage-2 semantic scan +semantic_scan_provider = "" # [[llm.providers]] name (required when semantic_scan = true) +``` + +### Key Invariants + +- NEVER proceed when `semantic_scan = true` and `semantic_scan_provider` is empty +- XML delimiter-escape neutralization (`` → escaped form) MUST run before interpolation — NEVER interpolate raw skill content +- Unknown scanner output tokens MUST produce `Block` verdict — NEVER default to `Allow` on parse failure +- `scan_targets()` in `zeph-plugins` extracts candidates without LLM calls — keeps `zeph-plugins` LLM-free +- NEVER apply Stage-2 scan to bundled skills (`.bundled` marker) — bundled skills are pre-vetted + +--- + ## Stage-1 Advisory SKILL.md Scan (#4132) Before executing a skill, the system runs a lightweight static scan over the SKILL.md body diff --git a/specs/007-channels/spec.md b/specs/007-channels/spec.md index 8a9208b58..f514e8a2f 100644 --- a/specs/007-channels/spec.md +++ b/specs/007-channels/spec.md @@ -287,3 +287,15 @@ without tracking, creating silent orphan tasks on shutdown. A shared `StreamingBuffer` abstraction was extracted from channel-specific code. Discord and Slack adapters now use the shared buffer for streaming chunk accumulation. Stub `elicit()` methods were added to Discord and Slack channels for future elicitation support. + +### Discord API 429 Rate-Limit Retry (#4746) + +`DiscordChannel` now handles HTTP 429 responses from the Discord API by reading the +`Retry-After` header and retrying the request after the specified delay. Previously, +HTTP 429 responses were propagated as `ChannelError::Send` and the message was dropped. + +#### Key Invariants + +- On HTTP 429, the channel MUST wait `Retry-After` seconds before retrying — never drop the message +- If `Retry-After` is absent or unparseable, fall back to a fixed 1-second delay +- Retry applies to `send()` and `send_chunk()` only — `send_typing()` is non-critical and may be dropped on 429 diff --git a/specs/011-tui/spec.md b/specs/011-tui/spec.md index 04aa977a0..2b073cd4f 100644 --- a/specs/011-tui/spec.md +++ b/specs/011-tui/spec.md @@ -265,6 +265,39 @@ Issue #3054. When the user pastes multi-line content into the TUI input: --- +## Ctrl+R Prompt History Reverse-Search (#4649, #4657, #4678) + +`ReverseSearchState` widget adds in-session prompt history reverse-search accessible via +`Ctrl+R` keybinding from TUI Insert mode. + +### Behavior + +- Scope: in-memory prompt history for the current session only (no cross-session persistence) +- `Ctrl+R`: enter reverse-search mode; input field shows search query, history filtered live +- `Ctrl+R` again: cycle to the next older match +- `Enter`: confirm match, place in input composer, exit reverse-search mode +- `Esc`: exit reverse-search mode without replacing input + +### Key Dispatch Order + +Key dispatch in TUI Insert mode checks reverse-search **before** slash-autocomplete (invariant C4). +If the user is in reverse-search mode, all keystrokes route to `ReverseSearchState` — slash +autocomplete does not activate. + +### Char-Safe Rendering + +The reverse-search render uses `floor_char_boundary()` for truncation to prevent panics on +multibyte UTF-8 input (Cyrillic, CJK characters). NEVER truncate at a byte boundary. + +### Key Invariants (Ctrl+R) + +- Reverse-search scope is single-session, in-memory only — NEVER persist across restarts or share across slots +- Key dispatch MUST check reverse-search before slash-autocomplete (C4 invariant) +- Render truncation MUST use `floor_char_boundary()` — NEVER byte-index truncation +- `Esc` MUST exit without side-effects — NEVER modify the composer input on dismiss + +--- + ## Key Invariants - Metrics updated every turn — not only when a specific event fires diff --git a/specs/016-agent-feedback/spec.md b/specs/016-agent-feedback/spec.md index 95b31d3dc..1ba092bef 100644 --- a/specs/016-agent-feedback/spec.md +++ b/specs/016-agent-feedback/spec.md @@ -452,7 +452,26 @@ let detector = FeedbackDetector::new(0.6); --- -## 9. Related Specs +## 10. `judge_provider` Three-Level Fallback (#4780) + +`build_judge_provider` resolves the LLM provider for `JudgeDetector` calls via a three-level +fallback chain: + +1. **`judge_provider`**: named lookup in `[[llm.providers]]` +2. **`judge_model`**: legacy field — construct a provider from the model name string +3. **Primary provider**: agent's default LLM provider + +A previous regression caused `build_judge_provider` to return `None` on named lookup failure +instead of falling through to the `judge_model` branch. This was fixed in #4780. The three-level +chain is now correctly restored and tested. + +### Key Invariant + +- NEVER return `None` from `build_judge_provider` on `judge_provider` lookup failure — fall through to `judge_model` → primary provider chain + +--- + +## 11. Related Specs - **015-self-learning**: Uses `FeedbackDetector` to identify user corrections for skill refinement - **002-agent-loop**: Calls feedback detector on every user message to detect implicit corrections diff --git a/specs/040-sanitizer/spec.md b/specs/040-sanitizer/spec.md index 4ce07d0af..f9e5a78ef 100644 --- a/specs/040-sanitizer/spec.md +++ b/specs/040-sanitizer/spec.md @@ -452,6 +452,33 @@ max_external_content_size = 25000 --- +## Unicode Bypass Extension (#4757, #4760, #4802) + +`UNICODE_BYPASS_RE` (the regex used to detect Unicode homoglyph / invisible-character injection +bypass attempts) has been extended with additional codepoint ranges: + +| PR | Codepoints Added | Rationale | +|----|-----------------|-----------| +| #4757 | U+2060 (WORD JOINER) | Used to split injection keywords invisibly | +| #4760 | `\p{Cf}` (Unicode Format characters), U+034F (COMBINING GRAPHEME JOINER) | Broader format-character class that covers a family of invisible bypass vectors | +| #4802 | U+034F explicitly re-listed for clarity alongside `\p{Cf}` | Documentation fix; no functional change | + +`CausalIpiConfig.provider` was previously hardcoded; it now resolves from the named provider +registry (same `*_provider` pattern used by all other subsystems). The config field: + +```toml +[sanitizer.causal_ipi] +provider = "" # [[llm.providers]] name; empty = primary provider fallback +``` + +### Key Invariants + +- `UNICODE_BYPASS_RE` MUST cover `\p{Cf}` (all Unicode Format characters) — point-by-point additions are insufficient; use the Unicode category +- U+2060 (WORD JOINER) is in `\p{Cf}` but MUST also be listed explicitly for documentation clarity +- `CausalIpiConfig.provider` MUST resolve via the named provider registry — NEVER hardcode a provider + +--- + ## 15. See Also - [[MOC-specs]] — all specifications diff --git a/specs/042-zeph-commands/spec.md b/specs/042-zeph-commands/spec.md index 976383b21..52a444d51 100644 --- a/specs/042-zeph-commands/spec.md +++ b/specs/042-zeph-commands/spec.md @@ -232,6 +232,27 @@ recap_provider = "" # provider for recap LLM call; empty = primary pro --- +## 12. `requires_auth` Guard on Advanced/Debugging Handlers (#4762) + +Handlers in the `Advanced` and `Debugging` `SlashCategory` categories now carry a +`requires_auth: bool` field on their `CommandInfo`. When `requires_auth = true` and the +channel is unauthenticated (e.g. a public Telegram group without an `allowed_users` match), +dispatch returns `CommandOutput::Message("Unauthorized")` without invoking the handler. + +### Affected Handlers + +All handlers registered under `SlashCategory::Advanced` and `SlashCategory::Debugging` have +`requires_auth = true` by default. The check is performed in `CommandRegistry::dispatch()`, +before calling `handler.handle()`. + +### Key Invariants + +- `requires_auth = true` handlers MUST be rejected before the handler body runs — never inside the handler +- Handlers in `Session` / `Configuration` / `Memory` / `Skills` / `Planning` categories default to `requires_auth = false` +- The `NullSink` used in tests bypasses auth gating — tests must use a dedicated test context that sets `is_authenticated = true` where needed + +--- + ## 9. Open Questions None. diff --git a/specs/055-cocoon/spec.md b/specs/055-cocoon/spec.md index d9680960a..77328405e 100644 --- a/specs/055-cocoon/spec.md +++ b/specs/055-cocoon/spec.md @@ -547,13 +547,22 @@ sidecar. > the localhost restriction to allow arbitrary remote hosts would negate TEE > confidentiality benefits and is not planned. > -> **4. `ton_balance` side-channel** +> **4. `ton_balance` side-channel (MITIGATED #4649, #4657)** > `CocoonHealth.ton_balance` is returned by `/stats` and displayed in the TUI > sidebar. In shared-access or shared-screen scenarios, an observer with TUI > visibility can infer the user's spending volume and usage pattern from -> balance changes over time. This is not a TEE break but is a privacy -> consideration. Operators in multi-user environments should consider making -> balance display opt-in or redacting the value in the TUI status area. +> balance changes over time. This is mitigated by `cocoon.show_balance` (default `true`): +> when `false`, the TUI renders `*** TON` instead of the real balance value. +> Operators in multi-user or shared-screen environments should set `show_balance = false`. +> +> ```toml +> [cocoon] +> show_balance = true # set false to redact balance in TUI sidebar +> ``` +> +> Migration step 53 (config 2026-05 series) adds a commented `[cocoon]` section to +> existing configs for discoverability. The `--init` wizard prompts for this value +> in the Cocoon setup section. > > **5. GPU-TEE overhead** > Intel TDX provides CPU-level TEE protection; NVIDIA H100 Confidential diff --git a/specs/parity-claude-code-3918/brd.md b/specs/065-ephemeral-plugins-provider-overrides/brd.md similarity index 86% rename from specs/parity-claude-code-3918/brd.md rename to specs/065-ephemeral-plugins-provider-overrides/brd.md index c750a1e80..abd69a8d4 100644 --- a/specs/parity-claude-code-3918/brd.md +++ b/specs/065-ephemeral-plugins-provider-overrides/brd.md @@ -1,7 +1,7 @@ --- aliases: + - Ephemeral Plugins and Provider Overrides BRD - Parity BRD 3918 - - Claude Code Parity Business Requirements tags: - sdd - brd @@ -11,22 +11,22 @@ tags: created: 2026-05-29 status: approved related: - - "[[specs/parity-claude-code-3918/spec]]" - - "[[specs/parity-claude-code-3918/srs]]" - - "[[specs/parity-claude-code-3918/nfr]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/spec]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/srs]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/nfr]]" - "[[specs/058-plugins/spec]]" - "[[specs/003-llm-providers/spec]]" --- -# BRD: Claude Code v2.1.141–v2.1.143 Parity (GitHub #3918) +# BRD: Ephemeral Plugin Loading and Provider Override Persistence (GitHub #3918) ## 1. Business Context -Zeph is an AI agent targeting parity with the Claude Code CLI UX where features are directly applicable to Zeph's architecture. GitHub issue #3918 tracks parity assessment for Claude Code v2.1.141–v2.1.143 release notes. This document defines the business case for the two actionable gaps identified in that assessment. +Zeph is an AI agent that tracks capability gaps identified through competitive analysis. GitHub issue #3918 covers a gap assessment for release v2.1.141–v2.1.143. This document defines the business case for the two actionable gaps identified in that assessment. ## 2. Problem Statement -Claude Code v2.1.141–v2.1.143 introduced: +The assessed release introduced: 1. **`--plugin-url` flag** — load a plugin from a URL for the duration of one session, with no permanent installation. Zeph has permanent plugin installation but no session-scoped ephemeral loading. 2. **Background session provider persistence** — model selection and reasoning effort are preserved when the agent wakes from an idle background state. Zeph persists the provider *name* but discards per-session parameter overrides (reasoning effort, temperature) on process restart. diff --git a/specs/parity-claude-code-3918/nfr.md b/specs/065-ephemeral-plugins-provider-overrides/nfr.md similarity index 91% rename from specs/parity-claude-code-3918/nfr.md rename to specs/065-ephemeral-plugins-provider-overrides/nfr.md index 494e875fd..307b1d73b 100644 --- a/specs/parity-claude-code-3918/nfr.md +++ b/specs/065-ephemeral-plugins-provider-overrides/nfr.md @@ -1,7 +1,7 @@ --- aliases: + - Ephemeral Plugins and Provider Overrides NFR - Parity NFR 3918 - - Claude Code Parity Non-Functional Requirements tags: - sdd - nfr @@ -11,12 +11,12 @@ tags: created: 2026-05-29 status: approved related: - - "[[specs/parity-claude-code-3918/brd]]" - - "[[specs/parity-claude-code-3918/srs]]" - - "[[specs/parity-claude-code-3918/spec]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/brd]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/srs]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/spec]]" --- -# NFR: Claude Code v2.1.141–v2.1.143 Parity (GitHub #3918) +# NFR: Ephemeral Plugin Loading and Provider Override Persistence (GitHub #3918) ISO/IEC 25010:2011 quality model. diff --git a/specs/parity-claude-code-3918/plan.md b/specs/065-ephemeral-plugins-provider-overrides/plan.md similarity index 96% rename from specs/parity-claude-code-3918/plan.md rename to specs/065-ephemeral-plugins-provider-overrides/plan.md index ec2722df5..c3a939ae6 100644 --- a/specs/parity-claude-code-3918/plan.md +++ b/specs/065-ephemeral-plugins-provider-overrides/plan.md @@ -1,7 +1,7 @@ --- aliases: + - Ephemeral Plugins and Provider Overrides Plan - Parity Plan 3918 - - Claude Code Parity Implementation Plan tags: - sdd - plan @@ -11,11 +11,11 @@ tags: created: 2026-05-29 status: approved related: - - "[[specs/parity-claude-code-3918/spec]]" - - "[[specs/parity-claude-code-3918/tasks]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/spec]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/tasks]]" --- -# Implementation Plan: Claude Code v2.1.141–v2.1.143 Parity (GitHub #3918) +# Implementation Plan: Ephemeral Plugin Loading and Provider Override Persistence (GitHub #3918) ## Recommended Implementation Order diff --git a/specs/parity-claude-code-3918/spec.md b/specs/065-ephemeral-plugins-provider-overrides/spec.md similarity index 90% rename from specs/parity-claude-code-3918/spec.md rename to specs/065-ephemeral-plugins-provider-overrides/spec.md index c1614f52a..82083ead0 100644 --- a/specs/parity-claude-code-3918/spec.md +++ b/specs/065-ephemeral-plugins-provider-overrides/spec.md @@ -1,7 +1,7 @@ --- aliases: + - Ephemeral Plugins and Provider Overrides - Parity Spec 3918 - - Claude Code Parity Implementation Spec tags: - sdd - spec @@ -9,20 +9,20 @@ tags: - plugins - provider-persistence created: 2026-05-29 -status: approved +status: implemented related: - "[[MOC-specs]]" - "[[constitution]]" - - "[[specs/parity-claude-code-3918/brd]]" - - "[[specs/parity-claude-code-3918/srs]]" - - "[[specs/parity-claude-code-3918/nfr]]" - - "[[specs/parity-claude-code-3918/plan]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/brd]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/srs]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/nfr]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/plan]]" - "[[specs/058-plugins/spec]]" - "[[specs/003-llm-providers/spec]]" - "[[specs/010-security/spec]]" --- -# Spec: Claude Code v2.1.141–v2.1.143 Parity (GitHub #3918) +# Spec: Ephemeral Plugin Loading and Provider Override Persistence (GitHub #3918) > This spec is the authoritative implementation contract for the two actionable parity gaps > identified in GitHub issue #3918. It is derived from the architect plan @@ -37,9 +37,9 @@ related: |-----|---------|----------|-----------| | `--plugin-url` session-scoped loading | **Implement** | P2 | Download infra exists; missing ephemeral variant + HTTPS gate | | Session provider override persistence | **Implement** | P2 | Persistence infra exists; missing overrides blob per channel | -| `worktree.baseRef` config | Defer | P3 | Requires native worktree subsystem (does not exist) | -| `worktree.bgIsolation: none` | Defer | P3 | Depends on worktree.baseRef | -| Ctrl+R cross-project history | Defer | P3 | Zeph TUI has no prompt-history infrastructure | +| `worktree.baseRef` config | **Implemented** | P3 | `worktree.base_ref: fresh\|head` in spec-063; `--init` wizard via `step_worktree()` (#4847) | +| `worktree.bgIsolation: none` | Partially deferred | P3 | `bg_isolation` field added to `WorktreeConfig` via `step_worktree()` (#4847); full child-process isolation still deferred | +| Ctrl+R cross-project history | **Implemented (single-session)** | P3 | `ReverseSearchState` widget with Ctrl+R keybinding added in TUI (#4678); cross-session scope deferred | Deferred gaps **must** have follow-up GitHub issues filed. See `tasks.md`. diff --git a/specs/parity-claude-code-3918/srs.md b/specs/065-ephemeral-plugins-provider-overrides/srs.md similarity index 95% rename from specs/parity-claude-code-3918/srs.md rename to specs/065-ephemeral-plugins-provider-overrides/srs.md index f1ba50f82..a77b03b07 100644 --- a/specs/parity-claude-code-3918/srs.md +++ b/specs/065-ephemeral-plugins-provider-overrides/srs.md @@ -1,7 +1,7 @@ --- aliases: + - Ephemeral Plugins and Provider Overrides SRS - Parity SRS 3918 - - Claude Code Parity Software Requirements tags: - sdd - srs @@ -11,12 +11,12 @@ tags: created: 2026-05-29 status: approved related: - - "[[specs/parity-claude-code-3918/brd]]" - - "[[specs/parity-claude-code-3918/spec]]" - - "[[specs/parity-claude-code-3918/nfr]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/brd]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/spec]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/nfr]]" --- -# SRS: Claude Code v2.1.141–v2.1.143 Parity (GitHub #3918) +# SRS: Ephemeral Plugin Loading and Provider Override Persistence (GitHub #3918) ISO/IEC/IEEE 29148:2018 compliant. Requirements use EARS notation. diff --git a/specs/parity-claude-code-3918/tasks.md b/specs/065-ephemeral-plugins-provider-overrides/tasks.md similarity index 93% rename from specs/parity-claude-code-3918/tasks.md rename to specs/065-ephemeral-plugins-provider-overrides/tasks.md index 98b655671..88b5edd27 100644 --- a/specs/parity-claude-code-3918/tasks.md +++ b/specs/065-ephemeral-plugins-provider-overrides/tasks.md @@ -1,7 +1,7 @@ --- aliases: + - Ephemeral Plugins and Provider Overrides Tasks - Parity Tasks 3918 - - Claude Code Parity Task Breakdown tags: - sdd - tasks @@ -11,11 +11,11 @@ tags: created: 2026-05-29 status: approved related: - - "[[specs/parity-claude-code-3918/plan]]" - - "[[specs/parity-claude-code-3918/spec]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/plan]]" + - "[[specs/065-ephemeral-plugins-provider-overrides/spec]]" --- -# Task Breakdown: Claude Code v2.1.141–v2.1.143 Parity (GitHub #3918) +# Task Breakdown: Ephemeral Plugin Loading and Provider Override Persistence (GitHub #3918) All tasks reference the implementation plan in `plan.md`. diff --git a/specs/MOC-specs.md b/specs/MOC-specs.md index 070a5f247..b190a9ce8 100644 --- a/specs/MOC-specs.md +++ b/specs/MOC-specs.md @@ -228,6 +228,7 @@ status: moc | 044 | [[044-subagent-lifecycle/spec\|Subagent Lifecycle]] | specify | approved | | 063 | [[063-worktree-subsystem/spec\|Worktree Subsystem]] | specify | approved | | 064 | [[064-durable-execution/spec\|Durable Execution]] | specify | approved | +| 065 | [[065-ephemeral-plugins-provider-overrides/spec\|Ephemeral Plugins & Provider Overrides]] | specify | implemented | --- diff --git a/specs/README.md b/specs/README.md index 99ce375d0..f05c90b0a 100644 --- a/specs/README.md +++ b/specs/README.md @@ -25,7 +25,7 @@ See `[[constitution]]` for project-wide non-negotiable rules. ## Numbering Scheme -Spec IDs (001–044) follow a logical grouping: +Spec IDs (001–065) follow a logical grouping: - **001–010**: Foundational contracts and core systems (invariants, loop, providers, memory, skills, tools, channels, mcp, orchestration, security) - **011–020**: User-facing features and operational integration (TUI, graph memory, protocols, self-learning, filtering, indexing, scheduler, gateway, config loading) @@ -45,9 +45,9 @@ Spec IDs (001–044) follow a logical grouping: - **053**: SpeculationEngine — speculative tool execution (SSE decoding path, PASTE skill activation, ToolStartEvent{speculative:true}) - **055**: Cocoon distributed compute integration — CocoonProvider, CocoonClient, `zeph cocoon doctor`, TUI palette entries, vault key ZEPH_COCOON_ACCESS_HASH - **062**: Context-Adaptive Memory (CAM) — three-level fidelity (Full/Compressed/Placeholder), heuristic FidelityScorer, proactive AgeMem regrade, PlannedToolHint for PAACE; GitHub #4016, #4017, #4018 -- **063**: Worktree subsystem — `zeph-worktree` crate, `worktree.base_ref: fresh|head`, CwdGuard serialisation, startup probe, CLI commands; GitHub #4655 +- **063**: Worktree subsystem — `zeph-worktree` crate, `worktree.base_ref: fresh|head`, CwdGuard serialisation, startup probe, CLI commands, `git_timeout_secs` (default 30, migration step 55), `--init` wizard `step_worktree()`; GitHub #4655, #4704, #4784, #4847 - **064**: Durable execution — `zeph-durable` Layer-0 crate, journal/replay, `DurableStep`, `EffectClass`, `JournalWriter` actor, AEAD payload cipher, `DurablePromise`/timers, dedicated `durable.db`, P1-P4 integration adapters (agent-loop, orchestration, scheduler, subagent); `restate` feature flag -- **parity-claude-code-3918**: Claude Code v2.1.141–v2.1.143 parity — `--plugin-url` ephemeral loading, provider override persistence; GitHub #3918 +- **065**: Ephemeral plugin loading and provider override persistence — `--plugin-url` HTTPS-only session-scoped plugin loading (`TempDir` lifetime, blocking scan), provider parameter override persistence (`reasoning_effort`/`temperature` via `channel_preferences` key-value row, no ALTER TABLE); GitHub #3918 --- @@ -70,8 +70,8 @@ Spec IDs (001–044) follow a logical grouping: | Doc | Feature | Crate | |---|---|---| | `002-agent-loop/spec.md` | Agent loop, turn lifecycle, context pressure, HiAgent subgoal-aware compaction | `zeph-core` | -| `003-llm-providers/spec.md` | LlmProvider trait, AnyProvider, prompt caching, configurable `CacheTtl` (ephemeral/1h) | `zeph-llm` | -| `004-memory/spec.md` | SQLite + Qdrant, compaction, semantic response cache, anchored summarization, compaction probe, importance scoring, A-MAC admission control, MemScene consolidation, multi-vector chunking, GAAMA episode nodes, BATS budget hints, Focus compression, SleepGate forgetting pass, persona memory, trajectory memory, category-aware memory, TiMem tree, microcompact, autoDream, MagicDocs, embed backfill batching | `zeph-memory` | +| `003-llm-providers/spec.md` | LlmProvider trait, AnyProvider, prompt caching, configurable `CacheTtl` (ephemeral/1h), SSE `StreamLimits` (max_tool_json_bytes/max_thinking_bytes/max_compaction_bytes), o-series `context_window` branch, embedding helpers in `LlmConfig` | `zeph-llm` | +| `004-memory/spec.md` | SQLite + Qdrant, compaction, semantic response cache, anchored summarization, compaction probe, importance scoring, A-MAC admission control, MemScene consolidation, multi-vector chunking, GAAMA episode nodes, BATS budget hints, Focus compression, SleepGate forgetting pass, persona memory, trajectory memory, category-aware memory, TiMem tree, microcompact, autoDream, MagicDocs, embed backfill batching, Benna-Fusi two-timescale SYNAPSE edges (`confidence_fast`/`confidence_slow`, migration 096), MemORAI `MemoryWriteGate` prefilter, `deep_reasoning_query_conditioned` flag (#3709, #3710, #3994) | `zeph-memory` | | `004-memory/004-7-memory-apex-magma.md` | APEX-MEM append-only MAGMA: edge supersession, ontology normalization, SYNAPSE conflict resolution (#3223); BeliefMem probabilistic pre-commitment edge layer with Noisy-OR accumulation, temporal decay, promotion threshold (#3706) | `zeph-memory` | | `004-memory/004-8-memory-typed-pages.md` | ClawVM typed page compaction: PageType classification, minimum-fidelity invariants, compaction audit log (#3221) | `zeph-context`, `zeph-memory` | | `004-memory/004-9-memory-write-gate.md` | MemReader write quality gate: three-signal scorer, rule-based MVP, optional LLM scoring (#3222) | `zeph-memory` | @@ -81,13 +81,13 @@ Spec IDs (001–044) follow a logical grouping: | `004-memory/004-13-memory-memcot.md` | MemCoT: SemanticStateAccumulator, Zoom-In evidence localization, Zoom-Out causal expansion (#3592) | `zeph-memory` | | `004-memory/004-14-memory-tiering-rfc-decision.md` | RFC #4217 decision: memory tiering architecture analysis (MEMTIER, BudgetMem, Multi-Layer, LCM, MemRouter); adopt frequency signal + tier-aware gating + cost-aware routing (#4217) | `zeph-memory` | | `004-memory/004-15-memory-skill-coevolution-rfc-decision.md` | RFC #4218 decision: memory–skill coevolution analysis (MemQ, δ-mem, EvolveMem, SAGE-GraphMem, NanoResearch, Cognifold); adopt Cognifold idle-time folding + EvolveMem feedback routing; defer MemQ to P3 (#4218) | `zeph-memory`, `zeph-skills` | -| `005-skills/spec.md` | SKILL.md format, registry, matching, hot-reload, skill trust governance, two-stage matching, Wilson score confidence intervals, hub install pipeline, agent-invocable skills (`invoke_skill`) | `zeph-skills` | +| `005-skills/spec.md` | SKILL.md format, registry, matching, hot-reload, skill trust governance, two-stage matching, Wilson score confidence intervals, hub install pipeline, agent-invocable skills (`invoke_skill`), recursive WalkDir discovery (max depth 16), `SkillExtensions` manifest parser, concurrent semantic scan (`buffer_unordered(4)`, 300s timeout), skill egress attribution in `ToolCall`/`AuditEntry`/`EgressEvent` | `zeph-skills` | | `006-tools/spec.md` | ToolExecutor, CompositeExecutor, TAFC, schema filter, result cache, dependency graph, tool invocation phase taxonomy, native `tool_use` only; `invoke_skill`/`load_skill` utility-gate exemption | `zeph-tools` | | `007-channels/spec.md` | Channel trait, AnyChannel dispatch, streaming, channel feature parity, `stream_interval_ms` (Bot API 10.0, #3727); `TelegramApiClient` 30s `REQUEST_TIMEOUT` on reqwest client (#3780); Telegram reaction moderation tools `telegram_delete_reaction` / `telegram_delete_all_reactions` (#3770); CJK false-positive fix in FeedbackDetector; `send_status` added to Discord and Slack adapters (#4228) | `zeph-channels` | | `007-channels/007-1-telegram-guest-mode.md` | Telegram Guest Mode — `guest_message` update handling, `answerGuestQuery` routing, `allowed_users` access control, single-shot streaming (#3729) | `zeph-channels`, `zeph-core`, `zeph-config` | | `007-channels/007-2-telegram-bot-to-bot.md` | Telegram Bot-to-Bot — `setManagedBotAccessSettings` startup, `allowed_bots` authorization, reply-chain loop prevention, `is_from_bot` metadata (#3730) | `zeph-channels`, `zeph-core`, `zeph-config` | | `008-mcp/spec.md` | MCP client, server lifecycle, semantic tool discovery, per-message pruning cache, injection detection, tool collision detection, caller identity propagation, tool quota, structured error codes, OAP authorization, elicitation (2025-06-18) | `zeph-mcp` | -| `009-orchestration/spec.md` | DAG planner, DagScheduler, AgentRouter, /plan command, plan template cache, VMAO adaptive replanning, cascade-aware DAG routing, VeriMAP predicate gate, AdaptOrch topology advisor, CoE entropy routing, graph persistence in scheduler loop | `zeph-orchestration` | +| `009-orchestration/spec.md` | DAG planner, DagScheduler, AgentRouter, /plan command, plan template cache, VMAO adaptive replanning, cascade-aware DAG routing, VeriMAP predicate gate, AdaptOrch topology advisor, CoE entropy routing, graph persistence in scheduler loop, `graph_dirty` consistency on all terminal transitions and `inject_tasks`/`record_predicate_outcome` (#4809, #4831, #4848), `PlanCache` tracing instrumentation | `zeph-orchestration` | | `010-security/spec.md` | Vault, shell sandbox, content isolation, SSRF protection, IPI defense, PII NER circuit breaker, cross-tool injection correlation, AgentRFC protocol audit, MCP→ACP boundary enforcement, credential env-var scrubbing, file permission hardening (`fs_secure`), Seatbelt deny-first secret-path rules | cross-cutting | | `010-security/010-5-egress-logging.md` | Egress logging sub-spec: `EgressEvent` per outbound HTTP call, `AuditEntry.correlation_id`, bounded mpsc telemetry (256 + drop counter), TUI Security panel surface | `zeph-tools`, `zeph-core`, `zeph-tui` | | `010-security/010-6-vigil-intent-anchoring.md` | VIGIL verify-before-commit sub-spec: pre-sanitizer regex tripwire with Block/Sanitize action, per-turn `current_turn_intent`, subagent exemption, non-retryable blocks via `error_category="vigil_blocked"` | `zeph-core`, `zeph-tools`, `zeph-config` | @@ -127,7 +127,7 @@ Spec IDs (001–044) follow a logical grouping: | `042-experiments/spec.md` | Experiments & Runtime Feature Gating: `[experiments]` config section, ExperimentConfig, rollout percentage, experiment results reporting, CLI subcommands; distinct from compile-time feature flags | `zeph-experiments` | | `043-zeph-commands/spec.md` | Slash command registry, `CommandHandler` object-safe trait, `CommandRegistry` with longest-word-boundary dispatch, `ChannelSink` abstraction, static `COMMANDS` list; `/recap` command, `/session` TUI commands; no dependency on `zeph-core` | `zeph-commands` | | `044-zeph-common/spec.md` | Shared primitives: `Secret` (zeroize-on-drop), `ToolName` (Arc), `SessionId` (UUID v4), `ToolDefinition`, `SkillTrustLevel`, `PolicyLlmClient`; no `zeph-*` peer dependencies | `zeph-common` | -| `045-subagent-lifecycle/spec.md` | Full `zeph-subagent` crate: `SubAgentDef` parsing, `SubAgentManager` spawning and concurrency cap, `PermissionGrants` TTL, `FilteredToolExecutor` policy gate (case-insensitive tool ID normalization via `normalize_tool_id`, #3765), `MemoryAwareExecutor` for `memory: user` subagents (#3771), transcript JSONL persistence, lifecycle hooks, memory injection | `zeph-subagent` | +| `045-subagent-lifecycle/spec.md` | Full `zeph-subagent` crate: `SubAgentDef` parsing, `SubAgentManager` spawning and concurrency cap, `PermissionGrants` TTL, `FilteredToolExecutor` policy gate (case-insensitive tool ID normalization via `normalize_tool_id`, #3765), `MemoryAwareExecutor` for `memory: user` subagents (#3771), transcript JSONL persistence, lifecycle hooks, memory injection, transitive constraint propagation (`max_trust_level`, `inherited_tool_allowlist` in `SpawnContext`, `apply_constraint_propagation()` in spawn and resume, #4681) | `zeph-subagent` | | `046-interop-protocol-gaps/spec.md` | Agent interoperability protocol gap analysis (arXiv:2505.02279): capability matrix for MCP, ACP, A2A, ANP vs. Zeph; protocol selection guidance; ANP as P4 research; ACP re-negotiation as P3 follow-up | cross-cutting | | `047-march-quality/spec.md` | MARCH Proposer+Checker self-check pipeline: post-response factual consistency, information-asymmetry checker, `self-check` feature flag, per-turn `MarchVerdict`, Prometheus metrics (#3226) | `zeph-core` | | `048-cli-modes/spec.md` | CLI execution modes: `--bare` (skip scheduler/indexer/eviction), `--json` (JSONL event stream), `-y` (auto-approve), `/loop` command (supervised loop with inline errors), `/recap` command (#3170, #3218) | `zeph-channels`, binary | @@ -149,4 +149,4 @@ Spec IDs (001–044) follow a logical grouping: | `062-context-adaptive-memory/spec.md` | Context-Adaptive Memory (CAM): three-level fidelity (Full/Compressed/Placeholder), heuristic FidelityScorer, proactive AgeMem regrade trigger, PlannedToolHint struct for PAACE DAG lookahead; MVP: heuristic scoring only; GitHub #4016, #4017, #4018 | `zeph-common`, `zeph-context`, `zeph-agent-context` | | `063-worktree-subsystem/spec.md` | Worktree subsystem: new `zeph-worktree` crate, `worktree.base_ref: fresh\|head`, CwdGuard process-level serialisation (INV-1..INV-4), startup capability probe, `zeph worktree list/clean` CLI; defers concurrent isolation to `bgIsolation` (#4656); GitHub #4655 | `zeph-worktree` (new), `zeph-config`, `zeph-subagent` | | `064-durable-execution/spec.md` | Durable execution: `zeph-durable` Layer-0 crate, append-only journal, `DurableStep`/`DurableContext` (`&self` + `AtomicU32`), `EffectClass`+`OnAmbiguous`, `JournalWriter` actor (mpsc, ACK, group-commit), AEAD `PayloadCipher` (XChaCha20-Poly1305, vault-keyed), `DurablePromise`/resolver-token auth, `DurableTimer`, dedicated `durable.db` pool+migrations, `ReplayDivergence` guard, `read_execution_range` cursor, `restate` feature flag; P1 agent-loop, P2 orchestration `/plan resume`, P3 scheduler exactly-once, P4 subagent promise | `zeph-durable` (new), `zeph-agent-tools`, `zeph-orchestration`, `zeph-scheduler`, `zeph-subagent` | -| `parity-claude-code-3918/spec.md` | Claude Code v2.1.141–v2.1.143 parity gaps: `--plugin-url` ephemeral plugin loading (HTTPS-only, blocking scan, TempDir lifetime) + provider parameter override persistence (reasoning_effort, temperature via `channel_preferences` key-value row, no ALTER TABLE); defers worktree.baseRef, bgIsolation, Ctrl+R; GitHub #3918 | `zeph-plugins`, `zeph-core`, `zeph-config`, `zeph-commands` | +| `065-ephemeral-plugins-provider-overrides/spec.md` | Ephemeral plugin loading and provider override persistence: `--plugin-url` (HTTPS-only, blocking scan, TempDir lifetime) + provider parameter override persistence (reasoning_effort, temperature via `channel_preferences` key-value row, no ALTER TABLE); defers worktree.baseRef, bgIsolation, Ctrl+R; GitHub #3918 | `zeph-plugins`, `zeph-core`, `zeph-config`, `zeph-commands` | From d93c706fac532b4b785440da04ba1f594e1dadea Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Fri, 5 Jun 2026 23:04:59 +0200 Subject: [PATCH 7/7] docs(zeph-worktree): add README --- crates/zeph-worktree/README.md | 96 ++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 crates/zeph-worktree/README.md diff --git a/crates/zeph-worktree/README.md b/crates/zeph-worktree/README.md new file mode 100644 index 000000000..20a25ffc6 --- /dev/null +++ b/crates/zeph-worktree/README.md @@ -0,0 +1,96 @@ +# zeph-worktree + +[![Crates.io](https://img.shields.io/crates/v/zeph-worktree)](https://crates.io/crates/zeph-worktree) +[![docs.rs](https://img.shields.io/docsrs/zeph-worktree)](https://docs.rs/zeph-worktree) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](../../LICENSE) +[![MSRV](https://img.shields.io/badge/MSRV-1.95-blue)](https://www.rust-lang.org) + +Git worktree lifecycle management for Zeph subagents. + +## Overview + +`zeph-worktree` creates, removes, lists, and reconciles per-subagent git worktrees. Each background subagent that opts into filesystem isolation gets a dedicated worktree cloned from the host repository, preventing concurrent agents from clobbering each other's working trees. + +The crate is intentionally narrow in scope — it wraps `git worktree` subprocess calls with full path sanitization, capability probing, and a configurable timeout. It has no dependency on `zeph-core`, `zeph-subagent`, or `zeph-channels`. + +## Key types + +| Type | Description | +|------|-------------| +| `DefaultWorktreeManager` | Production `WorktreeManager` — the type stored by `SubAgentManager` | +| `WorktreeManager` | Generic manager parameterised over `GitRunner`; injectable for testing | +| `WorktreeHandle` | Live record of one managed worktree (path, branch, subagent ID, creation time) | +| `DefaultGitRunner` | Production git invocation backend with configurable timeout | +| `GitRunner` | Trait for abstracting git subprocess calls | +| `WorktreeError` | All errors this crate can produce | + +## Usage + +```toml +[dependencies] +zeph-worktree = { path = "crates/zeph-worktree" } +``` + +```rust +use std::path::PathBuf; +use zeph_config::WorktreeConfig; +use zeph_worktree::{DefaultWorktreeManager, git_runner::DefaultGitRunner, manager::probe_capabilities}; + +#[tokio::main] +async fn main() -> Result<(), zeph_worktree::WorktreeError> { + let repo = PathBuf::from("/path/to/repo"); + let runner = DefaultGitRunner::new(); + + // Verify git ≥ 2.5 is available and the path is a repository. + probe_capabilities(&runner, &repo).await?; + + let mgr = DefaultWorktreeManager::new(repo, WorktreeConfig::default(), runner)?; + + // Create a worktree for a subagent. + let handle = mgr.create("agent-42").await?; + println!("Worktree at {:?}", handle.path); + + // List all tracked worktrees. + let all = mgr.list(); + println!("{} active worktrees", all.len()); + + // Remove the worktree (force = false). + mgr.remove(&handle, false).await?; + + Ok(()) +} +``` + +> [!IMPORTANT] +> Call `probe_capabilities` once at bootstrap. It checks that `git` ≥ 2.5 is in `PATH` and that the target path is a git repository. A missing git binary is caught here, not at first spawn. + +## Configuration + +`WorktreeManager` is driven by `WorktreeConfig` from `zeph-config`: + +```toml +[worktree] +enabled = true +bg_isolation = "worktree" # "none" | "worktree" +base_ref = "head" # "head" | "fresh" +git_timeout_secs = 30 # clamped to max(1, value) +cleanup_on_completion = true +``` + +| Field | Default | Description | +|-------|---------|-------------| +| `enabled` | `false` | Enable worktree isolation for background subagents | +| `bg_isolation` | `"none"` | `"worktree"` creates a dedicated worktree; `"none"` only holds the CWD lock | +| `base_ref` | `"head"` | `"head"` branches off current HEAD; `"fresh"` fetches and branches off `origin/` | +| `git_timeout_secs` | `30` | Per-command timeout for all git subprocess calls | +| `cleanup_on_completion` | `true` | Remove the worktree when the subagent finishes | + +## Invariants + +- Path sanitization rejects absolute paths, `..` components, and names starting with `-` before any git call. +- `base_ref = "fresh"` never silently falls back to HEAD on fetch failure — it returns an error. +- `git_timeout_secs = 0` is clamped to `1` by `DefaultGitRunner`. + +## License + +MIT — see [LICENSE](../../LICENSE).