From f5f507074293b1c301fa51bdd83a8e2e31aa88f5 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Jun 2026 07:47:20 +0000 Subject: [PATCH 1/3] feat(firewall): payload sizing, byte budgets, tiktoken counter, honest summaries Group the four context-firewall issues that share a single implementation path (payload measurement and the bounded Frame) into one change: - #207: add allocation-free firewall.estimated_size and use it for the raw-mode budget check instead of len(json.dumps(...)), so large payloads are no longer fully serialised just to be measured. - #211: add opt-in byte budgets to HandleStore (max_total_bytes evicts oldest-first; max_entry_bytes rejects an over-cap payload with the new HandleTooLarge error). Defaults stay None so behaviour is unchanged. - #218: ship firewall.make_tiktoken_counter implementing the TokenCounter seam with a real tokenizer (lazy import, cached encoder, configurable encoding), wired via BudgetManager(token_counter=...). - #174: stop averaging boolean columns in summaries (report true/false counts) and surface truncation with an explicit "N more facts omitted" marker. To stay within the shrink-only module-size ratchet, the HandleStore.expand query pipeline moves verbatim into a new handle_query module (also reduces the handles.py ceiling debt tracked by #188). Docs and CHANGELOG updated. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01HjetuNJX1EUJ2eVGfoLyHt --- CHANGELOG.md | 19 ++ docs/context_firewall.md | 64 ++++- pyproject.toml | 4 + src/weaver_kernel/__init__.py | 8 +- src/weaver_kernel/errors.py | 21 +- src/weaver_kernel/firewall/__init__.py | 4 + src/weaver_kernel/firewall/size_estimate.py | 96 +++++++ src/weaver_kernel/firewall/summarize.py | 61 ++-- .../firewall/token_counting_tiktoken.py | 101 +++++++ src/weaver_kernel/firewall/transform.py | 4 +- src/weaver_kernel/handle_query.py | 232 ++++++++++++++++ src/weaver_kernel/handles.py | 261 ++++++------------ tests/test_architecture.py | 1 - tests/test_doctests.py | 1 + tests/test_firewall.py | 42 +++ tests/test_handles.py | 68 +++++ tests/test_size_estimate.py | 71 +++++ tests/test_token_counting_tiktoken.py | 90 ++++++ 18 files changed, 927 insertions(+), 221 deletions(-) create mode 100644 src/weaver_kernel/firewall/size_estimate.py create mode 100644 src/weaver_kernel/firewall/token_counting_tiktoken.py create mode 100644 src/weaver_kernel/handle_query.py create mode 100644 tests/test_size_estimate.py create mode 100644 tests/test_token_counting_tiktoken.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fe2310..2fa1d80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- **Context-firewall sizing, budgeting & summary fidelity.** A grouped pass over + how the firewall measures, bounds, and represents payloads: + - **Allocation-free size estimation (#207).** `firewall.estimated_size` walks + a value to approximate its serialised length instead of `json.dumps`-ing the + whole payload just to measure it; the raw-mode budget check now uses it, so + multi-MB results are no longer fully serialised for sizing. + - **Byte-aware handle budgeting (#211).** `HandleStore` accepts optional + `max_total_bytes` (evict oldest-first) and `max_entry_bytes` (reject an + over-cap payload with the new `HandleTooLarge` error). Both default to + `None`, leaving existing behaviour unchanged; `current_bytes` exposes + resident usage. + - **tiktoken token counter (#218).** `firewall.make_tiktoken_counter()` + implements the `TokenCounter` seam with a real tokenizer (configurable + encoding, default `cl100k_base`), wired via `BudgetManager(token_counter=…)`. + `tiktoken` is imported lazily so the base install stays dependency-light; + install the existing `weaver-kernel[tiktoken]` extra to use it. + - **Honest summaries (#174).** Boolean columns are summarised as true/false + counts instead of being averaged as numbers, and truncated fact lists carry + an explicit "N more facts omitted" marker. - **CI / supply-chain hardening.** A focused pass over the build pipeline and repository automation: - **Bare-install CI job (#208).** Installs `weaver-kernel` with no extras, diff --git a/docs/context_firewall.md b/docs/context_firewall.md index af65471..c32e91d 100644 --- a/docs/context_firewall.md +++ b/docs/context_firewall.md @@ -19,6 +19,12 @@ Budgets( ) ``` +The character size used for budget comparisons is computed by an allocation-free +estimator (`weaver_kernel.firewall.estimated_size`) that walks the structure +rather than serialising it with `json.dumps` — so a multi-MB raw result is never +fully serialised just to measure it. The estimate is deterministic and tracks +the serialised length closely; only threshold comparisons depend on it. + ## Response modes | Mode | What you get | When to use | @@ -51,6 +57,29 @@ expanded = kernel.expand(handle, query={"fields": ["id", "name"]}, principal=pri expanded = kernel.expand(handle, query={"filter": {"status": "unpaid"}}, principal=principal) ``` +### Bounding handle memory by size + +The store holds *raw, pre-firewall* datasets, and entry count is a poor proxy +for memory — one deployment's 10k entries are kilobytes, another's are +gigabytes. `HandleStore` accepts two optional byte budgets (both `None` = +disabled, so default behaviour is unchanged): + +```python +from weaver_kernel import HandleStore + +store = HandleStore( + max_total_bytes=512 * 1024 * 1024, # evict oldest-first until within budget + max_entry_bytes=64 * 1024 * 1024, # reject a single over-cap payload +) +``` + +Sizes are estimated with the same `estimated_size` walk used for budgets. +`max_total_bytes` evicts oldest-first after each store (never the just-stored +entry); `max_entry_bytes` rejects an over-cap payload with `HandleTooLarge` +rather than truncating it, keeping expansion faithful to the original dataset. +Expanding an evicted handle raises the usual `HandleNotFound`. Tighter budgets +mean more "handle expired/evicted" experiences — tune for your workload. + ## Redaction When a capability has `SensitivityTag.PII` or `SensitivityTag.PCI`: @@ -84,11 +113,18 @@ never becomes a sensitive-data sink (see `docs/security.md`). ## Summarization Summaries are produced deterministically: -- **list of dicts** → row count + top keys + numeric stats + categorical distributions +- **list of dicts** → row count + top keys + numeric stats + categorical/boolean + distributions - **dict** → key list + per-value type/value - **string** → truncated to 500 chars - **other** → repr() truncated to 200 chars +Boolean columns are reported as `True`/`False` counts, never averaged (a `bool` +is an `int` subclass in Python, so "mean of `is_active` = 0.7" is nonsense). When +the fact list is capped by `max_facts`, the final fact is an explicit omission +marker (`… (N more facts omitted; full data via handle)`) so a truncated summary +is never mistaken for a complete one. + ## Cross-invocation budgets The per-invocation `Budgets` above cap a single Frame. A separate @@ -129,21 +165,27 @@ remaining drops *below* 5% does `handle_only` take over. `budget_remaining` in the returned `DryRunResult`, so callers can preview what their next live invocation would actually return. -Plug a different token counter (for example, a `tiktoken`-based one) via the -`TokenCounter` protocol: +The default counter (`default_token_counter`) is a character-based +`len(json.dumps(value)) // 4` approximation with no extra dependencies. For real +token counts, install the `tiktoken` extra and use the shipped factory: ```python -import tiktoken # pip install weaver-kernel[tiktoken] -enc = tiktoken.encoding_for_model("gpt-4o") +from weaver_kernel.firewall import BudgetManager, make_tiktoken_counter -def tiktoken_counter(value): - return len(enc.encode(str(value))) - -manager = BudgetManager(total_budget=128_000, token_counter=tiktoken_counter) +# pip install weaver-kernel[tiktoken] +manager = BudgetManager( + total_budget=128_000, + token_counter=make_tiktoken_counter(), # default cl100k_base + # token_counter=make_tiktoken_counter("o200k_base"), # GPT-4o / o-series +) ``` -The default counter (`default_token_counter`) is a character-based -`len(json.dumps(value)) // 4` approximation with no extra dependencies. +`make_tiktoken_counter` resolves and caches the encoder eagerly, so a missing +extra (`ImportError`) or an unknown encoding name (`FirewallError`) fails at +construction rather than mid-budgeting. The encoding is explicit because models +tokenize differently — name the one you budget against. `tiktoken` is imported +lazily, so `import weaver_kernel` never pulls the heavyweight dependency. Any +callable matching the `TokenCounter` protocol works too. ## Streaming diff --git a/pyproject.toml b/pyproject.toml index 00facb1..b9e84a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,3 +137,7 @@ ignore_missing_imports = true [[tool.mypy.overrides]] module = "weaver_contracts.*" ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "tiktoken.*" +ignore_missing_imports = true diff --git a/src/weaver_kernel/__init__.py b/src/weaver_kernel/__init__.py index f51c38d..94a3b39 100644 --- a/src/weaver_kernel/__init__.py +++ b/src/weaver_kernel/__init__.py @@ -40,9 +40,7 @@ from weaver_kernel import SQLiteTraceStore, JsonlTraceStore from weaver_kernel import SQLiteRevocationStore, InMemoryRevocationStore from weaver_kernel import verify_chain, ChainVerificationResult, TraceRecord - from weaver_kernel import ( - TraceStoreProtocol, RevocationStoreProtocol, HandleStoreProtocol, - ) + from weaver_kernel import TraceStoreProtocol, RevocationStoreProtocol, HandleStoreProtocol LLM tool-format adapters:: @@ -62,7 +60,7 @@ DriverError, FirewallError, AdapterParseError, BudgetExhausted, BudgetConfigError, CapabilityNotFound, CapabilityAlreadyRegistered, - HandleNotFound, HandleExpired, HandleConstraintViolation, + HandleNotFound, HandleExpired, HandleTooLarge, HandleConstraintViolation, NamespaceNotFound, FederationError, ManifestError, ManifestSignatureError, TrustPolicyError, DiscoveryError, ) @@ -91,6 +89,7 @@ HandleConstraintViolation, HandleExpired, HandleNotFound, + HandleTooLarge, ManifestError, ManifestSignatureError, NamespaceNotFound, @@ -246,6 +245,7 @@ "HandleConstraintViolation", "HandleExpired", "HandleNotFound", + "HandleTooLarge", "ManifestError", "ManifestSignatureError", "NamespaceNotFound", diff --git a/src/weaver_kernel/errors.py b/src/weaver_kernel/errors.py index be7aa2b..b6a0209 100644 --- a/src/weaver_kernel/errors.py +++ b/src/weaver_kernel/errors.py @@ -78,9 +78,13 @@ class BudgetExhausted(AgentKernelError): class BudgetConfigError(AgentKernelError): - """Raised when a :class:`~weaver_kernel.firewall.budget_manager.BudgetManager` is - constructed with invalid parameters, or asked to allocate/record/release - a negative amount. + """Raised when a budget is constructed with invalid parameters. + + Covers the :class:`~weaver_kernel.firewall.budget_manager.BudgetManager` + (non-positive ``total_budget``/``default_request``, or a negative + allocate/record/release amount) and the + :class:`~weaver_kernel.handles.HandleStore` byte budgets (non-positive + ``max_total_bytes``/``max_entry_bytes``). Used in place of bare :class:`ValueError` so callers can catch budget configuration mistakes without swallowing unrelated stdlib errors. @@ -128,6 +132,17 @@ class HandleExpired(AgentKernelError): """Raised when a handle's TTL has elapsed.""" +class HandleTooLarge(AgentKernelError): + """Raised when a single handle payload exceeds the store's per-entry byte cap. + + Fires from :meth:`~weaver_kernel.handles.HandleStore.store` only when the + store was configured with ``max_entry_bytes`` and the estimated size of the + data to store exceeds it. The data is *not* retained — rejecting an + over-cap payload outright (rather than silently truncating it) keeps handle + expansion faithful to the original dataset and bounds resident raw data. + """ + + class HandleConstraintViolation(AgentKernelError): """Raised when a handle expansion request violates the grant's constraints. diff --git a/src/weaver_kernel/firewall/__init__.py b/src/weaver_kernel/firewall/__init__.py index 17a45d4..98a7ebc 100644 --- a/src/weaver_kernel/firewall/__init__.py +++ b/src/weaver_kernel/firewall/__init__.py @@ -3,8 +3,10 @@ from .budget_manager import BudgetManager from .budgets import Budgets from .redaction import redact +from .size_estimate import estimated_size from .summarize import summarize from .token_counting import TokenCounter, default_token_counter +from .token_counting_tiktoken import make_tiktoken_counter from .transform import Firewall __all__ = [ @@ -13,6 +15,8 @@ "Firewall", "TokenCounter", "default_token_counter", + "estimated_size", + "make_tiktoken_counter", "redact", "summarize", ] diff --git a/src/weaver_kernel/firewall/size_estimate.py b/src/weaver_kernel/firewall/size_estimate.py new file mode 100644 index 0000000..b68d63a --- /dev/null +++ b/src/weaver_kernel/firewall/size_estimate.py @@ -0,0 +1,96 @@ +"""Allocation-free payload size estimation for the firewall hot path. + +``estimated_size`` approximates ``len(json.dumps(value, default=str))`` by +walking the structure and summing approximate character contributions, *without* +serialising the value to an intermediate string. The firewall runs on every +egress path and only needs the size to compare against budget thresholds, so a +single allocation-free pass (with an optional early exit) replaces a full JSON +serialisation that would double peak memory on large payloads. + +The estimate is intentionally approximate but **deterministic**: the same input +always yields the same number, and the walk visits container members in their +native (insertion) order. It is used both by +:mod:`~weaver_kernel.firewall.transform` (raw-mode budget warning, issue #207) +and by :class:`~weaver_kernel.handles.HandleStore` (byte-size budgeting, +issue #211). +""" + +from __future__ import annotations + +from typing import Any + +# Approximate JSON literal widths and structural overhead. json.dumps with the +# default separators emits ``", "`` between items and ``": "`` between a key and +# its value; object/array delimiters add two characters per container. These +# constants keep the estimate close to the real serialised length without +# reproducing json's exact escaping rules (bounded error is acceptable because +# only threshold comparisons depend on the result). +_NULL_LEN = 4 # "null" +_TRUE_LEN = 4 # "true" +_FALSE_LEN = 5 # "false" +_QUOTES = 2 # surrounding "" on a string +_CONTAINER_DELIMS = 2 # {} or [] +_ITEM_SEP = 2 # ", " +_KV_SEP = 2 # ": " + + +def estimated_size(value: Any, *, limit: int | None = None) -> int: + """Approximate the serialised character size of *value* without serialising. + + Walks *value* iteratively (so deeply nested structures cannot exhaust the + recursion limit) summing approximate JSON character contributions. ``bool`` + is handled before ``int`` because ``bool`` is an ``int`` subclass in Python. + + Args: + value: Any value the firewall might serialise. Non-JSON types fall back + to ``len(str(value))`` plus string quoting, mirroring the + ``default=str`` behaviour of the previous ``json.dumps`` measurement. + limit: Optional early-exit threshold. When the running total exceeds + *limit* the walk stops and returns a value greater than *limit* — use + this when only the boolean ``size > limit`` decision is needed, not + the exact size. + + Returns: + A non-negative integer approximating ``len(json.dumps(value, + default=str))``. + + Example: + >>> estimated_size(None) + 4 + >>> estimated_size("hi") + 4 + >>> estimated_size([1, 2, 3]) + 9 + """ + total = 0 + stack: list[Any] = [value] + while stack: + cur = stack.pop() + if cur is None: + total += _NULL_LEN + elif cur is True: + total += _TRUE_LEN + elif cur is False: + total += _FALSE_LEN + elif isinstance(cur, str): + total += len(cur) + _QUOTES + elif isinstance(cur, bool): # pragma: no cover - True/False caught above + total += _TRUE_LEN + elif isinstance(cur, (int, float)): + total += len(str(cur)) + elif isinstance(cur, dict): + n = len(cur) + total += _CONTAINER_DELIMS + max(0, n - 1) * _ITEM_SEP + for key, val in cur.items(): + total += len(str(key)) + _QUOTES + _KV_SEP + stack.append(val) + elif isinstance(cur, (list, tuple)): + n = len(cur) + total += _CONTAINER_DELIMS + max(0, n - 1) * _ITEM_SEP + stack.extend(cur) + else: + # Mirrors json.dumps(default=str): rendered as a quoted string. + total += len(str(cur)) + _QUOTES + if limit is not None and total > limit: + return total + return total diff --git a/src/weaver_kernel/firewall/summarize.py b/src/weaver_kernel/firewall/summarize.py index a9f1274..e0e9295 100644 --- a/src/weaver_kernel/firewall/summarize.py +++ b/src/weaver_kernel/firewall/summarize.py @@ -36,6 +36,30 @@ def summarize(data: Any, *, max_facts: int = 20) -> list[str]: return [repr(data)[:200]] +# ── Helpers ───────────────────────────────────────────────────────────────── + + +def _is_number(value: Any) -> bool: + """True for real numbers, excluding ``bool`` (an ``int`` subclass).""" + return isinstance(value, (int, float)) and not isinstance(value, bool) + + +def _truncate_facts(facts: list[str], max_facts: int) -> list[str]: + """Cap *facts* at *max_facts*, surfacing omission explicitly. + + Silent truncation lets an LLM treat a partial summary as complete. When + facts are dropped, the final slot becomes an explicit marker stating how + many were omitted; an untruncated list is returned unchanged so it carries + no marker (issue #174). + """ + if max_facts <= 0: + return [] + if len(facts) <= max_facts: + return facts + omitted = len(facts) - (max_facts - 1) + return [*facts[: max_facts - 1], f"… ({omitted} more facts omitted; full data via handle)"] + + # ── Specialised handlers ────────────────────────────────────────────────────── @@ -51,9 +75,15 @@ def _summarize_list_of_dicts(rows: list[dict[str, Any]], *, max_facts: int) -> l top_keys = sorted(key_counts, key=lambda k: -key_counts[k])[:10] facts.append(f"Top keys: {', '.join(top_keys)}") - # Numeric stats + # Numeric stats. ``bool`` is a subclass of ``int`` in Python, so a boolean + # column would otherwise be "averaged" into a meaningless mean (e.g. the + # mean of ``is_active`` = 0.7); exclude bools here and report them as + # true/false counts below instead (issue #174). numeric_keys = [ - k for k in top_keys if all(isinstance(r.get(k), (int, float)) for r in rows if k in r) + k + for k in top_keys + if all(_is_number(r.get(k)) for r in rows if k in r) + and any(_is_number(r.get(k)) for r in rows if k in r) ] for k in numeric_keys[:5]: values = [float(r[k]) for r in rows if k in r] @@ -62,14 +92,18 @@ def _summarize_list_of_dicts(rows: list[dict[str, Any]], *, max_facts: int) -> l f"{k}: min={min(values):.2f}, max={max(values):.2f}, " f"avg={sum(values) / len(values):.2f}" ) - if len(facts) >= max_facts: - break - # Status / categorical counts (string fields with few distinct values) + # Categorical / boolean counts for the remaining string-or-bool columns. for k in top_keys[:5]: if k in numeric_keys: continue - values_str = [str(r[k]) for r in rows if k in r and isinstance(r[k], str)] + present = [r[k] for r in rows if k in r] + bool_vals = [v for v in present if isinstance(v, bool)] + if bool_vals and len(bool_vals) == len(present): + true_count = sum(1 for v in bool_vals if v) + facts.append(f"{k}: True={true_count}, False={len(bool_vals) - true_count}") + continue + values_str = [str(v) for v in present if isinstance(v, str)] if not values_str: continue distinct = sorted(set(values_str)) @@ -77,15 +111,13 @@ def _summarize_list_of_dicts(rows: list[dict[str, Any]], *, max_facts: int) -> l counts = {v: values_str.count(v) for v in distinct} summary = ", ".join(f"{v}={counts[v]}" for v in sorted(counts)) facts.append(f"{k} distribution: {summary}") - if len(facts) >= max_facts: - break - return facts[:max_facts] + return _truncate_facts(facts, max_facts) def _summarize_dict(data: dict[str, Any], *, max_facts: int) -> list[str]: facts: list[str] = [f"Keys: {', '.join(sorted(data.keys())[:20])}"] - for k, v in list(data.items())[: max_facts - 1]: + for k, v in data.items(): if isinstance(v, (int, float)): facts.append(f"{k}: {v}") elif isinstance(v, str): @@ -96,16 +128,13 @@ def _summarize_dict(data: dict[str, Any], *, max_facts: int) -> list[str]: facts.append(f"{k}: dict with keys [{', '.join(list(v.keys())[:5])}]") else: facts.append(f"{k}: {repr(v)[:80]}") - if len(facts) >= max_facts: - break - return facts[:max_facts] + return _truncate_facts(facts, max_facts) def _summarize_plain_list(data: list[Any], *, max_facts: int) -> list[str]: facts = [f"List of {len(data)} items"] - for item in data[: max_facts - 1]: - facts.append(repr(item)[:100]) - return facts[:max_facts] + facts.extend(repr(item)[:100] for item in data) + return _truncate_facts(facts, max_facts) def _summarize_string(data: str, *, max_facts: int) -> list[str]: diff --git a/src/weaver_kernel/firewall/token_counting_tiktoken.py b/src/weaver_kernel/firewall/token_counting_tiktoken.py new file mode 100644 index 0000000..15e15fa --- /dev/null +++ b/src/weaver_kernel/firewall/token_counting_tiktoken.py @@ -0,0 +1,101 @@ +"""tiktoken-backed token counter for the context firewall (issue #218). + +Implements the :class:`~weaver_kernel.firewall.token_counting.TokenCounter` +seam with a real tokenizer instead of the ``chars // 4`` heuristic, so +cross-invocation budgets count the same tokens an OpenAI-family model would. +Install the optional dependency with ``pip install weaver-kernel[tiktoken]`` +and attach the counter to a :class:`~weaver_kernel.firewall.BudgetManager`:: + + from weaver_kernel.firewall import BudgetManager, make_tiktoken_counter + + manager = BudgetManager(token_counter=make_tiktoken_counter()) + +``tiktoken`` is imported lazily inside the factory, never at module import +time, so importing :mod:`weaver_kernel` without the extra stays free of the +heavyweight dependency. +""" + +from __future__ import annotations + +import json +from functools import lru_cache +from typing import Any + +from ..errors import FirewallError +from .token_counting import TokenCounter + +DEFAULT_ENCODING = "cl100k_base" +"""Default tiktoken encoding. + +``cl100k_base`` covers GPT-4 / GPT-3.5-turbo and the ``text-embedding-3`` models +and is the broadest-compatibility choice. Pass ``encoding="o200k_base"`` for the +GPT-4o / o-series family. Anthropic models tokenize differently — name the +encoding explicitly for the model you budget against rather than relying on this +default. +""" + + +@lru_cache(maxsize=8) +def _load_encoding(name: str) -> Any: + """Load and memoise a tiktoken encoding (constructing one is expensive). + + Args: + name: A tiktoken encoding name, e.g. ``"cl100k_base"``. + + Returns: + The tiktoken ``Encoding`` instance. + + Raises: + ImportError: If the ``tiktoken`` extra is not installed. + FirewallError: If *name* is not a known tiktoken encoding. + """ + try: + import tiktoken + except ImportError as exc: + raise ImportError( + "tiktoken-based token counting requires the optional dependency " + "'tiktoken>=0.6'. Install it with: pip install 'weaver-kernel[tiktoken]'" + ) from exc + try: + return tiktoken.get_encoding(name) + except (ValueError, KeyError) as exc: + raise FirewallError( + f"Unknown tiktoken encoding {name!r}; use a name such as " + f"'cl100k_base' or 'o200k_base'." + ) from exc + + +def make_tiktoken_counter(encoding: str = DEFAULT_ENCODING) -> TokenCounter: + """Build a :class:`TokenCounter` that counts real tokens via tiktoken. + + The encoder is resolved (and cached) eagerly so a missing extra or an + unknown encoding name fails at construction rather than on the first count. + Counting is deterministic: strings are encoded directly; other values are + rendered with ``json.dumps(..., default=str)`` first, mirroring the default + counter's treatment of structured data. + + Args: + encoding: tiktoken encoding name (see :data:`DEFAULT_ENCODING`). + + Returns: + A callable implementing the :class:`TokenCounter` protocol. + + Raises: + ImportError: If the ``tiktoken`` extra is not installed. + FirewallError: If *encoding* is not a known tiktoken encoding. + """ + _load_encoding(encoding) + + def count(value: Any) -> int: + if value is None: + return 0 + if isinstance(value, str): + text = value + else: + try: + text = json.dumps(value, default=str) + except (TypeError, ValueError): + text = str(value) + return len(_load_encoding(encoding).encode(text)) + + return count diff --git a/src/weaver_kernel/firewall/transform.py b/src/weaver_kernel/firewall/transform.py index 70c89a6..252457a 100644 --- a/src/weaver_kernel/firewall/transform.py +++ b/src/weaver_kernel/firewall/transform.py @@ -3,7 +3,6 @@ from __future__ import annotations import datetime -import json import logging from collections.abc import AsyncIterator from dataclasses import replace @@ -18,6 +17,7 @@ ) from .budgets import Budgets from .redaction import StreamRedactor, redact +from .size_estimate import estimated_size from .summarize import summarize logger = logging.getLogger(__name__) @@ -132,7 +132,7 @@ def transform( }, ) else: - raw_size = len(json.dumps(data, default=str)) + raw_size = estimated_size(data) if raw_size > self._budgets.max_chars: warnings.append( f"raw output ({raw_size} chars) exceeds budget " diff --git a/src/weaver_kernel/handle_query.py b/src/weaver_kernel/handle_query.py new file mode 100644 index 0000000..fa12870 --- /dev/null +++ b/src/weaver_kernel/handle_query.py @@ -0,0 +1,232 @@ +"""Handle expansion query logic, factored out of :mod:`weaver_kernel.handles`. + +:func:`expand_handle` holds the pagination / field-selection / filtering / +redaction pipeline that turns a stored raw dataset into a bounded +:class:`~weaver_kernel.models.Frame`. It lives here (rather than as a method on +:class:`~weaver_kernel.handles.HandleStore`) so the store module stays within +the module-size budget while the query logic remains independently testable. + +The data fetch is injected as a ``fetch_data`` callable so this module never +imports :class:`~weaver_kernel.handles.HandleStore` — the store stays the single +owner of TTL/eviction state and passes ``lambda: self.get(handle_id)``. +""" + +from __future__ import annotations + +import datetime +from collections.abc import Callable +from typing import Any + +from .errors import HandleConstraintViolation +from .firewall.redaction import redact +from .models import Frame, Handle, Provenance, ResponseMode +from .policy_reasons import DenialReason + + +def expand_handle( + handle: Handle, + *, + fetch_data: Callable[[], Any], + query: dict[str, Any], + action_id: str = "", + response_mode: ResponseMode = "table", + principal_id: str = "", + max_depth: int | None = None, +) -> Frame: + """Expand *handle* with optional pagination, field selection, and filtering. + + Args: + handle: The handle to expand. + fetch_data: Zero-argument callable returning the stored dataset. Raising + :class:`~weaver_kernel.errors.HandleExpired` / + :class:`~weaver_kernel.errors.HandleNotFound` from here propagates + unchanged. + query: Query parameters controlling the expansion (``offset``, ``limit``, + ``fields``, ``filter``). + action_id: Audit action ID to embed in the returned Frame. + response_mode: Response mode for the returned Frame. + principal_id: Principal performing the expansion. When the handle was + created with a non-empty ``principal_id``, this must match. + max_depth: Redaction recursion cap for the projected rows. + + Returns: + A :class:`Frame` containing the slice of data. + + Raises: + HandleConstraintViolation: If the requested expansion exceeds the grant's + persisted constraints or is requested by a different principal. + """ + # ── Principal binding ───────────────────────────────────────────────── + # A handle bound to a principal is not a bearer credential. If the + # handle has a non-empty principal_id, the caller MUST present a + # matching principal_id — omitting it counts as a mismatch (otherwise + # any caller in the same process could expand by passing ""). + if handle.principal_id and handle.principal_id != principal_id: + raise HandleConstraintViolation( + f"Handle '{handle.handle_id}' was granted to principal " + f"'{handle.principal_id}' and cannot be expanded by " + f"'{principal_id or ''}'.", + reason_code=DenialReason.HANDLE_PRINCIPAL_MISMATCH, + ) + + # ── Query input validation ──────────────────────────────────────────── + # Validate user-supplied query types up front so callers see a stable + # HandleConstraintViolation (with INVALID_CONSTRAINT) instead of a + # bare TypeError/ValueError from inside the expansion logic. Public + # interfaces must not leak stdlib exceptions (see AGENTS.md). + raw_filter = query.get("filter") + if raw_filter is not None and not isinstance(raw_filter, dict): + raise HandleConstraintViolation( + f"Handle expand 'filter' must be a dict, got {type(raw_filter).__name__}.", + reason_code=DenialReason.INVALID_CONSTRAINT, + ) + raw_fields = query.get("fields") + if raw_fields is not None and not isinstance(raw_fields, list | tuple): + raise HandleConstraintViolation( + f"Handle expand 'fields' must be a list, got {type(raw_fields).__name__}.", + reason_code=DenialReason.INVALID_CONSTRAINT, + ) + + # ── Grant-constraint rechecks ───────────────────────────────────────── + granted_max_rows = handle.constraints.get("max_rows") + granted_fields = handle.constraints.get("allowed_fields") or [] + granted_scope = handle.constraints.get("scope") or {} + + requested_fields: list[str] = list(raw_fields or []) + if granted_fields and requested_fields: + disallowed = [f for f in requested_fields if f not in granted_fields] + if disallowed: + raise HandleConstraintViolation( + f"Handle '{handle.handle_id}' grant restricts fields to " + f"{sorted(granted_fields)}; request asked for " + f"{sorted(disallowed)}.", + reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION, + ) + + filter_in: dict[str, Any] = raw_filter or {} + if granted_scope: + for sk, sv in granted_scope.items(): + if sk in filter_in and filter_in[sk] != sv: + raise HandleConstraintViolation( + f"Handle '{handle.handle_id}' is scoped to " + f"{sk}={sv!r}; request filter " + f"{sk}={filter_in[sk]!r} is outside that scope.", + reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION, + ) + + data = fetch_data() + rows: list[Any] = data if isinstance(data, list) else [data] + + # ── Filtering ────────────────────────────────────────────────────────── + # Grant scope is AND-merged into the request filter so the caller + # cannot bypass it by omitting the scope key. + filter_spec: dict[str, Any] = dict(filter_in) + for sk, sv in granted_scope.items(): + filter_spec.setdefault(sk, sv) + if filter_spec: + rows = [ + r + for r in rows + if isinstance(r, dict) and all(r.get(k) == v for k, v in filter_spec.items()) + ] + + # ── Pagination ──────────────────────────────────────────────────────── + try: + offset = int(query.get("offset", 0)) + except (TypeError, ValueError) as exc: + raise HandleConstraintViolation( + f"Handle expand 'offset' must be an integer, got {query.get('offset')!r}.", + reason_code=DenialReason.INVALID_CONSTRAINT, + ) from exc + if offset < 0: + raise HandleConstraintViolation( + f"Handle expand 'offset' must be non-negative, got {offset}.", + reason_code=DenialReason.INVALID_CONSTRAINT, + ) + requested_limit_raw = query.get("limit") + requested_limit: int | None + if requested_limit_raw is None: + requested_limit = None + else: + try: + requested_limit = int(requested_limit_raw) + except (TypeError, ValueError) as exc: + raise HandleConstraintViolation( + f"Handle expand 'limit' must be an integer, got {requested_limit_raw!r}.", + reason_code=DenialReason.INVALID_CONSTRAINT, + ) from exc + # A negative limit would slice as rows[offset:offset+limit] and could + # return rows in excess of a (possibly zero) grant cap — reject it + # rather than silently bypassing max_rows. + if requested_limit < 0: + raise HandleConstraintViolation( + f"Handle expand 'limit' must be non-negative, got {requested_limit}.", + reason_code=DenialReason.INVALID_CONSTRAINT, + ) + limit = len(rows) if requested_limit is None else requested_limit + + if isinstance(granted_max_rows, int) and granted_max_rows >= 0: + if requested_limit is not None and requested_limit > granted_max_rows: + raise HandleConstraintViolation( + f"Handle '{handle.handle_id}' grant caps rows at " + f"{granted_max_rows}; request asked for " + f"limit={requested_limit}.", + reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION, + ) + limit = min(limit, granted_max_rows) + + rows = rows[offset : offset + limit] + + # ── Field selection ─────────────────────────────────────────────────── + # If the grant restricts fields and the caller did not ask for any, + # apply the grant's allowed_fields as the default projection so + # disallowed fields cannot leak through an unscoped expand call. + effective_fields: list[str] + if requested_fields: + effective_fields = requested_fields + elif granted_fields: + effective_fields = list(granted_fields) + else: + effective_fields = [] + if effective_fields: + rows = [ + {k: v for k, v in r.items() if k in effective_fields} if isinstance(r, dict) else r + for r in rows + ] + + if not rows: + table_preview: list[Any] = [] + elif isinstance(rows[0], dict): + table_preview = rows + else: + table_preview = [{"value": r} for r in rows] + + # ── Redaction ─────────────────────────────────────────────────────────── + # expand() builds its Frame directly from the raw stored dataset, which + # is persisted pre-firewall. Field-level grant constraints + # (allowed_fields / scope) are already enforced by the projection above, + # so allowed_fields is intentionally not re-passed here; but a permitted + # field can still carry inline secrets (e.g. a Bearer token in a `note` + # value). Route the projected rows through the same redactor the + # Firewall applies on first invocation — using the firewall's configured + # max_depth when the caller threads it — so the I-01 boundary holds on + # the expansion path too (see docs/agent-context/invariants.md). + if max_depth is None: + redacted_preview, warnings = redact(table_preview) + else: + redacted_preview, warnings = redact(table_preview, max_depth=max_depth) + + return Frame( + action_id=action_id, + capability_id=handle.capability_id, + response_mode=response_mode, + table_preview=redacted_preview, + warnings=warnings, + handle=handle, + provenance=Provenance( + capability_id=handle.capability_id, + principal_id=principal_id, + invoked_at=datetime.datetime.now(tz=datetime.timezone.utc), + action_id=action_id, + ), + ) diff --git a/src/weaver_kernel/handles.py b/src/weaver_kernel/handles.py index 9049e8d..408957e 100644 --- a/src/weaver_kernel/handles.py +++ b/src/weaver_kernel/handles.py @@ -6,10 +6,10 @@ import uuid from typing import Any -from .errors import HandleConstraintViolation, HandleExpired, HandleNotFound -from .firewall.redaction import redact -from .models import Frame, Handle, Provenance, ResponseMode -from .policy_reasons import DenialReason +from .errors import BudgetConfigError, HandleExpired, HandleNotFound, HandleTooLarge +from .firewall.size_estimate import estimated_size +from .handle_query import expand_handle +from .models import Frame, Handle, ResponseMode class HandleStore: @@ -19,6 +19,16 @@ class HandleStore: or explicitly via :meth:`evict_expired`. A *max_entries* cap prevents unbounded memory growth in long-lived processes — when the cap is exceeded the oldest entries are dropped after expired ones are cleared. + + Two optional **byte budgets** bound memory by size rather than entry count, + since one deployment's 10k entries are kilobytes and another's are gigabytes + (issue #211). Both default to ``None`` (disabled), leaving behaviour + unchanged until configured: + + - ``max_entry_bytes`` rejects a single over-cap payload with + :class:`~weaver_kernel.errors.HandleTooLarge` (the data is never stored). + - ``max_total_bytes`` evicts oldest-first after each store until aggregate + estimated residency is within budget. """ _EVICT_INTERVAL: int = 128 # run evict_expired() every N store() calls @@ -28,12 +38,27 @@ def __init__( default_ttl_seconds: int = 3600, *, max_entries: int = 10_000, + max_total_bytes: int | None = None, + max_entry_bytes: int | None = None, ) -> None: + if max_total_bytes is not None and max_total_bytes <= 0: + raise BudgetConfigError("max_total_bytes must be positive when set") + if max_entry_bytes is not None and max_entry_bytes <= 0: + raise BudgetConfigError("max_entry_bytes must be positive when set") self._default_ttl = default_ttl_seconds self._max_entries = max_entries + self._max_total_bytes = max_total_bytes + self._max_entry_bytes = max_entry_bytes self._store_count = 0 self._data: dict[str, Any] = {} self._meta: dict[str, Handle] = {} + self._sizes: dict[str, int] = {} + self._total_bytes = 0 + + @property + def current_bytes(self) -> int: + """Estimated total bytes of data currently resident in the store.""" + return self._total_bytes # ── Storage ─────────────────────────────────────────────────────────────── @@ -61,7 +86,18 @@ def store( Returns: A :class:`Handle` referencing the stored data. + + Raises: + HandleTooLarge: If ``max_entry_bytes`` is set and the estimated size + of *data* exceeds it. """ + size = estimated_size(data) + if self._max_entry_bytes is not None and size > self._max_entry_bytes: + raise HandleTooLarge( + f"Handle data for '{capability_id}' is ~{size} bytes, exceeding the " + f"per-entry cap of {self._max_entry_bytes} bytes; not stored." + ) + ttl = ttl_seconds if ttl_seconds is not None else self._default_ttl now = datetime.datetime.now(tz=datetime.timezone.utc) handle = Handle( @@ -75,21 +111,32 @@ def store( ) self._data[handle.handle_id] = data self._meta[handle.handle_id] = handle + self._sizes[handle.handle_id] = size + self._total_bytes += size # Periodic eviction of expired entries self._store_count += 1 if self._store_count % self._EVICT_INTERVAL == 0: self.evict_expired() - # Cap enforcement: evict oldest entries when over the limit + # Cap enforcement: evict oldest entries when over the entry limit if len(self._meta) > self._max_entries: self.evict_expired() # clear expired first overflow = len(self._meta) - self._max_entries if overflow > 0: - oldest = sorted(self._meta, key=lambda hid: self._meta[hid].created_at) - for hid in oldest[:overflow]: - self._data.pop(hid, None) - self._meta.pop(hid, None) + for hid in self._oldest_ids()[:overflow]: + self._drop(hid) + + # Byte-budget enforcement: evict oldest until within the total budget, + # never evicting the entry we just stored (the caller holds its handle). + if self._max_total_bytes is not None and self._total_bytes > self._max_total_bytes: + self.evict_expired() + for hid in self._oldest_ids(): + if self._total_bytes <= self._max_total_bytes: + break + if hid == handle.handle_id: + continue # never evict the entry we just stored + self._drop(hid) return handle @@ -114,8 +161,7 @@ def get(self, handle_id: str) -> Any: now = datetime.datetime.now(tz=datetime.timezone.utc) if handle.expires_at <= now: # Lazy eviction - del self._data[handle_id] - del self._meta[handle_id] + self._drop(handle_id) raise HandleExpired( f"Handle '{handle_id}' expired at {handle.expires_at.isoformat()}." ) @@ -158,6 +204,9 @@ def expand( - ``fields`` (list[str]): Only include these fields. - ``filter`` (dict[str, Any]): Basic equality filter (all conditions AND-ed). + The query pipeline lives in :func:`weaver_kernel.handle_query.expand_handle`; + this method binds it to the store's TTL-checked data fetch. + Args: handle: The handle to expand. query: Query parameters controlling the expansion. @@ -180,179 +229,14 @@ def expand( grant's persisted constraints (``max_rows``, ``allowed_fields``, ``scope``) or is requested by a different principal. """ - # ── Principal binding ───────────────────────────────────────────────── - # A handle bound to a principal is not a bearer credential. If the - # handle has a non-empty principal_id, the caller MUST present a - # matching principal_id — omitting it counts as a mismatch (otherwise - # any caller in the same process could expand by passing ""). - if handle.principal_id and handle.principal_id != principal_id: - raise HandleConstraintViolation( - f"Handle '{handle.handle_id}' was granted to principal " - f"'{handle.principal_id}' and cannot be expanded by " - f"'{principal_id or ''}'.", - reason_code=DenialReason.HANDLE_PRINCIPAL_MISMATCH, - ) - - # ── Query input validation ──────────────────────────────────────────── - # Validate user-supplied query types up front so callers see a stable - # HandleConstraintViolation (with INVALID_CONSTRAINT) instead of a - # bare TypeError/ValueError from inside the expansion logic. Public - # interfaces must not leak stdlib exceptions (see AGENTS.md). - raw_filter = query.get("filter") - if raw_filter is not None and not isinstance(raw_filter, dict): - raise HandleConstraintViolation( - f"Handle expand 'filter' must be a dict, got {type(raw_filter).__name__}.", - reason_code=DenialReason.INVALID_CONSTRAINT, - ) - raw_fields = query.get("fields") - if raw_fields is not None and not isinstance(raw_fields, list | tuple): - raise HandleConstraintViolation( - f"Handle expand 'fields' must be a list, got {type(raw_fields).__name__}.", - reason_code=DenialReason.INVALID_CONSTRAINT, - ) - - # ── Grant-constraint rechecks ───────────────────────────────────────── - granted_max_rows = handle.constraints.get("max_rows") - granted_fields = handle.constraints.get("allowed_fields") or [] - granted_scope = handle.constraints.get("scope") or {} - - requested_fields: list[str] = list(raw_fields or []) - if granted_fields and requested_fields: - disallowed = [f for f in requested_fields if f not in granted_fields] - if disallowed: - raise HandleConstraintViolation( - f"Handle '{handle.handle_id}' grant restricts fields to " - f"{sorted(granted_fields)}; request asked for " - f"{sorted(disallowed)}.", - reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION, - ) - - filter_in: dict[str, Any] = raw_filter or {} - if granted_scope: - for sk, sv in granted_scope.items(): - if sk in filter_in and filter_in[sk] != sv: - raise HandleConstraintViolation( - f"Handle '{handle.handle_id}' is scoped to " - f"{sk}={sv!r}; request filter " - f"{sk}={filter_in[sk]!r} is outside that scope.", - reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION, - ) - - data = self.get(handle.handle_id) - rows: list[Any] = data if isinstance(data, list) else [data] - - # ── Filtering ────────────────────────────────────────────────────────── - # Grant scope is AND-merged into the request filter so the caller - # cannot bypass it by omitting the scope key. - filter_spec: dict[str, Any] = dict(filter_in) - for sk, sv in granted_scope.items(): - filter_spec.setdefault(sk, sv) - if filter_spec: - rows = [ - r - for r in rows - if isinstance(r, dict) and all(r.get(k) == v for k, v in filter_spec.items()) - ] - - # ── Pagination ──────────────────────────────────────────────────────── - try: - offset = int(query.get("offset", 0)) - except (TypeError, ValueError) as exc: - raise HandleConstraintViolation( - f"Handle expand 'offset' must be an integer, got {query.get('offset')!r}.", - reason_code=DenialReason.INVALID_CONSTRAINT, - ) from exc - if offset < 0: - raise HandleConstraintViolation( - f"Handle expand 'offset' must be non-negative, got {offset}.", - reason_code=DenialReason.INVALID_CONSTRAINT, - ) - requested_limit_raw = query.get("limit") - requested_limit: int | None - if requested_limit_raw is None: - requested_limit = None - else: - try: - requested_limit = int(requested_limit_raw) - except (TypeError, ValueError) as exc: - raise HandleConstraintViolation( - f"Handle expand 'limit' must be an integer, got {requested_limit_raw!r}.", - reason_code=DenialReason.INVALID_CONSTRAINT, - ) from exc - # A negative limit would slice as rows[offset:offset+limit] and could - # return rows in excess of a (possibly zero) grant cap — reject it - # rather than silently bypassing max_rows. - if requested_limit < 0: - raise HandleConstraintViolation( - f"Handle expand 'limit' must be non-negative, got {requested_limit}.", - reason_code=DenialReason.INVALID_CONSTRAINT, - ) - limit = len(rows) if requested_limit is None else requested_limit - - if isinstance(granted_max_rows, int) and granted_max_rows >= 0: - if requested_limit is not None and requested_limit > granted_max_rows: - raise HandleConstraintViolation( - f"Handle '{handle.handle_id}' grant caps rows at " - f"{granted_max_rows}; request asked for " - f"limit={requested_limit}.", - reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION, - ) - limit = min(limit, granted_max_rows) - - rows = rows[offset : offset + limit] - - # ── Field selection ─────────────────────────────────────────────────── - # If the grant restricts fields and the caller did not ask for any, - # apply the grant's allowed_fields as the default projection so - # disallowed fields cannot leak through an unscoped expand call. - effective_fields: list[str] - if requested_fields: - effective_fields = requested_fields - elif granted_fields: - effective_fields = list(granted_fields) - else: - effective_fields = [] - if effective_fields: - rows = [ - {k: v for k, v in r.items() if k in effective_fields} if isinstance(r, dict) else r - for r in rows - ] - - if not rows: - table_preview: list[Any] = [] - elif isinstance(rows[0], dict): - table_preview = rows - else: - table_preview = [{"value": r} for r in rows] - - # ── Redaction ─────────────────────────────────────────────────────────── - # expand() builds its Frame directly from the raw stored dataset, which - # is persisted pre-firewall. Field-level grant constraints - # (allowed_fields / scope) are already enforced by the projection above, - # so allowed_fields is intentionally not re-passed here; but a permitted - # field can still carry inline secrets (e.g. a Bearer token in a `note` - # value). Route the projected rows through the same redactor the - # Firewall applies on first invocation — using the firewall's configured - # max_depth when the caller threads it — so the I-01 boundary holds on - # the expansion path too (see docs/agent-context/invariants.md). - if max_depth is None: - redacted_preview, warnings = redact(table_preview) - else: - redacted_preview, warnings = redact(table_preview, max_depth=max_depth) - - return Frame( + return expand_handle( + handle, + fetch_data=lambda: self.get(handle.handle_id), + query=query, action_id=action_id, - capability_id=handle.capability_id, response_mode=response_mode, - table_preview=redacted_preview, - warnings=warnings, - handle=handle, - provenance=Provenance( - capability_id=handle.capability_id, - principal_id=principal_id, - invoked_at=datetime.datetime.now(tz=datetime.timezone.utc), - action_id=action_id, - ), + principal_id=principal_id, + max_depth=max_depth, ) # ── Maintenance ─────────────────────────────────────────────────────────── @@ -366,6 +250,15 @@ def evict_expired(self) -> int: now = datetime.datetime.now(tz=datetime.timezone.utc) expired = [hid for hid, h in self._meta.items() if h.expires_at <= now] for hid in expired: - self._data.pop(hid, None) - self._meta.pop(hid, None) + self._drop(hid) return len(expired) + + def _oldest_ids(self) -> list[str]: + """Handle IDs ordered oldest-first by creation time (deterministic).""" + return sorted(self._meta, key=lambda hid: self._meta[hid].created_at) + + def _drop(self, handle_id: str) -> None: + """Remove a handle's data, metadata, and size accounting.""" + self._data.pop(handle_id, None) + self._meta.pop(handle_id, None) + self._total_bytes -= self._sizes.pop(handle_id, 0) diff --git a/tests/test_architecture.py b/tests/test_architecture.py index 8aa9642..5af2c4d 100644 --- a/tests/test_architecture.py +++ b/tests/test_architecture.py @@ -54,7 +54,6 @@ "adapters/_base.py": 459, "kernel/_invoke.py": 400, "firewall/transform.py": 377, - "handles.py": 371, "adapters/openai.py": 358, "stores/sqlite.py": 350, "tokens.py": 336, diff --git a/tests/test_doctests.py b/tests/test_doctests.py index 7d9f440..f1e3815 100644 --- a/tests/test_doctests.py +++ b/tests/test_doctests.py @@ -18,6 +18,7 @@ # false failures. _DOCTESTED_MODULES = [ "weaver_kernel.firewall.token_counting", + "weaver_kernel.firewall.size_estimate", ] diff --git a/tests/test_firewall.py b/tests/test_firewall.py index 5471030..eda9257 100644 --- a/tests/test_firewall.py +++ b/tests/test_firewall.py @@ -297,6 +297,48 @@ def test_summarize_dict_max_facts() -> None: assert len(facts) <= 2 +# ── summarize() bool handling + truncation marker (#174) ──────────────────────── + + +def test_summarize_bool_column_is_not_averaged() -> None: + rows = [{"active": True}, {"active": True}, {"active": False}] + facts = summarize(rows) + # A bool column must never be reported as a numeric mean/min/max. + assert not any("avg=" in f for f in facts) + assert any(f == "active: True=2, False=1" for f in facts) + + +def test_summarize_numeric_column_still_averaged() -> None: + rows = [{"score": 10}, {"score": 20}, {"score": 30}] + facts = summarize(rows) + assert any(f == "score: min=10.00, max=30.00, avg=20.00" for f in facts) + + +def test_summarize_truncation_marker_when_facts_omitted() -> None: + rows = [{"a": i, "b": i, "c": i, "d": i, "e": i} for i in range(5)] + facts = summarize(rows, max_facts=3) + assert len(facts) == 3 + assert "more facts omitted" in facts[-1] + + +def test_summarize_no_marker_when_all_facts_fit() -> None: + rows = [{"score": i} for i in range(3)] + facts = summarize(rows, max_facts=20) + assert not any("omitted" in f for f in facts) + + +def test_summarize_exactly_max_facts_has_no_false_marker() -> None: + # "Keys" + "a: 1" + "b: 2" is exactly 3 facts; no truncation should occur. + facts = summarize({"a": 1, "b": 2}, max_facts=3) + assert facts == ["Keys: a, b", "a: 1", "b: 2"] + + +def test_summarize_plain_list_marks_omitted_items() -> None: + facts = summarize(list(range(10)), max_facts=4) + assert facts[0] == "List of 10 items" + assert "more facts omitted" in facts[-1] + + # ── Token counting ───────────────────────────────────────────────────────────── diff --git a/tests/test_handles.py b/tests/test_handles.py index 66fedbd..e273a73 100644 --- a/tests/test_handles.py +++ b/tests/test_handles.py @@ -7,10 +7,12 @@ import pytest from weaver_kernel import ( + BudgetConfigError, HandleConstraintViolation, HandleExpired, HandleNotFound, HandleStore, + HandleTooLarge, ) from weaver_kernel.models import Handle from weaver_kernel.policy_reasons import DenialReason @@ -344,3 +346,69 @@ def test_expand_fills_provenance_principal(store: HandleStore) -> None: frame = store.expand(handle, query={}, action_id="act-9", principal_id="agent-1") assert frame.provenance.principal_id == "agent-1" assert frame.provenance.action_id == "act-9" + + +# ── Byte-size budgeting (#211) ─────────────────────────────────────────────────── + + +def test_current_bytes_starts_at_zero_and_tracks_stores() -> None: + store = HandleStore() + assert store.current_bytes == 0 + store.store("cap.x", {"v": "x" * 100}) + assert store.current_bytes > 100 + + +def test_max_entry_bytes_rejects_oversized_payload() -> None: + store = HandleStore(max_entry_bytes=100) + with pytest.raises(HandleTooLarge): + store.store("cap.x", {"v": "x" * 1000}) + # Rejected data is never retained. + assert store.current_bytes == 0 + assert len(store._meta) == 0 + + +def test_max_entry_bytes_allows_within_cap() -> None: + store = HandleStore(max_entry_bytes=10_000) + handle = store.store("cap.x", {"v": "x" * 1000}) + assert store.get(handle.handle_id) == {"v": "x" * 1000} + + +def test_max_total_bytes_evicts_oldest_until_within_budget() -> None: + store = HandleStore(max_total_bytes=2500) + h1 = store.store("cap.x", {"v": "a" * 1000}) + h2 = store.store("cap.x", {"v": "b" * 1000}) + h3 = store.store("cap.x", {"v": "c" * 1000}) + # Three ~1009-byte entries exceed 2500; the oldest is evicted, never the + # just-stored one. + assert store.current_bytes <= 2500 + with pytest.raises(HandleNotFound): + store.get(h1.handle_id) + assert store.get(h2.handle_id) == {"v": "b" * 1000} + assert store.get(h3.handle_id) == {"v": "c" * 1000} + + +def test_expanding_byte_evicted_handle_raises_not_found() -> None: + store = HandleStore(max_total_bytes=2500) + h1 = store.store("cap.x", [{"id": i} for i in range(50)], principal_id="agent-1") + evicted_meta = store.get_meta(h1.handle_id) + store.store("cap.x", {"v": "b" * 1500}) + store.store("cap.x", {"v": "c" * 1500}) + with pytest.raises(HandleNotFound): + store.expand(evicted_meta, query={}, principal_id="agent-1") + + +def test_evict_expired_releases_byte_accounting() -> None: + store = HandleStore() + store.store("cap.x", {"v": "x" * 500}, ttl_seconds=-1) + before = store.current_bytes + assert before > 0 + store.evict_expired() + assert store.current_bytes == 0 + assert before > store.current_bytes + + +def test_byte_budget_rejects_invalid_config() -> None: + with pytest.raises(BudgetConfigError): + HandleStore(max_total_bytes=0) + with pytest.raises(BudgetConfigError): + HandleStore(max_entry_bytes=-5) diff --git a/tests/test_size_estimate.py b/tests/test_size_estimate.py new file mode 100644 index 0000000..0732ebe --- /dev/null +++ b/tests/test_size_estimate.py @@ -0,0 +1,71 @@ +"""Tests for the allocation-free size estimator (issue #207).""" + +from __future__ import annotations + +import datetime +import json + +import pytest + +from weaver_kernel.firewall.size_estimate import estimated_size + + +def _json_len(value: object) -> int: + return len(json.dumps(value, default=str)) + + +def test_scalars() -> None: + assert estimated_size(None) == 4 + assert estimated_size(True) == 4 + assert estimated_size(False) == 5 + assert estimated_size("hi") == 4 # 2 chars + 2 quotes + assert estimated_size(12345) == 5 + + +def test_matches_json_dumps_on_simple_containers() -> None: + # For containers of scalars the estimate is exact against json.dumps. + for value in ([1, 2, 3], {"a": 1, "b": 2}, [], {}, ["x", "y"]): + assert estimated_size(value) == _json_len(value), value + + +@pytest.mark.parametrize( + "value", + [ + {"rows": [{"id": i, "name": f"n{i}", "active": i % 2 == 0} for i in range(50)]}, + [{"k": "v" * 100} for _ in range(20)], + {"nested": {"deep": {"deeper": ["a", "b", "c"]}}}, + {"when": datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)}, + "x" * 5000, + ], +) +def test_bounded_error_against_json(value: object) -> None: + # The estimate need not be exact (it does not reproduce JSON escaping), but + # it must track json.dumps closely enough that budget threshold comparisons + # do not flip: stay within ±25% of the real serialised length. + actual = _json_len(value) + estimate = estimated_size(value) + assert 0.75 * actual <= estimate <= 1.25 * actual, (actual, estimate) + + +def test_early_exit_stops_past_limit() -> None: + big = {"payload": "x" * 100_000} + # With a small limit the walk returns as soon as it crosses the threshold, + # so the reported value need only be greater than the limit. + assert estimated_size(big, limit=100) > 100 + + +def test_non_serialisable_falls_back_to_str() -> None: + class Opaque: + def __str__(self) -> str: + return "opaque-value" + + # Mirrors the previous json.dumps(default=str): rendered as a quoted string. + assert estimated_size(Opaque()) == len("opaque-value") + 2 + + +def test_deeply_nested_does_not_recurse() -> None: + value: object = "leaf" + for _ in range(5000): + value = [value] + # Iterative walk: no RecursionError even far past the recursion limit. + assert estimated_size(value) > 5000 diff --git a/tests/test_token_counting_tiktoken.py b/tests/test_token_counting_tiktoken.py new file mode 100644 index 0000000..fbbba73 --- /dev/null +++ b/tests/test_token_counting_tiktoken.py @@ -0,0 +1,90 @@ +"""Tests for the tiktoken-backed token counter (issue #218). + +These exercise the counter's own logic — value handling, encoder caching, and +error remapping — against a *fake* ``tiktoken`` module injected into +``sys.modules``. That keeps the suite deterministic and offline: the real +``tiktoken`` downloads its BPE vocabularies over the network on first use, which +must never be a test dependency (AGENTS.md prefers offline tests). +""" + +from __future__ import annotations + +import sys +import types +from collections.abc import Iterator + +import pytest + +from weaver_kernel import FirewallError +from weaver_kernel.firewall import token_counting_tiktoken as tt + + +class _FakeEncoding: + """Deterministic stand-in: one token per whitespace-separated word.""" + + def encode(self, text: str) -> list[int]: + return list(range(len(text.split()))) + + +@pytest.fixture() +def fake_tiktoken(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]: + module = types.ModuleType("tiktoken") + + def get_encoding(name: str) -> _FakeEncoding: + if name == "unknown-encoding": + raise KeyError(name) + return _FakeEncoding() + + module.get_encoding = get_encoding # type: ignore[attr-defined] + monkeypatch.setitem(sys.modules, "tiktoken", module) + tt._load_encoding.cache_clear() + yield + tt._load_encoding.cache_clear() + + +def test_counts_words_as_tokens(fake_tiktoken: None) -> None: + counter = tt.make_tiktoken_counter() + assert counter("hello world") == 2 + + +def test_none_is_zero(fake_tiktoken: None) -> None: + assert tt.make_tiktoken_counter()(None) == 0 + + +def test_structured_value_counts_serialised_form(fake_tiktoken: None) -> None: + counter = tt.make_tiktoken_counter() + # json.dumps({"id": 1}) == '{"id": 1}' → two whitespace-separated tokens. + assert counter({"id": 1}) == 2 + + +def test_non_serialisable_value_falls_back_to_str(fake_tiktoken: None) -> None: + class Opaque: + def __str__(self) -> str: + return "one two three" + + assert tt.make_tiktoken_counter()(Opaque()) == 3 + + +def test_explicit_encoding_is_accepted(fake_tiktoken: None) -> None: + assert tt.make_tiktoken_counter(encoding="o200k_base")("a b c") == 3 + + +def test_unknown_encoding_raises_firewall_error(fake_tiktoken: None) -> None: + with pytest.raises(FirewallError): + tt.make_tiktoken_counter(encoding="unknown-encoding") + + +def test_missing_extra_raises_helpful_import_error(monkeypatch: pytest.MonkeyPatch) -> None: + # Simulate the extra being absent: importing tiktoken raises ModuleNotFoundError. + monkeypatch.setitem(sys.modules, "tiktoken", None) + tt._load_encoding.cache_clear() + with pytest.raises(ImportError, match=r"weaver-kernel\[tiktoken\]"): + tt.make_tiktoken_counter() + tt._load_encoding.cache_clear() + + +def test_wires_into_budget_manager(fake_tiktoken: None) -> None: + from weaver_kernel.firewall import BudgetManager + + manager = BudgetManager(total_budget=1000, token_counter=tt.make_tiktoken_counter()) + assert manager.count_tokens("hello world") == 2 From 25b26dd5cd029c2cf275c03554c4e2e6b7f25e3c Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Jun 2026 07:58:48 +0000 Subject: [PATCH 2/3] =?UTF-8?q?fix(firewall):=20address=20Copilot=20review?= =?UTF-8?q?=20=E2=80=94=20bound=20summaries,=20cycle-safe=20estimator,=20t?= =?UTF-8?q?otal-budget=20invariant?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - summarize: cap _summarize_dict/_summarize_plain_list iteration with islice and derive the omitted count from known length, so a huge dict/list no longer triggers an O(n) walk on the firewall hot path (defeating max_facts). - size_estimate: count each container at most once (cycle detection) so self-referential inputs terminate instead of hanging; correct the docstring's traversal-order claim (the total is order-independent). - handles: reject a single entry larger than the binding cap (max_entry_bytes or max_total_bytes) so current_bytes can never exceed max_total_bytes; use estimated_size(limit=...) to avoid fully walking an over-cap payload. - Docs/CHANGELOG and tests updated (cyclic-input, total-budget rejection). Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01HjetuNJX1EUJ2eVGfoLyHt --- docs/context_firewall.md | 8 +++-- src/weaver_kernel/errors.py | 16 +++++----- src/weaver_kernel/firewall/size_estimate.py | 18 +++++++++-- src/weaver_kernel/firewall/summarize.py | 29 +++++++++++++----- src/weaver_kernel/handles.py | 34 +++++++++++++++------ tests/test_handles.py | 16 ++++++++++ tests/test_size_estimate.py | 15 +++++++++ 7 files changed, 106 insertions(+), 30 deletions(-) diff --git a/docs/context_firewall.md b/docs/context_firewall.md index c32e91d..5c353d3 100644 --- a/docs/context_firewall.md +++ b/docs/context_firewall.md @@ -76,9 +76,11 @@ store = HandleStore( Sizes are estimated with the same `estimated_size` walk used for budgets. `max_total_bytes` evicts oldest-first after each store (never the just-stored entry); `max_entry_bytes` rejects an over-cap payload with `HandleTooLarge` -rather than truncating it, keeping expansion faithful to the original dataset. -Expanding an evicted handle raises the usual `HandleNotFound`. Tighter budgets -mean more "handle expired/evicted" experiences — tune for your workload. +rather than truncating it, keeping expansion faithful to the original dataset. A +single entry larger than `max_total_bytes` can never fit, so it is rejected the +same way — `current_bytes` therefore never exceeds `max_total_bytes`. Expanding +an evicted handle raises the usual `HandleNotFound`. Tighter budgets mean more +"handle expired/evicted" experiences — tune for your workload. ## Redaction diff --git a/src/weaver_kernel/errors.py b/src/weaver_kernel/errors.py index b6a0209..245299e 100644 --- a/src/weaver_kernel/errors.py +++ b/src/weaver_kernel/errors.py @@ -133,13 +133,15 @@ class HandleExpired(AgentKernelError): class HandleTooLarge(AgentKernelError): - """Raised when a single handle payload exceeds the store's per-entry byte cap. - - Fires from :meth:`~weaver_kernel.handles.HandleStore.store` only when the - store was configured with ``max_entry_bytes`` and the estimated size of the - data to store exceeds it. The data is *not* retained — rejecting an - over-cap payload outright (rather than silently truncating it) keeps handle - expansion faithful to the original dataset and bounds resident raw data. + """Raised when a single handle payload exceeds the store's byte ceiling. + + Fires from :meth:`~weaver_kernel.handles.HandleStore.store` when a byte + budget is configured and the estimated size of the data exceeds the binding + per-store ceiling — ``max_entry_bytes``, or ``max_total_bytes`` (a single + entry larger than the whole budget can never fit). The data is *not* + retained — rejecting an over-cap payload outright (rather than silently + truncating it) keeps handle expansion faithful to the original dataset and + bounds resident raw data. """ diff --git a/src/weaver_kernel/firewall/size_estimate.py b/src/weaver_kernel/firewall/size_estimate.py index b68d63a..51c502f 100644 --- a/src/weaver_kernel/firewall/size_estimate.py +++ b/src/weaver_kernel/firewall/size_estimate.py @@ -8,8 +8,10 @@ serialisation that would double peak memory on large payloads. The estimate is intentionally approximate but **deterministic**: the same input -always yields the same number, and the walk visits container members in their -native (insertion) order. It is used both by +always yields the same number (the total is order-independent). Self-referential +structures are handled gracefully — each container is counted at most once, so a +cycle can never hang the walk (unlike ``json.dumps``, which raises). It is used +both by :mod:`~weaver_kernel.firewall.transform` (raw-mode budget warning, issue #207) and by :class:`~weaver_kernel.handles.HandleStore` (byte-size budgeting, issue #211). @@ -40,6 +42,8 @@ def estimated_size(value: Any, *, limit: int | None = None) -> int: Walks *value* iteratively (so deeply nested structures cannot exhaust the recursion limit) summing approximate JSON character contributions. ``bool`` is handled before ``int`` because ``bool`` is an ``int`` subclass in Python. + Each container is visited at most once, so self-referential inputs terminate + instead of looping forever. Args: value: Any value the firewall might serialise. Non-JSON types fall back @@ -48,7 +52,8 @@ def estimated_size(value: Any, *, limit: int | None = None) -> int: limit: Optional early-exit threshold. When the running total exceeds *limit* the walk stops and returns a value greater than *limit* — use this when only the boolean ``size > limit`` decision is needed, not - the exact size. + the exact size. (Which member tips the total past *limit* is + unspecified, but the returned value is always ``> limit``.) Returns: A non-negative integer approximating ``len(json.dumps(value, @@ -63,6 +68,7 @@ def estimated_size(value: Any, *, limit: int | None = None) -> int: 9 """ total = 0 + seen: set[int] = set() # container ids already counted, to break cycles stack: list[Any] = [value] while stack: cur = stack.pop() @@ -79,12 +85,18 @@ def estimated_size(value: Any, *, limit: int | None = None) -> int: elif isinstance(cur, (int, float)): total += len(str(cur)) elif isinstance(cur, dict): + if id(cur) in seen: + continue + seen.add(id(cur)) n = len(cur) total += _CONTAINER_DELIMS + max(0, n - 1) * _ITEM_SEP for key, val in cur.items(): total += len(str(key)) + _QUOTES + _KV_SEP stack.append(val) elif isinstance(cur, (list, tuple)): + if id(cur) in seen: + continue + seen.add(id(cur)) n = len(cur) total += _CONTAINER_DELIMS + max(0, n - 1) * _ITEM_SEP stack.extend(cur) diff --git a/src/weaver_kernel/firewall/summarize.py b/src/weaver_kernel/firewall/summarize.py index e0e9295..0ce05a3 100644 --- a/src/weaver_kernel/firewall/summarize.py +++ b/src/weaver_kernel/firewall/summarize.py @@ -5,6 +5,7 @@ from __future__ import annotations +from itertools import islice from typing import Any @@ -44,19 +45,27 @@ def _is_number(value: Any) -> bool: return isinstance(value, (int, float)) and not isinstance(value, bool) -def _truncate_facts(facts: list[str], max_facts: int) -> list[str]: +def _truncate_facts(facts: list[str], max_facts: int, *, total: int | None = None) -> list[str]: """Cap *facts* at *max_facts*, surfacing omission explicitly. Silent truncation lets an LLM treat a partial summary as complete. When facts are dropped, the final slot becomes an explicit marker stating how many were omitted; an untruncated list is returned unchanged so it carries no marker (issue #174). + + Args: + facts: The (possibly already-bounded) candidate fact strings. + max_facts: Maximum facts to return. + total: True number of candidate facts when *facts* was pre-capped by the + caller (so it never materialises one fact per item on the hot path). + Defaults to ``len(facts)``. """ if max_facts <= 0: return [] - if len(facts) <= max_facts: - return facts - omitted = len(facts) - (max_facts - 1) + candidate_total = len(facts) if total is None else total + if candidate_total <= max_facts: + return facts[:max_facts] + omitted = candidate_total - (max_facts - 1) return [*facts[: max_facts - 1], f"… ({omitted} more facts omitted; full data via handle)"] @@ -117,7 +126,9 @@ def _summarize_list_of_dicts(rows: list[dict[str, Any]], *, max_facts: int) -> l def _summarize_dict(data: dict[str, Any], *, max_facts: int) -> list[str]: facts: list[str] = [f"Keys: {', '.join(sorted(data.keys())[:20])}"] - for k, v in data.items(): + # Build at most ``max_facts`` value facts — never one per key — so a huge + # dict does not turn a bounded summary into an O(n) walk on the hot path. + for k, v in islice(data.items(), max_facts): if isinstance(v, (int, float)): facts.append(f"{k}: {v}") elif isinstance(v, str): @@ -128,13 +139,15 @@ def _summarize_dict(data: dict[str, Any], *, max_facts: int) -> list[str]: facts.append(f"{k}: dict with keys [{', '.join(list(v.keys())[:5])}]") else: facts.append(f"{k}: {repr(v)[:80]}") - return _truncate_facts(facts, max_facts) + return _truncate_facts(facts, max_facts, total=1 + len(data)) def _summarize_plain_list(data: list[Any], *, max_facts: int) -> list[str]: + # Only repr() up to ``max_facts`` elements; the total count is already + # surfaced by the "List of N items" header and threaded to the marker. facts = [f"List of {len(data)} items"] - facts.extend(repr(item)[:100] for item in data) - return _truncate_facts(facts, max_facts) + facts.extend(repr(item)[:100] for item in islice(data, max_facts)) + return _truncate_facts(facts, max_facts, total=1 + len(data)) def _summarize_string(data: str, *, max_facts: int) -> list[str]: diff --git a/src/weaver_kernel/handles.py b/src/weaver_kernel/handles.py index 408957e..8e4411f 100644 --- a/src/weaver_kernel/handles.py +++ b/src/weaver_kernel/handles.py @@ -28,7 +28,9 @@ class HandleStore: - ``max_entry_bytes`` rejects a single over-cap payload with :class:`~weaver_kernel.errors.HandleTooLarge` (the data is never stored). - ``max_total_bytes`` evicts oldest-first after each store until aggregate - estimated residency is within budget. + estimated residency is within budget. A single entry larger than the total + budget could never fit, so it is rejected with ``HandleTooLarge`` rather + than retained — guaranteeing ``current_bytes`` never exceeds the budget. """ _EVICT_INTERVAL: int = 128 # run evict_expired() every N store() calls @@ -88,15 +90,29 @@ def store( A :class:`Handle` referencing the stored data. Raises: - HandleTooLarge: If ``max_entry_bytes`` is set and the estimated size - of *data* exceeds it. + HandleTooLarge: If a byte budget is configured and the estimated + size of *data* exceeds the binding per-store ceiling — either + ``max_entry_bytes`` or ``max_total_bytes`` (a single entry larger + than the *total* budget can never fit and is rejected too, so the + store never ends up permanently over budget). """ - size = estimated_size(data) - if self._max_entry_bytes is not None and size > self._max_entry_bytes: - raise HandleTooLarge( - f"Handle data for '{capability_id}' is ~{size} bytes, exceeding the " - f"per-entry cap of {self._max_entry_bytes} bytes; not stored." - ) + # Both caps act as a per-store reject ceiling: a single entry larger than + # the total budget could never satisfy it. Early-exit at the ceiling so an + # over-cap payload that will be rejected anyway is not fully walked. + caps = [c for c in (self._max_entry_bytes, self._max_total_bytes) if c is not None] + if caps: + reject_at = min(caps) + size = estimated_size(data, limit=reject_at) + if size > reject_at: + cap_name = ( + "max_entry_bytes" if reject_at == self._max_entry_bytes else "max_total_bytes" + ) + raise HandleTooLarge( + f"Handle data for '{capability_id}' exceeds the {reject_at}-byte " + f"{cap_name} ceiling; not stored." + ) + else: + size = estimated_size(data) ttl = ttl_seconds if ttl_seconds is not None else self._default_ttl now = datetime.datetime.now(tz=datetime.timezone.utc) diff --git a/tests/test_handles.py b/tests/test_handles.py index e273a73..c875621 100644 --- a/tests/test_handles.py +++ b/tests/test_handles.py @@ -407,6 +407,22 @@ def test_evict_expired_releases_byte_accounting() -> None: assert before > store.current_bytes +def test_entry_larger_than_total_budget_is_rejected() -> None: + # A single entry that exceeds the *total* budget can never fit, so it is + # rejected rather than retained — current_bytes never exceeds the budget. + store = HandleStore(max_total_bytes=200) + with pytest.raises(HandleTooLarge): + store.store("cap.x", {"v": "x" * 1000}) + assert store.current_bytes == 0 + + +def test_total_budget_never_exceeded_after_stores() -> None: + store = HandleStore(max_total_bytes=2500) + for marker in ("a", "b", "c", "d", "e"): + store.store("cap.x", {"v": marker * 1000}) + assert store.current_bytes <= 2500 + + def test_byte_budget_rejects_invalid_config() -> None: with pytest.raises(BudgetConfigError): HandleStore(max_total_bytes=0) diff --git a/tests/test_size_estimate.py b/tests/test_size_estimate.py index 0732ebe..c593234 100644 --- a/tests/test_size_estimate.py +++ b/tests/test_size_estimate.py @@ -69,3 +69,18 @@ def test_deeply_nested_does_not_recurse() -> None: value = [value] # Iterative walk: no RecursionError even far past the recursion limit. assert estimated_size(value) > 5000 + + +def test_self_referential_list_terminates() -> None: + cyclic: list[object] = [1, 2] + cyclic.append(cyclic) # a list that contains itself + # json.dumps would raise; the estimator counts each container once and + # returns a finite size instead of hanging. + assert estimated_size(cyclic) > 0 + + +def test_mutually_referential_dicts_terminate() -> None: + a: dict[str, object] = {} + b: dict[str, object] = {"a": a} + a["b"] = b + assert estimated_size(a) > 0 From a9bea94370ebe85bfe651c28f57b4de748a643d0 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 12:16:17 +0000 Subject: [PATCH 3/3] test(firewall): pin raw-mode budget-warning decision at the char boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit follow-up on #207. estimated_size() is approximate (±25%), and the only decision it drives on the transform path is the raw-mode "exceeds budget" warning. Add a boundary test asserting the over/under decision holds with margin beyond the error band, and note in size_estimate that the estimate is a lower bound for shared-reference (DAG) inputs. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01EMB1FUdczZi62skaN4HZbS --- src/weaver_kernel/firewall/size_estimate.py | 8 +++++-- tests/test_firewall.py | 23 +++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/weaver_kernel/firewall/size_estimate.py b/src/weaver_kernel/firewall/size_estimate.py index 51c502f..e2debf5 100644 --- a/src/weaver_kernel/firewall/size_estimate.py +++ b/src/weaver_kernel/firewall/size_estimate.py @@ -10,8 +10,12 @@ The estimate is intentionally approximate but **deterministic**: the same input always yields the same number (the total is order-independent). Self-referential structures are handled gracefully — each container is counted at most once, so a -cycle can never hang the walk (unlike ``json.dumps``, which raises). It is used -both by +cycle can never hang the walk (unlike ``json.dumps``, which raises). Counting +each container once also means a structure that legitimately reuses the same +container in several positions (a DAG, not a cycle) is counted once, so the +estimate is a *lower bound* for shared-reference inputs; firewall and handle +payloads are deserialised JSON without shared references, where this does not +arise. It is used by :mod:`~weaver_kernel.firewall.transform` (raw-mode budget warning, issue #207) and by :class:`~weaver_kernel.handles.HandleStore` (byte-size budgeting, issue #211). diff --git a/tests/test_firewall.py b/tests/test_firewall.py index eda9257..32d8984 100644 --- a/tests/test_firewall.py +++ b/tests/test_firewall.py @@ -179,6 +179,29 @@ def test_raw_mode_oversized_data_adds_warning() -> None: assert frame.raw_data == large_data # type: ignore[union-attr] +def test_raw_mode_warning_decision_holds_across_budget_boundary() -> None: + # #207: estimated_size() replaced len(json.dumps(...)) for the raw-mode + # budget warning. The estimate is approximate (±25%, see test_size_estimate), + # so the over/under *decision* — the only thing this size drives — is pinned + # with margin beyond the error band: a payload well under max_chars carries + # no warning, one well over does. + budgets = Budgets(max_chars=1000) + under = _transform( + {"payload": "x" * 200}, + "raw", + principal_roles=["admin"], + budgets=budgets, + ) + over = _transform( + {"payload": "x" * 5000}, + "raw", + principal_roles=["admin"], + budgets=budgets, + ) + assert not any("exceeds budget" in w for w in under.warnings) # type: ignore[union-attr] + assert any("exceeds budget" in w for w in over.warnings) # type: ignore[union-attr] + + # ── Table mode with non-list data ──────────────────────────────────────────────