From f5f507074293b1c301fa51bdd83a8e2e31aa88f5 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 22 Jun 2026 07:47:20 +0000
Subject: [PATCH 1/3] feat(firewall): payload sizing, byte budgets, tiktoken
 counter, honest summaries

Group the four context-firewall issues that share a single implementation path
(payload measurement and the bounded Frame) into one change:

- #207: add allocation-free firewall.estimated_size and use it for the raw-mode
  budget check instead of len(json.dumps(...)), so large payloads are no longer
  fully serialised just to be measured.
- #211: add opt-in byte budgets to HandleStore (max_total_bytes evicts
  oldest-first; max_entry_bytes rejects an over-cap payload with the new
  HandleTooLarge error). Defaults stay None so behaviour is unchanged.
- #218: ship firewall.make_tiktoken_counter implementing the TokenCounter seam
  with a real tokenizer (lazy import, cached encoder, configurable encoding),
  wired via BudgetManager(token_counter=...).
- #174: stop averaging boolean columns in summaries (report true/false counts)
  and surface truncation with an explicit "N more facts omitted" marker.

To stay within the shrink-only module-size ratchet, the HandleStore.expand
query pipeline moves verbatim into a new handle_query module (also reduces the
handles.py ceiling debt tracked by #188). Docs and CHANGELOG updated.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01HjetuNJX1EUJ2eVGfoLyHt
---
 CHANGELOG.md                                  |  19 ++
 docs/context_firewall.md                      |  64 ++++-
 pyproject.toml                                |   4 +
 src/weaver_kernel/__init__.py                 |   8 +-
 src/weaver_kernel/errors.py                   |  21 +-
 src/weaver_kernel/firewall/__init__.py        |   4 +
 src/weaver_kernel/firewall/size_estimate.py   |  96 +++++++
 src/weaver_kernel/firewall/summarize.py       |  61 ++--
 .../firewall/token_counting_tiktoken.py       | 101 +++++++
 src/weaver_kernel/firewall/transform.py       |   4 +-
 src/weaver_kernel/handle_query.py             | 232 ++++++++++++++++
 src/weaver_kernel/handles.py                  | 261 ++++++------------
 tests/test_architecture.py                    |   1 -
 tests/test_doctests.py                        |   1 +
 tests/test_firewall.py                        |  42 +++
 tests/test_handles.py                         |  68 +++++
 tests/test_size_estimate.py                   |  71 +++++
 tests/test_token_counting_tiktoken.py         |  90 ++++++
 18 files changed, 927 insertions(+), 221 deletions(-)
 create mode 100644 src/weaver_kernel/firewall/size_estimate.py
 create mode 100644 src/weaver_kernel/firewall/token_counting_tiktoken.py
 create mode 100644 src/weaver_kernel/handle_query.py
 create mode 100644 tests/test_size_estimate.py
 create mode 100644 tests/test_token_counting_tiktoken.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4fe2310..2fa1d80 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
+- **Context-firewall sizing, budgeting & summary fidelity.** A grouped pass over
+  how the firewall measures, bounds, and represents payloads:
+  - **Allocation-free size estimation (#207).** `firewall.estimated_size` walks
+    a value to approximate its serialised length instead of `json.dumps`-ing the
+    whole payload just to measure it; the raw-mode budget check now uses it, so
+    multi-MB results are no longer fully serialised for sizing.
+  - **Byte-aware handle budgeting (#211).** `HandleStore` accepts optional
+    `max_total_bytes` (evict oldest-first) and `max_entry_bytes` (reject an
+    over-cap payload with the new `HandleTooLarge` error). Both default to
+    `None`, leaving existing behaviour unchanged; `current_bytes` exposes
+    resident usage.
+  - **tiktoken token counter (#218).** `firewall.make_tiktoken_counter()`
+    implements the `TokenCounter` seam with a real tokenizer (configurable
+    encoding, default `cl100k_base`), wired via `BudgetManager(token_counter=…)`.
+    `tiktoken` is imported lazily so the base install stays dependency-light;
+    install the existing `weaver-kernel[tiktoken]` extra to use it.
+  - **Honest summaries (#174).** Boolean columns are summarised as true/false
+    counts instead of being averaged as numbers, and truncated fact lists carry
+    an explicit "N more facts omitted" marker.
 - **CI / supply-chain hardening.** A focused pass over the build pipeline and
   repository automation:
   - **Bare-install CI job (#208).** Installs `weaver-kernel` with no extras,
diff --git a/docs/context_firewall.md b/docs/context_firewall.md
index af65471..c32e91d 100644
--- a/docs/context_firewall.md
+++ b/docs/context_firewall.md
@@ -19,6 +19,12 @@ Budgets(
 )
 ```
 
+The character size used for budget comparisons is computed by an allocation-free
+estimator (`weaver_kernel.firewall.estimated_size`) that walks the structure
+rather than serialising it with `json.dumps` — so a multi-MB raw result is never
+fully serialised just to measure it. The estimate is deterministic and tracks
+the serialised length closely; only threshold comparisons depend on it.
+
 ## Response modes
 
 | Mode | What you get | When to use |
@@ -51,6 +57,29 @@ expanded = kernel.expand(handle, query={"fields": ["id", "name"]}, principal=pri
 expanded = kernel.expand(handle, query={"filter": {"status": "unpaid"}}, principal=principal)
 ```
 
+### Bounding handle memory by size
+
+The store holds *raw, pre-firewall* datasets, and entry count is a poor proxy
+for memory — one deployment's 10k entries are kilobytes, another's are
+gigabytes. `HandleStore` accepts two optional byte budgets (both `None` =
+disabled, so default behaviour is unchanged):
+
+```python
+from weaver_kernel import HandleStore
+
+store = HandleStore(
+    max_total_bytes=512 * 1024 * 1024,  # evict oldest-first until within budget
+    max_entry_bytes=64 * 1024 * 1024,   # reject a single over-cap payload
+)
+```
+
+Sizes are estimated with the same `estimated_size` walk used for budgets.
+`max_total_bytes` evicts oldest-first after each store (never the just-stored
+entry); `max_entry_bytes` rejects an over-cap payload with `HandleTooLarge`
+rather than truncating it, keeping expansion faithful to the original dataset.
+Expanding an evicted handle raises the usual `HandleNotFound`. Tighter budgets
+mean more "handle expired/evicted" experiences — tune for your workload.
+
 ## Redaction
 
 When a capability has `SensitivityTag.PII` or `SensitivityTag.PCI`:
@@ -84,11 +113,18 @@ never becomes a sensitive-data sink (see `docs/security.md`).
 ## Summarization
 
 Summaries are produced deterministically:
-- **list of dicts** → row count + top keys + numeric stats + categorical distributions
+- **list of dicts** → row count + top keys + numeric stats + categorical/boolean
+  distributions
 - **dict** → key list + per-value type/value
 - **string** → truncated to 500 chars
 - **other** → repr() truncated to 200 chars
 
+Boolean columns are reported as `True`/`False` counts, never averaged (a `bool`
+is an `int` subclass in Python, so "mean of `is_active` = 0.7" is nonsense). When
+the fact list is capped by `max_facts`, the final fact is an explicit omission
+marker (`… (N more facts omitted; full data via handle)`) so a truncated summary
+is never mistaken for a complete one.
+
 ## Cross-invocation budgets
 
 The per-invocation `Budgets` above cap a single Frame. A separate
@@ -129,21 +165,27 @@ remaining drops *below* 5% does `handle_only` take over.
 `budget_remaining` in the returned `DryRunResult`, so callers can preview
 what their next live invocation would actually return.
 
-Plug a different token counter (for example, a `tiktoken`-based one) via the
-`TokenCounter` protocol:
+The default counter (`default_token_counter`) is a character-based
+`len(json.dumps(value)) // 4` approximation with no extra dependencies. For real
+token counts, install the `tiktoken` extra and use the shipped factory:
 
 ```python
-import tiktoken                         # pip install weaver-kernel[tiktoken]
-enc = tiktoken.encoding_for_model("gpt-4o")
+from weaver_kernel.firewall import BudgetManager, make_tiktoken_counter
 
-def tiktoken_counter(value):
-    return len(enc.encode(str(value)))
-
-manager = BudgetManager(total_budget=128_000, token_counter=tiktoken_counter)
+# pip install weaver-kernel[tiktoken]
+manager = BudgetManager(
+    total_budget=128_000,
+    token_counter=make_tiktoken_counter(),              # default cl100k_base
+    # token_counter=make_tiktoken_counter("o200k_base"),  # GPT-4o / o-series
+)
 ```
 
-The default counter (`default_token_counter`) is a character-based
-`len(json.dumps(value)) // 4` approximation with no extra dependencies.
+`make_tiktoken_counter` resolves and caches the encoder eagerly, so a missing
+extra (`ImportError`) or an unknown encoding name (`FirewallError`) fails at
+construction rather than mid-budgeting. The encoding is explicit because models
+tokenize differently — name the one you budget against. `tiktoken` is imported
+lazily, so `import weaver_kernel` never pulls the heavyweight dependency. Any
+callable matching the `TokenCounter` protocol works too.
 
 ## Streaming
 
diff --git a/pyproject.toml b/pyproject.toml
index 00facb1..b9e84a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -137,3 +137,7 @@ ignore_missing_imports = true
 [[tool.mypy.overrides]]
 module = "weaver_contracts.*"
 ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "tiktoken.*"
+ignore_missing_imports = true
diff --git a/src/weaver_kernel/__init__.py b/src/weaver_kernel/__init__.py
index f51c38d..94a3b39 100644
--- a/src/weaver_kernel/__init__.py
+++ b/src/weaver_kernel/__init__.py
@@ -40,9 +40,7 @@
     from weaver_kernel import SQLiteTraceStore, JsonlTraceStore
     from weaver_kernel import SQLiteRevocationStore, InMemoryRevocationStore
     from weaver_kernel import verify_chain, ChainVerificationResult, TraceRecord
-    from weaver_kernel import (
-        TraceStoreProtocol, RevocationStoreProtocol, HandleStoreProtocol,
-    )
+    from weaver_kernel import TraceStoreProtocol, RevocationStoreProtocol, HandleStoreProtocol
 
 LLM tool-format adapters::
 
@@ -62,7 +60,7 @@
         DriverError, FirewallError, AdapterParseError,
         BudgetExhausted, BudgetConfigError,
         CapabilityNotFound, CapabilityAlreadyRegistered,
-        HandleNotFound, HandleExpired, HandleConstraintViolation,
+        HandleNotFound, HandleExpired, HandleTooLarge, HandleConstraintViolation,
         NamespaceNotFound, FederationError, ManifestError, ManifestSignatureError,
         TrustPolicyError, DiscoveryError,
     )
@@ -91,6 +89,7 @@
     HandleConstraintViolation,
     HandleExpired,
     HandleNotFound,
+    HandleTooLarge,
     ManifestError,
     ManifestSignatureError,
     NamespaceNotFound,
@@ -246,6 +245,7 @@
     "HandleConstraintViolation",
     "HandleExpired",
     "HandleNotFound",
+    "HandleTooLarge",
     "ManifestError",
     "ManifestSignatureError",
     "NamespaceNotFound",
diff --git a/src/weaver_kernel/errors.py b/src/weaver_kernel/errors.py
index be7aa2b..b6a0209 100644
--- a/src/weaver_kernel/errors.py
+++ b/src/weaver_kernel/errors.py
@@ -78,9 +78,13 @@ class BudgetExhausted(AgentKernelError):
 
 
 class BudgetConfigError(AgentKernelError):
-    """Raised when a :class:`~weaver_kernel.firewall.budget_manager.BudgetManager` is
-    constructed with invalid parameters, or asked to allocate/record/release
-    a negative amount.
+    """Raised when a budget is constructed with invalid parameters.
+
+    Covers the :class:`~weaver_kernel.firewall.budget_manager.BudgetManager`
+    (non-positive ``total_budget``/``default_request``, or a negative
+    allocate/record/release amount) and the
+    :class:`~weaver_kernel.handles.HandleStore` byte budgets (non-positive
+    ``max_total_bytes``/``max_entry_bytes``).
 
     Used in place of bare :class:`ValueError` so callers can catch budget
     configuration mistakes without swallowing unrelated stdlib errors.
@@ -128,6 +132,17 @@ class HandleExpired(AgentKernelError):
     """Raised when a handle's TTL has elapsed."""
 
 
+class HandleTooLarge(AgentKernelError):
+    """Raised when a single handle payload exceeds the store's per-entry byte cap.
+
+    Fires from :meth:`~weaver_kernel.handles.HandleStore.store` only when the
+    store was configured with ``max_entry_bytes`` and the estimated size of the
+    data to store exceeds it. The data is *not* retained — rejecting an
+    over-cap payload outright (rather than silently truncating it) keeps handle
+    expansion faithful to the original dataset and bounds resident raw data.
+    """
+
+
 class HandleConstraintViolation(AgentKernelError):
     """Raised when a handle expansion request violates the grant's constraints.
 
diff --git a/src/weaver_kernel/firewall/__init__.py b/src/weaver_kernel/firewall/__init__.py
index 17a45d4..98a7ebc 100644
--- a/src/weaver_kernel/firewall/__init__.py
+++ b/src/weaver_kernel/firewall/__init__.py
@@ -3,8 +3,10 @@
 from .budget_manager import BudgetManager
 from .budgets import Budgets
 from .redaction import redact
+from .size_estimate import estimated_size
 from .summarize import summarize
 from .token_counting import TokenCounter, default_token_counter
+from .token_counting_tiktoken import make_tiktoken_counter
 from .transform import Firewall
 
 __all__ = [
@@ -13,6 +15,8 @@
     "Firewall",
     "TokenCounter",
     "default_token_counter",
+    "estimated_size",
+    "make_tiktoken_counter",
     "redact",
     "summarize",
 ]
diff --git a/src/weaver_kernel/firewall/size_estimate.py b/src/weaver_kernel/firewall/size_estimate.py
new file mode 100644
index 0000000..b68d63a
--- /dev/null
+++ b/src/weaver_kernel/firewall/size_estimate.py
@@ -0,0 +1,96 @@
+"""Allocation-free payload size estimation for the firewall hot path.
+
+``estimated_size`` approximates ``len(json.dumps(value, default=str))`` by
+walking the structure and summing approximate character contributions, *without*
+serialising the value to an intermediate string. The firewall runs on every
+egress path and only needs the size to compare against budget thresholds, so a
+single allocation-free pass (with an optional early exit) replaces a full JSON
+serialisation that would double peak memory on large payloads.
+
+The estimate is intentionally approximate but **deterministic**: the same input
+always yields the same number, and the walk visits container members in their
+native (insertion) order. It is used both by
+:mod:`~weaver_kernel.firewall.transform` (raw-mode budget warning, issue #207)
+and by :class:`~weaver_kernel.handles.HandleStore` (byte-size budgeting,
+issue #211).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+# Approximate JSON literal widths and structural overhead. json.dumps with the
+# default separators emits ``", "`` between items and ``": "`` between a key and
+# its value; object/array delimiters add two characters per container. These
+# constants keep the estimate close to the real serialised length without
+# reproducing json's exact escaping rules (bounded error is acceptable because
+# only threshold comparisons depend on the result).
+_NULL_LEN = 4  # "null"
+_TRUE_LEN = 4  # "true"
+_FALSE_LEN = 5  # "false"
+_QUOTES = 2  # surrounding "" on a string
+_CONTAINER_DELIMS = 2  # {} or []
+_ITEM_SEP = 2  # ", "
+_KV_SEP = 2  # ": "
+
+
+def estimated_size(value: Any, *, limit: int | None = None) -> int:
+    """Approximate the serialised character size of *value* without serialising.
+
+    Walks *value* iteratively (so deeply nested structures cannot exhaust the
+    recursion limit) summing approximate JSON character contributions. ``bool``
+    is handled before ``int`` because ``bool`` is an ``int`` subclass in Python.
+
+    Args:
+        value: Any value the firewall might serialise. Non-JSON types fall back
+            to ``len(str(value))`` plus string quoting, mirroring the
+            ``default=str`` behaviour of the previous ``json.dumps`` measurement.
+        limit: Optional early-exit threshold. When the running total exceeds
+            *limit* the walk stops and returns a value greater than *limit* — use
+            this when only the boolean ``size > limit`` decision is needed, not
+            the exact size.
+
+    Returns:
+        A non-negative integer approximating ``len(json.dumps(value,
+        default=str))``.
+
+    Example:
+        >>> estimated_size(None)
+        4
+        >>> estimated_size("hi")
+        4
+        >>> estimated_size([1, 2, 3])
+        9
+    """
+    total = 0
+    stack: list[Any] = [value]
+    while stack:
+        cur = stack.pop()
+        if cur is None:
+            total += _NULL_LEN
+        elif cur is True:
+            total += _TRUE_LEN
+        elif cur is False:
+            total += _FALSE_LEN
+        elif isinstance(cur, str):
+            total += len(cur) + _QUOTES
+        elif isinstance(cur, bool):  # pragma: no cover - True/False caught above
+            total += _TRUE_LEN
+        elif isinstance(cur, (int, float)):
+            total += len(str(cur))
+        elif isinstance(cur, dict):
+            n = len(cur)
+            total += _CONTAINER_DELIMS + max(0, n - 1) * _ITEM_SEP
+            for key, val in cur.items():
+                total += len(str(key)) + _QUOTES + _KV_SEP
+                stack.append(val)
+        elif isinstance(cur, (list, tuple)):
+            n = len(cur)
+            total += _CONTAINER_DELIMS + max(0, n - 1) * _ITEM_SEP
+            stack.extend(cur)
+        else:
+            # Mirrors json.dumps(default=str): rendered as a quoted string.
+            total += len(str(cur)) + _QUOTES
+        if limit is not None and total > limit:
+            return total
+    return total
diff --git a/src/weaver_kernel/firewall/summarize.py b/src/weaver_kernel/firewall/summarize.py
index a9f1274..e0e9295 100644
--- a/src/weaver_kernel/firewall/summarize.py
+++ b/src/weaver_kernel/firewall/summarize.py
@@ -36,6 +36,30 @@ def summarize(data: Any, *, max_facts: int = 20) -> list[str]:
     return [repr(data)[:200]]
 
 
+# ── Helpers ─────────────────────────────────────────────────────────────────
+
+
+def _is_number(value: Any) -> bool:
+    """True for real numbers, excluding ``bool`` (an ``int`` subclass)."""
+    return isinstance(value, (int, float)) and not isinstance(value, bool)
+
+
+def _truncate_facts(facts: list[str], max_facts: int) -> list[str]:
+    """Cap *facts* at *max_facts*, surfacing omission explicitly.
+
+    Silent truncation lets an LLM treat a partial summary as complete. When
+    facts are dropped, the final slot becomes an explicit marker stating how
+    many were omitted; an untruncated list is returned unchanged so it carries
+    no marker (issue #174).
+    """
+    if max_facts <= 0:
+        return []
+    if len(facts) <= max_facts:
+        return facts
+    omitted = len(facts) - (max_facts - 1)
+    return [*facts[: max_facts - 1], f"… ({omitted} more facts omitted; full data via handle)"]
+
+
 # ── Specialised handlers ──────────────────────────────────────────────────────
 
 
@@ -51,9 +75,15 @@ def _summarize_list_of_dicts(rows: list[dict[str, Any]], *, max_facts: int) -> l
     top_keys = sorted(key_counts, key=lambda k: -key_counts[k])[:10]
     facts.append(f"Top keys: {', '.join(top_keys)}")
 
-    # Numeric stats
+    # Numeric stats. ``bool`` is a subclass of ``int`` in Python, so a boolean
+    # column would otherwise be "averaged" into a meaningless mean (e.g. the
+    # mean of ``is_active`` = 0.7); exclude bools here and report them as
+    # true/false counts below instead (issue #174).
     numeric_keys = [
-        k for k in top_keys if all(isinstance(r.get(k), (int, float)) for r in rows if k in r)
+        k
+        for k in top_keys
+        if all(_is_number(r.get(k)) for r in rows if k in r)
+        and any(_is_number(r.get(k)) for r in rows if k in r)
     ]
     for k in numeric_keys[:5]:
         values = [float(r[k]) for r in rows if k in r]
@@ -62,14 +92,18 @@ def _summarize_list_of_dicts(rows: list[dict[str, Any]], *, max_facts: int) -> l
                 f"{k}: min={min(values):.2f}, max={max(values):.2f}, "
                 f"avg={sum(values) / len(values):.2f}"
             )
-        if len(facts) >= max_facts:
-            break
 
-    # Status / categorical counts (string fields with few distinct values)
+    # Categorical / boolean counts for the remaining string-or-bool columns.
     for k in top_keys[:5]:
         if k in numeric_keys:
             continue
-        values_str = [str(r[k]) for r in rows if k in r and isinstance(r[k], str)]
+        present = [r[k] for r in rows if k in r]
+        bool_vals = [v for v in present if isinstance(v, bool)]
+        if bool_vals and len(bool_vals) == len(present):
+            true_count = sum(1 for v in bool_vals if v)
+            facts.append(f"{k}: True={true_count}, False={len(bool_vals) - true_count}")
+            continue
+        values_str = [str(v) for v in present if isinstance(v, str)]
         if not values_str:
             continue
         distinct = sorted(set(values_str))
@@ -77,15 +111,13 @@ def _summarize_list_of_dicts(rows: list[dict[str, Any]], *, max_facts: int) -> l
             counts = {v: values_str.count(v) for v in distinct}
             summary = ", ".join(f"{v}={counts[v]}" for v in sorted(counts))
             facts.append(f"{k} distribution: {summary}")
-        if len(facts) >= max_facts:
-            break
 
-    return facts[:max_facts]
+    return _truncate_facts(facts, max_facts)
 
 
 def _summarize_dict(data: dict[str, Any], *, max_facts: int) -> list[str]:
     facts: list[str] = [f"Keys: {', '.join(sorted(data.keys())[:20])}"]
-    for k, v in list(data.items())[: max_facts - 1]:
+    for k, v in data.items():
         if isinstance(v, (int, float)):
             facts.append(f"{k}: {v}")
         elif isinstance(v, str):
@@ -96,16 +128,13 @@ def _summarize_dict(data: dict[str, Any], *, max_facts: int) -> list[str]:
             facts.append(f"{k}: dict with keys [{', '.join(list(v.keys())[:5])}]")
         else:
             facts.append(f"{k}: {repr(v)[:80]}")
-        if len(facts) >= max_facts:
-            break
-    return facts[:max_facts]
+    return _truncate_facts(facts, max_facts)
 
 
 def _summarize_plain_list(data: list[Any], *, max_facts: int) -> list[str]:
     facts = [f"List of {len(data)} items"]
-    for item in data[: max_facts - 1]:
-        facts.append(repr(item)[:100])
-    return facts[:max_facts]
+    facts.extend(repr(item)[:100] for item in data)
+    return _truncate_facts(facts, max_facts)
 
 
 def _summarize_string(data: str, *, max_facts: int) -> list[str]:
diff --git a/src/weaver_kernel/firewall/token_counting_tiktoken.py b/src/weaver_kernel/firewall/token_counting_tiktoken.py
new file mode 100644
index 0000000..15e15fa
--- /dev/null
+++ b/src/weaver_kernel/firewall/token_counting_tiktoken.py
@@ -0,0 +1,101 @@
+"""tiktoken-backed token counter for the context firewall (issue #218).
+
+Implements the :class:`~weaver_kernel.firewall.token_counting.TokenCounter`
+seam with a real tokenizer instead of the ``chars // 4`` heuristic, so
+cross-invocation budgets count the same tokens an OpenAI-family model would.
+Install the optional dependency with ``pip install weaver-kernel[tiktoken]``
+and attach the counter to a :class:`~weaver_kernel.firewall.BudgetManager`::
+
+    from weaver_kernel.firewall import BudgetManager, make_tiktoken_counter
+
+    manager = BudgetManager(token_counter=make_tiktoken_counter())
+
+``tiktoken`` is imported lazily inside the factory, never at module import
+time, so importing :mod:`weaver_kernel` without the extra stays free of the
+heavyweight dependency.
+"""
+
+from __future__ import annotations
+
+import json
+from functools import lru_cache
+from typing import Any
+
+from ..errors import FirewallError
+from .token_counting import TokenCounter
+
+DEFAULT_ENCODING = "cl100k_base"
+"""Default tiktoken encoding.
+
+``cl100k_base`` covers GPT-4 / GPT-3.5-turbo and the ``text-embedding-3`` models
+and is the broadest-compatibility choice. Pass ``encoding="o200k_base"`` for the
+GPT-4o / o-series family. Anthropic models tokenize differently — name the
+encoding explicitly for the model you budget against rather than relying on this
+default.
+"""
+
+
+@lru_cache(maxsize=8)
+def _load_encoding(name: str) -> Any:
+    """Load and memoise a tiktoken encoding (constructing one is expensive).
+
+    Args:
+        name: A tiktoken encoding name, e.g. ``"cl100k_base"``.
+
+    Returns:
+        The tiktoken ``Encoding`` instance.
+
+    Raises:
+        ImportError: If the ``tiktoken`` extra is not installed.
+        FirewallError: If *name* is not a known tiktoken encoding.
+    """
+    try:
+        import tiktoken
+    except ImportError as exc:
+        raise ImportError(
+            "tiktoken-based token counting requires the optional dependency "
+            "'tiktoken>=0.6'. Install it with: pip install 'weaver-kernel[tiktoken]'"
+        ) from exc
+    try:
+        return tiktoken.get_encoding(name)
+    except (ValueError, KeyError) as exc:
+        raise FirewallError(
+            f"Unknown tiktoken encoding {name!r}; use a name such as "
+            f"'cl100k_base' or 'o200k_base'."
+        ) from exc
+
+
+def make_tiktoken_counter(encoding: str = DEFAULT_ENCODING) -> TokenCounter:
+    """Build a :class:`TokenCounter` that counts real tokens via tiktoken.
+
+    The encoder is resolved (and cached) eagerly so a missing extra or an
+    unknown encoding name fails at construction rather than on the first count.
+    Counting is deterministic: strings are encoded directly; other values are
+    rendered with ``json.dumps(..., default=str)`` first, mirroring the default
+    counter's treatment of structured data.
+
+    Args:
+        encoding: tiktoken encoding name (see :data:`DEFAULT_ENCODING`).
+
+    Returns:
+        A callable implementing the :class:`TokenCounter` protocol.
+
+    Raises:
+        ImportError: If the ``tiktoken`` extra is not installed.
+        FirewallError: If *encoding* is not a known tiktoken encoding.
+    """
+    _load_encoding(encoding)
+
+    def count(value: Any) -> int:
+        if value is None:
+            return 0
+        if isinstance(value, str):
+            text = value
+        else:
+            try:
+                text = json.dumps(value, default=str)
+            except (TypeError, ValueError):
+                text = str(value)
+        return len(_load_encoding(encoding).encode(text))
+
+    return count
diff --git a/src/weaver_kernel/firewall/transform.py b/src/weaver_kernel/firewall/transform.py
index 70c89a6..252457a 100644
--- a/src/weaver_kernel/firewall/transform.py
+++ b/src/weaver_kernel/firewall/transform.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import datetime
-import json
 import logging
 from collections.abc import AsyncIterator
 from dataclasses import replace
@@ -18,6 +17,7 @@
 )
 from .budgets import Budgets
 from .redaction import StreamRedactor, redact
+from .size_estimate import estimated_size
 from .summarize import summarize
 
 logger = logging.getLogger(__name__)
@@ -132,7 +132,7 @@ def transform(
                     },
                 )
             else:
-                raw_size = len(json.dumps(data, default=str))
+                raw_size = estimated_size(data)
                 if raw_size > self._budgets.max_chars:
                     warnings.append(
                         f"raw output ({raw_size} chars) exceeds budget "
diff --git a/src/weaver_kernel/handle_query.py b/src/weaver_kernel/handle_query.py
new file mode 100644
index 0000000..fa12870
--- /dev/null
+++ b/src/weaver_kernel/handle_query.py
@@ -0,0 +1,232 @@
+"""Handle expansion query logic, factored out of :mod:`weaver_kernel.handles`.
+
+:func:`expand_handle` holds the pagination / field-selection / filtering /
+redaction pipeline that turns a stored raw dataset into a bounded
+:class:`~weaver_kernel.models.Frame`. It lives here (rather than as a method on
+:class:`~weaver_kernel.handles.HandleStore`) so the store module stays within
+the module-size budget while the query logic remains independently testable.
+
+The data fetch is injected as a ``fetch_data`` callable so this module never
+imports :class:`~weaver_kernel.handles.HandleStore` — the store stays the single
+owner of TTL/eviction state and passes ``lambda: self.get(handle_id)``.
+"""
+
+from __future__ import annotations
+
+import datetime
+from collections.abc import Callable
+from typing import Any
+
+from .errors import HandleConstraintViolation
+from .firewall.redaction import redact
+from .models import Frame, Handle, Provenance, ResponseMode
+from .policy_reasons import DenialReason
+
+
+def expand_handle(
+    handle: Handle,
+    *,
+    fetch_data: Callable[[], Any],
+    query: dict[str, Any],
+    action_id: str = "",
+    response_mode: ResponseMode = "table",
+    principal_id: str = "",
+    max_depth: int | None = None,
+) -> Frame:
+    """Expand *handle* with optional pagination, field selection, and filtering.
+
+    Args:
+        handle: The handle to expand.
+        fetch_data: Zero-argument callable returning the stored dataset. Raising
+            :class:`~weaver_kernel.errors.HandleExpired` /
+            :class:`~weaver_kernel.errors.HandleNotFound` from here propagates
+            unchanged.
+        query: Query parameters controlling the expansion (``offset``, ``limit``,
+            ``fields``, ``filter``).
+        action_id: Audit action ID to embed in the returned Frame.
+        response_mode: Response mode for the returned Frame.
+        principal_id: Principal performing the expansion. When the handle was
+            created with a non-empty ``principal_id``, this must match.
+        max_depth: Redaction recursion cap for the projected rows.
+
+    Returns:
+        A :class:`Frame` containing the slice of data.
+
+    Raises:
+        HandleConstraintViolation: If the requested expansion exceeds the grant's
+            persisted constraints or is requested by a different principal.
+    """
+    # ── Principal binding ─────────────────────────────────────────────────
+    # A handle bound to a principal is not a bearer credential. If the
+    # handle has a non-empty principal_id, the caller MUST present a
+    # matching principal_id — omitting it counts as a mismatch (otherwise
+    # any caller in the same process could expand by passing "").
+    if handle.principal_id and handle.principal_id != principal_id:
+        raise HandleConstraintViolation(
+            f"Handle '{handle.handle_id}' was granted to principal "
+            f"'{handle.principal_id}' and cannot be expanded by "
+            f"'{principal_id or '<unspecified>'}'.",
+            reason_code=DenialReason.HANDLE_PRINCIPAL_MISMATCH,
+        )
+
+    # ── Query input validation ────────────────────────────────────────────
+    # Validate user-supplied query types up front so callers see a stable
+    # HandleConstraintViolation (with INVALID_CONSTRAINT) instead of a
+    # bare TypeError/ValueError from inside the expansion logic. Public
+    # interfaces must not leak stdlib exceptions (see AGENTS.md).
+    raw_filter = query.get("filter")
+    if raw_filter is not None and not isinstance(raw_filter, dict):
+        raise HandleConstraintViolation(
+            f"Handle expand 'filter' must be a dict, got {type(raw_filter).__name__}.",
+            reason_code=DenialReason.INVALID_CONSTRAINT,
+        )
+    raw_fields = query.get("fields")
+    if raw_fields is not None and not isinstance(raw_fields, list | tuple):
+        raise HandleConstraintViolation(
+            f"Handle expand 'fields' must be a list, got {type(raw_fields).__name__}.",
+            reason_code=DenialReason.INVALID_CONSTRAINT,
+        )
+
+    # ── Grant-constraint rechecks ─────────────────────────────────────────
+    granted_max_rows = handle.constraints.get("max_rows")
+    granted_fields = handle.constraints.get("allowed_fields") or []
+    granted_scope = handle.constraints.get("scope") or {}
+
+    requested_fields: list[str] = list(raw_fields or [])
+    if granted_fields and requested_fields:
+        disallowed = [f for f in requested_fields if f not in granted_fields]
+        if disallowed:
+            raise HandleConstraintViolation(
+                f"Handle '{handle.handle_id}' grant restricts fields to "
+                f"{sorted(granted_fields)}; request asked for "
+                f"{sorted(disallowed)}.",
+                reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION,
+            )
+
+    filter_in: dict[str, Any] = raw_filter or {}
+    if granted_scope:
+        for sk, sv in granted_scope.items():
+            if sk in filter_in and filter_in[sk] != sv:
+                raise HandleConstraintViolation(
+                    f"Handle '{handle.handle_id}' is scoped to "
+                    f"{sk}={sv!r}; request filter "
+                    f"{sk}={filter_in[sk]!r} is outside that scope.",
+                    reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION,
+                )
+
+    data = fetch_data()
+    rows: list[Any] = data if isinstance(data, list) else [data]
+
+    # ── Filtering ──────────────────────────────────────────────────────────
+    # Grant scope is AND-merged into the request filter so the caller
+    # cannot bypass it by omitting the scope key.
+    filter_spec: dict[str, Any] = dict(filter_in)
+    for sk, sv in granted_scope.items():
+        filter_spec.setdefault(sk, sv)
+    if filter_spec:
+        rows = [
+            r
+            for r in rows
+            if isinstance(r, dict) and all(r.get(k) == v for k, v in filter_spec.items())
+        ]
+
+    # ── Pagination ────────────────────────────────────────────────────────
+    try:
+        offset = int(query.get("offset", 0))
+    except (TypeError, ValueError) as exc:
+        raise HandleConstraintViolation(
+            f"Handle expand 'offset' must be an integer, got {query.get('offset')!r}.",
+            reason_code=DenialReason.INVALID_CONSTRAINT,
+        ) from exc
+    if offset < 0:
+        raise HandleConstraintViolation(
+            f"Handle expand 'offset' must be non-negative, got {offset}.",
+            reason_code=DenialReason.INVALID_CONSTRAINT,
+        )
+    requested_limit_raw = query.get("limit")
+    requested_limit: int | None
+    if requested_limit_raw is None:
+        requested_limit = None
+    else:
+        try:
+            requested_limit = int(requested_limit_raw)
+        except (TypeError, ValueError) as exc:
+            raise HandleConstraintViolation(
+                f"Handle expand 'limit' must be an integer, got {requested_limit_raw!r}.",
+                reason_code=DenialReason.INVALID_CONSTRAINT,
+            ) from exc
+        # A negative limit would slice as rows[offset:offset+limit] and could
+        # return rows in excess of a (possibly zero) grant cap — reject it
+        # rather than silently bypassing max_rows.
+        if requested_limit < 0:
+            raise HandleConstraintViolation(
+                f"Handle expand 'limit' must be non-negative, got {requested_limit}.",
+                reason_code=DenialReason.INVALID_CONSTRAINT,
+            )
+    limit = len(rows) if requested_limit is None else requested_limit
+
+    if isinstance(granted_max_rows, int) and granted_max_rows >= 0:
+        if requested_limit is not None and requested_limit > granted_max_rows:
+            raise HandleConstraintViolation(
+                f"Handle '{handle.handle_id}' grant caps rows at "
+                f"{granted_max_rows}; request asked for "
+                f"limit={requested_limit}.",
+                reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION,
+            )
+        limit = min(limit, granted_max_rows)
+
+    rows = rows[offset : offset + limit]
+
+    # ── Field selection ───────────────────────────────────────────────────
+    # If the grant restricts fields and the caller did not ask for any,
+    # apply the grant's allowed_fields as the default projection so
+    # disallowed fields cannot leak through an unscoped expand call.
+    effective_fields: list[str]
+    if requested_fields:
+        effective_fields = requested_fields
+    elif granted_fields:
+        effective_fields = list(granted_fields)
+    else:
+        effective_fields = []
+    if effective_fields:
+        rows = [
+            {k: v for k, v in r.items() if k in effective_fields} if isinstance(r, dict) else r
+            for r in rows
+        ]
+
+    if not rows:
+        table_preview: list[Any] = []
+    elif isinstance(rows[0], dict):
+        table_preview = rows
+    else:
+        table_preview = [{"value": r} for r in rows]
+
+    # ── Redaction ───────────────────────────────────────────────────────────
+    # expand() builds its Frame directly from the raw stored dataset, which
+    # is persisted pre-firewall. Field-level grant constraints
+    # (allowed_fields / scope) are already enforced by the projection above,
+    # so allowed_fields is intentionally not re-passed here; but a permitted
+    # field can still carry inline secrets (e.g. a Bearer token in a `note`
+    # value). Route the projected rows through the same redactor the
+    # Firewall applies on first invocation — using the firewall's configured
+    # max_depth when the caller threads it — so the I-01 boundary holds on
+    # the expansion path too (see docs/agent-context/invariants.md).
+    if max_depth is None:
+        redacted_preview, warnings = redact(table_preview)
+    else:
+        redacted_preview, warnings = redact(table_preview, max_depth=max_depth)
+
+    return Frame(
+        action_id=action_id,
+        capability_id=handle.capability_id,
+        response_mode=response_mode,
+        table_preview=redacted_preview,
+        warnings=warnings,
+        handle=handle,
+        provenance=Provenance(
+            capability_id=handle.capability_id,
+            principal_id=principal_id,
+            invoked_at=datetime.datetime.now(tz=datetime.timezone.utc),
+            action_id=action_id,
+        ),
+    )
diff --git a/src/weaver_kernel/handles.py b/src/weaver_kernel/handles.py
index 9049e8d..408957e 100644
--- a/src/weaver_kernel/handles.py
+++ b/src/weaver_kernel/handles.py
@@ -6,10 +6,10 @@
 import uuid
 from typing import Any
 
-from .errors import HandleConstraintViolation, HandleExpired, HandleNotFound
-from .firewall.redaction import redact
-from .models import Frame, Handle, Provenance, ResponseMode
-from .policy_reasons import DenialReason
+from .errors import BudgetConfigError, HandleExpired, HandleNotFound, HandleTooLarge
+from .firewall.size_estimate import estimated_size
+from .handle_query import expand_handle
+from .models import Frame, Handle, ResponseMode
 
 
 class HandleStore:
@@ -19,6 +19,16 @@ class HandleStore:
     or explicitly via :meth:`evict_expired`.  A *max_entries* cap prevents
     unbounded memory growth in long-lived processes — when the cap is exceeded
     the oldest entries are dropped after expired ones are cleared.
+
+    Two optional **byte budgets** bound memory by size rather than entry count,
+    since one deployment's 10k entries are kilobytes and another's are gigabytes
+    (issue #211). Both default to ``None`` (disabled), leaving behaviour
+    unchanged until configured:
+
+    - ``max_entry_bytes`` rejects a single over-cap payload with
+      :class:`~weaver_kernel.errors.HandleTooLarge` (the data is never stored).
+    - ``max_total_bytes`` evicts oldest-first after each store until aggregate
+      estimated residency is within budget.
     """
 
     _EVICT_INTERVAL: int = 128  # run evict_expired() every N store() calls
@@ -28,12 +38,27 @@ def __init__(
         default_ttl_seconds: int = 3600,
         *,
         max_entries: int = 10_000,
+        max_total_bytes: int | None = None,
+        max_entry_bytes: int | None = None,
     ) -> None:
+        if max_total_bytes is not None and max_total_bytes <= 0:
+            raise BudgetConfigError("max_total_bytes must be positive when set")
+        if max_entry_bytes is not None and max_entry_bytes <= 0:
+            raise BudgetConfigError("max_entry_bytes must be positive when set")
         self._default_ttl = default_ttl_seconds
         self._max_entries = max_entries
+        self._max_total_bytes = max_total_bytes
+        self._max_entry_bytes = max_entry_bytes
         self._store_count = 0
         self._data: dict[str, Any] = {}
         self._meta: dict[str, Handle] = {}
+        self._sizes: dict[str, int] = {}
+        self._total_bytes = 0
+
+    @property
+    def current_bytes(self) -> int:
+        """Estimated total bytes of data currently resident in the store."""
+        return self._total_bytes
 
     # ── Storage ───────────────────────────────────────────────────────────────
 
@@ -61,7 +86,18 @@ def store(
 
         Returns:
             A :class:`Handle` referencing the stored data.
+
+        Raises:
+            HandleTooLarge: If ``max_entry_bytes`` is set and the estimated size
+                of *data* exceeds it.
         """
+        size = estimated_size(data)
+        if self._max_entry_bytes is not None and size > self._max_entry_bytes:
+            raise HandleTooLarge(
+                f"Handle data for '{capability_id}' is ~{size} bytes, exceeding the "
+                f"per-entry cap of {self._max_entry_bytes} bytes; not stored."
+            )
+
         ttl = ttl_seconds if ttl_seconds is not None else self._default_ttl
         now = datetime.datetime.now(tz=datetime.timezone.utc)
         handle = Handle(
@@ -75,21 +111,32 @@ def store(
         )
         self._data[handle.handle_id] = data
         self._meta[handle.handle_id] = handle
+        self._sizes[handle.handle_id] = size
+        self._total_bytes += size
 
         # Periodic eviction of expired entries
         self._store_count += 1
         if self._store_count % self._EVICT_INTERVAL == 0:
             self.evict_expired()
 
-        # Cap enforcement: evict oldest entries when over the limit
+        # Cap enforcement: evict oldest entries when over the entry limit
         if len(self._meta) > self._max_entries:
             self.evict_expired()  # clear expired first
             overflow = len(self._meta) - self._max_entries
             if overflow > 0:
-                oldest = sorted(self._meta, key=lambda hid: self._meta[hid].created_at)
-                for hid in oldest[:overflow]:
-                    self._data.pop(hid, None)
-                    self._meta.pop(hid, None)
+                for hid in self._oldest_ids()[:overflow]:
+                    self._drop(hid)
+
+        # Byte-budget enforcement: evict oldest until within the total budget,
+        # never evicting the entry we just stored (the caller holds its handle).
+        if self._max_total_bytes is not None and self._total_bytes > self._max_total_bytes:
+            self.evict_expired()
+            for hid in self._oldest_ids():
+                if self._total_bytes <= self._max_total_bytes:
+                    break
+                if hid == handle.handle_id:
+                    continue  # never evict the entry we just stored
+                self._drop(hid)
 
         return handle
 
@@ -114,8 +161,7 @@ def get(self, handle_id: str) -> Any:
         now = datetime.datetime.now(tz=datetime.timezone.utc)
         if handle.expires_at <= now:
             # Lazy eviction
-            del self._data[handle_id]
-            del self._meta[handle_id]
+            self._drop(handle_id)
             raise HandleExpired(
                 f"Handle '{handle_id}' expired at {handle.expires_at.isoformat()}."
             )
@@ -158,6 +204,9 @@ def expand(
             - ``fields`` (list[str]): Only include these fields.
             - ``filter`` (dict[str, Any]): Basic equality filter (all conditions AND-ed).
 
+        The query pipeline lives in :func:`weaver_kernel.handle_query.expand_handle`;
+        this method binds it to the store's TTL-checked data fetch.
+
         Args:
             handle: The handle to expand.
             query: Query parameters controlling the expansion.
@@ -180,179 +229,14 @@ def expand(
                 grant's persisted constraints (``max_rows``, ``allowed_fields``,
                 ``scope``) or is requested by a different principal.
         """
-        # ── Principal binding ─────────────────────────────────────────────────
-        # A handle bound to a principal is not a bearer credential. If the
-        # handle has a non-empty principal_id, the caller MUST present a
-        # matching principal_id — omitting it counts as a mismatch (otherwise
-        # any caller in the same process could expand by passing "").
-        if handle.principal_id and handle.principal_id != principal_id:
-            raise HandleConstraintViolation(
-                f"Handle '{handle.handle_id}' was granted to principal "
-                f"'{handle.principal_id}' and cannot be expanded by "
-                f"'{principal_id or '<unspecified>'}'.",
-                reason_code=DenialReason.HANDLE_PRINCIPAL_MISMATCH,
-            )
-
-        # ── Query input validation ────────────────────────────────────────────
-        # Validate user-supplied query types up front so callers see a stable
-        # HandleConstraintViolation (with INVALID_CONSTRAINT) instead of a
-        # bare TypeError/ValueError from inside the expansion logic. Public
-        # interfaces must not leak stdlib exceptions (see AGENTS.md).
-        raw_filter = query.get("filter")
-        if raw_filter is not None and not isinstance(raw_filter, dict):
-            raise HandleConstraintViolation(
-                f"Handle expand 'filter' must be a dict, got {type(raw_filter).__name__}.",
-                reason_code=DenialReason.INVALID_CONSTRAINT,
-            )
-        raw_fields = query.get("fields")
-        if raw_fields is not None and not isinstance(raw_fields, list | tuple):
-            raise HandleConstraintViolation(
-                f"Handle expand 'fields' must be a list, got {type(raw_fields).__name__}.",
-                reason_code=DenialReason.INVALID_CONSTRAINT,
-            )
-
-        # ── Grant-constraint rechecks ─────────────────────────────────────────
-        granted_max_rows = handle.constraints.get("max_rows")
-        granted_fields = handle.constraints.get("allowed_fields") or []
-        granted_scope = handle.constraints.get("scope") or {}
-
-        requested_fields: list[str] = list(raw_fields or [])
-        if granted_fields and requested_fields:
-            disallowed = [f for f in requested_fields if f not in granted_fields]
-            if disallowed:
-                raise HandleConstraintViolation(
-                    f"Handle '{handle.handle_id}' grant restricts fields to "
-                    f"{sorted(granted_fields)}; request asked for "
-                    f"{sorted(disallowed)}.",
-                    reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION,
-                )
-
-        filter_in: dict[str, Any] = raw_filter or {}
-        if granted_scope:
-            for sk, sv in granted_scope.items():
-                if sk in filter_in and filter_in[sk] != sv:
-                    raise HandleConstraintViolation(
-                        f"Handle '{handle.handle_id}' is scoped to "
-                        f"{sk}={sv!r}; request filter "
-                        f"{sk}={filter_in[sk]!r} is outside that scope.",
-                        reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION,
-                    )
-
-        data = self.get(handle.handle_id)
-        rows: list[Any] = data if isinstance(data, list) else [data]
-
-        # ── Filtering ──────────────────────────────────────────────────────────
-        # Grant scope is AND-merged into the request filter so the caller
-        # cannot bypass it by omitting the scope key.
-        filter_spec: dict[str, Any] = dict(filter_in)
-        for sk, sv in granted_scope.items():
-            filter_spec.setdefault(sk, sv)
-        if filter_spec:
-            rows = [
-                r
-                for r in rows
-                if isinstance(r, dict) and all(r.get(k) == v for k, v in filter_spec.items())
-            ]
-
-        # ── Pagination ────────────────────────────────────────────────────────
-        try:
-            offset = int(query.get("offset", 0))
-        except (TypeError, ValueError) as exc:
-            raise HandleConstraintViolation(
-                f"Handle expand 'offset' must be an integer, got {query.get('offset')!r}.",
-                reason_code=DenialReason.INVALID_CONSTRAINT,
-            ) from exc
-        if offset < 0:
-            raise HandleConstraintViolation(
-                f"Handle expand 'offset' must be non-negative, got {offset}.",
-                reason_code=DenialReason.INVALID_CONSTRAINT,
-            )
-        requested_limit_raw = query.get("limit")
-        requested_limit: int | None
-        if requested_limit_raw is None:
-            requested_limit = None
-        else:
-            try:
-                requested_limit = int(requested_limit_raw)
-            except (TypeError, ValueError) as exc:
-                raise HandleConstraintViolation(
-                    f"Handle expand 'limit' must be an integer, got {requested_limit_raw!r}.",
-                    reason_code=DenialReason.INVALID_CONSTRAINT,
-                ) from exc
-            # A negative limit would slice as rows[offset:offset+limit] and could
-            # return rows in excess of a (possibly zero) grant cap — reject it
-            # rather than silently bypassing max_rows.
-            if requested_limit < 0:
-                raise HandleConstraintViolation(
-                    f"Handle expand 'limit' must be non-negative, got {requested_limit}.",
-                    reason_code=DenialReason.INVALID_CONSTRAINT,
-                )
-        limit = len(rows) if requested_limit is None else requested_limit
-
-        if isinstance(granted_max_rows, int) and granted_max_rows >= 0:
-            if requested_limit is not None and requested_limit > granted_max_rows:
-                raise HandleConstraintViolation(
-                    f"Handle '{handle.handle_id}' grant caps rows at "
-                    f"{granted_max_rows}; request asked for "
-                    f"limit={requested_limit}.",
-                    reason_code=DenialReason.HANDLE_CONSTRAINT_VIOLATION,
-                )
-            limit = min(limit, granted_max_rows)
-
-        rows = rows[offset : offset + limit]
-
-        # ── Field selection ───────────────────────────────────────────────────
-        # If the grant restricts fields and the caller did not ask for any,
-        # apply the grant's allowed_fields as the default projection so
-        # disallowed fields cannot leak through an unscoped expand call.
-        effective_fields: list[str]
-        if requested_fields:
-            effective_fields = requested_fields
-        elif granted_fields:
-            effective_fields = list(granted_fields)
-        else:
-            effective_fields = []
-        if effective_fields:
-            rows = [
-                {k: v for k, v in r.items() if k in effective_fields} if isinstance(r, dict) else r
-                for r in rows
-            ]
-
-        if not rows:
-            table_preview: list[Any] = []
-        elif isinstance(rows[0], dict):
-            table_preview = rows
-        else:
-            table_preview = [{"value": r} for r in rows]
-
-        # ── Redaction ───────────────────────────────────────────────────────────
-        # expand() builds its Frame directly from the raw stored dataset, which
-        # is persisted pre-firewall. Field-level grant constraints
-        # (allowed_fields / scope) are already enforced by the projection above,
-        # so allowed_fields is intentionally not re-passed here; but a permitted
-        # field can still carry inline secrets (e.g. a Bearer token in a `note`
-        # value). Route the projected rows through the same redactor the
-        # Firewall applies on first invocation — using the firewall's configured
-        # max_depth when the caller threads it — so the I-01 boundary holds on
-        # the expansion path too (see docs/agent-context/invariants.md).
-        if max_depth is None:
-            redacted_preview, warnings = redact(table_preview)
-        else:
-            redacted_preview, warnings = redact(table_preview, max_depth=max_depth)
-
-        return Frame(
+        return expand_handle(
+            handle,
+            fetch_data=lambda: self.get(handle.handle_id),
+            query=query,
             action_id=action_id,
-            capability_id=handle.capability_id,
             response_mode=response_mode,
-            table_preview=redacted_preview,
-            warnings=warnings,
-            handle=handle,
-            provenance=Provenance(
-                capability_id=handle.capability_id,
-                principal_id=principal_id,
-                invoked_at=datetime.datetime.now(tz=datetime.timezone.utc),
-                action_id=action_id,
-            ),
+            principal_id=principal_id,
+            max_depth=max_depth,
         )
 
     # ── Maintenance ───────────────────────────────────────────────────────────
@@ -366,6 +250,15 @@ def evict_expired(self) -> int:
         now = datetime.datetime.now(tz=datetime.timezone.utc)
         expired = [hid for hid, h in self._meta.items() if h.expires_at <= now]
         for hid in expired:
-            self._data.pop(hid, None)
-            self._meta.pop(hid, None)
+            self._drop(hid)
         return len(expired)
+
+    def _oldest_ids(self) -> list[str]:
+        """Handle IDs ordered oldest-first by creation time (deterministic)."""
+        return sorted(self._meta, key=lambda hid: self._meta[hid].created_at)
+
+    def _drop(self, handle_id: str) -> None:
+        """Remove a handle's data, metadata, and size accounting."""
+        self._data.pop(handle_id, None)
+        self._meta.pop(handle_id, None)
+        self._total_bytes -= self._sizes.pop(handle_id, 0)
diff --git a/tests/test_architecture.py b/tests/test_architecture.py
index 8aa9642..5af2c4d 100644
--- a/tests/test_architecture.py
+++ b/tests/test_architecture.py
@@ -54,7 +54,6 @@
     "adapters/_base.py": 459,
     "kernel/_invoke.py": 400,
     "firewall/transform.py": 377,
-    "handles.py": 371,
     "adapters/openai.py": 358,
     "stores/sqlite.py": 350,
     "tokens.py": 336,
diff --git a/tests/test_doctests.py b/tests/test_doctests.py
index 7d9f440..f1e3815 100644
--- a/tests/test_doctests.py
+++ b/tests/test_doctests.py
@@ -18,6 +18,7 @@
 # false failures.
 _DOCTESTED_MODULES = [
     "weaver_kernel.firewall.token_counting",
+    "weaver_kernel.firewall.size_estimate",
 ]
 
 
diff --git a/tests/test_firewall.py b/tests/test_firewall.py
index 5471030..eda9257 100644
--- a/tests/test_firewall.py
+++ b/tests/test_firewall.py
@@ -297,6 +297,48 @@ def test_summarize_dict_max_facts() -> None:
     assert len(facts) <= 2
 
 
+# ── summarize() bool handling + truncation marker (#174) ────────────────────────
+
+
+def test_summarize_bool_column_is_not_averaged() -> None:
+    rows = [{"active": True}, {"active": True}, {"active": False}]
+    facts = summarize(rows)
+    # A bool column must never be reported as a numeric mean/min/max.
+    assert not any("avg=" in f for f in facts)
+    assert any(f == "active: True=2, False=1" for f in facts)
+
+
+def test_summarize_numeric_column_still_averaged() -> None:
+    rows = [{"score": 10}, {"score": 20}, {"score": 30}]
+    facts = summarize(rows)
+    assert any(f == "score: min=10.00, max=30.00, avg=20.00" for f in facts)
+
+
+def test_summarize_truncation_marker_when_facts_omitted() -> None:
+    rows = [{"a": i, "b": i, "c": i, "d": i, "e": i} for i in range(5)]
+    facts = summarize(rows, max_facts=3)
+    assert len(facts) == 3
+    assert "more facts omitted" in facts[-1]
+
+
+def test_summarize_no_marker_when_all_facts_fit() -> None:
+    rows = [{"score": i} for i in range(3)]
+    facts = summarize(rows, max_facts=20)
+    assert not any("omitted" in f for f in facts)
+
+
+def test_summarize_exactly_max_facts_has_no_false_marker() -> None:
+    # "Keys" + "a: 1" + "b: 2" is exactly 3 facts; no truncation should occur.
+    facts = summarize({"a": 1, "b": 2}, max_facts=3)
+    assert facts == ["Keys: a, b", "a: 1", "b: 2"]
+
+
+def test_summarize_plain_list_marks_omitted_items() -> None:
+    facts = summarize(list(range(10)), max_facts=4)
+    assert facts[0] == "List of 10 items"
+    assert "more facts omitted" in facts[-1]
+
+
 # ── Token counting ─────────────────────────────────────────────────────────────
 
 
diff --git a/tests/test_handles.py b/tests/test_handles.py
index 66fedbd..e273a73 100644
--- a/tests/test_handles.py
+++ b/tests/test_handles.py
@@ -7,10 +7,12 @@
 import pytest
 
 from weaver_kernel import (
+    BudgetConfigError,
     HandleConstraintViolation,
     HandleExpired,
     HandleNotFound,
     HandleStore,
+    HandleTooLarge,
 )
 from weaver_kernel.models import Handle
 from weaver_kernel.policy_reasons import DenialReason
@@ -344,3 +346,69 @@ def test_expand_fills_provenance_principal(store: HandleStore) -> None:
     frame = store.expand(handle, query={}, action_id="act-9", principal_id="agent-1")
     assert frame.provenance.principal_id == "agent-1"
     assert frame.provenance.action_id == "act-9"
+
+
+# ── Byte-size budgeting (#211) ───────────────────────────────────────────────────
+
+
+def test_current_bytes_starts_at_zero_and_tracks_stores() -> None:
+    store = HandleStore()
+    assert store.current_bytes == 0
+    store.store("cap.x", {"v": "x" * 100})
+    assert store.current_bytes > 100
+
+
+def test_max_entry_bytes_rejects_oversized_payload() -> None:
+    store = HandleStore(max_entry_bytes=100)
+    with pytest.raises(HandleTooLarge):
+        store.store("cap.x", {"v": "x" * 1000})
+    # Rejected data is never retained.
+    assert store.current_bytes == 0
+    assert len(store._meta) == 0
+
+
+def test_max_entry_bytes_allows_within_cap() -> None:
+    store = HandleStore(max_entry_bytes=10_000)
+    handle = store.store("cap.x", {"v": "x" * 1000})
+    assert store.get(handle.handle_id) == {"v": "x" * 1000}
+
+
+def test_max_total_bytes_evicts_oldest_until_within_budget() -> None:
+    store = HandleStore(max_total_bytes=2500)
+    h1 = store.store("cap.x", {"v": "a" * 1000})
+    h2 = store.store("cap.x", {"v": "b" * 1000})
+    h3 = store.store("cap.x", {"v": "c" * 1000})
+    # Three ~1009-byte entries exceed 2500; the oldest is evicted, never the
+    # just-stored one.
+    assert store.current_bytes <= 2500
+    with pytest.raises(HandleNotFound):
+        store.get(h1.handle_id)
+    assert store.get(h2.handle_id) == {"v": "b" * 1000}
+    assert store.get(h3.handle_id) == {"v": "c" * 1000}
+
+
+def test_expanding_byte_evicted_handle_raises_not_found() -> None:
+    store = HandleStore(max_total_bytes=2500)
+    h1 = store.store("cap.x", [{"id": i} for i in range(50)], principal_id="agent-1")
+    evicted_meta = store.get_meta(h1.handle_id)
+    store.store("cap.x", {"v": "b" * 1500})
+    store.store("cap.x", {"v": "c" * 1500})
+    with pytest.raises(HandleNotFound):
+        store.expand(evicted_meta, query={}, principal_id="agent-1")
+
+
+def test_evict_expired_releases_byte_accounting() -> None:
+    store = HandleStore()
+    store.store("cap.x", {"v": "x" * 500}, ttl_seconds=-1)
+    before = store.current_bytes
+    assert before > 0
+    store.evict_expired()
+    assert store.current_bytes == 0
+    assert before > store.current_bytes
+
+
+def test_byte_budget_rejects_invalid_config() -> None:
+    with pytest.raises(BudgetConfigError):
+        HandleStore(max_total_bytes=0)
+    with pytest.raises(BudgetConfigError):
+        HandleStore(max_entry_bytes=-5)
diff --git a/tests/test_size_estimate.py b/tests/test_size_estimate.py
new file mode 100644
index 0000000..0732ebe
--- /dev/null
+++ b/tests/test_size_estimate.py
@@ -0,0 +1,71 @@
+"""Tests for the allocation-free size estimator (issue #207)."""
+
+from __future__ import annotations
+
+import datetime
+import json
+
+import pytest
+
+from weaver_kernel.firewall.size_estimate import estimated_size
+
+
+def _json_len(value: object) -> int:
+    return len(json.dumps(value, default=str))
+
+
+def test_scalars() -> None:
+    assert estimated_size(None) == 4
+    assert estimated_size(True) == 4
+    assert estimated_size(False) == 5
+    assert estimated_size("hi") == 4  # 2 chars + 2 quotes
+    assert estimated_size(12345) == 5
+
+
+def test_matches_json_dumps_on_simple_containers() -> None:
+    # For containers of scalars the estimate is exact against json.dumps.
+    for value in ([1, 2, 3], {"a": 1, "b": 2}, [], {}, ["x", "y"]):
+        assert estimated_size(value) == _json_len(value), value
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        {"rows": [{"id": i, "name": f"n{i}", "active": i % 2 == 0} for i in range(50)]},
+        [{"k": "v" * 100} for _ in range(20)],
+        {"nested": {"deep": {"deeper": ["a", "b", "c"]}}},
+        {"when": datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)},
+        "x" * 5000,
+    ],
+)
+def test_bounded_error_against_json(value: object) -> None:
+    # The estimate need not be exact (it does not reproduce JSON escaping), but
+    # it must track json.dumps closely enough that budget threshold comparisons
+    # do not flip: stay within ±25% of the real serialised length.
+    actual = _json_len(value)
+    estimate = estimated_size(value)
+    assert 0.75 * actual <= estimate <= 1.25 * actual, (actual, estimate)
+
+
+def test_early_exit_stops_past_limit() -> None:
+    big = {"payload": "x" * 100_000}
+    # With a small limit the walk returns as soon as it crosses the threshold,
+    # so the reported value need only be greater than the limit.
+    assert estimated_size(big, limit=100) > 100
+
+
+def test_non_serialisable_falls_back_to_str() -> None:
+    class Opaque:
+        def __str__(self) -> str:
+            return "opaque-value"
+
+    # Mirrors the previous json.dumps(default=str): rendered as a quoted string.
+    assert estimated_size(Opaque()) == len("opaque-value") + 2
+
+
+def test_deeply_nested_does_not_recurse() -> None:
+    value: object = "leaf"
+    for _ in range(5000):
+        value = [value]
+    # Iterative walk: no RecursionError even far past the recursion limit.
+    assert estimated_size(value) > 5000
diff --git a/tests/test_token_counting_tiktoken.py b/tests/test_token_counting_tiktoken.py
new file mode 100644
index 0000000..fbbba73
--- /dev/null
+++ b/tests/test_token_counting_tiktoken.py
@@ -0,0 +1,90 @@
+"""Tests for the tiktoken-backed token counter (issue #218).
+
+These exercise the counter's own logic — value handling, encoder caching, and
+error remapping — against a *fake* ``tiktoken`` module injected into
+``sys.modules``. That keeps the suite deterministic and offline: the real
+``tiktoken`` downloads its BPE vocabularies over the network on first use, which
+must never be a test dependency (AGENTS.md prefers offline tests).
+"""
+
+from __future__ import annotations
+
+import sys
+import types
+from collections.abc import Iterator
+
+import pytest
+
+from weaver_kernel import FirewallError
+from weaver_kernel.firewall import token_counting_tiktoken as tt
+
+
+class _FakeEncoding:
+    """Deterministic stand-in: one token per whitespace-separated word."""
+
+    def encode(self, text: str) -> list[int]:
+        return list(range(len(text.split())))
+
+
+@pytest.fixture()
+def fake_tiktoken(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
+    module = types.ModuleType("tiktoken")
+
+    def get_encoding(name: str) -> _FakeEncoding:
+        if name == "unknown-encoding":
+            raise KeyError(name)
+        return _FakeEncoding()
+
+    module.get_encoding = get_encoding  # type: ignore[attr-defined]
+    monkeypatch.setitem(sys.modules, "tiktoken", module)
+    tt._load_encoding.cache_clear()
+    yield
+    tt._load_encoding.cache_clear()
+
+
+def test_counts_words_as_tokens(fake_tiktoken: None) -> None:
+    counter = tt.make_tiktoken_counter()
+    assert counter("hello world") == 2
+
+
+def test_none_is_zero(fake_tiktoken: None) -> None:
+    assert tt.make_tiktoken_counter()(None) == 0
+
+
+def test_structured_value_counts_serialised_form(fake_tiktoken: None) -> None:
+    counter = tt.make_tiktoken_counter()
+    # json.dumps({"id": 1}) == '{"id": 1}' → two whitespace-separated tokens.
+    assert counter({"id": 1}) == 2
+
+
+def test_non_serialisable_value_falls_back_to_str(fake_tiktoken: None) -> None:
+    class Opaque:
+        def __str__(self) -> str:
+            return "one two three"
+
+    assert tt.make_tiktoken_counter()(Opaque()) == 3
+
+
+def test_explicit_encoding_is_accepted(fake_tiktoken: None) -> None:
+    assert tt.make_tiktoken_counter(encoding="o200k_base")("a b c") == 3
+
+
+def test_unknown_encoding_raises_firewall_error(fake_tiktoken: None) -> None:
+    with pytest.raises(FirewallError):
+        tt.make_tiktoken_counter(encoding="unknown-encoding")
+
+
+def test_missing_extra_raises_helpful_import_error(monkeypatch: pytest.MonkeyPatch) -> None:
+    # Simulate the extra being absent: importing tiktoken raises ModuleNotFoundError.
+    monkeypatch.setitem(sys.modules, "tiktoken", None)
+    tt._load_encoding.cache_clear()
+    with pytest.raises(ImportError, match=r"weaver-kernel\[tiktoken\]"):
+        tt.make_tiktoken_counter()
+    tt._load_encoding.cache_clear()
+
+
+def test_wires_into_budget_manager(fake_tiktoken: None) -> None:
+    from weaver_kernel.firewall import BudgetManager
+
+    manager = BudgetManager(total_budget=1000, token_counter=tt.make_tiktoken_counter())
+    assert manager.count_tokens("hello world") == 2

From 25b26dd5cd029c2cf275c03554c4e2e6b7f25e3c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 22 Jun 2026 07:58:48 +0000
Subject: [PATCH 2/3] =?UTF-8?q?fix(firewall):=20address=20Copilot=20review?=
 =?UTF-8?q?=20=E2=80=94=20bound=20summaries,=20cycle-safe=20estimator,=20t?=
 =?UTF-8?q?otal-budget=20invariant?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- summarize: cap _summarize_dict/_summarize_plain_list iteration with islice and
  derive the omitted count from known length, so a huge dict/list no longer
  triggers an O(n) walk on the firewall hot path (defeating max_facts).
- size_estimate: count each container at most once (cycle detection) so
  self-referential inputs terminate instead of hanging; correct the docstring's
  traversal-order claim (the total is order-independent).
- handles: reject a single entry larger than the binding cap (max_entry_bytes or
  max_total_bytes) so current_bytes can never exceed max_total_bytes; use
  estimated_size(limit=...) to avoid fully walking an over-cap payload.
- Docs/CHANGELOG and tests updated (cyclic-input, total-budget rejection).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01HjetuNJX1EUJ2eVGfoLyHt
---
 docs/context_firewall.md                    |  8 +++--
 src/weaver_kernel/errors.py                 | 16 +++++-----
 src/weaver_kernel/firewall/size_estimate.py | 18 +++++++++--
 src/weaver_kernel/firewall/summarize.py     | 29 +++++++++++++-----
 src/weaver_kernel/handles.py                | 34 +++++++++++++++------
 tests/test_handles.py                       | 16 ++++++++++
 tests/test_size_estimate.py                 | 15 +++++++++
 7 files changed, 106 insertions(+), 30 deletions(-)

diff --git a/docs/context_firewall.md b/docs/context_firewall.md
index c32e91d..5c353d3 100644
--- a/docs/context_firewall.md
+++ b/docs/context_firewall.md
@@ -76,9 +76,11 @@ store = HandleStore(
 Sizes are estimated with the same `estimated_size` walk used for budgets.
 `max_total_bytes` evicts oldest-first after each store (never the just-stored
 entry); `max_entry_bytes` rejects an over-cap payload with `HandleTooLarge`
-rather than truncating it, keeping expansion faithful to the original dataset.
-Expanding an evicted handle raises the usual `HandleNotFound`. Tighter budgets
-mean more "handle expired/evicted" experiences — tune for your workload.
+rather than truncating it, keeping expansion faithful to the original dataset. A
+single entry larger than `max_total_bytes` can never fit, so it is rejected the
+same way — `current_bytes` therefore never exceeds `max_total_bytes`. Expanding
+an evicted handle raises the usual `HandleNotFound`. Tighter budgets mean more
+"handle expired/evicted" experiences — tune for your workload.
 
 ## Redaction
 
diff --git a/src/weaver_kernel/errors.py b/src/weaver_kernel/errors.py
index b6a0209..245299e 100644
--- a/src/weaver_kernel/errors.py
+++ b/src/weaver_kernel/errors.py
@@ -133,13 +133,15 @@ class HandleExpired(AgentKernelError):
 
 
 class HandleTooLarge(AgentKernelError):
-    """Raised when a single handle payload exceeds the store's per-entry byte cap.
-
-    Fires from :meth:`~weaver_kernel.handles.HandleStore.store` only when the
-    store was configured with ``max_entry_bytes`` and the estimated size of the
-    data to store exceeds it. The data is *not* retained — rejecting an
-    over-cap payload outright (rather than silently truncating it) keeps handle
-    expansion faithful to the original dataset and bounds resident raw data.
+    """Raised when a single handle payload exceeds the store's byte ceiling.
+
+    Fires from :meth:`~weaver_kernel.handles.HandleStore.store` when a byte
+    budget is configured and the estimated size of the data exceeds the binding
+    per-store ceiling — ``max_entry_bytes``, or ``max_total_bytes`` (a single
+    entry larger than the whole budget can never fit). The data is *not*
+    retained — rejecting an over-cap payload outright (rather than silently
+    truncating it) keeps handle expansion faithful to the original dataset and
+    bounds resident raw data.
     """
 
 
diff --git a/src/weaver_kernel/firewall/size_estimate.py b/src/weaver_kernel/firewall/size_estimate.py
index b68d63a..51c502f 100644
--- a/src/weaver_kernel/firewall/size_estimate.py
+++ b/src/weaver_kernel/firewall/size_estimate.py
@@ -8,8 +8,10 @@
 serialisation that would double peak memory on large payloads.
 
 The estimate is intentionally approximate but **deterministic**: the same input
-always yields the same number, and the walk visits container members in their
-native (insertion) order. It is used both by
+always yields the same number (the total is order-independent). Self-referential
+structures are handled gracefully — each container is counted at most once, so a
+cycle can never hang the walk (unlike ``json.dumps``, which raises). It is used
+both by
 :mod:`~weaver_kernel.firewall.transform` (raw-mode budget warning, issue #207)
 and by :class:`~weaver_kernel.handles.HandleStore` (byte-size budgeting,
 issue #211).
@@ -40,6 +42,8 @@ def estimated_size(value: Any, *, limit: int | None = None) -> int:
     Walks *value* iteratively (so deeply nested structures cannot exhaust the
     recursion limit) summing approximate JSON character contributions. ``bool``
     is handled before ``int`` because ``bool`` is an ``int`` subclass in Python.
+    Each container is visited at most once, so self-referential inputs terminate
+    instead of looping forever.
 
     Args:
         value: Any value the firewall might serialise. Non-JSON types fall back
@@ -48,7 +52,8 @@ def estimated_size(value: Any, *, limit: int | None = None) -> int:
         limit: Optional early-exit threshold. When the running total exceeds
             *limit* the walk stops and returns a value greater than *limit* — use
             this when only the boolean ``size > limit`` decision is needed, not
-            the exact size.
+            the exact size. (Which member tips the total past *limit* is
+            unspecified, but the returned value is always ``> limit``.)
 
     Returns:
         A non-negative integer approximating ``len(json.dumps(value,
@@ -63,6 +68,7 @@ def estimated_size(value: Any, *, limit: int | None = None) -> int:
         9
     """
     total = 0
+    seen: set[int] = set()  # container ids already counted, to break cycles
     stack: list[Any] = [value]
     while stack:
         cur = stack.pop()
@@ -79,12 +85,18 @@ def estimated_size(value: Any, *, limit: int | None = None) -> int:
         elif isinstance(cur, (int, float)):
             total += len(str(cur))
         elif isinstance(cur, dict):
+            if id(cur) in seen:
+                continue
+            seen.add(id(cur))
             n = len(cur)
             total += _CONTAINER_DELIMS + max(0, n - 1) * _ITEM_SEP
             for key, val in cur.items():
                 total += len(str(key)) + _QUOTES + _KV_SEP
                 stack.append(val)
         elif isinstance(cur, (list, tuple)):
+            if id(cur) in seen:
+                continue
+            seen.add(id(cur))
             n = len(cur)
             total += _CONTAINER_DELIMS + max(0, n - 1) * _ITEM_SEP
             stack.extend(cur)
diff --git a/src/weaver_kernel/firewall/summarize.py b/src/weaver_kernel/firewall/summarize.py
index e0e9295..0ce05a3 100644
--- a/src/weaver_kernel/firewall/summarize.py
+++ b/src/weaver_kernel/firewall/summarize.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+from itertools import islice
 from typing import Any
 
 
@@ -44,19 +45,27 @@ def _is_number(value: Any) -> bool:
     return isinstance(value, (int, float)) and not isinstance(value, bool)
 
 
-def _truncate_facts(facts: list[str], max_facts: int) -> list[str]:
+def _truncate_facts(facts: list[str], max_facts: int, *, total: int | None = None) -> list[str]:
     """Cap *facts* at *max_facts*, surfacing omission explicitly.
 
     Silent truncation lets an LLM treat a partial summary as complete. When
     facts are dropped, the final slot becomes an explicit marker stating how
     many were omitted; an untruncated list is returned unchanged so it carries
     no marker (issue #174).
+
+    Args:
+        facts: The (possibly already-bounded) candidate fact strings.
+        max_facts: Maximum facts to return.
+        total: True number of candidate facts when *facts* was pre-capped by the
+            caller (so it never materialises one fact per item on the hot path).
+            Defaults to ``len(facts)``.
     """
     if max_facts <= 0:
         return []
-    if len(facts) <= max_facts:
-        return facts
-    omitted = len(facts) - (max_facts - 1)
+    candidate_total = len(facts) if total is None else total
+    if candidate_total <= max_facts:
+        return facts[:max_facts]
+    omitted = candidate_total - (max_facts - 1)
     return [*facts[: max_facts - 1], f"… ({omitted} more facts omitted; full data via handle)"]
 
 
@@ -117,7 +126,9 @@ def _summarize_list_of_dicts(rows: list[dict[str, Any]], *, max_facts: int) -> l
 
 def _summarize_dict(data: dict[str, Any], *, max_facts: int) -> list[str]:
     facts: list[str] = [f"Keys: {', '.join(sorted(data.keys())[:20])}"]
-    for k, v in data.items():
+    # Build at most ``max_facts`` value facts — never one per key — so a huge
+    # dict does not turn a bounded summary into an O(n) walk on the hot path.
+    for k, v in islice(data.items(), max_facts):
         if isinstance(v, (int, float)):
             facts.append(f"{k}: {v}")
         elif isinstance(v, str):
@@ -128,13 +139,15 @@ def _summarize_dict(data: dict[str, Any], *, max_facts: int) -> list[str]:
             facts.append(f"{k}: dict with keys [{', '.join(list(v.keys())[:5])}]")
         else:
             facts.append(f"{k}: {repr(v)[:80]}")
-    return _truncate_facts(facts, max_facts)
+    return _truncate_facts(facts, max_facts, total=1 + len(data))
 
 
 def _summarize_plain_list(data: list[Any], *, max_facts: int) -> list[str]:
+    # Only repr() up to ``max_facts`` elements; the total count is already
+    # surfaced by the "List of N items" header and threaded to the marker.
     facts = [f"List of {len(data)} items"]
-    facts.extend(repr(item)[:100] for item in data)
-    return _truncate_facts(facts, max_facts)
+    facts.extend(repr(item)[:100] for item in islice(data, max_facts))
+    return _truncate_facts(facts, max_facts, total=1 + len(data))
 
 
 def _summarize_string(data: str, *, max_facts: int) -> list[str]:
diff --git a/src/weaver_kernel/handles.py b/src/weaver_kernel/handles.py
index 408957e..8e4411f 100644
--- a/src/weaver_kernel/handles.py
+++ b/src/weaver_kernel/handles.py
@@ -28,7 +28,9 @@ class HandleStore:
     - ``max_entry_bytes`` rejects a single over-cap payload with
       :class:`~weaver_kernel.errors.HandleTooLarge` (the data is never stored).
     - ``max_total_bytes`` evicts oldest-first after each store until aggregate
-      estimated residency is within budget.
+      estimated residency is within budget. A single entry larger than the total
+      budget could never fit, so it is rejected with ``HandleTooLarge`` rather
+      than retained — guaranteeing ``current_bytes`` never exceeds the budget.
     """
 
     _EVICT_INTERVAL: int = 128  # run evict_expired() every N store() calls
@@ -88,15 +90,29 @@ def store(
             A :class:`Handle` referencing the stored data.
 
         Raises:
-            HandleTooLarge: If ``max_entry_bytes`` is set and the estimated size
-                of *data* exceeds it.
+            HandleTooLarge: If a byte budget is configured and the estimated
+                size of *data* exceeds the binding per-store ceiling — either
+                ``max_entry_bytes`` or ``max_total_bytes`` (a single entry larger
+                than the *total* budget can never fit and is rejected too, so the
+                store never ends up permanently over budget).
         """
-        size = estimated_size(data)
-        if self._max_entry_bytes is not None and size > self._max_entry_bytes:
-            raise HandleTooLarge(
-                f"Handle data for '{capability_id}' is ~{size} bytes, exceeding the "
-                f"per-entry cap of {self._max_entry_bytes} bytes; not stored."
-            )
+        # Both caps act as a per-store reject ceiling: a single entry larger than
+        # the total budget could never satisfy it. Early-exit at the ceiling so an
+        # over-cap payload that will be rejected anyway is not fully walked.
+        caps = [c for c in (self._max_entry_bytes, self._max_total_bytes) if c is not None]
+        if caps:
+            reject_at = min(caps)
+            size = estimated_size(data, limit=reject_at)
+            if size > reject_at:
+                cap_name = (
+                    "max_entry_bytes" if reject_at == self._max_entry_bytes else "max_total_bytes"
+                )
+                raise HandleTooLarge(
+                    f"Handle data for '{capability_id}' exceeds the {reject_at}-byte "
+                    f"{cap_name} ceiling; not stored."
+                )
+        else:
+            size = estimated_size(data)
 
         ttl = ttl_seconds if ttl_seconds is not None else self._default_ttl
         now = datetime.datetime.now(tz=datetime.timezone.utc)
diff --git a/tests/test_handles.py b/tests/test_handles.py
index e273a73..c875621 100644
--- a/tests/test_handles.py
+++ b/tests/test_handles.py
@@ -407,6 +407,22 @@ def test_evict_expired_releases_byte_accounting() -> None:
     assert before > store.current_bytes
 
 
+def test_entry_larger_than_total_budget_is_rejected() -> None:
+    # A single entry that exceeds the *total* budget can never fit, so it is
+    # rejected rather than retained — current_bytes never exceeds the budget.
+    store = HandleStore(max_total_bytes=200)
+    with pytest.raises(HandleTooLarge):
+        store.store("cap.x", {"v": "x" * 1000})
+    assert store.current_bytes == 0
+
+
+def test_total_budget_never_exceeded_after_stores() -> None:
+    store = HandleStore(max_total_bytes=2500)
+    for marker in ("a", "b", "c", "d", "e"):
+        store.store("cap.x", {"v": marker * 1000})
+        assert store.current_bytes <= 2500
+
+
 def test_byte_budget_rejects_invalid_config() -> None:
     with pytest.raises(BudgetConfigError):
         HandleStore(max_total_bytes=0)
diff --git a/tests/test_size_estimate.py b/tests/test_size_estimate.py
index 0732ebe..c593234 100644
--- a/tests/test_size_estimate.py
+++ b/tests/test_size_estimate.py
@@ -69,3 +69,18 @@ def test_deeply_nested_does_not_recurse() -> None:
         value = [value]
     # Iterative walk: no RecursionError even far past the recursion limit.
     assert estimated_size(value) > 5000
+
+
+def test_self_referential_list_terminates() -> None:
+    cyclic: list[object] = [1, 2]
+    cyclic.append(cyclic)  # a list that contains itself
+    # json.dumps would raise; the estimator counts each container once and
+    # returns a finite size instead of hanging.
+    assert estimated_size(cyclic) > 0
+
+
+def test_mutually_referential_dicts_terminate() -> None:
+    a: dict[str, object] = {}
+    b: dict[str, object] = {"a": a}
+    a["b"] = b
+    assert estimated_size(a) > 0

From a9bea94370ebe85bfe651c28f57b4de748a643d0 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 24 Jun 2026 12:16:17 +0000
Subject: [PATCH 3/3] test(firewall): pin raw-mode budget-warning decision at
 the char boundary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Audit follow-up on #207. estimated_size() is approximate (±25%), and the
only decision it drives on the transform path is the raw-mode "exceeds
budget" warning. Add a boundary test asserting the over/under decision
holds with margin beyond the error band, and note in size_estimate that
the estimate is a lower bound for shared-reference (DAG) inputs.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01EMB1FUdczZi62skaN4HZbS
---
 src/weaver_kernel/firewall/size_estimate.py |  8 +++++--
 tests/test_firewall.py                      | 23 +++++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/weaver_kernel/firewall/size_estimate.py b/src/weaver_kernel/firewall/size_estimate.py
index 51c502f..e2debf5 100644
--- a/src/weaver_kernel/firewall/size_estimate.py
+++ b/src/weaver_kernel/firewall/size_estimate.py
@@ -10,8 +10,12 @@
 The estimate is intentionally approximate but **deterministic**: the same input
 always yields the same number (the total is order-independent). Self-referential
 structures are handled gracefully — each container is counted at most once, so a
-cycle can never hang the walk (unlike ``json.dumps``, which raises). It is used
-both by
+cycle can never hang the walk (unlike ``json.dumps``, which raises). Counting
+each container once also means a structure that legitimately reuses the same
+container in several positions (a DAG, not a cycle) is counted once, so the
+estimate is a *lower bound* for shared-reference inputs; firewall and handle
+payloads are deserialised JSON without shared references, where this does not
+arise. It is used by
 :mod:`~weaver_kernel.firewall.transform` (raw-mode budget warning, issue #207)
 and by :class:`~weaver_kernel.handles.HandleStore` (byte-size budgeting,
 issue #211).
diff --git a/tests/test_firewall.py b/tests/test_firewall.py
index eda9257..32d8984 100644
--- a/tests/test_firewall.py
+++ b/tests/test_firewall.py
@@ -179,6 +179,29 @@ def test_raw_mode_oversized_data_adds_warning() -> None:
     assert frame.raw_data == large_data  # type: ignore[union-attr]
 
 
+def test_raw_mode_warning_decision_holds_across_budget_boundary() -> None:
+    # #207: estimated_size() replaced len(json.dumps(...)) for the raw-mode
+    # budget warning. The estimate is approximate (±25%, see test_size_estimate),
+    # so the over/under *decision* — the only thing this size drives — is pinned
+    # with margin beyond the error band: a payload well under max_chars carries
+    # no warning, one well over does.
+    budgets = Budgets(max_chars=1000)
+    under = _transform(
+        {"payload": "x" * 200},
+        "raw",
+        principal_roles=["admin"],
+        budgets=budgets,
+    )
+    over = _transform(
+        {"payload": "x" * 5000},
+        "raw",
+        principal_roles=["admin"],
+        budgets=budgets,
+    )
+    assert not any("exceeds budget" in w for w in under.warnings)  # type: ignore[union-attr]
+    assert any("exceeds budget" in w for w in over.warnings)  # type: ignore[union-attr]
+
+
 # ── Table mode with non-list data ──────────────────────────────────────────────