From a9e53791d591276fefea9a4f063a53f0056ccd74 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 14:07:20 +0000 Subject: [PATCH 1/5] test: coverage gate, docstring/doctest + architecture gates, weaver-spec conformance adapter Adds the locally-verifiable CI gates from the hardening group: - #141: branch-coverage config in pyproject ([tool.coverage.run/report]) with a ratchet floor of fail_under=90 (measured 93.9%); `make test` now runs --cov-branch --cov-report=term-missing so the local gate matches CI. - #195: tests/test_docstrings.py enforces a docstring (+ Args: for functions with params) on every __all__ symbol; tests/test_doctests.py runs curated inline doctests (default_token_counter). Filled the few thin docstrings (merge_sensitivity, traces_to_ocsf, record_decision). - #202: tests/test_architecture.py enforces import boundaries (firewall/ drivers/router/models stay within their allowed leaf imports) and a module-size ratchet (current over-budget files pinned; new files capped at 300 lines), stdlib ast only. - #225: weaver_kernel/conformance.py maps Frame/ActionTrace/CapabilityToken to the published weaver-contracts dataclasses (lazy optional import); a new `conformance` extra pins weaver-contracts; tests/test_conformance.py validates the mappings (skipped when the extra is absent). make ci passes: 730 passed, 1 skipped, 93.9% branch coverage. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01AGeBGThsRwnZuREUV8GoKH --- Makefile | 2 +- pyproject.toml | 27 ++++ src/weaver_kernel/conformance.py | 147 +++++++++++++++++++ src/weaver_kernel/federation.py | 7 + src/weaver_kernel/firewall/token_counting.py | 6 + src/weaver_kernel/ocsf.py | 6 + src/weaver_kernel/replay.py | 11 ++ tests/test_architecture.py | 143 ++++++++++++++++++ tests/test_conformance.py | 108 ++++++++++++++ tests/test_docstrings.py | 66 +++++++++ tests/test_doctests.py | 32 ++++ 11 files changed, 554 insertions(+), 1 deletion(-) create mode 100644 src/weaver_kernel/conformance.py create mode 100644 tests/test_architecture.py create mode 100644 tests/test_conformance.py create mode 100644 tests/test_docstrings.py create mode 100644 tests/test_doctests.py diff --git a/Makefile b/Makefile index 4c93992..ee4f189 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ type: python -m mypy src/ test: - python -m pytest -q --cov=weaver_kernel + python -m pytest -q --cov=weaver_kernel --cov-branch --cov-report=term-missing example: python examples/basic_cli.py diff --git a/pyproject.toml b/pyproject.toml index e2e5e27..00facb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ dev = [ "pyyaml>=6.0", "tomli>=2.0; python_version<'3.11'", "types-PyYAML>=6.0", + "weaver-contracts>=0.7,<0.8", ] mcp = ["mcp>=1.6"] otel = ["opentelemetry-api>=1.20"] @@ -70,6 +71,10 @@ policy = [ "tomli>=2.0; python_version<'3.11'", ] tiktoken = ["tiktoken>=0.6"] +# Weaver-spec contract conformance: validate kernel objects against the +# published weaver-contracts dataclasses. Optional so `import weaver_kernel` +# never requires it; CI installs it (via [dev]) to run tests/test_conformance.py. +conformance = ["weaver-contracts>=0.7,<0.8"] [tool.hatch.build.targets.wheel] packages = ["src/weaver_kernel"] @@ -78,6 +83,24 @@ packages = ["src/weaver_kernel"] asyncio_mode = "auto" testpaths = ["tests"] +[tool.coverage.run] +branch = true +source = ["weaver_kernel"] + +[tool.coverage.report] +# Ratchet floor: kept slightly below the measured branch coverage so the gate +# protects against regressions without forcing aspirational targets. Raise it +# (never lower it) as coverage climbs — see CONTRIBUTING.md. +fail_under = 90 +show_missing = true +exclude_also = [ + "if TYPE_CHECKING:", + "raise NotImplementedError", + "\\.\\.\\.", + "if __name__ == .__main__.:", + "@(abc\\.)?abstractmethod", +] + [tool.ruff] line-length = 99 target-version = "py310" @@ -110,3 +133,7 @@ ignore_missing_imports = true [[tool.mypy.overrides]] module = "opentelemetry.*" ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "weaver_contracts.*" +ignore_missing_imports = true diff --git a/src/weaver_kernel/conformance.py b/src/weaver_kernel/conformance.py new file mode 100644 index 0000000..cfca7ba --- /dev/null +++ b/src/weaver_kernel/conformance.py @@ -0,0 +1,147 @@ +"""weaver-spec conformance adapter (issue #225). + +Maps the kernel's runtime objects (:class:`~weaver_kernel.models.Frame`, +:class:`~weaver_kernel.trace.ActionTrace`, +:class:`~weaver_kernel.tokens.CapabilityToken`) onto the published +``weaver-contracts`` dataclasses, so CI can assert the kernel emits +spec-conformant payloads instead of echoing a placeholder. + +``weaver-contracts`` is an optional dependency (the ``conformance`` extra): +``import weaver_kernel`` never requires it. Each adapter imports it lazily and +raises a clear :class:`ImportError` with an install hint when it is absent, +mirroring the optional-extra seam used by the MCP and OTel integrations. + +The adapters intentionally translate vocabulary where the contract and the +kernel differ (notably trace ``event_type``); the mapping policy lives here so +contract gaps surface in one place and can be filed upstream rather than by +bending kernel types. +""" + +from __future__ import annotations + +import datetime +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: # pragma: no cover - typing only + from .models import ActionTrace, Frame + from .tokens import CapabilityToken + +# Kernel trace event_type -> weaver-contracts TraceEvent.event_type. The +# contract speaks a richer lifecycle vocabulary; these are the documented +# equivalences for the three audited kernel events. +_EVENT_TYPE_MAP = { + "invoke": "capability_executed", + "expand": "handle_resolved", + "deny": "capability_denied", +} + + +def _require_contracts() -> Any: + """Import and return the ``weaver_contracts`` package or raise a clear error.""" + try: + import weaver_contracts + except ModuleNotFoundError as exc: # pragma: no cover - exercised via extra absence + raise ImportError( + "Conformance mapping requires the optional dependency " + "'weaver-contracts'. Install it with: pip install 'weaver-kernel[conformance]'" + ) from exc + return weaver_contracts + + +def contract_version() -> str: + """Return the installed ``weaver-contracts`` CONTRACT_VERSION string. + + Returns: + The semantic version of the contract package the kernel is mapped to. + """ + return str(_require_contracts().CONTRACT_VERSION) + + +def frame_to_contract(frame: Frame, *, created_at: datetime.datetime | None = None) -> Any: + """Map a kernel :class:`Frame` to a ``weaver_contracts.Frame``. + + Args: + frame: The firewalled frame produced at the kernel boundary. + created_at: Timestamp for the contract frame. Defaults to the current + UTC time, since the kernel ``Frame`` carries no creation timestamp. + + Returns: + A validated ``weaver_contracts.Frame`` (its ``__post_init__`` enforces + the contract invariants, e.g. a non-empty summary). + """ + wc = _require_contracts() + summary = "; ".join(frame.facts) if frame.facts else f"{frame.response_mode} result" + handle_refs = [frame.handle.handle_id] if frame.handle is not None else [] + return wc.Frame( + frame_id=frame.action_id, + capability_id=frame.capability_id, + summary=summary, + created_at=created_at or datetime.datetime.now(tz=datetime.timezone.utc), + structured_data={"table_preview": frame.table_preview} if frame.table_preview else None, + handle_refs=handle_refs, + redaction_notes="; ".join(frame.warnings) if frame.warnings else None, + metadata={"response_mode": frame.response_mode, "is_final": frame.is_final}, + ) + + +def trace_to_contract(trace: ActionTrace) -> Any: + """Map a kernel :class:`ActionTrace` to a ``weaver_contracts.TraceEvent``. + + Args: + trace: The audit record for an invoke/expand/deny event. + + Returns: + A validated ``weaver_contracts.TraceEvent`` with kernel-specific detail + (driver, sensitivity, reason code) carried in ``metadata``. + """ + wc = _require_contracts() + is_deny = trace.event_type == "deny" + outcome = "failure" if (is_deny or trace.error is not None) else "success" + return wc.TraceEvent( + event_id=trace.action_id, + event_type=_EVENT_TYPE_MAP[trace.event_type], + timestamp=trace.invoked_at, + capability_id=trace.capability_id, + principal=trace.principal_id, + frame_id=None if is_deny else trace.action_id, + handle_id=trace.handle_id, + outcome=outcome, + error_message=trace.error, + metadata={ + "reason_code": trace.reason_code, + "sensitivity": str(trace.sensitivity.value), + "driver_id": trace.driver_id, + "response_mode": trace.response_mode, + }, + ) + + +def token_to_contract(token: CapabilityToken) -> Any: + """Map a kernel :class:`CapabilityToken` to a ``weaver_contracts.CapabilityToken``. + + Args: + token: An issued, signed capability token. + + Returns: + A validated ``weaver_contracts.CapabilityToken`` whose ``scope`` is the + single capability the kernel token authorises. + """ + wc = _require_contracts() + return wc.CapabilityToken( + token_id=token.token_id, + principal=token.principal_id, + scope=[token.capability_id], + issued_at=token.issued_at, + expires_at=token.expires_at, + single_use=False, + issuer=None, + metadata={"audit_id": token.audit_id, "constraints": token.constraints}, + ) + + +__all__ = [ + "contract_version", + "frame_to_contract", + "trace_to_contract", + "token_to_contract", +] diff --git a/src/weaver_kernel/federation.py b/src/weaver_kernel/federation.py index 9ca998c..8c7291d 100644 --- a/src/weaver_kernel/federation.py +++ b/src/weaver_kernel/federation.py @@ -259,6 +259,13 @@ def merge_sensitivity(local: SensitivityTag, remote: SensitivityTag) -> Sensitiv Exposed for callers that maintain their own capability records outside the registry and want the canonical ``most_restrictive`` union rule. + + Args: + local: The locally-known sensitivity tag. + remote: The sensitivity tag advertised by a remote/federated source. + + Returns: + The stricter (more restrictive) of the two tags. """ return _stricter(local, remote) diff --git a/src/weaver_kernel/firewall/token_counting.py b/src/weaver_kernel/firewall/token_counting.py index 35b5179..04d9c05 100644 --- a/src/weaver_kernel/firewall/token_counting.py +++ b/src/weaver_kernel/firewall/token_counting.py @@ -31,6 +31,12 @@ def default_token_counter(value: Any) -> int: Returns: A non-negative integer approximating the token count. + + Example: + >>> default_token_counter(None) + 0 + >>> default_token_counter("hello world") + 3 """ if value is None: return 0 diff --git a/src/weaver_kernel/ocsf.py b/src/weaver_kernel/ocsf.py index 0d852b8..b1b42a9 100644 --- a/src/weaver_kernel/ocsf.py +++ b/src/weaver_kernel/ocsf.py @@ -121,6 +121,12 @@ def traces_to_ocsf(traces: Iterable[ActionTrace]) -> list[dict[str, object]]: Order is preserved from *traces* (typically ``Kernel.list_traces()`` / ``Kernel.query_traces(...)``), so the caller controls ordering. + + Args: + traces: Traces to map, in the desired output order. + + Returns: + One OCSF API Activity event dict per trace, order-preserved. """ return [trace_to_ocsf(trace) for trace in traces] diff --git a/src/weaver_kernel/replay.py b/src/weaver_kernel/replay.py index f67c4f8..204b19a 100644 --- a/src/weaver_kernel/replay.py +++ b/src/weaver_kernel/replay.py @@ -117,6 +117,17 @@ def record_decision( Convenience for building a replay corpus from a known-good engine, so a later :func:`replay` against the same engine yields an empty diff. + + Args: + engine: The policy engine evaluated to capture the baseline outcome. + request: The capability request to evaluate. + capability: The capability the request targets. + principal: The principal on whose behalf the request is made. + justification: Optional free-text justification passed to the engine. + + Returns: + A :class:`DecisionRecord` carrying the baseline allow/deny outcome and + reason code, ready to feed :func:`replay`. """ allowed, reason_code = _evaluate( engine, diff --git a/tests/test_architecture.py b/tests/test_architecture.py new file mode 100644 index 0000000..442a93d --- /dev/null +++ b/tests/test_architecture.py @@ -0,0 +1,143 @@ +"""Automated architectural conformance checks (issue #202). + +Two cheap, stdlib-only guards that preserve the documented layering without a +heavyweight architecture tool: + +1. **Import boundaries.** Lower layers must not depend on higher ones. The + rules below are derived from the *current* clean structure (see AGENTS.md + "Architectural conformance"); they pass today and fail the moment a + forbidden edge is introduced. +2. **Module-size ratchet.** Modules must stay within the documented 300-line + budget (AGENTS.md). The files already over budget are listed explicitly + with their current size as a ceiling, so they may only shrink — and any new + or regrown module that crosses 300 lines fails immediately. + +Both walk ``src/weaver_kernel`` with :mod:`ast`; no third-party dependency. +``TYPE_CHECKING`` and inside-function (lazy) imports are intentionally allowed — +the lazy seam is how optional extras stay optional. +""" + +from __future__ import annotations + +import ast +from pathlib import Path + +_SRC = Path(__file__).resolve().parent.parent / "src" / "weaver_kernel" + +# Leaf modules a given layer is allowed to import from within the package. +# Key = top-level module/sub-package under weaver_kernel; value = the only +# intra-package top-level names it may import. A layer absent from this map is +# unconstrained (e.g. ``kernel`` is the orchestrator and may import anything). +_ALLOWED_INTRA_IMPORTS: dict[str, set[str]] = { + # The firewall transforms RawResult -> Frame using only the data contracts; + # it must never reach back into execution/policy/registry layers. + "firewall": {"models", "errors", "enums"}, + # Drivers execute capabilities and depend only on the data contracts. + "drivers": {"models", "errors", "enums"}, + # The router is a stateless, import-light dispatch table. + "router": {"models"}, + # Data contracts are leaves built only on enums + error types. + "models": {"enums", "errors"}, + # Foundational leaves import nothing else in the package. + "enums": set(), + "errors": set(), +} + +# Modules currently over the 300-line budget, with their present length as the +# ceiling (ratchet: these may shrink, never grow past this). Shrinking a module +# below 300 should remove it from this map. New modules are not allowed here. +_SIZE_RATCHET: dict[str, int] = { + "__init__.py": 341, + "models.py": 753, + "policy.py": 652, + "kernel/__init__.py": 541, + "adapters/_base.py": 459, + "kernel/_invoke.py": 400, + "firewall/transform.py": 377, + "handles.py": 371, + "adapters/openai.py": 358, + "stores/sqlite.py": 350, + "tokens.py": 336, + "federation_discovery.py": 306, +} + +_LINE_BUDGET = 300 + + +def _rel(path: Path) -> str: + return path.relative_to(_SRC).as_posix() + + +def _layer_of(rel_path: str) -> str: + """Top-level module or sub-package name for a file under weaver_kernel.""" + head = rel_path.split("/", 1)[0] + return head if "/" in rel_path else head.removesuffix(".py") + + +def _intra_imports(tree: ast.Module, rel_path: str) -> set[str]: + """Return top-level ``weaver_kernel`` names imported at *rel_path*'s scope. + + Relative imports are resolved against the importing file's package depth + (``from .models`` in ``router.py`` and ``from ..models`` in + ``firewall/transform.py`` both resolve to top-level ``models``), as are + absolute ``weaver_kernel.`` imports. ``TYPE_CHECKING`` blocks and imports + nested inside functions/classes are ignored — that lazy seam is how optional + extras stay optional. + """ + # Package parts of the importing module, e.g. firewall/transform.py -> ["firewall"]. + package_parts = rel_path.split("/")[:-1] + base = ["weaver_kernel", *package_parts] + + names: set[str] = set() + for node in tree.body: + if isinstance(node, ast.ImportFrom): + if node.level > 0: + # Drop (level - 1) trailing components to reach the target package. + anchor = base[: len(base) - (node.level - 1)] + resolved = [*anchor, *(node.module.split(".") if node.module else [])] + elif node.module and node.module.startswith("weaver_kernel."): + resolved = node.module.split(".") + else: + continue + if len(resolved) >= 2 and resolved[0] == "weaver_kernel": + names.add(resolved[1]) + elif isinstance(node, ast.Import): + for alias in node.names: + if alias.name.startswith("weaver_kernel."): + names.add(alias.name.split(".")[1]) + return names + + +def test_import_boundaries_hold() -> None: + """No module imports outside its layer's allowed intra-package set.""" + violations: list[str] = [] + for path in sorted(_SRC.rglob("*.py")): + rel = _rel(path) + layer = _layer_of(rel) + allowed = _ALLOWED_INTRA_IMPORTS.get(layer) + if allowed is None: + continue + tree = ast.parse(path.read_text(encoding="utf-8")) + imported = _intra_imports(tree, rel) + # A layer may always import within itself (e.g. firewall.transform -> + # firewall.redaction shows up as the layer's own name or not at all). + forbidden = {name for name in imported if name != layer and name not in allowed} + if forbidden: + violations.append(f"{rel} imports {sorted(forbidden)} (allowed: {sorted(allowed)})") + assert violations == [], "Architectural import-boundary violations:\n" + "\n".join(violations) + + +def test_module_size_budget() -> None: + """Every module is within 300 lines, or within its ratcheted ceiling.""" + offenders: list[str] = [] + for path in sorted(_SRC.rglob("*.py")): + rel = _rel(path) + lines = len(path.read_text(encoding="utf-8").splitlines()) + ceiling = _SIZE_RATCHET.get(rel, _LINE_BUDGET) + if lines > ceiling: + limit = "300-line budget" if rel not in _SIZE_RATCHET else f"ratchet ceiling {ceiling}" + offenders.append(f"{rel}: {lines} lines exceeds {limit}") + assert offenders == [], ( + "Module-size budget exceeded (split the module, or if you legitimately " + "shrank an over-budget file, lower its ceiling in _SIZE_RATCHET):\n" + "\n".join(offenders) + ) diff --git a/tests/test_conformance.py b/tests/test_conformance.py new file mode 100644 index 0000000..9bab9cb --- /dev/null +++ b/tests/test_conformance.py @@ -0,0 +1,108 @@ +"""weaver-spec conformance mapping tests (issue #225). + +Builds real kernel objects, maps them through ``weaver_kernel.conformance``, +and asserts the results validate against the published ``weaver-contracts`` +dataclasses (their ``__post_init__`` enforces the contract invariants — these +are real assertions, not an echo). Skipped when the optional ``conformance`` +extra is not installed, so the gate is non-blocking until the extra is present. +""" + +from __future__ import annotations + +import datetime + +import pytest + +from weaver_kernel import CapabilityToken, Frame, SensitivityTag +from weaver_kernel.models import ActionTrace + +wc = pytest.importorskip("weaver_contracts", reason="install the 'conformance' extra") + +from weaver_kernel import conformance # noqa: E402 (after importorskip by design) + +_T = datetime.datetime(2026, 1, 2, 3, 4, 5, tzinfo=datetime.timezone.utc) + + +def _frame() -> Frame: + return Frame( + action_id="act-1", + capability_id="billing.list_invoices", + response_mode="summary", + facts=["2 invoices found", "total 100 USD"], + ) + + +def _trace(*, event_type: str = "invoke", error: str | None = None) -> ActionTrace: + return ActionTrace( + action_id="act-1", + capability_id="billing.list_invoices", + principal_id="alice", + token_id="tok-1", + invoked_at=_T, + args={"operation": "billing.list_invoices"}, + response_mode="summary", + driver_id="billing", + sensitivity=SensitivityTag.PII, + event_type=event_type, # type: ignore[arg-type] + error=error, + ) + + +def _token() -> CapabilityToken: + return CapabilityToken( + token_id="tok-1", + capability_id="billing.list_invoices", + principal_id="alice", + issued_at=_T, + expires_at=_T + datetime.timedelta(hours=1), + ) + + +def test_contract_version_is_reported() -> None: + assert conformance.contract_version() == str(wc.CONTRACT_VERSION) + + +def test_frame_maps_to_valid_contract_frame() -> None: + contract_frame = conformance.frame_to_contract(_frame(), created_at=_T) + assert isinstance(contract_frame, wc.Frame) + assert contract_frame.frame_id == "act-1" + assert contract_frame.capability_id == "billing.list_invoices" + assert contract_frame.summary # contract requires a non-empty summary + assert contract_frame.created_at == _T + + +def test_empty_frame_still_yields_non_empty_summary() -> None: + # The contract rejects an empty summary; the adapter must always supply one. + bare = Frame(action_id="a", capability_id="cap.x", response_mode="handle_only") + contract_frame = conformance.frame_to_contract(bare, created_at=_T) + assert contract_frame.summary + + +def test_invoke_trace_maps_to_executed_success() -> None: + event = conformance.trace_to_contract(_trace()) + assert isinstance(event, wc.TraceEvent) + assert event.event_type == "capability_executed" + assert event.outcome == "success" + assert event.principal == "alice" + assert event.frame_id == "act-1" + + +def test_deny_trace_maps_to_denied_failure() -> None: + event = conformance.trace_to_contract(_trace(event_type="deny", error="missing role")) + assert event.event_type == "capability_denied" + assert event.outcome == "failure" + assert event.frame_id is None # a denied request produced no frame + + +def test_failed_invoke_maps_to_failure() -> None: + event = conformance.trace_to_contract(_trace(error="driver exploded")) + assert event.outcome == "failure" + assert event.error_message == "driver exploded" + + +def test_token_maps_to_valid_contract_token() -> None: + token = conformance.token_to_contract(_token()) + assert isinstance(token, wc.CapabilityToken) + assert token.principal == "alice" + assert token.scope == ["billing.list_invoices"] + assert token.expires_at == _T + datetime.timedelta(hours=1) diff --git a/tests/test_docstrings.py b/tests/test_docstrings.py new file mode 100644 index 0000000..53a1217 --- /dev/null +++ b/tests/test_docstrings.py @@ -0,0 +1,66 @@ +"""Docstring-coverage gate for the public API (issue #195). + +Pins the contract that every class and function exported via +``weaver_kernel.__all__`` carries a docstring meeting the documented shape: +a non-empty summary line, plus an ``Args:`` section for callables that take +parameters. A library whose generated docs site and IDE/copilot hints are +built from docstrings cannot afford an undocumented public symbol. + +Type aliases (e.g. ``ResponseMode = Literal[...]``) and module-level +constants (``__version__``, ``OCSF_VERSION``, ...) cannot carry a meaningful +``__doc__`` and are documented in their module docstring instead, so they are +exempt from this gate. +""" + +from __future__ import annotations + +import inspect + +import weaver_kernel + + +def _documentable_symbols() -> list[tuple[str, object]]: + """Return ``(name, obj)`` for every class/function exported in ``__all__``.""" + out: list[tuple[str, object]] = [] + for name in weaver_kernel.__all__: + obj = getattr(weaver_kernel, name) + if inspect.isclass(obj) or inspect.isroutine(obj): + out.append((name, obj)) + return out + + +def test_public_classes_and_functions_have_docstrings() -> None: + """Every exported class/function has a non-empty docstring with a summary.""" + missing = [ + name for name, obj in _documentable_symbols() if not (inspect.getdoc(obj) or "").strip() + ] + assert missing == [], ( + f"Public symbols exported in __all__ without a docstring: {missing}. " + f"Add a Google-style docstring in src/weaver_kernel/ so the generated " + f"docs (#134) and IDE hints stay complete." + ) + + +def test_public_functions_document_their_parameters() -> None: + """Exported functions that take parameters include an ``Args:`` section.""" + offenders: list[str] = [] + for name, obj in _documentable_symbols(): + if not inspect.isroutine(obj): + continue + try: + sig = inspect.signature(obj) + except (TypeError, ValueError): + continue + params = [ + p + for p_name, p in sig.parameters.items() + if p_name not in ("self", "cls") and p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD) + ] + doc = inspect.getdoc(obj) or "" + if params and "Args:" not in doc: + offenders.append(name) + assert offenders == [], ( + f"Exported functions with parameters but no 'Args:' section: {offenders}. " + f"Document each parameter (Google-style) so the public contract is " + f"self-describing." + ) diff --git a/tests/test_doctests.py b/tests/test_doctests.py new file mode 100644 index 0000000..7d9f440 --- /dev/null +++ b/tests/test_doctests.py @@ -0,0 +1,32 @@ +"""Executable docstring examples for core public helpers (issue #195). + +Inline doctests give the highest-traffic *synchronous* helpers runnable, +verified usage examples. The asynchronous core verbs (``request_capabilities``, +``grant_capability``, ``invoke``, ``expand``, ``explain``) are exercised end to +end by the example-based suite (``test_readme_quickstart``, +``test_chainweaver_flow``, ...) instead — async doctests with full kernel setup +would be unreadable, which the issue explicitly anticipates. +""" + +from __future__ import annotations + +import doctest +import importlib + +# Modules whose docstrings carry runnable ``>>>`` examples. Keep this list +# curated: pointing doctest at modules with prose-only docstrings produces +# false failures. +_DOCTESTED_MODULES = [ + "weaver_kernel.firewall.token_counting", +] + + +def test_public_doctests_pass() -> None: + """Every curated module's doctests run and pass.""" + attempted = 0 + for module_name in _DOCTESTED_MODULES: + module = importlib.import_module(module_name) + result = doctest.testmod(module, verbose=False) + assert result.failed == 0, f"{module_name}: {result.failed} doctest failure(s)" + attempted += result.attempted + assert attempted > 0, "No doctests were collected — the gate is not exercising anything." From d2236cb4cecba6d55eb4f96bff3bfe918c867bd2 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 14:11:40 +0000 Subject: [PATCH 2/5] ci: harden and de-drift CI, add bare-install/audit/conformance jobs and scanning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - #209: SHA-pin every action in ci.yml (copied from publish.yml) and declare workflow-level least-privilege `permissions: contents: read`. No floating action tags remain in any workflow. - #210: ci.yml test job now invokes Makefile targets (fmt-check/lint/type/test/ example) so the local gate and CI cannot drift; all 13 examples run via `make example` (previously only 8 ran inline). - #232: pip caching (cache: pip, keyed on pyproject.toml) and a top-level concurrency group with cancel-in-progress (ci.yml only — publish is never cancelled). - #208: new bare-install job — installs with no extras, imports the full public API, runs the README quickstart, asserts optional extras are genuinely absent, and asserts the MCP-extra-missing ImportError is actionable. - #205: new security-audit job (pip-audit over the runtime tree), CodeQL workflow (python, security-and-quality, PR + weekly), and Dependabot for pip (grouped, range-respecting) + github-actions. - #225: conformance_stub placeholder replaced by a real `conformance` job that installs the conformance extra and runs the mapping tests (no echo). - #141: per-matrix coverage HTML report uploaded as an artifact. - #212: publish.yml generates a CycloneDX SBOM of the published runtime tree (separate sbom/ artifact, attached to the release) and enables PEP 740 PyPI attestations on the Trusted-Publisher upload. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01AGeBGThsRwnZuREUV8GoKH --- .github/dependabot.yml | 28 ++++++ .github/workflows/ci.yml | 177 +++++++++++++++++++++++++++------- .github/workflows/codeql.yml | 40 ++++++++ .github/workflows/publish.yml | 36 ++++++- 4 files changed, 245 insertions(+), 36 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/codeql.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..3efeceb --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,28 @@ +version: 2 +updates: + # Python dependencies. The library declares *ranged* requirements + # (httpx>=0.27, pydantic>=2) on purpose; Dependabot only proposes a change + # when a dependency drifts out of the declared range, so it never forces a + # pin on the library itself. Minor/patch bumps are grouped to limit noise. + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + open-pull-requests-limit: 5 + groups: + python-minor-and-patch: + update-types: + - "minor" + - "patch" + + # GitHub Actions. Keeps the SHA-pinned actions in ci.yml / publish.yml / + # codeql.yml fresh (it updates the pin and the version comment together). + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + open-pull-requests-limit: 5 + groups: + github-actions: + patterns: + - "*" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a00b03a..e0f9e7d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,11 @@ name: CI +# Contract: this workflow runs the same gate as `make ci` +# (fmt-check -> lint -> type -> test -> example). Each step below invokes a +# Makefile target so the local gate and CI cannot drift (see Makefile and +# docs/agent-context/workflows.md). Change the steps here only by changing the +# Makefile. + on: push: branches: ["main", "copilot/**"] @@ -7,6 +13,17 @@ on: branches: ["main"] workflow_call: +# Least-privilege by default; jobs needing more declare it explicitly. +permissions: + contents: read + +# Cancel superseded runs on the same ref so a new push to a PR stops the +# previous run instead of burning runner time. Keyed on the ref so distinct +# branches/PRs stay independent. +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: name: "Python ${{ matrix.python-version }}" @@ -18,64 +35,154 @@ jobs: python-version: ["3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: pyproject.toml - name: Install dependencies run: pip install -e ".[dev]" - - name: Lint (ruff check) - run: ruff check src/ tests/ examples/ + # Each step is a Makefile target — see the contract note at the top. + - name: Format check + run: make fmt-check - - name: Format check (ruff format) - run: ruff format --check src/ tests/ examples/ + - name: Lint + run: make lint - - name: Type check (mypy) - run: mypy src/ + - name: Type check + run: make type - - name: Test (pytest) - run: python -m pytest -q --cov=weaver_kernel --cov-report=term-missing + - name: Test + run: make test - name: Examples - run: | - python examples/basic_cli.py - python examples/billing_demo.py - python examples/http_driver_demo.py - python examples/tutorial.py - python examples/readme_quickstart.py - python examples/trace_export_demo.py - python examples/ocsf_export_demo.py - python examples/trace_replay_demo.py - - conformance_stub: - name: "Weaver Spec Conformance Stub (v0.1.0)" + run: make example + + - name: Coverage HTML report + if: always() + run: python -m coverage html + + - name: Upload coverage HTML + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: coverage-html-${{ matrix.python-version }} + path: htmlcov/ + if-no-files-found: ignore + + bare-install: + name: "Bare install (no extras)" runs-on: ubuntu-latest needs: test permissions: contents: read steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: "3.12" + cache: pip + cache-dependency-path: pyproject.toml - - name: Install dependencies - run: pip install -e ".[dev]" + # No extras: proves the "minimal deps (httpx + pydantic)" claim holds. + - name: Install (no extras) + run: pip install . + + - name: Import the full public API + run: | + python -c "import weaver_kernel as w; \ + missing = [n for n in w.__all__ if not hasattr(w, n)]; \ + assert not missing, f'missing public symbols: {missing}'; \ + print(f'imported {len(w.__all__)} public symbols')" + + - name: Run the README quickstart + run: python examples/readme_quickstart.py + + - name: Assert optional extras are genuinely absent + run: | + for mod in mcp yaml opentelemetry tiktoken weaver_contracts; do + if python -c "import $mod" 2>/dev/null; then + echo "::error::optional dependency '$mod' is importable in a bare install" + exit 1 + fi + done + echo "no optional extras leaked into the base install" + + - name: Assert the MCP-extra-missing error is helpful + run: | + python - <<'PY' + from weaver_kernel.drivers.mcp_support import import_optional + try: + import_optional("mcp.client.session") + except ImportError as exc: + assert "weaver-kernel[mcp]" in str(exc), f"unhelpful error: {exc}" + print("MCP-extra-missing error is documented and actionable") + else: + raise AssertionError("expected ImportError without the mcp extra") + PY + + security-audit: + name: "Dependency audit (pip-audit)" + runs-on: ubuntu-latest + permissions: + contents: read - # Placeholder: activate once dgenio/weaver-spec#4 ships the conformance runner. - # weaver-spec and weaver-contracts are published on PyPI. - # weaver_contracts.conformance does not yet exist (dgenio/weaver-spec#4). - # Replace this step with: - # pip install weaver-contracts # PyPI dist name uses a hyphen - # python -m weaver_contracts.conformance --target weaver_kernel - - name: weaver-spec conformance suite (stub) + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: "3.12" + cache: pip + cache-dependency-path: pyproject.toml + + # Audit the *runtime* dependency tree (install without extras) so the gate + # tracks what adopters actually ship. Policy: fail on any known + # vulnerability; document false positives via pip-audit's --ignore-vuln + # allowlist (see README "Security automation"). + - name: Install runtime deps + pip-audit run: | - echo "weaver-contracts 0.2.0 is on PyPI; weaver_contracts.conformance runner not yet available (dgenio/weaver-spec#4)." - echo "Stub passes. Activate when dgenio/weaver-spec#4 ships." + pip install . + pip install pip-audit + + - name: Audit dependencies + run: pip-audit --strict --desc + + conformance: + name: "Weaver-spec conformance" + runs-on: ubuntu-latest + needs: test + permissions: + contents: read + + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: "3.12" + cache: pip + cache-dependency-path: pyproject.toml + + # Real validation (no echo): map kernel Frame/ActionTrace/token onto the + # published weaver-contracts dataclasses and assert they validate. When + # dgenio/weaver-spec#4 ships weaver_contracts.conformance, add its runner + # here as an additional step. + - name: Install conformance extra + run: pip install ".[conformance]" pytest pytest-asyncio + + - name: Report contract version + run: python -c "from weaver_kernel.conformance import contract_version; print('weaver-contracts', contract_version())" + + - name: Run conformance mapping tests + run: python -m pytest tests/test_conformance.py -q diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..394b10e --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,40 @@ +name: CodeQL + +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + schedule: + # Weekly, so newly-published advisories are caught even without a push. + - cron: "27 3 * * 1" + +permissions: + contents: read + +concurrency: + group: codeql-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + analyze: + name: "Analyze (python)" + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write # required to upload CodeQL results to the Security tab + + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + + - name: Initialize CodeQL + uses: github/codeql-action/init@8272c299f21ca24af15dfe9ac0971ba969e5e0d5 # v3.36.2 + with: + languages: python + queries: security-and-quality + + # Python is interpreted — no build step is needed, so autobuild is omitted. + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@8272c299f21ca24af15dfe9ac0971ba969e5e0d5 # v3.36.2 + with: + category: "/language:python" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3c8c62d..baecc44 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -29,12 +29,33 @@ jobs: - name: Build sdist and wheel run: python -m build + # Describe the *published package's* runtime tree (not the build env): a + # clean venv with only the built wheel installed, introspected by + # cyclonedx-py from a separate tool install so the tool's own deps do not + # pollute the SBOM. Written to sbom/ (NOT dist/) so it is never uploaded + # to PyPI as a distribution. + - name: Generate SBOM (CycloneDX) + run: | + python -m pip install cyclonedx-bom + python -m venv /tmp/wheelenv + /tmp/wheelenv/bin/pip install dist/*.whl + mkdir -p sbom + cyclonedx-py environment /tmp/wheelenv/bin/python \ + --of JSON --mc-type library --pyproject pyproject.toml \ + -o sbom/weaver-kernel.cdx.json + - name: Upload dist artifacts uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: dist path: dist/ + - name: Upload SBOM artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: sbom + path: sbom/ + release: name: "GitHub Release" needs: build @@ -48,12 +69,20 @@ jobs: name: dist path: dist/ + - name: Download SBOM artifact + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: sbom + path: sbom/ + - name: Create GitHub Release uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2.5.0 with: generate_release_notes: true fail_on_unmatched_files: true - files: dist/* + files: | + dist/* + sbom/* publish: name: "Publish to PyPI" @@ -72,3 +101,8 @@ jobs: - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 + with: + # PEP 740 attestations, signed via the Trusted Publisher OIDC identity + # (the id-token: write permission above). Verifiable on the PyPI + # project page and with the `pypi-attestations` CLI — see RELEASE.md. + attestations: true From 0304d86d399cdb723e0bca229a6fd0bfd5d04852 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 14:16:22 +0000 Subject: [PATCH 3/5] docs: document CI hardening, gates, SBOM/attestations, and conformance - README: add CodeQL + coverage-floor badges, the minimal-install guarantee, and a supply-chain/security-automation note (#141, #205, #208). - CONTRIBUTING: document the coverage ratchet, docstring/doctest gate, and architecture-conformance gate; note CI invokes the same make targets (#141, #195, #202, #210). - AGENTS.md: add the "Architectural conformance" layering table and note the docstring/coverage/no-extras gates in the quality bar (#202, #195, #141, #208). - RELEASE.md: document the SBOM and PEP 740 attestation artifacts and how consumers verify them (#212). - docs/agent-context/workflows.md: reflect CI-calls-make and the new bare-install / security-audit / conformance / CodeQL jobs (#210, #205, #225). - CHANGELOG: record the full CI / supply-chain hardening pass under [Unreleased]. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01AGeBGThsRwnZuREUV8GoKH --- AGENTS.md | 30 ++++++++++++++++++++++++-- CHANGELOG.md | 38 +++++++++++++++++++++++++++++++++ CONTRIBUTING.md | 24 ++++++++++++++++++++- README.md | 20 +++++++++++++++++ RELEASE.md | 30 +++++++++++++++++++++++--- docs/agent-context/workflows.md | 24 +++++++++++++-------- 6 files changed, 151 insertions(+), 15 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index d484fc4..fdb4caf 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -52,9 +52,35 @@ Use these terms consistently. Never substitute synonyms: - All public interfaces need type hints and docstrings. - Never raise bare `ValueError` or `KeyError` to callers. Use custom exceptions from `errors.py`. Catching stdlib exceptions internally to remap them is fine. - Error messages are part of the contract — tests must assert both exception type and message. -- Keep modules ≤ 300 lines. Split if needed. +- Keep modules ≤ 300 lines. Split if needed. Enforced by + `tests/test_architecture.py` (over-budget files are pinned with a shrink-only + ceiling; new files must be ≤ 300). +- All `__all__` exports need a Google-style docstring (`Args:` for functions + with parameters). Enforced by `tests/test_docstrings.py`. +- Branch coverage must stay at or above the `fail_under` floor in + `pyproject.toml` (`make test` fails otherwise). The floor is a ratchet — only + raise it. - No randomness in matching, routing, or summarization. Deterministic outputs always. -- No new dependencies without justification. The dep list is intentionally minimal (`httpx`, `pydantic`). +- No new dependencies without justification. The runtime dep list is + intentionally minimal (`httpx`, `pydantic`); a CI job installs with no extras + to prove it. Optional features live behind extras (`mcp`, `otel`, `policy`, + `tiktoken`, `conformance`). + +## Architectural conformance + +`tests/test_architecture.py` mechanically enforces the layering below (stdlib +`ast` only — no architecture tool). Module-scope imports are checked; lazy +imports inside functions and `TYPE_CHECKING` blocks are exempt (that is the +optional-extra seam). + +| Layer | May import (within the package) | +|-------|----------------------------------| +| `firewall/` | `models`, `errors`, `enums` only — never execution/policy/registry | +| `drivers/` | `models`, `errors`, `enums` only | +| `router` | `models` only (stateless dispatch) | +| `models` | `enums`, `errors` only | +| `enums`, `errors` | nothing else in the package (leaves) | +| `kernel/` and everything else | unconstrained (the kernel is the orchestrator) | ## Security rules diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bde971..4fe2310 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- **CI / supply-chain hardening.** A focused pass over the build pipeline and + repository automation: + - **Bare-install CI job (#208).** Installs `weaver-kernel` with no extras, + imports the entire public API, runs the README quickstart, asserts optional + extras (`mcp`/`yaml`/`opentelemetry`/`tiktoken`) are genuinely absent, and + asserts the MCP-extra-missing `ImportError` stays actionable — so the + "minimal deps: httpx + pydantic" claim is tested, not just stated. + - **Dependency + code scanning (#205).** `pip-audit` over the runtime tree, + a CodeQL workflow (`security-and-quality`, on PRs + weekly), and Dependabot + for the `pip` and `github-actions` ecosystems (grouped, range-respecting). + - **Coverage gate + badge (#141).** Branch coverage is configured in + `pyproject.toml` with a ratchet floor (`fail_under`); `make test` fails + below it, CI uploads an HTML coverage artifact, and the README carries a + floor badge. + - **Docstring + doctest gate (#195).** `tests/test_docstrings.py` requires a + Google-style docstring (and `Args:` for functions with parameters) on every + `weaver_kernel.__all__` symbol; `tests/test_doctests.py` runs curated inline + doctests. + - **Architecture conformance (#202).** `tests/test_architecture.py` enforces + import boundaries (`firewall`/`drivers`/`router`/`models` leaf-import rules) + and the 300-line module budget (over-budget files pinned with a shrink-only + ceiling), stdlib `ast` only. Rules are documented in AGENTS.md. + - **weaver-spec conformance, activated (#225).** The placeholder + `conformance_stub` job is replaced by a real `conformance` job. New + `weaver_kernel.conformance` adapter maps `Frame`/`ActionTrace`/ + `CapabilityToken` onto the published `weaver-contracts` dataclasses and is + validated by `tests/test_conformance.py` (new `conformance` extra). + - **Release SBOM + attestations (#212).** `publish.yml` generates a CycloneDX + SBOM of the published runtime tree (attached to the GitHub Release) and + enables PEP 740 PyPI attestations via the existing Trusted Publisher. + Verification steps are documented in RELEASE.md. - **Handle expansions and policy denials are now first-class audit records (#175).** `ActionTrace` gained an additive `event_type` (`invoke`/`expand`/`deny`) and `reason_code`. A successful `Kernel.expand()` @@ -40,6 +71,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Companion: `examples/trace_replay_demo.py`. ### Changed +- **CI aligned with `make ci` and hardened (#209, #210, #232).** The `ci.yml` + test job now invokes the Makefile targets (`fmt-check`/`lint`/`type`/`test`/ + `example`) instead of re-implementing them, so the local gate and CI cannot + drift, and all 13 example scripts now run in CI (previously 8). Every action + is pinned to a commit SHA (matching `publish.yml`) with explicit + least-privilege `permissions:` blocks, and the workflow adds pip caching plus + a `concurrency` group with `cancel-in-progress`. - **Bounded memory for in-memory audit and revocation state (#182).** `TraceStore` now caps at `max_entries` (default 10 000) with oldest-first eviction, a one-time warning, and an observable `evicted_count`. The revocation diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3927738..ecbc105 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,10 +19,32 @@ make fmt # auto-format with ruff (not run by `make ci`) make fmt-check # verify formatting with `ruff format --check` (no mutation) make lint # lint with ruff make type # type-check with mypy -make test # run pytest with coverage +make test # run pytest with branch coverage (fails under the coverage floor) make ci # fmt-check + lint + type + test + example ``` +`.github/workflows/ci.yml` invokes these same Makefile targets, so a green +`make ci` locally means a green CI run — the two cannot drift. + +## Quality gates enforced by `make test` + +These run as ordinary pytest checks (no extra commands): + +- **Coverage floor (ratchet).** `[tool.coverage.report] fail_under` in + `pyproject.toml` is the enforced minimum branch coverage. The rule is a + **ratchet: only ever raise it, never lower it.** If a change pushes coverage + comfortably above the floor, bump `fail_under` up to lock the gain in. +- **Docstring gate** (`tests/test_docstrings.py`). Every symbol exported from + `weaver_kernel.__all__` must have a Google-style docstring; functions that + take parameters must include an `Args:` section. Type aliases and constants + are exempt. Highest-traffic helpers also carry runnable doctests + (`tests/test_doctests.py`). +- **Architecture conformance** (`tests/test_architecture.py`). Import + boundaries (`firewall`/`drivers`/`router`/`models` stay within their allowed + leaf imports) and the 300-line module budget are enforced; over-budget files + are pinned with a shrink-only ceiling. See AGENTS.md → "Architectural + conformance". + ## Pull request guidelines 1. Keep PRs focused — one logical change per PR. diff --git a/README.md b/README.md index ae550e5..401a130 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,16 @@ # agent-kernel [![CI](https://github.com/dgenio/agent-kernel/actions/workflows/ci.yml/badge.svg)](https://github.com/dgenio/agent-kernel/actions/workflows/ci.yml) +[![CodeQL](https://github.com/dgenio/agent-kernel/actions/workflows/codeql.yml/badge.svg)](https://github.com/dgenio/agent-kernel/actions/workflows/codeql.yml) +[![Coverage ≥90%](https://img.shields.io/badge/coverage-%E2%89%A590%25-brightgreen.svg)](https://github.com/dgenio/agent-kernel/actions/workflows/ci.yml) [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/) [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) + + + **Least-privilege, revocable, principal-scoped authorization for agent tool calls — with a tamper-evident audit of everything that ran.** A capability-based security kernel for AI agents operating in large tool ecosystems (MCP, A2A, 1000+ tools). @@ -74,6 +81,19 @@ contracts, not through tight coupling. A deeper, per-project comparison — including *when not* to reach for `agent-kernel` — is in [How this relates to neighboring projects](#how-this-relates-to-neighboring-projects). +The minimal-install guarantee is enforced in CI: a dedicated job installs the +package with **no extras** (`pip install weaver-kernel`), imports the entire +public API, and runs the quickstart — so an accidental hard dependency on an +optional extra (`mcp`, `yaml`, `opentelemetry`, `tiktoken`) fails the build. + +**Supply-chain & security automation.** CI runs [`pip-audit`](https://pypi.org/project/pip-audit/) +over the runtime dependency tree and [CodeQL](https://codeql.github.com/) +(`security-and-quality`) on every PR and weekly; Dependabot keeps pinned +GitHub Actions and Python dependencies fresh. Releases carry a CycloneDX SBOM +and PEP 740 PyPI attestations (see [RELEASE.md](RELEASE.md)). A `pip-audit` +false positive can be allow-listed with `pip-audit --ignore-vuln ` plus a +justifying comment in the workflow. + ## Quickstart ```bash diff --git a/RELEASE.md b/RELEASE.md index 3bcddc6..8c279ba 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -53,8 +53,10 @@ Pushing the `v*` tag triggers `.github/workflows/publish.yml`, which: 1. Runs the full CI suite (`make ci` equivalent) as a gate. 2. Builds the sdist and wheel with `python -m build`. -3. Creates a GitHub Release with auto-generated notes and the built artifacts attached. -4. Publishes to PyPI using Trusted Publisher (OIDC — no API tokens stored). +3. Generates a CycloneDX SBOM of the published runtime tree (`weaver-kernel.cdx.json`). +4. Creates a GitHub Release with auto-generated notes, the built artifacts, and the SBOM attached. +5. Publishes to PyPI using Trusted Publisher (OIDC — no API tokens stored) with + PEP 740 attestations generated and uploaded automatically. Monitor the workflow run at: @@ -62,7 +64,29 @@ Monitor the workflow run at: ### 5. Verify ```bash -pip install weaver-kernel==0.3.0 +pip install "weaver-kernel==" +``` + +### Supply-chain artifacts (SBOM + attestations) + +Each release ships verifiable provenance: + +- **SBOM** — a CycloneDX 1.6 JSON describing the published package's runtime + dependency tree, attached to the GitHub Release as `weaver-kernel.cdx.json`. + It is generated from a clean install of the built wheel, so it reflects what + adopters actually receive (not the build environment). +- **PyPI attestations** — PEP 740 digital attestations signed via the Trusted + Publisher OIDC identity, shown under "Provenance" on the + [PyPI project page](https://pypi.org/project/weaver-kernel/). + +Consumers can verify the attestations with the +[`pypi-attestations`](https://pypi.org/project/pypi-attestations/) CLI: + +```bash +pip download --no-deps "weaver-kernel==" +python -m pypi_attestations verify pypi \ + --repository https://github.com/dgenio/agent-kernel \ + weaver_kernel--py3-none-any.whl ``` ## Trusted Publisher Setup diff --git a/docs/agent-context/workflows.md b/docs/agent-context/workflows.md index a2f9208..88c4dee 100644 --- a/docs/agent-context/workflows.md +++ b/docs/agent-context/workflows.md @@ -16,15 +16,21 @@ | `make example` | Run all example scripts | After changing examples or core APIs | `make ci` is the **single authoritative pre-push command**. It runs all five targets -in sequence and mirrors the checks in the `test` job of `.github/workflows/ci.yml`: the -format step is the non-mutating `fmt-check` (equivalent to CI's `ruff format --check`), -and lint/type/test/example run the same tools CI does. (CI's separate `conformance_stub` -job is a no-op placeholder and is not part of the local gate.) The Makefile -additionally invokes every tool via `python -m ` — a local hardening over CI -that uses the active interpreter's site-packages, preventing spurious failures when -`ruff` or `mypy` are provided by isolated installers such as `uv tool` or `pipx`. If -`make ci` passes locally, the same checks will pass in CI. Use `make fmt` (the -mutating target) when you want to auto-fix formatting before re-running `make ci`. +in sequence, and the `test` job of `.github/workflows/ci.yml` now **invokes those same +Makefile targets** (`make fmt-check lint type test example`) rather than re-implementing +them inline — so the local gate and CI cannot drift. The format step is the non-mutating +`fmt-check` (equivalent to CI's `ruff format --check`). The Makefile invokes every tool +via `python -m ` — a local hardening that uses the active interpreter's +site-packages, preventing spurious failures when `ruff` or `mypy` are provided by +isolated installers such as `uv tool` or `pipx`. If `make ci` passes locally, the same +checks will pass in CI. Use `make fmt` (the mutating target) when you want to auto-fix +formatting before re-running `make ci`. + +CI runs additional jobs that are **not** part of the local `make ci` gate (they need a +clean environment or network): `bare-install` (no-extras smoke test), `security-audit` +(`pip-audit`), `conformance` (weaver-spec mapping against the `conformance` extra), and +the separate `codeql.yml` analysis. The coverage floor, docstring gate, and architecture +conformance checks *are* part of `make test`, so they run both locally and in CI. ## PR conventions From 00ca6e63f5df3c14f1bb4e6cecd6feca718fe4f7 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 14:29:46 +0000 Subject: [PATCH 4/5] fix: address Copilot review on conformance adapter and architecture gate - conformance.py: correct the docstring cross-reference (ActionTrace lives in weaver_kernel.models, not .trace) and raise AgentKernelError instead of a bare KeyError when ActionTrace.event_type is unknown (repo invariant: no bare KeyError to callers). - tests/test_architecture.py: _intra_imports now descends into module-scope if/try/with blocks so a conditional import cannot bypass the boundary rules, while still skipping TYPE_CHECKING bodies and function/class scope (the lazy optional-extra seam). - ci.yml: the security-audit job resolves weaver-kernel's runtime dependency tree in an isolated venv and audits that requirements set, so pip-audit's own dependencies are no longer part of the audited environment. make ci: 730 passed, 1 skipped, 93.87% branch coverage. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01AGeBGThsRwnZuREUV8GoKH --- .github/workflows/ci.yml | 28 +++++++++++++------- src/weaver_kernel/conformance.py | 17 ++++++++++-- tests/test_architecture.py | 45 +++++++++++++++++++++++++++++--- 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e0f9e7d..46bff00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -145,17 +145,25 @@ jobs: cache: pip cache-dependency-path: pyproject.toml - # Audit the *runtime* dependency tree (install without extras) so the gate - # tracks what adopters actually ship. Policy: fail on any known - # vulnerability; document false positives via pip-audit's --ignore-vuln - # allowlist (see README "Security automation"). - - name: Install runtime deps + pip-audit - run: | - pip install . - pip install pip-audit + - name: Install pip-audit + run: pip install pip-audit - - name: Audit dependencies - run: pip-audit --strict --desc + # Resolve weaver-kernel's *runtime* dependency tree in an isolated venv + # (no extras, no pip-audit) and audit exactly that, so pip-audit's own + # dependencies can never cause a failure unrelated to what adopters ship. + - name: Resolve runtime dependency tree + run: | + python -m venv /tmp/runtime + /tmp/runtime/bin/pip install . + /tmp/runtime/bin/pip freeze --exclude-editable \ + | grep -viE '^(weaver-kernel|pip|setuptools)([=@ ]|$)' > runtime-requirements.txt + echo "Auditing:"; cat runtime-requirements.txt + + # Policy: fail on any known vulnerability in the runtime tree. Document a + # false positive by appending `--ignore-vuln ` here with a comment + # (see README "Security automation"). + - name: Audit runtime dependencies + run: pip-audit --strict --desc --requirement runtime-requirements.txt conformance: name: "Weaver-spec conformance" diff --git a/src/weaver_kernel/conformance.py b/src/weaver_kernel/conformance.py index cfca7ba..5fcc090 100644 --- a/src/weaver_kernel/conformance.py +++ b/src/weaver_kernel/conformance.py @@ -1,7 +1,7 @@ """weaver-spec conformance adapter (issue #225). Maps the kernel's runtime objects (:class:`~weaver_kernel.models.Frame`, -:class:`~weaver_kernel.trace.ActionTrace`, +:class:`~weaver_kernel.models.ActionTrace`, :class:`~weaver_kernel.tokens.CapabilityToken`) onto the published ``weaver-contracts`` dataclasses, so CI can assert the kernel emits spec-conformant payloads instead of echoing a placeholder. @@ -22,6 +22,8 @@ import datetime from typing import TYPE_CHECKING, Any +from .errors import AgentKernelError + if TYPE_CHECKING: # pragma: no cover - typing only from .models import ActionTrace, Frame from .tokens import CapabilityToken @@ -93,13 +95,24 @@ def trace_to_contract(trace: ActionTrace) -> Any: Returns: A validated ``weaver_contracts.TraceEvent`` with kernel-specific detail (driver, sensitivity, reason code) carried in ``metadata``. + + Raises: + AgentKernelError: If ``trace.event_type`` is not a known kernel event + type (e.g. a malformed or future value from a deserialised trace). """ wc = _require_contracts() + try: + contract_event_type = _EVENT_TYPE_MAP[trace.event_type] + except KeyError as exc: + raise AgentKernelError( + f"Cannot map unknown ActionTrace.event_type {trace.event_type!r} to a " + f"weaver-contracts TraceEvent; expected one of {sorted(_EVENT_TYPE_MAP)}." + ) from exc is_deny = trace.event_type == "deny" outcome = "failure" if (is_deny or trace.error is not None) else "success" return wc.TraceEvent( event_id=trace.action_id, - event_type=_EVENT_TYPE_MAP[trace.event_type], + event_type=contract_event_type, timestamp=trace.invoked_at, capability_id=trace.capability_id, principal=trace.principal_id, diff --git a/tests/test_architecture.py b/tests/test_architecture.py index 442a93d..8aa9642 100644 --- a/tests/test_architecture.py +++ b/tests/test_architecture.py @@ -74,22 +74,59 @@ def _layer_of(rel_path: str) -> str: return head if "/" in rel_path else head.removesuffix(".py") +def _is_type_checking_guard(test: ast.expr) -> bool: + """True for ``if TYPE_CHECKING:`` / ``if typing.TYPE_CHECKING:`` guards.""" + return (isinstance(test, ast.Name) and test.id == "TYPE_CHECKING") or ( + isinstance(test, ast.Attribute) and test.attr == "TYPE_CHECKING" + ) + + +def _module_scope_imports(body: list[ast.stmt]) -> list[ast.Import | ast.ImportFrom]: + """Collect import statements at module scope. + + Descends into module-scope ``if`` / ``try`` / ``with`` blocks (so a + conditional import cannot bypass the boundary check) but never into + function or class bodies — those lazy imports are the optional-extra seam. + The body of an ``if TYPE_CHECKING:`` guard is skipped (typing-only, never + executed); its ``else`` branch *is* checked because it runs at runtime. + """ + found: list[ast.Import | ast.ImportFrom] = [] + for node in body: + if isinstance(node, (ast.Import, ast.ImportFrom)): + found.append(node) + elif isinstance(node, ast.If): + if not _is_type_checking_guard(node.test): + found.extend(_module_scope_imports(node.body)) + found.extend(_module_scope_imports(node.orelse)) + elif isinstance(node, ast.Try): + found.extend(_module_scope_imports(node.body)) + for handler in node.handlers: + found.extend(_module_scope_imports(handler.body)) + found.extend(_module_scope_imports(node.orelse)) + found.extend(_module_scope_imports(node.finalbody)) + elif isinstance(node, ast.With): + found.extend(_module_scope_imports(node.body)) + # FunctionDef / AsyncFunctionDef / ClassDef: not descended (lazy seam). + return found + + def _intra_imports(tree: ast.Module, rel_path: str) -> set[str]: """Return top-level ``weaver_kernel`` names imported at *rel_path*'s scope. Relative imports are resolved against the importing file's package depth (``from .models`` in ``router.py`` and ``from ..models`` in ``firewall/transform.py`` both resolve to top-level ``models``), as are - absolute ``weaver_kernel.`` imports. ``TYPE_CHECKING`` blocks and imports - nested inside functions/classes are ignored — that lazy seam is how optional - extras stay optional. + absolute ``weaver_kernel.`` imports. Module-scope conditional imports + (inside ``if`` / ``try`` / ``with``) are included; ``TYPE_CHECKING`` blocks + and imports nested inside functions/classes are ignored — that lazy seam is + how optional extras stay optional. """ # Package parts of the importing module, e.g. firewall/transform.py -> ["firewall"]. package_parts = rel_path.split("/")[:-1] base = ["weaver_kernel", *package_parts] names: set[str] = set() - for node in tree.body: + for node in _module_scope_imports(tree.body): if isinstance(node, ast.ImportFrom): if node.level > 0: # Drop (level - 1) trailing components to reach the target package. From dd46e5a828822f979b65018c60a0ac7a755f90ca Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 21:44:30 +0000 Subject: [PATCH 5/5] test: cover trace_to_contract unknown event_type error path Adds a test asserting trace_to_contract() raises AgentKernelError (naming the supported event types) when given a trace with an unmapped event_type, locking in the bare-KeyError fix and covering the previously-untested error branch (branch coverage 93.87% -> 93.91%). Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01231d7jz6KPg14KciqbZLsJ --- tests/test_conformance.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_conformance.py b/tests/test_conformance.py index 9bab9cb..8548960 100644 --- a/tests/test_conformance.py +++ b/tests/test_conformance.py @@ -14,6 +14,7 @@ import pytest from weaver_kernel import CapabilityToken, Frame, SensitivityTag +from weaver_kernel.errors import AgentKernelError from weaver_kernel.models import ActionTrace wc = pytest.importorskip("weaver_contracts", reason="install the 'conformance' extra") @@ -100,6 +101,19 @@ def test_failed_invoke_maps_to_failure() -> None: assert event.error_message == "driver exploded" +def test_unknown_event_type_raises_kernel_error() -> None: + # An unmapped event_type (e.g. from a deserialised or future trace) must + # fail with a clear kernel error, not a bare KeyError leaking the lookup. + bad = _trace(event_type="teleport") + with pytest.raises(AgentKernelError) as exc_info: + conformance.trace_to_contract(bad) + message = str(exc_info.value) + assert "teleport" in message + # The message names the supported event types so the caller can self-correct. + for known in ("invoke", "expand", "deny"): + assert known in message + + def test_token_maps_to_valid_contract_token() -> None: token = conformance.token_to_contract(_token()) assert isinstance(token, wc.CapabilityToken)