From bcb11813c7607f6fdb2d5cebc88b527569ef1316 Mon Sep 17 00:00:00 2001 From: mmercuri Date: Sun, 26 Apr 2026 00:47:49 -0700 Subject: [PATCH] feat(instrument): Port Ollama LLM provider adapter (M3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-contained M3 fan-out PR that lands the LayerLens Ollama provider adapter together with the minimum base infrastructure required to host it. Sister provider PRs (OpenAI, Anthropic, Azure, Bedrock, Vertex, LiteLLM, Cohere, Mistral) land in parallel. Source (~259 LOC): ateam/stratix/sdk/python/adapters/llm_providers/ ollama_adapter.py. Adapter - src/layerlens/instrument/adapters/providers/ollama_adapter.py wraps ollama.Client.{chat,generate,embeddings}, emits model.invoke + cost.record events. api_cost_usd is always 0.0 (Ollama is local / self-hosted); optional infra_cost_usd derived from prompt_eval_duration + eval_duration when cost_per_second is set. - providers/__init__.py exposes OllamaAdapter via lazy __getattr__ — importing the package does NOT load the ollama SDK. Pricing - _base/pricing.py adds explicit zero-cost entries for the canonical Ollama model tags (llama3.x, mistral, mixtral, phi3, qwen2.5, gemma2, deepseek-r1, codellama, nomic-embed-text, mxbai-embed-large, all-minilm). Comment documents that 0.0 is intentional (self-hosted) and distinct from the hosted Bedrock/Together rates listed above. Tests - tests/instrument/adapters/providers/test_ollama_adapter.py — 13 respx-based HTTP-fixture tests covering the chat/generate/embeddings paths, error path (HTTP 500 -> ResponseError + policy.violation), cost_per_second infra-cost math, endpoint detection from OLLAMA_HOST, lazy-import contract, and disconnect lifecycle. - Lazy-import + default-install + resolved-dep-tree guards all green. Sample - samples/instrument/ollama/{__init__.py,main.py,README.md} — runnable in mocked mode (default, respx-backed) or live mode (LAYERLENS_OLLAMA_LIVE=1) against a real ollama serve daemon. Sample pulls the model first, then runs a chat round-trip. Doc - docs/adapters/providers/ollama.md — install, quickstart, ollama serve setup for macOS/Linux/Windows/Docker, GPU notes (CUDA / ROCm / Metal / CPU), env-var reference. pyproject - providers-ollama = ["ollama>=0.2"] extra. httpx is already a core dep so the extra surface is documented but minimal. - Per-file ruff override for src/layerlens/instrument/adapters/ providers/**.py (ARG002 — wrapped SDK callbacks). - pyright executionEnvironment relaxation for providers (matches the existing cli relaxation). Acceptance - uv run pytest tests/instrument/adapters/providers/test_ollama_adapter.py: 13 passed - uv run pytest tests/instrument/test_lazy_imports.py: 3 passed - uv run pytest tests/instrument/test_default_install.py: 3 passed - uv run mypy --strict src/layerlens/instrument/adapters/providers: clean - uv run ruff check src/ tests/: clean --- docs/adapters/providers/ollama.md | 240 ++++++++ pyproject.toml | 20 +- samples/instrument/ollama/README.md | 135 +++++ samples/instrument/ollama/__init__.py | 1 + samples/instrument/ollama/main.py | 225 ++++++++ src/layerlens/_compat/__init__.py | 8 + src/layerlens/_compat/pydantic.py | 121 ++++ src/layerlens/instrument/__init__.py | 49 ++ .../instrument/_vendored/__init__.py | 26 + src/layerlens/instrument/_vendored/events.py | 90 +++ .../_vendored/events_cross_cutting.py | 309 +++++++++++ .../instrument/_vendored/events_l1_io.py | 114 ++++ .../instrument/_vendored/events_l3_model.py | 105 ++++ .../_vendored/events_l4_environment.py | 149 +++++ .../instrument/_vendored/events_l5_tools.py | 200 +++++++ .../instrument/_vendored/events_protocol.py | 506 +++++++++++++++++ .../instrument/_vendored/memory_models.py | 95 ++++ src/layerlens/instrument/adapters/__init__.py | 42 ++ .../instrument/adapters/_base/__init__.py | 49 ++ .../instrument/adapters/_base/adapter.py | 523 ++++++++++++++++++ .../instrument/adapters/_base/capture.py | 281 ++++++++++ .../adapters/_base/pydantic_compat.py | 122 ++++ .../instrument/adapters/_base/registry.py | 266 +++++++++ .../instrument/adapters/_base/sinks.py | 277 ++++++++++ .../adapters/_base/trace_container.py | 81 +++ .../instrument/adapters/providers/__init__.py | 46 ++ .../adapters/providers/_base/__init__.py | 21 + .../adapters/providers/_base/pricing.py | 184 ++++++ .../adapters/providers/_base/provider.py | 406 ++++++++++++++ .../adapters/providers/_base/tokens.py | 80 +++ .../adapters/providers/ollama_adapter.py | 261 +++++++++ tests/instrument/__init__.py | 0 .../_baselines/default_dependencies.txt | 22 + .../_baselines/resolved_dependencies.txt | 40 ++ tests/instrument/adapters/__init__.py | 0 .../instrument/adapters/providers/__init__.py | 0 .../adapters/providers/test_ollama_adapter.py | 392 +++++++++++++ tests/instrument/test_default_install.py | 182 ++++++ tests/instrument/test_lazy_imports.py | 104 ++++ tests/instrument/test_resolved_dep_tree.py | 202 +++++++ 40 files changed, 5973 insertions(+), 1 deletion(-) create mode 100644 docs/adapters/providers/ollama.md create mode 100644 samples/instrument/ollama/README.md create mode 100644 samples/instrument/ollama/__init__.py create mode 100644 samples/instrument/ollama/main.py create mode 100644 src/layerlens/_compat/__init__.py create mode 100644 src/layerlens/_compat/pydantic.py create mode 100644 src/layerlens/instrument/__init__.py create mode 100644 src/layerlens/instrument/_vendored/__init__.py create mode 100644 src/layerlens/instrument/_vendored/events.py create mode 100644 src/layerlens/instrument/_vendored/events_cross_cutting.py create mode 100644 src/layerlens/instrument/_vendored/events_l1_io.py create mode 100644 src/layerlens/instrument/_vendored/events_l3_model.py create mode 100644 src/layerlens/instrument/_vendored/events_l4_environment.py create mode 100644 src/layerlens/instrument/_vendored/events_l5_tools.py create mode 100644 src/layerlens/instrument/_vendored/events_protocol.py create mode 100644 src/layerlens/instrument/_vendored/memory_models.py create mode 100644 src/layerlens/instrument/adapters/__init__.py create mode 100644 src/layerlens/instrument/adapters/_base/__init__.py create mode 100644 src/layerlens/instrument/adapters/_base/adapter.py create mode 100644 src/layerlens/instrument/adapters/_base/capture.py create mode 100644 src/layerlens/instrument/adapters/_base/pydantic_compat.py create mode 100644 src/layerlens/instrument/adapters/_base/registry.py create mode 100644 src/layerlens/instrument/adapters/_base/sinks.py create mode 100644 src/layerlens/instrument/adapters/_base/trace_container.py create mode 100644 src/layerlens/instrument/adapters/providers/__init__.py create mode 100644 src/layerlens/instrument/adapters/providers/_base/__init__.py create mode 100644 src/layerlens/instrument/adapters/providers/_base/pricing.py create mode 100644 src/layerlens/instrument/adapters/providers/_base/provider.py create mode 100644 src/layerlens/instrument/adapters/providers/_base/tokens.py create mode 100644 src/layerlens/instrument/adapters/providers/ollama_adapter.py create mode 100644 tests/instrument/__init__.py create mode 100644 tests/instrument/_baselines/default_dependencies.txt create mode 100644 tests/instrument/_baselines/resolved_dependencies.txt create mode 100644 tests/instrument/adapters/__init__.py create mode 100644 tests/instrument/adapters/providers/__init__.py create mode 100644 tests/instrument/adapters/providers/test_ollama_adapter.py create mode 100644 tests/instrument/test_default_install.py create mode 100644 tests/instrument/test_lazy_imports.py create mode 100644 tests/instrument/test_resolved_dep_tree.py diff --git a/docs/adapters/providers/ollama.md b/docs/adapters/providers/ollama.md new file mode 100644 index 0000000..a0ae6b7 --- /dev/null +++ b/docs/adapters/providers/ollama.md @@ -0,0 +1,240 @@ +# Ollama provider adapter + +`layerlens.instrument.adapters.providers.ollama_adapter.OllamaAdapter` +instruments the [Ollama Python SDK](https://github.com/ollama/ollama-python) +for local LLM inference. Ollama is a self-hosted runtime (no cloud API) so +the adapter records all token usage but reports `api_cost_usd: 0.0` — +the only billable resource is the operator's own compute. + +## Status + +| Field | Value | +|---------------|--------------------------------------------| +| Adapter type | LLM provider | +| Framework | `ollama` | +| SDK pin | `ollama>=0.2` | +| Adapter ver. | `0.1.0` | +| Local-only | Yes — default endpoint `http://localhost:11434` | +| Pricing | All models recorded as `0.0` USD/token (self-hosted) | +| GA milestone | M3 (LLM provider fan-out) | + +## Install + +```bash +pip install 'layerlens[providers-ollama]' +``` + +The `providers-ollama` extra pulls `ollama>=0.2`. The default +`pip install layerlens` does NOT pull `ollama` — adapter modules and +their vendor SDKs are loaded lazily on first use. + +## `ollama serve` setup + +The adapter wraps an in-process `ollama.Client`, but the client itself +talks HTTP to a daemon (`ollama serve`) running locally or remotely. +You need to install + start the daemon BEFORE running any +instrumented code. + +### macOS + +```bash +brew install ollama +brew services start ollama # background service +# or, foreground for debugging: +ollama serve +``` + +### Linux + +```bash +curl -fsSL https://ollama.com/install.sh | sh +sudo systemctl enable --now ollama # systemd service on most distros +# or, foreground: +ollama serve +``` + +### Windows + +Download the installer from and +launch the **Ollama** app from the Start menu — the installer registers +a Windows service that runs the daemon in the background. Verify with: + +```powershell +Get-Service Ollama +curl.exe http://localhost:11434/api/version +``` + +### Docker / Compose + +```yaml +services: + ollama: + image: ollama/ollama:latest + ports: + - "11434:11434" + volumes: + - ollama-models:/root/.ollama + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] +volumes: + ollama-models: +``` + +### Verify the daemon is up + +```bash +curl http://localhost:11434/api/version +# {"version":"0.6.0"} +``` + +### Pull at least one model + +The first call to `client.chat(model=X)` blocks while Ollama downloads +`X` (multiple GB). Pre-pull models with: + +```bash +ollama pull llama3.2:3b # ~2 GB, fast on CPU, fits on 4 GB GPU +ollama pull llama3.1:8b # ~5 GB, recommended baseline +ollama pull qwen2.5:7b # ~4 GB, strong open model +ollama pull nomic-embed-text # ~270 MB, embeddings only +``` + +## Quick start + +```python +from ollama import Client +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.adapters.providers.ollama_adapter import OllamaAdapter + +adapter = OllamaAdapter( + capture_config=CaptureConfig.standard(), + cost_per_second=0.005, # optional infra-cost rate +) +adapter.connect() + +client = Client() # reads OLLAMA_HOST env var, defaults to http://localhost:11434 +adapter.connect_client(client) + +response = client.chat( + model="llama3.1", + messages=[{"role": "user", "content": "Hi"}], +) +print(response.message.content) +``` + +Each instrumented call emits two events: + +1. `model.invoke` — request and response, with `method` (chat / generate / + embeddings), `endpoint`, `latency_ms`, token counts, and the assistant + output (when `capture_content=True`). +2. `cost.record` — `api_cost_usd: 0.0` always, plus `infra_cost_usd` if + `cost_per_second` was set. + +A failed call additionally emits a third event: + +3. `policy.violation` — `provider: ollama`, `error: `, + `violation_type: safety`. The original exception is re-raised after + the events fire. + +## Ollama-specific behaviour + +- **`api_cost_usd: 0.0`** is always emitted because Ollama runs locally — + there is no API to bill for. The pricing table includes explicit + zero-cost entries for `llama3.x`, `mistral`, `mixtral`, `phi3`, + `qwen2.5`, `gemma2`, `deepseek-r1`, `codellama`, `nomic-embed-text`, + `mxbai-embed-large`, and `all-minilm` so `calculate_cost` returns + `0.0` (a real number) rather than `None` (pricing-unavailable). +- **Optional `infra_cost_usd`**: pass `cost_per_second` to the + constructor to attribute compute cost. The adapter sums + `prompt_eval_duration + eval_duration` (both in nanoseconds) and + computes `total_seconds * cost_per_second`. Useful for charging back + GPU rental cost to specific calls. +- **Endpoint capture**: the `OLLAMA_HOST` env var (or + `http://localhost:11434`) is recorded in every event so you can + identify which Ollama instance handled a request when running + multi-host fleets. +- **Three methods wrapped**: `chat`, `generate`, and `embeddings`. The + `method` field in `model.invoke` distinguishes them. Other SDK + methods (`pull`, `push`, `list`, `show`, etc.) are NOT instrumented + because they don't represent inference workload. + +## Token extraction + +Ollama responses (dict or `ChatResponse`-object form) expose +`prompt_eval_count` and `eval_count` — these map to `prompt_tokens` and +`completion_tokens` in `NormalizedTokenUsage`. `total_tokens` is the +sum. + +Embeddings responses don't carry token counts; the adapter falls back +to zeros for the `cost.record` payload. + +## GPU notes + +Ollama auto-detects available accelerators at daemon start time: + +| Platform | Backend | +|--------------------------------|-------------------------------| +| NVIDIA GPU (CUDA 11.8 / 12.x) | CUDA — preferred when present | +| AMD GPU (ROCm 5.7+) | ROCm | +| Apple Silicon (M1/M2/M3/M4) | Metal | +| Intel / no GPU | CPU | + +The adapter is GPU-agnostic — it only sees the JSON the daemon returns. +However, the `eval_duration` value the adapter uses for +`infra_cost_usd` is wall-clock time on the daemon, so swapping backends +will change reported infra cost without any adapter change. + +### Force CPU-only + +```bash +OLLAMA_NUM_GPU=0 ollama serve +``` + +### Inspect what's loaded + +```bash +ollama ps +# NAME ID SIZE PROCESSOR UNTIL +# llama3.1:latest abc... 5.0GB 100% GPU 4 minutes from now +``` + +### NVIDIA driver requirements + +| Ollama version | CUDA runtime | Min driver | +|----------------|-------------|-------------| +| 0.5+ | CUDA 12.x | 525.60.13 | +| 0.4 and older | CUDA 11.8 | 450.80.02 | + +If `ollama ps` reports `100% CPU` despite a CUDA card being present, +check `nvidia-smi` for driver presence and re-run `ollama serve` with +`OLLAMA_DEBUG=1` to see why CUDA was rejected. + +## Configuration + +| Env var | Default | Effect | +|-----------------------|-------------------------------|-----------------------------------------------| +| `OLLAMA_HOST` | `http://localhost:11434` | Daemon endpoint the SDK + adapter point at | +| `OLLAMA_NUM_GPU` | auto | Layers to offload to GPU (0 = CPU-only) | +| `OLLAMA_KEEP_ALIVE` | `5m` | How long the daemon keeps a model resident | +| `OLLAMA_DEBUG` | unset | Verbose daemon logging | + +## Sample + +A runnable end-to-end sample lives at +[`samples/instrument/ollama/`](../../../samples/instrument/ollama/) — runs +mocked-by-default (no daemon required), or live against a real +`ollama serve` with `LAYERLENS_OLLAMA_LIVE=1`. + +## Test fixtures + +The provider's pytest suite at +`tests/instrument/adapters/providers/test_ollama_adapter.py` uses +`respx` to mock the daemon's HTTP endpoints. This is the recommended +pattern when writing your own integration tests against the adapter — +it gives you coverage of the real httpx → adapter event-emission path +without requiring an Ollama daemon in CI. diff --git a/pyproject.toml b/pyproject.toml index ae6d1dc..2b12f2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,17 @@ classifiers = [ [project.optional-dependencies] cli = ["click>=8.0.0"] +# --- Instrument layer: LLM provider adapters --- +# Adding any extra below MUST keep the default `pip install layerlens` +# install set unchanged. Verified by `tests/instrument/test_default_install.py`. +# +# Note: `httpx` is already a core dependency, so the Ollama extra is +# functionally empty when only `httpx` would be required. We keep +# `ollama>=0.2` here as the documented public surface so that +# `pip install 'layerlens[providers-ollama]'` pulls the upstream SDK +# users actually call into. +providers-ollama = ["ollama>=0.2"] + [project.urls] Homepage = "https://github.com/LayerLens/stratix-python" Repository = "https://github.com/LayerLens/stratix-python" @@ -139,14 +150,21 @@ known-first-party = ["openai", "tests"] "tests/**.py" = ["T201", "T203", "ARG", "B007"] "examples/**.py" = ["T201", "T203"] "src/layerlens/cli/**" = ["T201", "T203"] +# Provider adapters wrap third-party SDKs whose callback signatures are +# fixed by upstream; unused arguments are part of the contract. +"src/layerlens/instrument/adapters/providers/**.py" = ["ARG002"] [tool.pyright] include = ["src", "tests"] exclude = ["**/__pycache__"] reportMissingTypeStubs = false -# Less strict settings for tests and cli +# Less strict settings for tests, cli, and the dynamic-monkey-patching +# provider-adapter code. mypy --strict stays strict for these dirs; +# pyright is relaxed here because it can't follow runtime attribute +# mutation that the provider instrumentation relies on. executionEnvironments = [ { root = "src/layerlens/cli", reportMissingImports = false, reportFunctionMemberAccess = false, reportCallIssue = false, reportArgumentType = false, reportAttributeAccessIssue = false }, + { root = "src/layerlens/instrument/adapters/providers", reportPossiblyUnbound = false, reportPossiblyUnboundVariable = false, reportCallIssue = false, reportAttributeAccessIssue = false, reportArgumentType = false, reportMissingImports = false, reportFunctionMemberAccess = false }, { root = "tests", reportGeneralTypeIssues = false, reportOptionalSubscript = false, reportOptionalMemberAccess = false, reportUntypedFunctionDecorator = false, reportUnknownArgumentType = false, reportUnknownMemberType = false, reportUnknownVariableType = false, reportUnnecessaryIsInstance = false, reportUnnecessaryComparison = false, reportArgumentType = false, reportCallIssue = false }, ] diff --git a/samples/instrument/ollama/README.md b/samples/instrument/ollama/README.md new file mode 100644 index 0000000..dbebc27 --- /dev/null +++ b/samples/instrument/ollama/README.md @@ -0,0 +1,135 @@ +# Ollama adapter sample + +This sample demonstrates the LayerLens Ollama provider adapter wrapping a real +`ollama.Client`. Every `chat`, `generate`, and `embeddings` call is intercepted +and turned into telemetry events. Ollama is a local-only inference engine, so: + +- `api_cost_usd` is **always 0.0** — there's no API to bill for. +- `infra_cost_usd` is computed from `prompt_eval_duration + eval_duration` + if you pass `cost_per_second` to the adapter constructor (handy for + attributing GPU rental cost to specific calls). + +## What you'll see + +Running `python -m samples.instrument.ollama.main` (mocked mode, default) +produces two events for a single chat completion: + +- `model.invoke` (L3) — request and response, with parameters, tokens, + latency, the `method` (chat / generate / embeddings) and the + `endpoint` Ollama was reached at. +- `cost.record` (cross-cutting) — `api_cost_usd: 0.0` always, plus + `infra_cost_usd` if `cost_per_second` was set. + +## Install + +```bash +pip install 'layerlens[providers-ollama]' +``` + +The `providers-ollama` extra installs `ollama>=0.2`. The default +`pip install layerlens` does NOT pull `ollama` — that's the lazy-import +guarantee tested by `tests/instrument/test_lazy_imports.py`. + +## Run mocked (no daemon required) + +```bash +pip install 'layerlens[providers-ollama]' respx +python -m samples.instrument.ollama.main +``` + +The sample uses `respx` to mock the local Ollama HTTP endpoints +(`POST /api/pull`, `POST /api/chat`) so it runs cleanly in CI or on a +fresh checkout without an Ollama daemon. This is exactly the same +fixture pattern used by +`tests/instrument/adapters/providers/test_ollama_adapter.py`. + +## Run live against a real `ollama serve` daemon + +### macOS / Linux + +```bash +# 1. Install Ollama +curl -fsSL https://ollama.com/install.sh | sh + +# 2. Start the daemon (binds 127.0.0.1:11434 by default) +ollama serve & + +# 3. Pull the model the sample calls (~2 GB on disk) +ollama pull llama3.2:3b + +# 4. Run the instrumented sample +LAYERLENS_OLLAMA_LIVE=1 python -m samples.instrument.ollama.main +``` + +### Windows + +Download the installer from and +launch the `Ollama` app from the Start menu — it runs the daemon in the +background. Then in PowerShell: + +```powershell +ollama pull llama3.2:3b +$env:LAYERLENS_OLLAMA_LIVE = "1" +python -m samples.instrument.ollama.main +``` + +### Choose a different model + +Set `LAYERLENS_OLLAMA_MODEL` to any tag from the +[Ollama library](https://ollama.com/library): + +```bash +LAYERLENS_OLLAMA_MODEL=mistral:7b LAYERLENS_OLLAMA_LIVE=1 \ + python -m samples.instrument.ollama.main +``` + +### Attribute GPU rental cost + +If you're running Ollama on a paid GPU instance, pass your effective +per-second rate (e.g. `$0.005`/s for a small inference instance) and +the adapter computes `infra_cost_usd = total_compute_seconds * rate` +on every event: + +```bash +LAYERLENS_OLLAMA_COST_PER_SECOND=0.005 LAYERLENS_OLLAMA_LIVE=1 \ + python -m samples.instrument.ollama.main +``` + +## GPU notes + +Ollama auto-detects available accelerators at daemon start time: + +| Platform | Backend | +|--------------------------------|-------------------------------| +| NVIDIA GPU (CUDA 11.8+, 12.x) | CUDA — preferred when present | +| AMD GPU (ROCm 5.7+) | ROCm | +| Apple Silicon (M1/M2/M3/M4) | Metal | +| Intel / Apple Intel / no GPU | CPU | + +Use `ollama ps` to see which backend a loaded model is using and how +much GPU VRAM it's consuming. The adapter records `eval_duration` and +`prompt_eval_duration` from the daemon — those values are the source +of truth for "how long did this token cost", regardless of which +backend ran them. + +To force CPU-only inference (e.g. to free GPU memory for other work): + +```bash +OLLAMA_NUM_GPU=0 ollama serve +``` + +## Verify telemetry shape + +The mocked-mode output looks like: + +```text +[event 1] model.invoke: provider='ollama' model='llama3.2:3b' method='chat' + endpoint='http://localhost:11434' prompt_tokens=18 completion_tokens=7 + total_tokens=25 latency_ms=0.0 finish_reason='stop' +[event 2] cost.record: provider='ollama' model='llama3.2:3b' prompt_tokens=18 + completion_tokens=7 total_tokens=25 api_cost_usd=0.0 + infra_cost_usd=0.00175 +``` + +Notice: `api_cost_usd=0.0` (Ollama is local) and `infra_cost_usd=0.00175` +(0.35 s of compute @ \$0.005/s). diff --git a/samples/instrument/ollama/__init__.py b/samples/instrument/ollama/__init__.py new file mode 100644 index 0000000..0ffeafb --- /dev/null +++ b/samples/instrument/ollama/__init__.py @@ -0,0 +1 @@ +"""Sample: instrument the Ollama Python SDK with the LayerLens adapter.""" diff --git a/samples/instrument/ollama/main.py b/samples/instrument/ollama/main.py new file mode 100644 index 0000000..741c790 --- /dev/null +++ b/samples/instrument/ollama/main.py @@ -0,0 +1,225 @@ +"""Sample: instrument the real Ollama client with the LayerLens adapter. + +Runs a single ``chat`` round-trip through ``OllamaAdapter``. Every +event the adapter emits (``model.invoke``, ``cost.record``, optional +``policy.violation`` on errors) is printed to stdout via the bundled +:class:`_StdoutSink`. Swap the sink for ``HttpEventSink`` (lands with +the M2 transport PR) to ship telemetry to atlas-app. + +Two execution modes: + +1. **Live** — set ``LAYERLENS_OLLAMA_LIVE=1`` and have ``ollama serve`` + running locally with the requested model pulled. The sample first + pulls the model (no-op if already cached) then runs a chat request. + +2. **Mocked** (default) — uses :mod:`respx` to fake the local Ollama + HTTP endpoints so the sample is runnable in CI / a fresh checkout + without an Ollama daemon. + +Required to run live:: + + # 1. Install + start the daemon (Linux/macOS) + curl -fsSL https://ollama.com/install.sh | sh + ollama serve & + + # 2. Pull the model you'll call (3.8B parameters, ~2GB) + ollama pull llama3.2:3b + + # 3. Install the adapter extra and run + pip install 'layerlens[providers-ollama]' + LAYERLENS_OLLAMA_LIVE=1 python -m samples.instrument.ollama.main + +Run mocked:: + + pip install 'layerlens[providers-ollama]' respx + python -m samples.instrument.ollama.main +""" + +from __future__ import annotations + +import os +import sys +from typing import Any, Dict + + +class _StdoutSink: + """Trivial event sink that prints each emitted event to stdout.""" + + def __init__(self) -> None: + self.count = 0 + + def emit(self, *args: Any, **_kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + event_type, payload = args + self.count += 1 + print(f"[event {self.count:>2}] {event_type}: {_summarise(payload)}") + + +def _summarise(payload: Dict[str, Any]) -> str: + """Pretty-print a few key fields from the payload.""" + if not isinstance(payload, dict): + return repr(payload) + keys = ( + "provider", + "model", + "method", + "endpoint", + "prompt_tokens", + "completion_tokens", + "total_tokens", + "api_cost_usd", + "infra_cost_usd", + "latency_ms", + "finish_reason", + "error", + ) + return " ".join( + f"{k}={payload[k]!r}" for k in keys if k in payload and payload[k] is not None + ) + + +def _run_live() -> int: + """Hit a real ``ollama serve`` daemon.""" + try: + from ollama import Client, ResponseError + except ImportError: + print( + "ollama package not installed. Install with:\n" + " pip install 'layerlens[providers-ollama]'", + file=sys.stderr, + ) + return 2 + + from layerlens.instrument.adapters._base import CaptureConfig + from layerlens.instrument.adapters.providers.ollama_adapter import OllamaAdapter + + model = os.environ.get("LAYERLENS_OLLAMA_MODEL", "llama3.2:3b") + + sink = _StdoutSink() + adapter = OllamaAdapter( + stratix=sink, + capture_config=CaptureConfig.standard(), + # Optional: $0.005 / GPU-second to attribute infra cost. + cost_per_second=float(os.environ.get("LAYERLENS_OLLAMA_COST_PER_SECOND", "0")) or None, + ) + adapter.connect() + + client = Client() + adapter.connect_client(client) + + try: + # Step 1: pull the model. This is a no-op if the model is + # already cached locally; otherwise it streams the layers down. + # (Pull is NOT instrumented — only chat / generate / embeddings + # are wrapped.) + print(f"Pulling model {model!r} (no-op if already cached)...") + try: + client.pull(model) + except ResponseError as exc: + print(f"Pull failed ({exc}); proceeding to chat anyway", file=sys.stderr) + + # Step 2: run a chat. This call IS instrumented. + print(f"Chatting with {model!r}...") + response = client.chat( + model=model, + messages=[ + {"role": "system", "content": "You are a concise assistant."}, + {"role": "user", "content": "What is 2 + 2?"}, + ], + ) + print(f"Response: {response.message.content}") + if response.eval_count is not None: + print( + f"Tokens — prompt: {response.prompt_eval_count}, " + f"completion: {response.eval_count}" + ) + finally: + adapter.disconnect() + + print(f"\nEmitted {sink.count} events.") + return 0 + + +def _run_mocked() -> int: + """Run against a respx-mocked Ollama HTTP endpoint.""" + try: + import httpx + import respx + from ollama import Client + except ImportError as exc: + print(f"Missing dependency: {exc}", file=sys.stderr) + print( + "Install with:\n" + " pip install 'layerlens[providers-ollama]' respx", + file=sys.stderr, + ) + return 2 + + from layerlens.instrument.adapters._base import CaptureConfig + from layerlens.instrument.adapters.providers.ollama_adapter import OllamaAdapter + + chat_body = { + "model": "llama3.2:3b", + "created_at": "2026-04-25T00:00:00Z", + "message": {"role": "assistant", "content": "2 + 2 = 4"}, + "done": True, + "done_reason": "stop", + "total_duration": 350_000_000, + "load_duration": 0, + "prompt_eval_count": 18, + "prompt_eval_duration": 100_000_000, + "eval_count": 7, + "eval_duration": 250_000_000, + } + + sink = _StdoutSink() + adapter = OllamaAdapter( + stratix=sink, + capture_config=CaptureConfig.standard(), + cost_per_second=0.005, # Demo: $0.005/sec attributed GPU rental. + ) + adapter.connect() + + print("Running mocked Ollama chat round-trip...") + with respx.mock(base_url="http://127.0.0.1:11434") as router: + # Pull is mocked as a single line of NDJSON; the SDK ignores + # streaming progress events when no callback is registered. + router.post("/api/pull").mock( + return_value=httpx.Response(200, content=b'{"status":"success"}\n') + ) + router.post("/api/chat").mock(return_value=httpx.Response(200, json=chat_body)) + + client = Client() + adapter.connect_client(client) + + try: + print("Pulling model 'llama3.2:3b' (mocked)...") + try: + client.pull("llama3.2:3b") + except Exception as exc: # noqa: BLE001 + print(f"Mocked pull surfaced: {exc}", file=sys.stderr) + + print("Chatting with mocked daemon...") + response = client.chat( + model="llama3.2:3b", + messages=[ + {"role": "system", "content": "You are a concise assistant."}, + {"role": "user", "content": "What is 2 + 2?"}, + ], + ) + print(f"Response: {response.message.content}") + finally: + adapter.disconnect() + + print(f"\nEmitted {sink.count} events.") + return 0 + + +def main() -> int: + if os.environ.get("LAYERLENS_OLLAMA_LIVE") == "1": + return _run_live() + return _run_mocked() + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/layerlens/_compat/__init__.py b/src/layerlens/_compat/__init__.py new file mode 100644 index 0000000..49bf6a9 --- /dev/null +++ b/src/layerlens/_compat/__init__.py @@ -0,0 +1,8 @@ +"""Compatibility shims for Python and library version differences. + +The instrument layer must run on Python 3.8+ and Pydantic 1.9+ or 2.x. +Modules in this package centralize the conditional imports and polyfills +so adapter code can be written against a single, stable surface. +""" + +from __future__ import annotations diff --git a/src/layerlens/_compat/pydantic.py b/src/layerlens/_compat/pydantic.py new file mode 100644 index 0000000..ea74a10 --- /dev/null +++ b/src/layerlens/_compat/pydantic.py @@ -0,0 +1,121 @@ +"""Pydantic v1/v2 dual-compatibility shim. + +`stratix-python` pins ``pydantic>=1.9.0, <3``. The instrument layer must +work under both v1 and v2 because frameworks we adapt (LangChain, CrewAI, +Pydantic-AI, etc.) span both versions in customer environments. + +This shim exposes a single set of names — ``BaseModel``, ``Field``, +``model_dump``, ``field_validator``, ``model_validator`` — that behave +identically under both versions. Callers must use these instead of +importing from ``pydantic`` directly so the v1/v2 boundary lives in +exactly one place. +""" + +from __future__ import annotations + +from typing import Any, Dict, Callable + +import pydantic + +PYDANTIC_V2: bool = pydantic.VERSION.startswith("2.") + +# Re-exported public names. Adapter code imports from here, never from +# ``pydantic`` directly, so a future v3 (or rollback to v1) is a one-file change. +BaseModel = pydantic.BaseModel +Field = pydantic.Field + + +def model_dump(model: Any) -> Dict[str, Any]: + """Return a dict representation of a Pydantic model under v1 or v2. + + v2 exposes ``model.model_dump()``; v1 exposes ``model.dict()``. Callers + can also pass a plain ``dict`` (returned unchanged) or any other object + (converted via ``str``) — matching the defensive pattern used by + ``BaseAdapter`` when serializing event payloads of unknown shape. + """ + if isinstance(model, dict): + return model + if PYDANTIC_V2 and hasattr(model, "model_dump"): + result = model.model_dump() + if isinstance(result, dict): + return result + return {"value": result} + if hasattr(model, "dict"): + result = model.dict() + if isinstance(result, dict): + return result + return {"value": result} + return {"raw": str(model)} + + +# Cast pydantic to Any inside the shim so we can call differently-shaped +# v1 and v2 entry points without the type checker objecting to the dead +# branch under whichever version is currently installed. +_pyd: Any = pydantic + + +def field_validator(*fields: str, mode: str = "after") -> Callable[..., Any]: + """Cross-version field validator decorator. + + Under Pydantic v2, delegates to the real ``field_validator``. Under + v1, delegates to ``pydantic.validator`` translating + ``mode="before"`` to ``pre=True`` and ``mode="after"`` to + ``pre=False``. + + Usage:: + + from layerlens._compat.pydantic import BaseModel, field_validator + + class M(BaseModel): + x: int + + @field_validator("x") + @classmethod + def _check_x(cls, v: int) -> int: + ... + """ + if PYDANTIC_V2: + result = _pyd.field_validator(*fields, mode=mode) + return result # type: ignore[no-any-return] + + pre = mode == "before" + + def _decorator(fn: Callable[..., Any]) -> Callable[..., Any]: + decorated: Callable[..., Any] = _pyd.validator( + *fields, pre=pre, allow_reuse=True + )(fn) + return decorated + + return _decorator + + +def model_validator(mode: str = "after") -> Callable[..., Any]: + """Cross-version model validator decorator. + + Under Pydantic v2, delegates to the real ``model_validator``. Under + v1, delegates to ``pydantic.root_validator`` with the appropriate + ``pre`` kwarg. + """ + if PYDANTIC_V2: + result = _pyd.model_validator(mode=mode) + return result # type: ignore[no-any-return] + + pre = mode == "before" + + def _decorator(fn: Callable[..., Any]) -> Callable[..., Any]: + decorated: Callable[..., Any] = _pyd.root_validator( + pre=pre, allow_reuse=True + )(fn) + return decorated + + return _decorator + + +__all__ = [ + "BaseModel", + "Field", + "PYDANTIC_V2", + "field_validator", + "model_dump", + "model_validator", +] diff --git a/src/layerlens/instrument/__init__.py b/src/layerlens/instrument/__init__.py new file mode 100644 index 0000000..aec3c8c --- /dev/null +++ b/src/layerlens/instrument/__init__.py @@ -0,0 +1,49 @@ +"""LayerLens Instrument layer. + +The ``instrument`` package houses framework, protocol, and LLM provider +adapters plus their shared base classes, registry, capture configuration, +and event-sink abstractions. Adapter code lives under +``layerlens.instrument.adapters``. + +Importing ``layerlens.instrument`` MUST NOT import any optional adapter +dependency (langchain, crewai, anthropic, etc.). Adapter modules are +lazy-loaded from the registry the first time their framework is requested. + +Convenience re-exports of the most commonly used base-layer types are +provided here so the typical adapter user can write:: + + from layerlens.instrument import ( + BaseAdapter, + AdapterRegistry, + CaptureConfig, + ) + +These are pure Python classes with only ``pydantic`` (already required) +as a dependency. +""" + +from __future__ import annotations + +from layerlens.instrument.adapters._base import ( + EventSink, + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + CaptureConfig, + AdapterRegistry, + ReplayableTrace, + AdapterCapability, +) + +__all__ = [ + "AdapterCapability", + "AdapterHealth", + "AdapterInfo", + "AdapterRegistry", + "AdapterStatus", + "BaseAdapter", + "CaptureConfig", + "EventSink", + "ReplayableTrace", +] diff --git a/src/layerlens/instrument/_vendored/__init__.py b/src/layerlens/instrument/_vendored/__init__.py new file mode 100644 index 0000000..975267d --- /dev/null +++ b/src/layerlens/instrument/_vendored/__init__.py @@ -0,0 +1,26 @@ +"""Vendored snapshots of types from the ateam ``stratix`` package. + +These modules are deliberately *frozen* copies of select types from the +``stratix`` package (see ``A:/github/layerlens/ateam``) so that the +LayerLens instrumentation layer can reference them without taking a +runtime dependency on ateam. + +Each module records the source SHA at the top. To refresh a vendored +module: + +1. Re-copy the file from + ``A:/github/layerlens/ateam/stratix/``. +2. Apply the Python 3.9 / Pydantic 2 compatibility shims described in + the comment header of each file. +3. Update the ``Source SHA`` line. +4. Re-run ``pytest tests/instrument`` and ``mypy --strict + src/layerlens/instrument/_vendored/``. + +Do **not** modify these files to add new fields — vendored types must +match ateam's wire shape exactly. New behavior belongs in the adapters +that consume them. +""" + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/src/layerlens/instrument/_vendored/events.py b/src/layerlens/instrument/_vendored/events.py new file mode 100644 index 0000000..f5d9ca8 --- /dev/null +++ b/src/layerlens/instrument/_vendored/events.py @@ -0,0 +1,90 @@ +"""Aggregated re-exports of vendored ``stratix.core.events`` types. + +Source: ``A:/github/layerlens/ateam/stratix/core/events/__init__.py`` +Source SHA: 7359c0e38d74e02aa1b27c34daef7a958abbd002 + +Mirrors the surface that the langgraph and langchain framework adapters +import from ``stratix.core.events`` directly. Only the names that those +adapters actually reference at runtime are re-exported here — anything +else lives in the per-module vendored files. + +Updates require re-vendoring — see ``__init__.py`` for the workflow. +""" + +from __future__ import annotations + +from layerlens.instrument._vendored.events_l1_io import ( + MessageRole, + AgentInputEvent, + AgentOutputEvent, +) +from layerlens.instrument._vendored.events_l3_model import ModelInvokeEvent +from layerlens.instrument._vendored.events_l5_tools import ( + ToolCallEvent, + ToolLogicEvent, + IntegrationType, + ToolEnvironmentEvent, +) +from layerlens.instrument._vendored.events_protocol import ( + SkillInfo, + AgentCardInfo, + AgentCardEvent, + AsyncTaskEvent, + TaskCompletedEvent, + TaskSubmittedEvent, + ProtocolStreamEvent, + McpAppInvocationEvent, + ElicitationRequestEvent, + ElicitationResponseEvent, + StructuredToolOutputEvent, +) +from layerlens.instrument._vendored.events_cross_cutting import ( + StateType, + ViolationType, + CostRecordEvent, + AgentHandoffEvent, + PolicyViolationEvent, + AgentStateChangeEvent, +) +from layerlens.instrument._vendored.events_l4_environment import ( + EnvironmentType, + EnvironmentConfigEvent, + EnvironmentMetricsEvent, +) + +__all__ = [ + # L1 + "AgentInputEvent", + "AgentOutputEvent", + "MessageRole", + # L3 + "ModelInvokeEvent", + # L4 + "EnvironmentConfigEvent", + "EnvironmentMetricsEvent", + "EnvironmentType", + # L5 + "ToolCallEvent", + "ToolLogicEvent", + "ToolEnvironmentEvent", + "IntegrationType", + # Cross-cutting + "AgentStateChangeEvent", + "CostRecordEvent", + "PolicyViolationEvent", + "AgentHandoffEvent", + "StateType", + "ViolationType", + # Protocol + "AgentCardEvent", + "AgentCardInfo", + "SkillInfo", + "TaskSubmittedEvent", + "TaskCompletedEvent", + "ProtocolStreamEvent", + "ElicitationRequestEvent", + "ElicitationResponseEvent", + "StructuredToolOutputEvent", + "McpAppInvocationEvent", + "AsyncTaskEvent", +] diff --git a/src/layerlens/instrument/_vendored/events_cross_cutting.py b/src/layerlens/instrument/_vendored/events_cross_cutting.py new file mode 100644 index 0000000..6cfd405 --- /dev/null +++ b/src/layerlens/instrument/_vendored/events_cross_cutting.py @@ -0,0 +1,309 @@ +"""Vendored snapshot of ``stratix.core.events.cross_cutting``. + +Source: ``A:/github/layerlens/ateam/stratix/core/events/cross_cutting.py`` +Source SHA: 7359c0e38d74e02aa1b27c34daef7a958abbd002 + +Compatibility shims applied for Python 3.9 + Pydantic 2: +- ``enum.StrEnum`` (added in Python 3.11) replaced with + ``(str, Enum)`` mixin so the vendored enums behave identically on + Python 3.9. +- PEP-604 union syntax (``X | None``) on Pydantic field annotations + rewritten as ``Optional[X]`` and ``Union[...]`` (Pydantic 2 evaluates + field type hints via ``typing.get_type_hints``, which fails on + Python 3.9 even with ``from __future__ import annotations``). + +Updates require re-vendoring — see ``__init__.py`` for the workflow. +""" + +# STRATIX Cross-Cutting Events +# +# From Step 1 specification: +# +# State Change Event: +# { +# "event_type": "agent.state.change", +# "state": { +# "type": "internal | ephemeral", +# "before_hash": "sha256", +# "after_hash": "sha256" +# } +# } +# +# Cost Event: +# { +# "event_type": "cost.record", +# "cost": { +# "tokens": 1423, +# "api_cost_usd": 0.031, +# "infra_cost_usd": "unavailable" +# } +# } +# +# Policy Violation Event: +# { +# "event_type": "policy.violation", +# "violation": { +# "type": "privacy | compliance | safety", +# "root_cause": "string", +# "remediation": "string", +# "failed_layer": "L3", +# "failed_sequence_id": 17 +# } +# } +# +# Multi-Agent Handoff Event: +# { +# "event_type": "agent.handoff", +# "from_agent": "agent_A", +# "to_agent": "agent_B", +# "handoff_context_hash": "sha256" +# } + +from __future__ import annotations + +from enum import Enum +from typing import Any, Union, Optional + +from pydantic import Field, BaseModel, field_validator + + +class StateType(str, Enum): + """Type of agent state.""" + + INTERNAL = "internal" + EPHEMERAL = "ephemeral" + + +class StateInfo(BaseModel): + """State information for state change events.""" + + type: StateType = Field(description="Type of state (internal or ephemeral)") + before_hash: str = Field(description="SHA-256 hash of state before change") + after_hash: str = Field(description="SHA-256 hash of state after change") + + @field_validator("before_hash", "after_hash") + @classmethod + def validate_hash(cls, v: str) -> str: + """Validate hash format.""" + if not v.startswith("sha256:"): + raise ValueError("Hash must start with 'sha256:'") + hex_part = v[7:] + if len(hex_part) != 64: + raise ValueError("Hash must be sha256: followed by 64 hex characters") + return v + + +class AgentStateChangeEvent(BaseModel): + """Cross-Cutting Event: Agent State Change. + + Represents a mutation to agent state. + + NORMATIVE: + - State changes must hash before/after (even if state is redacted) + - Emit on state mutation boundaries + """ + + event_type: str = Field(default="agent.state.change", description="Event type identifier") + state: StateInfo = Field(description="State change information") + + @classmethod + def create( + cls, + state_type: StateType, + before_hash: str, + after_hash: str, + ) -> AgentStateChangeEvent: + """Create a state change event. + + Args: + state_type: Type of state. + before_hash: Hash of state before change. + after_hash: Hash of state after change. + + Returns: + AgentStateChangeEvent instance. + """ + return cls( + state=StateInfo( + type=state_type, + before_hash=before_hash, + after_hash=after_hash, + ) + ) + + +class CostInfo(BaseModel): + """Cost information for cost record events.""" + + tokens: Optional[int] = Field(default=None, ge=0, description="Number of tokens consumed") + prompt_tokens: Optional[int] = Field( + default=None, ge=0, description="Number of prompt tokens" + ) + completion_tokens: Optional[int] = Field( + default=None, ge=0, description="Number of completion tokens" + ) + api_cost_usd: Optional[Union[float, str]] = Field( + default=None, description="API cost in USD (or 'unavailable')" + ) + infra_cost_usd: Optional[Union[float, str]] = Field( + default=None, description="Infrastructure cost in USD (or 'unavailable')" + ) + tool_calls: Optional[int] = Field(default=None, ge=0, description="Number of tool calls") + + +class CostRecordEvent(BaseModel): + """Cross-Cutting Event: Cost Record. + + Represents cost/usage tracking data. + + NORMATIVE: + - Costs must mark unavailable (never omit silently) + - Emit on known cost/usage updates + """ + + event_type: str = Field(default="cost.record", description="Event type identifier") + cost: CostInfo = Field(description="Cost information") + + @classmethod + def create( + cls, + tokens: Optional[int] = None, + prompt_tokens: Optional[int] = None, + completion_tokens: Optional[int] = None, + api_cost_usd: Optional[Union[float, str]] = None, + infra_cost_usd: Optional[Union[float, str]] = None, + tool_calls: Optional[int] = None, + ) -> CostRecordEvent: + """Create a cost record event.""" + return cls( + cost=CostInfo( + tokens=tokens, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + api_cost_usd=api_cost_usd, + infra_cost_usd=infra_cost_usd, + tool_calls=tool_calls, + ) + ) + + +class ViolationType(str, Enum): + """Type of policy violation.""" + + PRIVACY = "privacy" + COMPLIANCE = "compliance" + SAFETY = "safety" + CAPTURE = "capture" # Missing required layer/event + POLICY_CONSTRAINT = "policy_constraint" # Pre-check/policy constraint violation + + +class ViolationInfo(BaseModel): + """Violation information for policy violation events.""" + + type: ViolationType = Field(description="Type of violation") + root_cause: str = Field(description="Root cause of the violation") + remediation: str = Field(description="Suggested remediation action") + failed_layer: Optional[str] = Field(default=None, description="Layer where violation occurred") + failed_sequence_id: Optional[int] = Field( + default=None, description="Sequence ID where violation occurred" + ) + details: dict[str, Any] = Field( + default_factory=dict, description="Additional violation details" + ) + + +class PolicyViolationEvent(BaseModel): + """Cross-Cutting Event: Policy Violation. + + Represents a policy violation that terminates evaluation. + + NORMATIVE: + - Evaluation terminates immediately + - No further hashing occurs after violation + - Must include root_cause, remediation, failed_layer, failed_sequence_id + """ + + event_type: str = Field(default="policy.violation", description="Event type identifier") + violation: ViolationInfo = Field(description="Violation information") + + @classmethod + def create( + cls, + violation_type: ViolationType, + root_cause: str, + remediation: str, + failed_layer: Optional[str] = None, + failed_sequence_id: Optional[int] = None, + details: Optional[dict[str, Any]] = None, + ) -> PolicyViolationEvent: + """Create a policy violation event.""" + return cls( + violation=ViolationInfo( + type=violation_type, + root_cause=root_cause, + remediation=remediation, + failed_layer=failed_layer, + failed_sequence_id=failed_sequence_id, + details=details or {}, + ) + ) + + +class AgentHandoffEvent(BaseModel): + """Cross-Cutting Event: Agent Handoff. + + Represents delegation from one agent to another. + + NORMATIVE: + - Emit when delegating to another agent + - Include context hash/external reference + - Propagate trace context to receiving agent + """ + + event_type: str = Field(default="agent.handoff", description="Event type identifier") + from_agent: str = Field(description="Agent initiating the handoff") + to_agent: str = Field(description="Agent receiving the handoff") + handoff_context_hash: str = Field(description="SHA-256 hash of the handoff context") + context_privacy_level: str = Field( + default="cleartext", description="Privacy level of the handoff context" + ) + + @field_validator("handoff_context_hash") + @classmethod + def validate_hash(cls, v: str) -> str: + """Validate hash format.""" + if not v.startswith("sha256:"): + raise ValueError("Hash must start with 'sha256:'") + hex_part = v[7:] + if len(hex_part) != 64: + raise ValueError("Hash must be sha256: followed by 64 hex characters") + return v + + @classmethod + def create( + cls, + from_agent: str, + to_agent: str, + handoff_context_hash: str, + context_privacy_level: str = "cleartext", + ) -> AgentHandoffEvent: + """Create an agent handoff event.""" + return cls( + from_agent=from_agent, + to_agent=to_agent, + handoff_context_hash=handoff_context_hash, + context_privacy_level=context_privacy_level, + ) + + +__all__ = [ + "StateType", + "StateInfo", + "AgentStateChangeEvent", + "CostInfo", + "CostRecordEvent", + "ViolationType", + "ViolationInfo", + "PolicyViolationEvent", + "AgentHandoffEvent", +] diff --git a/src/layerlens/instrument/_vendored/events_l1_io.py b/src/layerlens/instrument/_vendored/events_l1_io.py new file mode 100644 index 0000000..626b002 --- /dev/null +++ b/src/layerlens/instrument/_vendored/events_l1_io.py @@ -0,0 +1,114 @@ +"""Vendored snapshot of ``stratix.core.events.l1_io``. + +Source: ``A:/github/layerlens/ateam/stratix/core/events/l1_io.py`` +Source SHA: 7359c0e38d74e02aa1b27c34daef7a958abbd002 + +Compatibility shims applied for Python 3.9 + Pydantic 2: +- ``enum.StrEnum`` (added in Python 3.11) replaced with + ``(str, Enum)`` mixin. +- PEP-604 union syntax (``X | None``) on Pydantic field annotations + rewritten as ``Optional[X]``. + +Updates require re-vendoring — see ``__init__.py`` for the workflow. +""" + +# STRATIX Layer 1 Events - Agent Inputs & Outputs +# +# { +# "event_type": "agent.input | agent.output", +# "layer": "L1", +# "content": { +# "role": "human | system | agent", +# "message": "string" +# } +# } + +from __future__ import annotations + +from enum import Enum +from typing import Any, Optional + +from pydantic import Field, BaseModel + + +class MessageRole(str, Enum): + """Role of the message sender.""" + + HUMAN = "human" + SYSTEM = "system" + AGENT = "agent" + + +class MessageContent(BaseModel): + """Content structure for L1 events.""" + + role: MessageRole = Field(description="Role of the message sender") + message: str = Field(description="The message content") + metadata: Optional[dict[str, Any]] = Field( + default=None, description="Optional metadata about the message" + ) + + +class AgentInputEvent(BaseModel): + """Layer 1 Event: Agent Input. + + Represents an inbound message to the agent (from human or system). + + NORMATIVE: Must be emitted for every inbound human/system message. + """ + + event_type: str = Field(default="agent.input", description="Event type identifier") + layer: str = Field(default="L1", description="Layer identifier") + content: MessageContent = Field(description="Message content") + + @classmethod + def create( + cls, + message: str, + role: MessageRole = MessageRole.HUMAN, + metadata: Optional[dict[str, Any]] = None, + ) -> AgentInputEvent: + """Create an agent input event.""" + return cls( + content=MessageContent( + role=role, + message=message, + metadata=metadata, + ) + ) + + +class AgentOutputEvent(BaseModel): + """Layer 1 Event: Agent Output. + + Represents an outbound message from the agent. + + NORMATIVE: Must be emitted for every outbound agent message. + """ + + event_type: str = Field(default="agent.output", description="Event type identifier") + layer: str = Field(default="L1", description="Layer identifier") + content: MessageContent = Field(description="Message content") + + @classmethod + def create( + cls, + message: str, + metadata: Optional[dict[str, Any]] = None, + ) -> AgentOutputEvent: + """Create an agent output event.""" + return cls( + content=MessageContent( + role=MessageRole.AGENT, + message=message, + metadata=metadata, + ) + ) + + +__all__ = [ + "MessageRole", + "MessageContent", + "AgentInputEvent", + "AgentOutputEvent", +] diff --git a/src/layerlens/instrument/_vendored/events_l3_model.py b/src/layerlens/instrument/_vendored/events_l3_model.py new file mode 100644 index 0000000..cfb73f8 --- /dev/null +++ b/src/layerlens/instrument/_vendored/events_l3_model.py @@ -0,0 +1,105 @@ +"""Vendored snapshot of ``stratix.core.events.l3_model``. + +Source: ``A:/github/layerlens/ateam/stratix/core/events/l3_model.py`` +Source SHA: 7359c0e38d74e02aa1b27c34daef7a958abbd002 + +Compatibility shims applied for Python 3.9 + Pydantic 2: +- PEP-604 union syntax (``X | None``) on Pydantic field annotations + rewritten as ``Optional[X]``. + +Updates require re-vendoring — see ``__init__.py`` for the workflow. +""" + +# STRATIX Layer 3 Events - Model Metadata +# +# { +# "event_type": "model.invoke", +# "layer": "L3", +# "model": { +# "provider": "string", +# "name": "string", +# "version": "string", +# "parameters": { "temperature": 0.2 } +# } +# } + +from __future__ import annotations + +from typing import Any, Optional + +from pydantic import Field, BaseModel + + +class ModelInfo(BaseModel): + """Model information for L3 events.""" + + provider: str = Field(description="Model provider (e.g., 'openai', 'anthropic')") + name: str = Field(description="Model name (e.g., 'gpt-4', 'claude-3-opus')") + version: str = Field(description="Model version or checkpoint (or 'unavailable')") + parameters: dict[str, Any] = Field( + default_factory=dict, description="Model parameters (temperature, max_tokens, etc.)" + ) + + +class ModelInvokeEvent(BaseModel): + """Layer 3 Event: Model Invoke. + + Represents an LLM model invocation. + + NORMATIVE: + - Must be emitted for every LLM invocation + - One model.invoke per request (no hidden provider calls) + - Tool version required (or explicitly 'unavailable') + """ + + event_type: str = Field(default="model.invoke", description="Event type identifier") + layer: str = Field(default="L3", description="Layer identifier") + model: ModelInfo = Field(description="Model information") + prompt_tokens: Optional[int] = Field(default=None, description="Number of prompt tokens") + completion_tokens: Optional[int] = Field( + default=None, description="Number of completion tokens" + ) + total_tokens: Optional[int] = Field(default=None, description="Total number of tokens") + latency_ms: Optional[float] = Field(default=None, description="Latency in milliseconds") + input_messages: Optional[list[dict[str, str]]] = Field( + default=None, description="Input messages sent to the model (opt-in via capture_content)" + ) + output_message: Optional[dict[str, str]] = Field( + default=None, description="Output message from the model (opt-in via capture_content)" + ) + + @classmethod + def create( + cls, + provider: str, + name: str, + version: str = "unavailable", + parameters: Optional[dict[str, Any]] = None, + prompt_tokens: Optional[int] = None, + completion_tokens: Optional[int] = None, + total_tokens: Optional[int] = None, + latency_ms: Optional[float] = None, + input_messages: Optional[list[dict[str, str]]] = None, + output_message: Optional[dict[str, str]] = None, + ) -> ModelInvokeEvent: + """Create a model invoke event.""" + return cls( + model=ModelInfo( + provider=provider, + name=name, + version=version, + parameters=parameters or {}, + ), + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + latency_ms=latency_ms, + input_messages=input_messages, + output_message=output_message, + ) + + +__all__ = [ + "ModelInfo", + "ModelInvokeEvent", +] diff --git a/src/layerlens/instrument/_vendored/events_l4_environment.py b/src/layerlens/instrument/_vendored/events_l4_environment.py new file mode 100644 index 0000000..b730609 --- /dev/null +++ b/src/layerlens/instrument/_vendored/events_l4_environment.py @@ -0,0 +1,149 @@ +"""Vendored snapshot of ``stratix.core.events.l4_environment``. + +Source: ``A:/github/layerlens/ateam/stratix/core/events/l4_environment.py`` +Source SHA: 7359c0e38d74e02aa1b27c34daef7a958abbd002 + +Compatibility shims applied for Python 3.9 + Pydantic 2: +- ``enum.StrEnum`` (added in Python 3.11) replaced with + ``(str, Enum)`` mixin. +- PEP-604 union syntax (``X | None``) on Pydantic field annotations + rewritten as ``Optional[X]``. + +Updates require re-vendoring — see ``__init__.py`` for the workflow. +""" + +# STRATIX Layer 4 Events - Environment Configuration & Metrics +# +# Layer 4a - Environment Configuration: +# { +# "event_type": "environment.config", +# "layer": "L4a", +# "environment": { +# "type": "cloud | on_prem | simulated", +# "region": "string", +# "attributes": { } +# } +# } +# +# Layer 4b - Environment Metrics: +# { +# "event_type": "environment.metrics", +# "layer": "L4b", +# "metrics": { +# "cpu_pct": 42.1, +# "gpu_pct": 77.0, +# "latency_ms": 812 +# } +# } + +from __future__ import annotations + +from enum import Enum +from typing import Any, Optional + +from pydantic import Field, BaseModel + + +class EnvironmentType(str, Enum): + """Type of execution environment.""" + + CLOUD = "cloud" + ON_PREM = "on_prem" + SIMULATED = "simulated" + + +class EnvironmentInfo(BaseModel): + """Environment information for L4a events.""" + + type: EnvironmentType = Field(description="Type of environment") + region: Optional[str] = Field(default=None, description="Geographic region") + attributes: dict[str, Any] = Field( + default_factory=dict, description="Additional environment attributes" + ) + + +class EnvironmentConfigEvent(BaseModel): + """Layer 4a Event: Environment Configuration. + + Represents the execution environment configuration. + + NORMATIVE: Must be emitted at trial start or on runtime change. + """ + + event_type: str = Field(default="environment.config", description="Event type identifier") + layer: str = Field(default="L4a", description="Layer identifier") + environment: EnvironmentInfo = Field(description="Environment configuration") + + @classmethod + def create( + cls, + env_type: EnvironmentType, + region: Optional[str] = None, + attributes: Optional[dict[str, Any]] = None, + ) -> EnvironmentConfigEvent: + """Create an environment configuration event.""" + return cls( + environment=EnvironmentInfo( + type=env_type, + region=region, + attributes=attributes or {}, + ) + ) + + +class EnvironmentMetrics(BaseModel): + """Environment metrics for L4b events.""" + + cpu_pct: Optional[float] = Field( + default=None, ge=0, le=100, description="CPU utilization percentage" + ) + gpu_pct: Optional[float] = Field( + default=None, ge=0, le=100, description="GPU utilization percentage" + ) + memory_pct: Optional[float] = Field( + default=None, ge=0, le=100, description="Memory utilization percentage" + ) + latency_ms: Optional[float] = Field(default=None, ge=0, description="Latency in milliseconds") + additional_metrics: dict[str, float] = Field( + default_factory=dict, description="Additional custom metrics" + ) + + +class EnvironmentMetricsEvent(BaseModel): + """Layer 4b Event: Environment Metrics. + + Represents environment resource metrics during execution. + """ + + event_type: str = Field(default="environment.metrics", description="Event type identifier") + layer: str = Field(default="L4b", description="Layer identifier") + metrics: EnvironmentMetrics = Field(description="Environment metrics") + + @classmethod + def create( + cls, + cpu_pct: Optional[float] = None, + gpu_pct: Optional[float] = None, + memory_pct: Optional[float] = None, + latency_ms: Optional[float] = None, + additional_metrics: Optional[dict[str, float]] = None, + ) -> EnvironmentMetricsEvent: + """Create an environment metrics event.""" + return cls( + metrics=EnvironmentMetrics( + cpu_pct=cpu_pct, + gpu_pct=gpu_pct, + memory_pct=memory_pct, + latency_ms=latency_ms, + additional_metrics=additional_metrics or {}, + ) + ) + + +__all__ = [ + "EnvironmentType", + "EnvironmentInfo", + "EnvironmentConfigEvent", + "EnvironmentMetrics", + "EnvironmentMetricsEvent", +] diff --git a/src/layerlens/instrument/_vendored/events_l5_tools.py b/src/layerlens/instrument/_vendored/events_l5_tools.py new file mode 100644 index 0000000..8d1da61 --- /dev/null +++ b/src/layerlens/instrument/_vendored/events_l5_tools.py @@ -0,0 +1,200 @@ +"""Vendored snapshot of ``stratix.core.events.l5_tools``. + +Source: ``A:/github/layerlens/ateam/stratix/core/events/l5_tools.py`` +Source SHA: 7359c0e38d74e02aa1b27c34daef7a958abbd002 + +Compatibility shims applied for Python 3.9 + Pydantic 2: +- ``enum.StrEnum`` (added in Python 3.11) replaced with + ``(str, Enum)`` mixin. +- PEP-604 union syntax (``X | None``) on Pydantic field annotations + rewritten as ``Optional[X]``. + +Updates require re-vendoring — see ``__init__.py`` for the workflow. +""" + +# STRATIX Layer 5 Events - Tool/Action Execution +# +# Layer 5a - Tool/Action Execution: +# { +# "event_type": "tool.call", +# "layer": "L5a", +# "tool": { +# "name": "string", +# "version": "string", +# "integration": "library | service | agent" +# }, +# "input": { }, +# "output": { } +# } +# +# Layer 5b - Tool Business Logic: +# { +# "event_type": "tool.logic", +# "layer": "L5b", +# "logic": { +# "description": "string", +# "rules": ["rule1", "rule2"] +# } +# } +# +# Layer 5c - Tool Environment: +# { +# "event_type": "tool.environment", +# "layer": "L5c", +# "environment": { +# "api": "uri", +# "permissions": ["scope1"] +# } +# } + +from __future__ import annotations + +from enum import Enum +from typing import Any, Optional + +from pydantic import Field, BaseModel + + +class IntegrationType(str, Enum): + """Type of tool integration.""" + + LIBRARY = "library" + SCRIPT = "script" + SERVICE = "service" + AGENT = "agent" + + +class ToolInfo(BaseModel): + """Tool information for L5a events.""" + + name: str = Field(description="Tool name") + version: str = Field(description="Tool version (or 'unavailable')") + integration: IntegrationType = Field(description="Type of integration") + + +class ToolCallEvent(BaseModel): + """Layer 5a Event: Tool Call. + + Represents a tool/action invocation. + + NORMATIVE: + - Must be emitted for every tool/action invocation + - tool.call must include integration type + - tool version required (or explicitly 'unavailable') + """ + + event_type: str = Field(default="tool.call", description="Event type identifier") + layer: str = Field(default="L5a", description="Layer identifier") + tool: ToolInfo = Field(description="Tool information") + input: dict[str, Any] = Field(default_factory=dict, description="Tool input parameters") + output: Optional[dict[str, Any]] = Field( + default=None, description="Tool output (null if error/pending)" + ) + error: Optional[str] = Field(default=None, description="Error message if tool failed") + latency_ms: Optional[float] = Field( + default=None, ge=0, description="Execution latency in milliseconds" + ) + + @classmethod + def create( + cls, + name: str, + version: str = "unavailable", + integration: IntegrationType = IntegrationType.LIBRARY, + input_data: Optional[dict[str, Any]] = None, + output_data: Optional[dict[str, Any]] = None, + error: Optional[str] = None, + latency_ms: Optional[float] = None, + ) -> ToolCallEvent: + """Create a tool call event.""" + return cls( + tool=ToolInfo( + name=name, + version=version, + integration=integration, + ), + input=input_data or {}, + output=output_data, + error=error, + latency_ms=latency_ms, + ) + + +class ToolLogicInfo(BaseModel): + """Tool business logic information for L5b events.""" + + description: str = Field(description="Description of the business logic") + rules: list[str] = Field(default_factory=list, description="Business rules applied") + + +class ToolLogicEvent(BaseModel): + """Layer 5b Event: Tool Business Logic. + + Represents the business logic applied during tool execution. + """ + + event_type: str = Field(default="tool.logic", description="Event type identifier") + layer: str = Field(default="L5b", description="Layer identifier") + logic: ToolLogicInfo = Field(description="Business logic information") + + @classmethod + def create( + cls, + description: str, + rules: Optional[list[str]] = None, + ) -> ToolLogicEvent: + """Create a tool logic event.""" + return cls( + logic=ToolLogicInfo( + description=description, + rules=rules or [], + ) + ) + + +class ToolEnvironmentInfo(BaseModel): + """Tool environment information for L5c events.""" + + api: Optional[str] = Field(default=None, description="API endpoint URI") + permissions: list[str] = Field(default_factory=list, description="Required permissions/scopes") + config: dict[str, Any] = Field( + default_factory=dict, description="Additional environment configuration" + ) + + +class ToolEnvironmentEvent(BaseModel): + """Layer 5c Event: Tool Environment. + + Represents the execution environment for a tool. + """ + + event_type: str = Field(default="tool.environment", description="Event type identifier") + layer: str = Field(default="L5c", description="Layer identifier") + environment: ToolEnvironmentInfo = Field(description="Tool environment information") + + @classmethod + def create( + cls, + api: Optional[str] = None, + permissions: Optional[list[str]] = None, + config: Optional[dict[str, Any]] = None, + ) -> ToolEnvironmentEvent: + """Create a tool environment event.""" + return cls( + environment=ToolEnvironmentInfo( + api=api, + permissions=permissions or [], + config=config or {}, + ) + ) + + +__all__ = [ + "IntegrationType", + "ToolInfo", + "ToolCallEvent", + "ToolLogicInfo", + "ToolLogicEvent", + "ToolEnvironmentInfo", + "ToolEnvironmentEvent", +] diff --git a/src/layerlens/instrument/_vendored/events_protocol.py b/src/layerlens/instrument/_vendored/events_protocol.py new file mode 100644 index 0000000..d56af16 --- /dev/null +++ b/src/layerlens/instrument/_vendored/events_protocol.py @@ -0,0 +1,506 @@ +"""Vendored snapshot of ``stratix.core.events.protocol``. + +Source: ``A:/github/layerlens/ateam/stratix/core/events/protocol.py`` +Source SHA: 7359c0e38d74e02aa1b27c34daef7a958abbd002 + +Compatibility shims applied for Python 3.9 + Pydantic 2: +- PEP-604 union syntax (``X | None``) on Pydantic field annotations + rewritten as ``Optional[X]`` (Pydantic 2 evaluates field type hints + via ``typing.get_type_hints``, which fails on Python 3.9 even with + ``from __future__ import annotations``). + +Updates require re-vendoring — see ``__init__.py`` for the workflow. +""" + +# STRATIX Protocol Events — Schema v1.2.0 +# +# Nine new event types for agentic protocol standards: +# +# Protocol Discovery (L6a): +# - protocol.agent_card: A2A Agent Card discovery and registration +# +# Protocol Streams (L6b): +# - protocol.stream.event: AG-UI/A2A streaming event +# +# Protocol Lifecycle (L6c): +# - protocol.task.submitted: A2A task submitted (cross-cutting, always enabled) +# - protocol.task.completed: A2A task completed (cross-cutting, always enabled) +# - protocol.async_task: MCP/A2A async task lifecycle (cross-cutting, always enabled) +# +# Tool-Layer Protocol Events (L5a): +# - protocol.elicitation.request: MCP Elicitation server-initiated user input +# - protocol.elicitation.response: MCP Elicitation user response +# - protocol.tool.structured_output: MCP structured tool output +# - protocol.mcp_app.invocation: MCP App interactive UI component + +from __future__ import annotations + +from typing import Any, Optional + +from pydantic import Field, BaseModel + +# --------------------------------------------------------------------------- +# Sub-models +# --------------------------------------------------------------------------- + + +class SkillInfo(BaseModel): + """A skill declared in an A2A Agent Card.""" + + id: str = Field(description="Skill identifier") + name: str = Field(description="Human-readable skill name") + description: Optional[str] = Field(default=None, description="Skill description") + tags: list[str] = Field(default_factory=list, description="Skill tags") + examples: list[str] = Field(default_factory=list, description="Example inputs") + + +class AgentCardInfo(BaseModel): + """Parsed content of an A2A Agent Card.""" + + agent_id: str = Field(description="Matches identity envelope agent_id") + name: str = Field(description="Human-readable agent name from the card") + description: Optional[str] = Field(default=None, description="Agent description") + url: str = Field(description="Base URL of the A2A endpoint") + version: str = Field(description="Protocol version declared in the card") + capabilities: dict[str, Any] = Field( + default_factory=dict, + description="Capability flags (streaming, pushNotifications, etc.)", + ) + skills: list[SkillInfo] = Field(default_factory=list, description="Declared skills") + auth_scheme: Optional[str] = Field( + default=None, + description="Authentication scheme: none | bearer | oauth2 | apiKey", + ) + source: str = Field( + default="discovery", + description="How the card was obtained: discovery | registration | refresh", + ) + + +# --------------------------------------------------------------------------- +# L6a — Protocol Discovery +# --------------------------------------------------------------------------- + + +class AgentCardEvent(BaseModel): + """L6a: Emitted when an A2A Agent Card is discovered or registered. + + Captures the full capability advertisement of an A2A-compliant agent. + """ + + event_type: str = Field( + default="protocol.agent_card", + description="Event type identifier", + ) + layer: str = Field(default="L6a", description="Layer identifier") + card: AgentCardInfo = Field(description="Parsed Agent Card content") + + @classmethod + def create( + cls, + agent_id: str, + name: str, + url: str, + version: str, + *, + description: Optional[str] = None, + capabilities: Optional[dict[str, Any]] = None, + skills: Optional[list[SkillInfo]] = None, + auth_scheme: Optional[str] = None, + source: str = "discovery", + ) -> AgentCardEvent: + return cls( + card=AgentCardInfo( + agent_id=agent_id, + name=name, + description=description, + url=url, + version=version, + capabilities=capabilities or {}, + skills=skills or [], + auth_scheme=auth_scheme, + source=source, + ) + ) + + +# --------------------------------------------------------------------------- +# L6c — Protocol Lifecycle (cross-cutting, always enabled) +# --------------------------------------------------------------------------- + + +class TaskSubmittedEvent(BaseModel): + """Cross-cutting: Emitted when an A2A task is submitted. + + Always enabled — task lifecycle events are infrastructure signals. + """ + + event_type: str = Field( + default="protocol.task.submitted", + description="Event type identifier", + ) + task_id: str = Field(description="A2A task identifier") + task_type: Optional[str] = Field( + default=None, + description="Semantic task type (from skill definition)", + ) + submitter_agent_id: Optional[str] = Field( + default=None, + description="Agent submitting the task", + ) + receiver_agent_url: str = Field( + description="A2A endpoint that received the task", + ) + protocol_origin: str = Field( + default="a2a", + description="Protocol origin: a2a | acp", + ) + message_role: str = Field( + default="user", + description="Message role: user | agent", + ) + + @classmethod + def create( + cls, + task_id: str, + receiver_agent_url: str, + *, + task_type: Optional[str] = None, + submitter_agent_id: Optional[str] = None, + protocol_origin: str = "a2a", + message_role: str = "user", + ) -> TaskSubmittedEvent: + return cls( + task_id=task_id, + task_type=task_type, + submitter_agent_id=submitter_agent_id, + receiver_agent_url=receiver_agent_url, + protocol_origin=protocol_origin, + message_role=message_role, + ) + + +class TaskCompletedEvent(BaseModel): + """Cross-cutting: Emitted when an A2A task reaches a terminal state.""" + + event_type: str = Field( + default="protocol.task.completed", + description="Event type identifier", + ) + task_id: str = Field(description="A2A task identifier") + final_status: str = Field( + description="Terminal status: completed | failed | cancelled", + ) + artifact_count: int = Field(default=0, description="Number of artifacts returned") + artifact_hashes: list[str] = Field( + default_factory=list, + description="sha256: per artifact", + ) + error_code: Optional[str] = Field(default=None, description="A2A error code if failed") + error_message: Optional[str] = Field(default=None, description="Error message if failed") + duration_ms: Optional[float] = Field( + default=None, + description="Wall time from submitted to completed", + ) + + @classmethod + def create( + cls, + task_id: str, + final_status: str, + *, + artifact_count: int = 0, + artifact_hashes: Optional[list[str]] = None, + error_code: Optional[str] = None, + error_message: Optional[str] = None, + duration_ms: Optional[float] = None, + ) -> TaskCompletedEvent: + return cls( + task_id=task_id, + final_status=final_status, + artifact_count=artifact_count, + artifact_hashes=artifact_hashes or [], + error_code=error_code, + error_message=error_message, + duration_ms=duration_ms, + ) + + +class AsyncTaskEvent(BaseModel): + """Cross-cutting: Emitted for MCP/A2A async task lifecycle transitions. + + Always enabled — async task tracking is critical infrastructure. + """ + + event_type: str = Field( + default="protocol.async_task", + description="Event type identifier", + ) + async_task_id: str = Field(description="Async task identifier") + originating_tool_call_span_id: Optional[str] = Field( + default=None, + description="Links to the originating tool.call span", + ) + status: str = Field( + description="Status: created | running | completed | failed | timeout", + ) + protocol: str = Field(description="Protocol: mcp | a2a") + progress_pct: Optional[float] = Field( + default=None, + description="0.0-100.0 progress if reported", + ) + timeout_ms: Optional[int] = Field(default=None, description="Configured timeout") + elapsed_ms: Optional[float] = Field(default=None, description="Time since creation") + + @classmethod + def create( + cls, + async_task_id: str, + status: str, + protocol: str, + *, + originating_tool_call_span_id: Optional[str] = None, + progress_pct: Optional[float] = None, + timeout_ms: Optional[int] = None, + elapsed_ms: Optional[float] = None, + ) -> AsyncTaskEvent: + return cls( + async_task_id=async_task_id, + status=status, + protocol=protocol, + originating_tool_call_span_id=originating_tool_call_span_id, + progress_pct=progress_pct, + timeout_ms=timeout_ms, + elapsed_ms=elapsed_ms, + ) + + +# --------------------------------------------------------------------------- +# L6b — Protocol Streams +# --------------------------------------------------------------------------- + + +class ProtocolStreamEvent(BaseModel): + """L6b: Emitted for each event in an SSE protocol stream. + + High-frequency: gated by CaptureConfig.l6b_protocol_streams. + """ + + event_type: str = Field( + default="protocol.stream.event", + description="Event type identifier", + ) + layer: str = Field(default="L6b", description="Layer identifier") + protocol: str = Field(description="Protocol: agui | a2a") + agui_event_type: Optional[str] = Field( + default=None, + description="AG-UI event type (e.g. TEXT_MESSAGE_CONTENT)", + ) + sequence_in_stream: int = Field( + description="Position within the SSE stream", + ) + payload_summary: Optional[str] = Field( + default=None, + description="Truncated payload for low-verbosity capture", + ) + payload_hash: str = Field(description="sha256 of full payload") + + @classmethod + def create( + cls, + protocol: str, + sequence_in_stream: int, + payload_hash: str, + *, + agui_event_type: Optional[str] = None, + payload_summary: Optional[str] = None, + ) -> ProtocolStreamEvent: + return cls( + protocol=protocol, + agui_event_type=agui_event_type, + sequence_in_stream=sequence_in_stream, + payload_summary=payload_summary, + payload_hash=payload_hash, + ) + + +# --------------------------------------------------------------------------- +# L5a — MCP Extension Events (tool layer) +# --------------------------------------------------------------------------- + + +class ElicitationRequestEvent(BaseModel): + """L5a: Emitted when an MCP server initiates a user input request.""" + + event_type: str = Field( + default="protocol.elicitation.request", + description="Event type identifier", + ) + layer: str = Field(default="L5a", description="Layer identifier") + elicitation_id: str = Field(description="Unique elicitation identifier") + server_name: str = Field(description="MCP server that issued the request") + request_title: Optional[str] = Field( + default=None, + description="Human-readable request title", + ) + schema_ref: Optional[str] = Field( + default=None, + description="JSON Schema $id for the requested input", + ) + schema_hash: str = Field(description="sha256 of the request schema") + + @classmethod + def create( + cls, + elicitation_id: str, + server_name: str, + schema_hash: str, + *, + request_title: Optional[str] = None, + schema_ref: Optional[str] = None, + ) -> ElicitationRequestEvent: + return cls( + elicitation_id=elicitation_id, + server_name=server_name, + request_title=request_title, + schema_ref=schema_ref, + schema_hash=schema_hash, + ) + + +class ElicitationResponseEvent(BaseModel): + """L5a: Emitted when a user responds to an MCP elicitation request.""" + + event_type: str = Field( + default="protocol.elicitation.response", + description="Event type identifier", + ) + layer: str = Field(default="L5a", description="Layer identifier") + elicitation_id: str = Field(description="Links to protocol.elicitation.request") + action: str = Field(description="User action: submit | cancel") + response_hash: str = Field( + description="sha256 of the user's response (never cleartext)", + ) + latency_ms: Optional[float] = Field( + default=None, + description="Time from request to response", + ) + + @classmethod + def create( + cls, + elicitation_id: str, + action: str, + response_hash: str, + *, + latency_ms: Optional[float] = None, + ) -> ElicitationResponseEvent: + return cls( + elicitation_id=elicitation_id, + action=action, + response_hash=response_hash, + latency_ms=latency_ms, + ) + + +class StructuredToolOutputEvent(BaseModel): + """L5a: Emitted when an MCP tool returns a structured output. + + Extends tool.call — both events are emitted for structured MCP tool calls. + """ + + event_type: str = Field( + default="protocol.tool.structured_output", + description="Event type identifier", + ) + layer: str = Field(default="L5a", description="Layer identifier") + tool_name: str = Field(description="MCP tool name") + schema_id: Optional[str] = Field( + default=None, + description="JSON Schema $id reference", + ) + schema_hash: str = Field(description="sha256 of the output schema") + validation_passed: bool = Field( + description="Whether output validated against schema", + ) + validation_errors: list[str] = Field( + default_factory=list, + description="Schema validation error messages", + ) + output_hash: str = Field(description="sha256 of the structured output value") + + @classmethod + def create( + cls, + tool_name: str, + schema_hash: str, + validation_passed: bool, + output_hash: str, + *, + schema_id: Optional[str] = None, + validation_errors: Optional[list[str]] = None, + ) -> StructuredToolOutputEvent: + return cls( + tool_name=tool_name, + schema_id=schema_id, + schema_hash=schema_hash, + validation_passed=validation_passed, + validation_errors=validation_errors or [], + output_hash=output_hash, + ) + + +class McpAppInvocationEvent(BaseModel): + """L5a: Emitted when an MCP App (interactive UI component) is invoked.""" + + event_type: str = Field( + default="protocol.mcp_app.invocation", + description="Event type identifier", + ) + layer: str = Field(default="L5a", description="Layer identifier") + app_id: str = Field(description="MCP App identifier") + component_type: str = Field( + description="Component type: form | confirmation | picker | custom", + ) + interaction_result: str = Field( + description="Result: submitted | cancelled | timeout", + ) + parameters_hash: str = Field(description="sha256 of invocation parameters") + result_hash: Optional[str] = Field( + default=None, + description="sha256 of user interaction result", + ) + + @classmethod + def create( + cls, + app_id: str, + component_type: str, + interaction_result: str, + parameters_hash: str, + *, + result_hash: Optional[str] = None, + ) -> McpAppInvocationEvent: + return cls( + app_id=app_id, + component_type=component_type, + interaction_result=interaction_result, + parameters_hash=parameters_hash, + result_hash=result_hash, + ) + + +__all__ = [ + "SkillInfo", + "AgentCardInfo", + "AgentCardEvent", + "TaskSubmittedEvent", + "TaskCompletedEvent", + "AsyncTaskEvent", + "ProtocolStreamEvent", + "ElicitationRequestEvent", + "ElicitationResponseEvent", + "StructuredToolOutputEvent", + "McpAppInvocationEvent", +] diff --git a/src/layerlens/instrument/_vendored/memory_models.py b/src/layerlens/instrument/_vendored/memory_models.py new file mode 100644 index 0000000..06ff615 --- /dev/null +++ b/src/layerlens/instrument/_vendored/memory_models.py @@ -0,0 +1,95 @@ +"""Vendored snapshot of ``stratix.memory.models``. + +Source: ``A:/github/layerlens/ateam/stratix/memory/models.py`` +Source SHA: 7359c0e38d74e02aa1b27c34daef7a958abbd002 + +Compatibility shims applied for Python 3.9 + Pydantic 2: +- ``datetime.UTC`` (added in Python 3.11) replaced with the + ``timezone.utc`` alias so ``datetime.now(UTC)`` keeps working. +- PEP-604 union syntax (``X | None``) on Pydantic field annotations + rewritten as ``Optional[X]``. + +Updates require re-vendoring — see ``__init__.py`` for the workflow. +""" + +# STRATIX Agent Memory — Pydantic Models +# +# Data models for persistent long-term agent memory: entries, queries, +# consolidation results, and usage statistics. + +from __future__ import annotations + +from uuid import uuid4 +from typing import Any, Literal, Optional +from datetime import datetime, timezone + +from pydantic import Field, BaseModel + +UTC = timezone.utc # Python 3.11+ has datetime.UTC; alias for 3.9/3.10 compat. + + +class MemoryEntry(BaseModel): + """A single memory record stored for an agent.""" + + id: str = Field(default_factory=lambda: str(uuid4())) + org_id: str + agent_id: str + memory_type: Literal["episodic", "semantic", "procedural", "working"] + namespace: str = "default" + key: str + content: str + embedding_hash: Optional[str] = None + metadata: dict[str, Any] = Field(default_factory=dict) + importance: float = Field(default=0.5, ge=0.0, le=1.0) + access_count: int = 0 + last_accessed_at: Optional[str] = None + expires_at: Optional[str] = None + created_at: str = Field(default_factory=lambda: datetime.now(UTC).isoformat()) + updated_at: str = Field(default_factory=lambda: datetime.now(UTC).isoformat()) + + +class MemoryQuery(BaseModel): + """Query parameters for memory retrieval.""" + + org_id: str + agent_id: str + namespace: str = "default" + memory_type: Optional[str] = None + key_prefix: Optional[str] = None + min_importance: float = 0.0 + limit: int = Field(default=20, le=100) + include_expired: bool = False + + +class MemoryConsolidation(BaseModel): + """Result of memory consolidation (summarization of old memories).""" + + id: str = Field(default_factory=lambda: str(uuid4())) + org_id: str + agent_id: str + source_memory_ids: list[str] + consolidated_content: str + consolidation_method: str + created_at: str = Field(default_factory=lambda: datetime.now(UTC).isoformat()) + + +class MemoryStats(BaseModel): + """Usage statistics for agent memory.""" + + org_id: str + agent_id: str + total_entries: int + by_type: dict[str, int] + by_namespace: dict[str, int] + avg_importance: float + oldest_entry: Optional[str] + newest_entry: Optional[str] + storage_bytes: int + + +__all__ = [ + "MemoryEntry", + "MemoryQuery", + "MemoryConsolidation", + "MemoryStats", +] diff --git a/src/layerlens/instrument/adapters/__init__.py b/src/layerlens/instrument/adapters/__init__.py new file mode 100644 index 0000000..560b3fb --- /dev/null +++ b/src/layerlens/instrument/adapters/__init__.py @@ -0,0 +1,42 @@ +"""Adapter implementations and the shared base layer. + +The ``_base`` subpackage contains the abstract :class:`BaseAdapter`, +:class:`AdapterRegistry`, :class:`CaptureConfig`, and :class:`EventSink` +classes that every concrete adapter depends on. Concrete adapters live +under ``frameworks/`` (LangChain, LangGraph, etc.), ``protocols/`` (A2A, +AGUI, MCP, etc.), and ``providers/`` (OpenAI, Anthropic, etc.). + +The base layer has no optional dependencies — it works with only the +SDK's core ``pydantic`` requirement. Concrete adapters declare their own +optional ``[project.optional-dependencies]`` groups in ``pyproject.toml``. +""" + +from __future__ import annotations + +from layerlens.instrument.adapters._base import ( + EventSink, + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + CaptureConfig, + TraceStoreSink, + AdapterRegistry, + ReplayableTrace, + AdapterCapability, + IngestionPipelineSink, +) + +__all__ = [ + "AdapterCapability", + "AdapterHealth", + "AdapterInfo", + "AdapterRegistry", + "AdapterStatus", + "BaseAdapter", + "CaptureConfig", + "EventSink", + "IngestionPipelineSink", + "ReplayableTrace", + "TraceStoreSink", +] diff --git a/src/layerlens/instrument/adapters/_base/__init__.py b/src/layerlens/instrument/adapters/_base/__init__.py new file mode 100644 index 0000000..e1008fe --- /dev/null +++ b/src/layerlens/instrument/adapters/_base/__init__.py @@ -0,0 +1,49 @@ +"""Shared base layer for all LayerLens adapters. + +Re-exports the public surface so adapter modules and external callers +import from a single, stable path:: + + from layerlens.instrument.adapters._base import BaseAdapter, CaptureConfig +""" + +from __future__ import annotations + +from layerlens.instrument.adapters._base.sinks import ( + EventSink, + TraceStoreSink, + IngestionPipelineSink, +) +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.capture import ( + ALWAYS_ENABLED_EVENT_TYPES, + CaptureConfig, +) +from layerlens.instrument.adapters._base.registry import AdapterRegistry +from layerlens.instrument.adapters._base.pydantic_compat import ( + PydanticCompat, + requires_pydantic, +) + +__all__ = [ + "ALWAYS_ENABLED_EVENT_TYPES", + "AdapterCapability", + "AdapterHealth", + "AdapterInfo", + "AdapterRegistry", + "AdapterStatus", + "BaseAdapter", + "CaptureConfig", + "EventSink", + "IngestionPipelineSink", + "PydanticCompat", + "ReplayableTrace", + "TraceStoreSink", + "requires_pydantic", +] diff --git a/src/layerlens/instrument/adapters/_base/adapter.py b/src/layerlens/instrument/adapters/_base/adapter.py new file mode 100644 index 0000000..9fcebe8 --- /dev/null +++ b/src/layerlens/instrument/adapters/_base/adapter.py @@ -0,0 +1,523 @@ +"""LayerLens Base Adapter. + +Provides the abstract :class:`BaseAdapter` class that all framework +adapters must extend. Implements circuit-breaker-protected event +emission, :class:`CaptureConfig` filtering, lifecycle management, and +replay serialization. + +Ported from ``ateam/stratix/sdk/python/adapters/base.py`` with the +following adaptations for the ``stratix-python`` SDK: + +* ``StrEnum`` (3.11+) replaced with ``(str, Enum)`` mixin (3.8+ compat). +* Pydantic imports routed through ``layerlens._compat.pydantic`` so v1 + and v2 are both supported. +* Payload serialization uses ``layerlens._compat.pydantic.model_dump`` + (handles v1 ``.dict()`` vs v2 ``.model_dump()``). +""" + +from __future__ import annotations + +import time +import logging +import threading +from abc import ABC, abstractmethod +from enum import Enum +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +if TYPE_CHECKING: + from layerlens.instrument.adapters._base.sinks import EventSink + +from layerlens._compat.pydantic import Field, BaseModel, model_dump +from layerlens.instrument.adapters._base.capture import ( + ALWAYS_ENABLED_EVENT_TYPES, + CaptureConfig, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +# Forward reference: EventSink is defined in sinks.py, which itself does not +# import from this module, but adapter.py is imported by sinks.py via the +# package's _base/__init__.py order. To avoid circular imports we use a +# string annotation in the BaseAdapter constructor and the public sink +# methods, and import EventSink lazily inside add_sink at call time. + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Enums & Models +# --------------------------------------------------------------------------- + + +class AdapterStatus(str, Enum): + """Health status of an adapter.""" + + HEALTHY = "healthy" + DEGRADED = "degraded" + DISCONNECTED = "disconnected" + ERROR = "error" + + +class AdapterCapability(str, Enum): + """Capabilities an adapter may declare.""" + + TRACE_TOOLS = "trace_tools" + TRACE_MODELS = "trace_models" + TRACE_STATE = "trace_state" + TRACE_HANDOFFS = "trace_handoffs" + TRACE_PROTOCOL_EVENTS = "trace_protocol_events" + REPLAY = "replay" + STREAMING = "streaming" + + +class AdapterHealth(BaseModel): + """Snapshot of adapter health.""" + + status: AdapterStatus = Field(description="Current status") + framework_name: str = Field(description="Framework this adapter targets") + framework_version: Optional[str] = Field(default=None, description="Detected framework version") + adapter_version: str = Field(description="Adapter version string") + message: Optional[str] = Field(default=None, description="Human-readable status detail") + error_count: int = Field(default=0, description="Consecutive error count") + circuit_open: bool = Field(default=False, description="True if circuit breaker is open") + + +class AdapterInfo(BaseModel): + """Metadata describing an adapter.""" + + name: str = Field(description="Adapter name") + version: str = Field(description="Adapter version") + framework: str = Field(description="Target framework name") + framework_version: Optional[str] = Field(default=None, description="Detected framework version") + capabilities: List[AdapterCapability] = Field(default_factory=list) + author: str = Field(default="LayerLens") + description: str = Field(default="") + requires_pydantic: PydanticCompat = Field( + default=PydanticCompat.V1_OR_V2, + description=( + "Declared Pydantic major-version compatibility. Surfaced in the " + "manifest so the atlas-app catalog UI can warn users before they " + "pin an incompatible runtime." + ), + ) + + +class ReplayableTrace(BaseModel): + """A trace serialized for replay. + + Contains enough information to re-execute the original agent run + with identical or modified inputs. + """ + + adapter_name: str = Field(description="Adapter that produced the trace") + framework: str = Field(description="Framework used") + trace_id: str = Field(description="Original trace ID") + events: List[Dict[str, Any]] = Field(default_factory=list, description="Ordered event dicts") + state_snapshots: List[Dict[str, Any]] = Field( + default_factory=list, + description="Checkpoint state snapshots", + ) + config: Dict[str, Any] = Field( + default_factory=dict, + description="Adapter/framework config at time of trace", + ) + metadata: Dict[str, Any] = Field(default_factory=dict) + + +# --------------------------------------------------------------------------- +# Null-object sentinel +# --------------------------------------------------------------------------- + + +class _NullStratix: + """Null-object sentinel used when an adapter is constructed without a + LayerLens client instance. + + Silently discards all calls so adapters can still be used stand-alone + or in tests. Evaluates to falsy so ``if self._stratix:`` guards work + correctly. + """ + + def __bool__(self) -> bool: + return False + + def emit(self, *args: Any, **kwargs: Any) -> None: + pass + + def _emit_event(self, *args: Any, **kwargs: Any) -> None: + pass + + @property + def agent_id(self) -> str: + return "null" + + @property + def framework(self) -> Optional[str]: + return None + + @property + def is_policy_violated(self) -> bool: + return False + + +_NULL_STRATIX = _NullStratix() + + +# --------------------------------------------------------------------------- +# Circuit breaker constants +# --------------------------------------------------------------------------- + +_CIRCUIT_BREAKER_THRESHOLD = 10 # consecutive errors before opening +_CIRCUIT_BREAKER_COOLDOWN_S = 60.0 # seconds before attempting recovery + + +# --------------------------------------------------------------------------- +# BaseAdapter ABC +# --------------------------------------------------------------------------- + + +class BaseAdapter(ABC): + """Abstract base class for all LayerLens framework adapters. + + Provides: + + * Circuit-breaker-protected :meth:`emit_event`. + * :class:`CaptureConfig` filtering. + * Lifecycle management (:meth:`connect` / :meth:`disconnect` / :meth:`health_check`). + * Replay serialization hook (:meth:`serialize_for_replay`). + """ + + # Subclasses MUST set these. + FRAMEWORK: str = "" + VERSION: str = "0.0.0" + + # Per-adapter Pydantic v1/v2 compatibility declaration (Round-2 item 20). + # Subclasses MUST set this explicitly to one of the three + # :class:`PydanticCompat` values — the lint test in + # ``tests/instrument/adapters/test_pydantic_compat.py`` enforces that + # no framework adapter relies on the V1_OR_V2 default by accident. + requires_pydantic: PydanticCompat = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any = None, + capture_config: Optional[CaptureConfig] = None, + event_sinks: Optional[List["EventSink"]] = None, + ) -> None: + self._stratix = stratix or _NULL_STRATIX + self._capture_config = capture_config or CaptureConfig() + self._connected = False + self._status: AdapterStatus = AdapterStatus.DISCONNECTED + + # Circuit breaker state (protected by _lock). + self._lock = threading.Lock() + self._error_count = 0 + self._circuit_open = False + self._circuit_opened_at: float = 0.0 + + # Collected events for replay serialization. + self._trace_events: List[Dict[str, Any]] = [] + + # Pluggable event sinks for persistence / export. Use add_sink / + # remove_sink to mutate; direct list manipulation is not part of + # the public API and may change in v2. + self._event_sinks: List["EventSink"] = list(event_sinks) if event_sinks else [] + + # --- Sink management (public API) --- + + def add_sink(self, sink: "EventSink") -> None: + """Register an :class:`EventSink` to receive emitted events. + + Sinks are dispatched in registration order. A sink that raises + from ``send`` / ``flush`` / ``close`` is logged at DEBUG and + does not affect other sinks or the adapter's emission path. + """ + self._event_sinks.append(sink) + + def remove_sink(self, sink: "EventSink") -> bool: + """Remove a previously-registered sink. + + Returns ``True`` if the sink was present, ``False`` otherwise. + """ + try: + self._event_sinks.remove(sink) + return True + except ValueError: + return False + + @property + def sinks(self) -> List["EventSink"]: + """Snapshot of currently-registered sinks (defensive copy).""" + return list(self._event_sinks) + + # --- Properties --- + + @property + def is_connected(self) -> bool: + """True when the adapter has a live connection to its framework.""" + return self._connected + + @property + def status(self) -> AdapterStatus: + return self._status + + @property + def capture_config(self) -> CaptureConfig: + return self._capture_config + + @property + def has_stratix(self) -> bool: + """True when a real (non-null) client instance is attached.""" + return bool(self._stratix) + + # --- Abstract lifecycle methods --- + + @abstractmethod + def connect(self) -> None: + """Verify framework availability and prepare the adapter. + + Implementations should import the framework, validate the + version, and set ``self._connected = True`` / + ``self._status = AdapterStatus.HEALTHY``. + """ + + @abstractmethod + def disconnect(self) -> None: + """Flush pending events and release resources. + + Implementations should set ``self._connected = False`` and + ``self._status = AdapterStatus.DISCONNECTED``. + """ + + @abstractmethod + def health_check(self) -> AdapterHealth: + """Return a health snapshot.""" + + @abstractmethod + def get_adapter_info(self) -> AdapterInfo: + """Return metadata about this adapter.""" + + def info(self) -> AdapterInfo: + """Return :class:`AdapterInfo` with the class-level compat decl applied. + + Subclasses populate the bulk of :class:`AdapterInfo` via + :meth:`get_adapter_info`. This wrapper guarantees the + ``requires_pydantic`` field reflects the subclass class attribute + even when the subclass omits it from its constructor call — + avoiding the need to repeat the value at every site. Used by + :meth:`AdapterRegistry.info` and the manifest emitter. + """ + base_info = self.get_adapter_info() + if base_info.requires_pydantic != self.requires_pydantic: + try: + # Pydantic v2 path: copy with overrides. + base_info = base_info.model_copy(update={"requires_pydantic": self.requires_pydantic}) + except AttributeError: + # Pydantic v1 path. + base_info = base_info.copy(update={"requires_pydantic": self.requires_pydantic}) + return base_info + + @abstractmethod + def serialize_for_replay(self) -> ReplayableTrace: + """Serialize the current trace data for replay.""" + + # --- Replay execution hook --- + + async def execute_replay( + self, + inputs: Dict[str, Any], + original_trace: Any, + request: Any, + replay_trace_id: str, + ) -> Any: + """Re-execute through this adapter's framework. + + Subclasses override this to provide actual re-execution. The + default raises :class:`NotImplementedError` (synthetic replay + used instead). + + Args: + inputs: Reconstructed inputs for the replay. + original_trace: The original SerializedTrace. + request: The ReplayRequest. + replay_trace_id: ID for the new replay trace. + + Returns: + A SerializedTrace from the replay execution. + + Raises: + NotImplementedError: If the adapter does not support replay. + """ + raise NotImplementedError(f"{self.__class__.__name__} does not support execute_replay()") + + # --- Concrete event emission --- + + def emit_event( + self, + payload: Any, + privacy_level: Any = None, + ) -> None: + """Emit a typed event payload through the LayerLens pipeline. + + This method: + + 1. Checks the circuit breaker — drops events if open (unless + cooldown expired). + 2. Checks :class:`CaptureConfig` — silently drops events whose + layer is disabled (cross-cutting events are never dropped). + 3. Delegates to ``self._stratix.emit(payload, privacy_level)`` + with error counting for circuit-breaker state management. + + Args: + payload: A Pydantic event payload (e.g., + ``ToolCallEvent.create(...)``). + privacy_level: Optional ``PrivacyLevel`` override. + """ + event_type = getattr(payload, "event_type", None) + + if not self._pre_emit_check(event_type): + return + + try: + if privacy_level is not None: + self._stratix.emit(payload, privacy_level) + else: + self._stratix.emit(payload) + + self._post_emit_success(event_type, payload) + except Exception: + self._post_emit_failure() + + def emit_dict_event( + self, + event_type: str, + payload: Dict[str, Any], + ) -> None: + """Emit a dict-based event through the LayerLens pipeline. + + Provides the same circuit-breaker and CaptureConfig gating as + :meth:`emit_event` but accepts raw ``(event_type, dict)`` pairs + used by the legacy adapter emission path. This avoids bypassing + the BaseAdapter protections. + + Args: + event_type: Event type string (e.g., ``"model.invoke"``). + payload: Raw event payload dict. + """ + if not self._pre_emit_check(event_type): + return + + try: + self._stratix.emit(event_type, payload) + self._post_emit_success(event_type, payload) + except Exception: + self._post_emit_failure() + + # --- Circuit breaker internals --- + + def _pre_emit_check(self, event_type: Optional[str]) -> bool: + """Run circuit-breaker and CaptureConfig checks. + + Returns ``True`` to proceed with emission. + """ + with self._lock: + if self._circuit_open and not self._attempt_recovery(): + return False + + if event_type and event_type not in ALWAYS_ENABLED_EVENT_TYPES: + # ``is_layer_enabled`` itself handles cross-cutting layer + # families (commerce.* etc.) via prefix bypass — see + # capture.py. The early-out above only catches exact + # matches in the freeze-listed set. + if not self._capture_config.is_layer_enabled(event_type): + return False + + return True + + def _post_emit_success(self, event_type: Optional[str], payload: Any) -> None: + """Handle successful emission: reset errors, record for replay.""" + with self._lock: + if self._error_count > 0: + self._error_count = 0 + if self._status == AdapterStatus.DEGRADED: + self._status = AdapterStatus.HEALTHY + + if event_type: + try: + payload_data = model_dump(payload) + except Exception: + payload_data = {"raw": str(payload)} + timestamp_ns = time.time_ns() + self._trace_events.append( + { + "event_type": event_type, + "payload": payload_data, + "timestamp_ns": timestamp_ns, + } + ) + + # Dispatch to pluggable event sinks. + if self._event_sinks: + for sink in self._event_sinks: + try: + sink.send(event_type, payload_data, timestamp_ns) + except Exception: + logger.debug( + "EventSink %s.send() failed", + type(sink).__name__, + exc_info=True, + ) + + def _post_emit_failure(self) -> None: + """Handle emission failure: increment errors, maybe open circuit.""" + with self._lock: + self._error_count += 1 + logger.debug( + "Adapter %s emit error #%d", + self.FRAMEWORK, + self._error_count, + exc_info=True, + ) + if self._error_count >= _CIRCUIT_BREAKER_THRESHOLD: + self._circuit_open = True + self._circuit_opened_at = time.monotonic() + self._status = AdapterStatus.ERROR + logger.warning( + "Adapter %s circuit breaker OPEN after %d consecutive errors", + self.FRAMEWORK, + self._error_count, + ) + elif self._error_count >= _CIRCUIT_BREAKER_THRESHOLD // 2: + self._status = AdapterStatus.DEGRADED + + def _attempt_recovery(self) -> bool: + """Check if the circuit-breaker cooldown has elapsed. + + Caller MUST hold ``self._lock``. + + Returns: + ``True`` if the circuit is now closed (ready to emit). + ``False`` if still open. + """ + elapsed = time.monotonic() - self._circuit_opened_at + if elapsed >= _CIRCUIT_BREAKER_COOLDOWN_S: + self._circuit_open = False + self._error_count = 0 + self._status = AdapterStatus.DEGRADED + logger.info("Adapter %s circuit breaker attempting recovery", self.FRAMEWORK) + return True + return False + + # --- Event sink lifecycle --- + + def _close_sinks(self) -> None: + """Flush and close all attached event sinks.""" + for sink in self._event_sinks: + try: + sink.flush() + sink.close() + except Exception: + logger.debug( + "EventSink %s close failed", + type(sink).__name__, + exc_info=True, + ) diff --git a/src/layerlens/instrument/adapters/_base/capture.py b/src/layerlens/instrument/adapters/_base/capture.py new file mode 100644 index 0000000..51defd2 --- /dev/null +++ b/src/layerlens/instrument/adapters/_base/capture.py @@ -0,0 +1,281 @@ +"""LayerLens Capture Configuration. + +Defines the :class:`CaptureConfig` model that controls which telemetry +layers are active for a given adapter instance. + +Layer Mapping: + L1: Agent I/O (agent.input, agent.output) + L2: Agent Code (agent.code) + L3: Model Metadata (model.invoke) + L4a: Environment Configuration (environment.config) + L4b: Environment Metrics (environment.metrics) + L5a: Tool/Action Execution (tool.call) + L5b: Tool Business Logic (tool.logic) + L5c: Tool Environment (tool.environment) + L6a: Protocol Discovery (A2A Agent Cards) + L6b: Protocol Streams (AGUI chunks, A2A SSE) + L6c: Protocol Lifecycle (A2A tasks, async tasks) + +Cross-cutting events (``agent.state.change``, ``cost.record``, +``policy.violation``, ``agent.handoff``) are always enabled and cannot +be disabled. + +Ported from ``ateam/stratix/sdk/python/adapters/capture.py``. +""" + +from __future__ import annotations + +import os + +from layerlens._compat.pydantic import Field, BaseModel + +# Layers that cannot be disabled. +_CROSS_CUTTING_LAYERS = frozenset( + { + "cross_cutting_state", + "cross_cutting_cost", + "cross_cutting_policy", + "cross_cutting_handoff", + } +) + +# Event types that are always emitted regardless of config. +# +# Commerce-namespace events (``commerce.payment.*``, ``commerce.ui.*``, +# ``commerce.supplier.*``) emitted by the AP2 / A2UI / UCP protocol +# adapters are added here because they are cross-cutting integrity / +# compliance signals (payment auth, mandate creation, supplier callback +# events) that customers would not expect to be silently dropped by a +# default ``CaptureConfig``. See coverage-deepening report 2026-04-25 — +# the protocol-coverage agent surfaced this gap when test fixtures +# revealed events were vanishing before reaching ``Stratix.emit``. +ALWAYS_ENABLED_EVENT_TYPES = frozenset( + { + "agent.state.change", + "cost.record", + "policy.violation", + "agent.handoff", + "evaluation.result", + "protocol.task.submitted", + "protocol.task.completed", + "protocol.async_task", + # Commerce-namespace events from AP2 / A2UI / UCP. The frozenset + # only contains exact event-type strings, so we list the family + # heads here — adapters that emit nested types still must use + # one of these head names or call ``emit_dict_event`` with the + # commerce-prefix variant (which the layer-gate will pass via + # the prefix check below). + "commerce.payment.created", + "commerce.payment.authorized", + "commerce.payment.failed", + "commerce.intent.created", + "commerce.mandate.created", + "commerce.mandate.revoked", + "commerce.ui.action", + "commerce.ui.element", + "commerce.supplier.event", + "commerce.supplier.callback", + } +) + +# Event-type prefixes that bypass the layer gate. Used in addition to +# ``ALWAYS_ENABLED_EVENT_TYPES`` for commerce events whose subtypes +# proliferate beyond the explicit set above. +_ALWAYS_ENABLED_PREFIXES = ("commerce.",) + + +class CaptureConfig(BaseModel): + """Controls which telemetry layers are active. + + Each boolean flag corresponds to a LayerLens capture layer. When a + flag is False, the adapter's :meth:`BaseAdapter.emit_event` silently + drops events for that layer instead of forwarding them to the + LayerLens pipeline. + + Cross-cutting events (state changes, cost records, policy violations, + handoffs) are always enabled and cannot be gated. + """ + + l1_agent_io: bool = Field( + default=True, + description="L1: Agent input/output messages", + ) + l2_agent_code: bool = Field( + default=False, + description="L2: Agent code artifacts and hashes", + ) + l3_model_metadata: bool = Field( + default=True, + description="L3: Model invocation metadata", + ) + l4a_environment_config: bool = Field( + default=True, + description="L4a: Environment configuration snapshots", + ) + l4b_environment_metrics: bool = Field( + default=False, + description="L4b: Environment runtime metrics", + ) + l5a_tool_calls: bool = Field( + default=True, + description="L5a: Tool/action call input/output", + ) + l5b_tool_logic: bool = Field( + default=False, + description="L5b: Tool business logic details", + ) + l5c_tool_environment: bool = Field( + default=False, + description="L5c: Tool environment details", + ) + l6a_protocol_discovery: bool = Field( + default=True, + description="L6a: Protocol discovery events (A2A Agent Cards).", + ) + l6b_protocol_streams: bool = Field( + default=True, + description=( + "L6b: Protocol stream events (AG-UI chunks, A2A SSE). " + "Set to False to capture only stream start/end events." + ), + ) + l6c_protocol_lifecycle: bool = Field( + default=True, + description="L6c: Protocol lifecycle events (A2A tasks, async tasks).", + ) + capture_content: bool = Field( + default=True, + description="Capture LLM message content on model.invoke events", + ) + + @property + def otel_capture_content(self) -> bool: + """Check if OTel content capture is enabled via env var. + + Content appears in OTel spans only when BOTH ``capture_content`` + AND the ``OTEL_GENAI_CAPTURE_MESSAGE_CONTENT`` env var are true. + """ + env_val = os.environ.get("OTEL_GENAI_CAPTURE_MESSAGE_CONTENT", "").lower() + return self.capture_content and env_val == "true" + + def is_layer_enabled(self, layer: str) -> bool: + """Check whether a given layer is enabled. + + Cross-cutting events always return True. + + Args: + layer: Layer identifier. Accepted formats: + + * Attribute names: ``"l1_agent_io"``, ``"l3_model_metadata"``, ... + * Short labels: ``"L1"``, ``"L3"``, ``"L5a"``, ... + * Event types: ``"agent.input"``, ``"model.invoke"``, ... + + Returns: + ``True`` if the layer is enabled or is a cross-cutting event. + """ + if layer in _CROSS_CUTTING_LAYERS or layer in ALWAYS_ENABLED_EVENT_TYPES: + return True + # Prefix bypass for commerce.* and similar cross-cutting families. + for prefix in _ALWAYS_ENABLED_PREFIXES: + if layer.startswith(prefix): + return True + + if hasattr(self, layer): + return bool(getattr(self, layer)) + + label_map = { + "L1": "l1_agent_io", + "L2": "l2_agent_code", + "L3": "l3_model_metadata", + "L4a": "l4a_environment_config", + "L4b": "l4b_environment_metrics", + "L5a": "l5a_tool_calls", + "L5b": "l5b_tool_logic", + "L5c": "l5c_tool_environment", + "L6a": "l6a_protocol_discovery", + "L6b": "l6b_protocol_streams", + "L6c": "l6c_protocol_lifecycle", + } + if layer in label_map: + return bool(getattr(self, label_map[layer])) + + event_type_map = { + "agent.input": "l1_agent_io", + "agent.output": "l1_agent_io", + "agent.lifecycle": "l1_agent_io", + "agent.identity": "l1_agent_io", + "agent.interaction": "l1_agent_io", + "agent.code": "l2_agent_code", + "model.invoke": "l3_model_metadata", + "environment.config": "l4a_environment_config", + "environment.metrics": "l4b_environment_metrics", + "tool.call": "l5a_tool_calls", + "tool.logic": "l5b_tool_logic", + "tool.environment": "l5c_tool_environment", + "protocol.agent_card": "l6a_protocol_discovery", + "protocol.stream.event": "l6b_protocol_streams", + "protocol.elicitation.request": "l5a_tool_calls", + "protocol.elicitation.response": "l5a_tool_calls", + "protocol.tool.structured_output": "l5a_tool_calls", + "protocol.mcp_app.invocation": "l5a_tool_calls", + # Embedding & Vector Store adapters + "embedding.create": "l3_model_metadata", + "retrieval.query": "l5a_tool_calls", + } + if layer in event_type_map: + return bool(getattr(self, event_type_map[layer])) + + # Unknown layers default to disabled (safe-by-default). + return False + + @classmethod + def minimal(cls) -> "CaptureConfig": + """L1 only — lightweight production telemetry.""" + return cls( + l1_agent_io=True, + l2_agent_code=False, + l3_model_metadata=False, + l4a_environment_config=False, + l4b_environment_metrics=False, + l5a_tool_calls=False, + l5b_tool_logic=False, + l5c_tool_environment=False, + l6a_protocol_discovery=True, + l6b_protocol_streams=False, + l6c_protocol_lifecycle=True, + capture_content=False, + ) + + @classmethod + def standard(cls) -> "CaptureConfig": + """L1 + L3 + L4a + L5a + L6 — recommended for most deployments.""" + return cls( + l1_agent_io=True, + l2_agent_code=False, + l3_model_metadata=True, + l4a_environment_config=True, + l4b_environment_metrics=False, + l5a_tool_calls=True, + l5b_tool_logic=False, + l5c_tool_environment=False, + l6a_protocol_discovery=True, + l6b_protocol_streams=True, + l6c_protocol_lifecycle=True, + ) + + @classmethod + def full(cls) -> "CaptureConfig": + """All layers enabled — development/debugging.""" + return cls( + l1_agent_io=True, + l2_agent_code=True, + l3_model_metadata=True, + l4a_environment_config=True, + l4b_environment_metrics=True, + l5a_tool_calls=True, + l5b_tool_logic=True, + l5c_tool_environment=True, + l6a_protocol_discovery=True, + l6b_protocol_streams=True, + l6c_protocol_lifecycle=True, + ) diff --git a/src/layerlens/instrument/adapters/_base/pydantic_compat.py b/src/layerlens/instrument/adapters/_base/pydantic_compat.py new file mode 100644 index 0000000..638748c --- /dev/null +++ b/src/layerlens/instrument/adapters/_base/pydantic_compat.py @@ -0,0 +1,122 @@ +"""Per-adapter Pydantic version compatibility declarations. + +Round-2 deliberation item 20: surface each adapter's Pydantic v1 / v2 / +both compatibility so that importing a v2-only adapter under a v1-pinned +runtime fails fast with a clear message instead of producing a confusing +``ImportError`` deep inside the framework SDK. + +Three values exist: + +* :attr:`PydanticCompat.V1_ONLY` — adapter or its underlying framework + uses Pydantic v1 idioms (``@root_validator``, ``model.dict()``, + ``Config`` inner class) that break under v2. +* :attr:`PydanticCompat.V2_ONLY` — adapter or its underlying framework + uses v2-only API surface (``@field_validator``, ``@model_validator``, + ``model.model_dump()``, ``Annotated`` constraints, etc.). Pinning a v1 + Pydantic with this adapter raises at import. +* :attr:`PydanticCompat.V1_OR_V2` — adapter is Pydantic-version-agnostic. + Either it imports nothing from ``pydantic`` directly, or it routes all + Pydantic access through :mod:`layerlens._compat.pydantic`. + +The :func:`requires_pydantic` helper is meant to be called at adapter +module import time after the version constant is declared:: + + from layerlens.instrument.adapters._base.pydantic_compat import ( + PydanticCompat, + requires_pydantic, + ) + + requires_pydantic(PydanticCompat.V2_ONLY) + +If the runtime pydantic does not satisfy the declaration, the call +raises :class:`RuntimeError` with a message naming the adapter, the +required version, and the installed version. +""" + +from __future__ import annotations + +import inspect +from enum import Enum +from typing import Optional + +import pydantic + +from layerlens._compat.pydantic import PYDANTIC_V2 + + +class PydanticCompat(str, Enum): + """Adapter declaration of which Pydantic major versions it supports.""" + + V1_ONLY = "v1_only" + V2_ONLY = "v2_only" + V1_OR_V2 = "v1_or_v2" + + +def _runtime_pydantic_version() -> str: + """Return the installed pydantic version string (e.g. ``"2.11.7"``).""" + return str(getattr(pydantic, "VERSION", "unknown")) + + +def _caller_module_name() -> Optional[str]: + """Best-effort lookup of the importing adapter's module name. + + Walks two frames up (past :func:`requires_pydantic`) and returns the + ``__name__`` of the calling module. Used purely to make the + :class:`RuntimeError` message actionable; never load-bearing. + """ + frame = inspect.currentframe() + if frame is None: + return None + try: + outer = frame.f_back + if outer is None: + return None + caller = outer.f_back + if caller is None: + return None + return caller.f_globals.get("__name__") + finally: + del frame + + +def requires_pydantic(version: PydanticCompat) -> None: + """Validate that the runtime Pydantic matches an adapter's declaration. + + Call from an adapter module's import path immediately after declaring + its compatibility constant. Raises :class:`RuntimeError` with a clear, + user-actionable message if the runtime Pydantic does not match. + + Args: + version: The adapter's :class:`PydanticCompat` declaration. + + Raises: + RuntimeError: If the runtime Pydantic version is incompatible + with the declaration. The message identifies the calling + adapter module so users can pin the correct extra. + """ + if version is PydanticCompat.V1_OR_V2: + return + + if version is PydanticCompat.V2_ONLY and not PYDANTIC_V2: + caller = _caller_module_name() or "" + raise RuntimeError( + f"{caller} requires Pydantic v2 (declared {version.value}); " + f"runtime is pydantic {_runtime_pydantic_version()}. " + "Upgrade with `pip install 'pydantic>=2,<3'` or remove the " + "adapter extra from your install set." + ) + + if version is PydanticCompat.V1_ONLY and PYDANTIC_V2: + caller = _caller_module_name() or "" + raise RuntimeError( + f"{caller} requires Pydantic v1 (declared {version.value}); " + f"runtime is pydantic {_runtime_pydantic_version()}. " + "Pin with `pip install 'pydantic>=1.9,<2'` or remove the " + "adapter extra from your install set." + ) + + +__all__ = [ + "PydanticCompat", + "requires_pydantic", +] diff --git a/src/layerlens/instrument/adapters/_base/registry.py b/src/layerlens/instrument/adapters/_base/registry.py new file mode 100644 index 0000000..bb20c4b --- /dev/null +++ b/src/layerlens/instrument/adapters/_base/registry.py @@ -0,0 +1,266 @@ +"""LayerLens Adapter Registry. + +Singleton registry that maps framework names to adapter classes, +supports auto-detection of installed frameworks, and provides lazy +instantiation. + +Ported from ``ateam/stratix/sdk/python/adapters/registry.py``. Module +paths are remapped from ``stratix.sdk.python.adapters.*`` to +``layerlens.instrument.adapters.*``. Lazy loading still uses +``importlib.import_module`` so unused adapter modules do not pull their +optional framework dependencies until first use. +""" + +from __future__ import annotations + +import logging +import importlib +import threading +from typing import Any, Dict, List, Type, Optional + +from layerlens.instrument.adapters._base.adapter import AdapterInfo, BaseAdapter +from layerlens.instrument.adapters._base.capture import CaptureConfig +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +# Module path for each framework adapter package. +# +# These point at the ``stratix-python`` SDK locations after the port. +# A module is registered here if its ``__init__.py`` (or the explicit +# leaf module named below) defines an ``ADAPTER_CLASS`` attribute that +# subclasses :class:`BaseAdapter`. Importing a module that requires an +# unavailable optional dependency raises :class:`ImportError`, which +# :meth:`AdapterRegistry._lazy_load` swallows and logs. +_ADAPTER_MODULES: Dict[str, str] = { + # Framework adapters + "langgraph": "layerlens.instrument.adapters.frameworks.langgraph", + "langchain": "layerlens.instrument.adapters.frameworks.langchain", + "crewai": "layerlens.instrument.adapters.frameworks.crewai", + "autogen": "layerlens.instrument.adapters.frameworks.autogen", + "semantic_kernel": "layerlens.instrument.adapters.frameworks.semantic_kernel", + "langfuse": "layerlens.instrument.adapters.frameworks.langfuse", + "openai_agents": "layerlens.instrument.adapters.frameworks.openai_agents", + "google_adk": "layerlens.instrument.adapters.frameworks.google_adk", + "bedrock_agents": "layerlens.instrument.adapters.frameworks.bedrock_agents", + "pydantic_ai": "layerlens.instrument.adapters.frameworks.pydantic_ai", + "llama_index": "layerlens.instrument.adapters.frameworks.llama_index", + "smolagents": "layerlens.instrument.adapters.frameworks.smolagents", + "agno": "layerlens.instrument.adapters.frameworks.agno", + "strands": "layerlens.instrument.adapters.frameworks.strands", + "ms_agent_framework": "layerlens.instrument.adapters.frameworks.ms_agent_framework", + "salesforce_agentforce": "layerlens.instrument.adapters.frameworks.agentforce", + "embedding": "layerlens.instrument.adapters.frameworks.embedding", + "browser_use": "layerlens.instrument.adapters.frameworks.browser_use", + "benchmark_import": "layerlens.instrument.adapters.frameworks.benchmark_import", + # LLM provider adapters + "openai": "layerlens.instrument.adapters.providers.openai_adapter", + "anthropic": "layerlens.instrument.adapters.providers.anthropic_adapter", + "azure_openai": "layerlens.instrument.adapters.providers.azure_openai_adapter", + "google_vertex": "layerlens.instrument.adapters.providers.google_vertex_adapter", + "aws_bedrock": "layerlens.instrument.adapters.providers.bedrock_adapter", + "ollama": "layerlens.instrument.adapters.providers.ollama_adapter", + "litellm": "layerlens.instrument.adapters.providers.litellm_adapter", + "cohere": "layerlens.instrument.adapters.providers.cohere_adapter", + "mistral": "layerlens.instrument.adapters.providers.mistral_adapter", + # Protocol adapters + "a2a": "layerlens.instrument.adapters.protocols.a2a", + "agui": "layerlens.instrument.adapters.protocols.agui", + "mcp_extensions": "layerlens.instrument.adapters.protocols.mcp", + "ap2": "layerlens.instrument.adapters.protocols.ap2", + "a2ui": "layerlens.instrument.adapters.protocols.a2ui", + "ucp": "layerlens.instrument.adapters.protocols.ucp", +} + +# Pip-installable package name used to probe whether the framework is +# available in the current environment. Used by :meth:`auto_detect`. +_FRAMEWORK_PACKAGES: Dict[str, str] = { + "langgraph": "langgraph", + "langchain": "langchain", + "crewai": "crewai", + "autogen": "autogen", + "openai": "openai", + "anthropic": "anthropic", + "azure_openai": "openai", + "google_vertex": "google.cloud.aiplatform", + "aws_bedrock": "boto3", + "ollama": "ollama", + "litellm": "litellm", + "cohere": "cohere", + "mistral": "mistralai", + "semantic_kernel": "semantic_kernel", + "openai_agents": "agents", + "google_adk": "google.adk", + "bedrock_agents": "boto3", + "pydantic_ai": "pydantic_ai", + "llama_index": "llama_index", + "smolagents": "smolagents", + "agno": "agno", + "strands": "strands", + "ms_agent_framework": "semantic_kernel", + "salesforce_agentforce": "requests", + "embedding": "layerlens.instrument.adapters.frameworks.embedding", + "browser_use": "browser_use", + "benchmark_import": "layerlens.instrument.adapters.frameworks.benchmark_import", + "langfuse": "layerlens.instrument.adapters.frameworks.langfuse", + "a2a": "layerlens.instrument.adapters.protocols.a2a", + "agui": "ag_ui", + "mcp_extensions": "mcp", + "ap2": "layerlens.instrument.adapters.protocols.ap2", + "a2ui": "layerlens.instrument.adapters.protocols.a2ui", + "ucp": "layerlens.instrument.adapters.protocols.ucp", +} + + +class AdapterRegistry: + """Singleton registry of LayerLens framework adapters. + + Usage:: + + registry = AdapterRegistry() + registry.register(MyCustomAdapter) + adapter = registry.get("langgraph", stratix=client) + """ + + _instance: Optional["AdapterRegistry"] = None + _lock: threading.Lock = threading.Lock() + _registry: Dict[str, Type[BaseAdapter]] + + def __new__(cls) -> "AdapterRegistry": + if cls._instance is None: + with cls._lock: + # Double-check after acquiring lock. + if cls._instance is None: + inst = super().__new__(cls) + inst._registry = {} + cls._instance = inst + return cls._instance + + # --- Public API --- + + def register(self, adapter_class: Type[BaseAdapter]) -> None: + """Register an adapter class. + + The class must define a ``FRAMEWORK`` class attribute. + + Args: + adapter_class: A subclass of :class:`BaseAdapter`. + + Raises: + ValueError: If the class does not define ``FRAMEWORK``. + """ + framework = getattr(adapter_class, "FRAMEWORK", None) + if not framework: + raise ValueError( + f"{adapter_class.__name__} does not define a FRAMEWORK class attribute" + ) + self._registry[framework] = adapter_class + logger.debug( + "Registered adapter %s for framework '%s'", + adapter_class.__name__, + framework, + ) + + def auto_detect(self) -> List[str]: + """Return a list of frameworks whose packages are importable.""" + available: List[str] = [] + for framework, package in _FRAMEWORK_PACKAGES.items(): + try: + importlib.import_module(package) + available.append(framework) + except ImportError: + pass + return available + + def get( + self, + framework: str, + stratix: Any = None, + capture_config: Optional[CaptureConfig] = None, + ) -> BaseAdapter: + """Retrieve, instantiate, and connect an adapter. + + Lazy-loads the adapter module on first use so framework + dependencies are never imported by ``import layerlens`` alone. + + Args: + framework: Framework name (e.g., ``"langgraph"``, + ``"langchain"``). + stratix: LayerLens client instance. + capture_config: :class:`CaptureConfig` to use. + + Returns: + Connected :class:`BaseAdapter` instance. + + Raises: + KeyError: If the framework has no registered adapter and + cannot be lazy-loaded. + """ + if framework not in self._registry: + self._lazy_load(framework) + + adapter_cls = self._registry.get(framework) + if adapter_cls is None: + raise KeyError( + f"No adapter registered for framework '{framework}'. " + f"Available: {list(self._registry.keys())}" + ) + + adapter = adapter_cls(stratix=stratix, capture_config=capture_config) + adapter.connect() + return adapter + + def list_available(self) -> List[AdapterInfo]: + """Return :class:`AdapterInfo` for every registered adapter. + + Uses :meth:`BaseAdapter.info` so the class-level + ``requires_pydantic`` declaration is applied even if the subclass + omits it from its :meth:`get_adapter_info` constructor call. + """ + results: List[AdapterInfo] = [] + for framework in list(self._registry.keys()): + cls = self._registry[framework] + try: + tmp = cls() + results.append(tmp.info()) + except Exception: + results.append( + AdapterInfo( + name=cls.__name__, + version=getattr(cls, "VERSION", "0.0.0"), + framework=framework, + requires_pydantic=getattr(cls, "requires_pydantic", PydanticCompat.V1_OR_V2), + ) + ) + return results + + # --- Internal --- + + def _lazy_load(self, framework: str) -> None: + """Import the adapter module for *framework* and pull ``ADAPTER_CLASS``.""" + module_path = _ADAPTER_MODULES.get(framework) + if module_path is None: + return + + try: + mod = importlib.import_module(module_path) + except ImportError: + logger.debug("Could not import adapter module %s", module_path) + return + + adapter_cls = getattr(mod, "ADAPTER_CLASS", None) + if adapter_cls is not None and issubclass(adapter_cls, BaseAdapter): + self._registry[framework] = adapter_cls + logger.debug( + "Lazy-loaded adapter %s from %s", + adapter_cls.__name__, + module_path, + ) + + @classmethod + def reset(cls) -> None: + """Reset the singleton — primarily for test isolation.""" + if cls._instance is not None: + cls._instance._registry.clear() + cls._instance = None diff --git a/src/layerlens/instrument/adapters/_base/sinks.py b/src/layerlens/instrument/adapters/_base/sinks.py new file mode 100644 index 0000000..4c762d1 --- /dev/null +++ b/src/layerlens/instrument/adapters/_base/sinks.py @@ -0,0 +1,277 @@ +"""LayerLens Event Sinks. + +Pluggable sinks that receive events from :class:`BaseAdapter` after +successful emission. Each sink bridges the adapter's in-memory event +stream to a persistence or export backend. + +The ``ateam`` source provided concrete :class:`TraceStoreSink` and +:class:`IngestionPipelineSink` implementations that depended on +``stratix.storage.traces.TraceStore`` and ``stratix.ingest.pipeline``. +Those server-side modules do not exist in the ``stratix-python`` SDK; +the sinks here are kept as protocol-conformant duck-typed bridges that +accept any object exposing ``store_trace`` / ``store_event`` (for +:class:`TraceStoreSink`) or ``ingest`` (for :class:`IngestionPipelineSink`). + +Typical SDK usage routes events to an HTTP sink that POSTs to atlas-app +``/api/v1/telemetry/spans``; that sink lives in +``layerlens.instrument.transport`` and is added in a later milestone. + +Ported from ``ateam/stratix/sdk/python/adapters/sinks.py``. +""" + +from __future__ import annotations + +import uuid +import logging +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional +from datetime import datetime, timezone + +# Python 3.11+ exposes ``datetime.UTC``; for 3.8+ compat we alias the +# existing ``timezone.utc`` constant. Keeping both names available means +# adapter code can use ``UTC`` regardless of interpreter version. +UTC = timezone.utc + +logger = logging.getLogger(__name__) + + +class EventSink(ABC): + """Abstract base for event sinks. + + Sinks receive ``(event_type, payload, timestamp_ns)`` triples from + :meth:`BaseAdapter._post_emit_success` and persist or forward them. + """ + + @abstractmethod + def send(self, event_type: str, payload: Dict[str, Any], timestamp_ns: int) -> None: + """Accept a single event. + + Args: + event_type: Event type string (e.g., ``"model.invoke"``). + payload: Serialized event payload dict. + timestamp_ns: Nanosecond-precision Unix timestamp. + """ + + @abstractmethod + def flush(self) -> None: + """Flush any buffered events to the backend.""" + + @abstractmethod + def close(self) -> None: + """Finalize the sink (e.g. mark trace as completed).""" + + +class TraceStoreSink(EventSink): + """Sink that writes events directly to a duck-typed trace store. + + The store object must expose: + + * ``store_trace(record)`` — accepts a record-like object with the + fields the store understands (``trace_id``, ``status``, + ``start_time``, ``end_time``, etc.). + * ``store_event(record)`` — accepts a record-like object with + ``event_id``, ``event_type``, ``trace_id``, ``span_id``, + ``sequence_id``, ``timestamp``, ``payload``. + * ``get_trace(trace_id)`` and ``update_trace_status(trace_id, status)`` + for finalization. + + The factory callables for trace and event records can be injected via + ``trace_record_factory`` and ``event_record_factory``; if omitted, the + sink uses simple dicts. This decouples the sink from the + ``stratix.storage.traces`` module that lives only in the framework + repo. + + Auto-generates ``trace_id`` (or accepts one), ``event_id``, ``span_id``, + and auto-increments ``sequence_id``. On :meth:`close` the trace is + marked ``"completed"``. + """ + + def __init__( + self, + store: Any, + trace_id: Optional[str] = None, + trial_id: str = "default", + agent_id: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + trace_record_factory: Optional[Any] = None, + event_record_factory: Optional[Any] = None, + ) -> None: + self._store = store + self._trace_id = trace_id or str(uuid.uuid4()) + self._trial_id = trial_id + self._sequence_id = 0 + self._closed = False + self._start_time = datetime.now(UTC) + self._trace_record_factory = trace_record_factory or self._default_trace_record + self._event_record_factory = event_record_factory or self._default_event_record + + self._store.store_trace( + self._trace_record_factory( + trace_id=self._trace_id, + trial_id=self._trial_id, + agent_id=agent_id, + start_time=self._start_time, + end_time=self._start_time, + status="active", + metadata=metadata or {}, + ) + ) + + @staticmethod + def _default_trace_record(**kwargs: Any) -> Dict[str, Any]: + return dict(kwargs) + + @staticmethod + def _default_event_record(**kwargs: Any) -> Dict[str, Any]: + return dict(kwargs) + + @property + def trace_id(self) -> str: + return self._trace_id + + def send(self, event_type: str, payload: Dict[str, Any], timestamp_ns: int) -> None: + if self._closed: + return + + self._sequence_id += 1 + ts = datetime.fromtimestamp(timestamp_ns / 1e9, tz=UTC) + + record = self._event_record_factory( + event_id=str(uuid.uuid4()), + event_type=event_type, + trace_id=self._trace_id, + span_id=str(uuid.uuid4()), + sequence_id=self._sequence_id, + timestamp=ts, + payload=payload if isinstance(payload, dict) else {"raw": str(payload)}, + ) + + try: + self._store.store_event(record) + except Exception: + logger.debug( + "TraceStoreSink.send() failed for event %s", + event_type, + exc_info=True, + ) + + def flush(self) -> None: + # TraceStoreSink writes synchronously — nothing to flush. + pass + + def close(self) -> None: + if self._closed: + return + self._closed = True + try: + existing = None + if hasattr(self._store, "get_trace"): + existing = self._store.get_trace(self._trace_id) + if existing is not None: + if hasattr(existing, "status"): + existing.status = "completed" + existing.end_time = datetime.now(UTC) + existing.event_count = self._sequence_id + self._store.store_trace(existing) + elif isinstance(existing, dict): + existing["status"] = "completed" + existing["end_time"] = datetime.now(UTC) + existing["event_count"] = self._sequence_id + self._store.store_trace(existing) + elif hasattr(self._store, "update_trace_status"): + self._store.update_trace_status(self._trace_id, "completed") + except Exception: + logger.debug( + "TraceStoreSink.close() failed to finalize trace %s", + self._trace_id, + exc_info=True, + ) + + +class IngestionPipelineSink(EventSink): + """Sink that feeds events into a duck-typed ingestion pipeline. + + The pipeline object must expose + ``ingest(events: list[dict], tenant_id: str)``. + + Supports two modes: + + * **immediate** (default): each event is ingested as a single-item batch. + * **buffered**: events are collected and ingested on + :meth:`flush` / :meth:`close`. + """ + + def __init__( + self, + pipeline: Any, + trace_id: Optional[str] = None, + tenant_id: str = "default", + buffered: bool = False, + ) -> None: + self._pipeline = pipeline + self._trace_id = trace_id or str(uuid.uuid4()) + self._tenant_id = tenant_id + self._buffered = buffered + self._buffer: List[Dict[str, Any]] = [] + self._sequence_id = 0 + self._closed = False + + @property + def trace_id(self) -> str: + return self._trace_id + + def _format_event( + self, + event_type: str, + payload: Dict[str, Any], + timestamp_ns: int, + ) -> Dict[str, Any]: + """Format an event into the dict schema that ``ingest()`` expects.""" + self._sequence_id += 1 + ts = datetime.fromtimestamp(timestamp_ns / 1e9, tz=UTC) + return { + "event_type": event_type, + "trace_id": self._trace_id, + "timestamp": ts.isoformat(), + "span_id": str(uuid.uuid4()), + "sequence_id": self._sequence_id, + "event_id": str(uuid.uuid4()), + "payload": payload if isinstance(payload, dict) else {"raw": str(payload)}, + } + + def send(self, event_type: str, payload: Dict[str, Any], timestamp_ns: int) -> None: + if self._closed: + return + + formatted = self._format_event(event_type, payload, timestamp_ns) + + if self._buffered: + self._buffer.append(formatted) + else: + try: + self._pipeline.ingest([formatted], tenant_id=self._tenant_id) + except Exception: + logger.debug( + "IngestionPipelineSink.send() failed for event %s", + event_type, + exc_info=True, + ) + + def flush(self) -> None: + if not self._buffer: + return + try: + self._pipeline.ingest(list(self._buffer), tenant_id=self._tenant_id) + except Exception: + logger.debug( + "IngestionPipelineSink.flush() failed for %d events", + len(self._buffer), + exc_info=True, + ) + self._buffer.clear() + + def close(self) -> None: + if self._closed: + return + self._closed = True + self.flush() diff --git a/src/layerlens/instrument/adapters/_base/trace_container.py b/src/layerlens/instrument/adapters/_base/trace_container.py new file mode 100644 index 0000000..01dcb4a --- /dev/null +++ b/src/layerlens/instrument/adapters/_base/trace_container.py @@ -0,0 +1,81 @@ +""" +STRATIX Trace Container + +Provides SerializedTrace — a portable, hashable representation of a +complete trace suitable for storage, replay, and cross-adapter transfer. +""" + +from __future__ import annotations + +from typing import Any, Optional + +from pydantic import Field, BaseModel + + +class SerializedTrace(BaseModel): + """ + A fully serialized trace record. + + Contains the ordered list of event dicts, checkpoint metadata, + and integrity information needed to verify and replay a trace. + """ + + trace_id: str = Field(description="Trace ID (UUID)") + evaluation_id: Optional[str] = Field(default=None, description="Evaluation ID") + trial_id: Optional[str] = Field(default=None, description="Trial ID") + events: list[dict[str, Any]] = Field( + default_factory=list, + description="Ordered event records (dicts)", + ) + checkpoints: list[dict[str, Any]] = Field( + default_factory=list, + description="Checkpoint snapshots collected during the trace", + ) + metadata: dict[str, Any] = Field( + default_factory=dict, + description="Arbitrary metadata (adapter name, framework, etc.)", + ) + hash_chain_verified: bool = Field( + default=False, + description="True if the hash chain was verified at serialization time", + ) + schema_version: str = Field( + default="1.2.0", + description="Schema version for forward compatibility", + ) + + @classmethod + def from_event_records( + cls, + events: list[dict[str, Any]], + trace_id: str, + evaluation_id: str | None = None, + trial_id: str | None = None, + checkpoints: list[dict[str, Any]] | None = None, + metadata: dict[str, Any] | None = None, + hash_chain_verified: bool = False, + ) -> SerializedTrace: + """ + Build a SerializedTrace from raw event records. + + Args: + events: Ordered list of event dicts. + trace_id: The trace ID. + evaluation_id: Optional evaluation ID. + trial_id: Optional trial ID. + checkpoints: Optional checkpoint snapshots. + metadata: Arbitrary metadata. + hash_chain_verified: Whether the hash chain was verified. + + Returns: + SerializedTrace instance + """ + return cls( + trace_id=trace_id, + evaluation_id=evaluation_id, + trial_id=trial_id, + events=events, + checkpoints=checkpoints or [], + metadata=metadata or {}, + hash_chain_verified=hash_chain_verified, + ) diff --git a/src/layerlens/instrument/adapters/providers/__init__.py b/src/layerlens/instrument/adapters/providers/__init__.py new file mode 100644 index 0000000..c3440d9 --- /dev/null +++ b/src/layerlens/instrument/adapters/providers/__init__.py @@ -0,0 +1,46 @@ +"""LLM provider adapters for the LayerLens Instrument layer. + +Each provider adapter wraps a vendor SDK client to intercept API calls +and emit ``model.invoke``, ``cost.record``, ``tool.call``, and +``policy.violation`` events through the LayerLens telemetry pipeline. + +Adapters delivered in this branch: + +* ``ollama_adapter`` — Ollama (``ollama >= 0.2``). Local-only; default + endpoint ``http://localhost:11434``. ``api_cost_usd`` is always + ``0.0`` (self-hosted); optional ``infra_cost_usd`` from compute + duration when ``cost_per_second`` is configured. + +Sister provider adapters land via M3 fan-out PRs (OpenAI, Anthropic, +Azure OpenAI, AWS Bedrock, Google Vertex, LiteLLM, Cohere, Mistral). + +Importing this package does NOT import any vendor SDK; concrete +adapter modules are loaded lazily on attribute access via +:func:`__getattr__`. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Tuple + +if TYPE_CHECKING: + from layerlens.instrument.adapters.providers.ollama_adapter import OllamaAdapter + +__all__: Tuple[str, ...] = ("OllamaAdapter",) + + +def __getattr__(name: str) -> Any: + """Lazy attribute access for provider adapter classes. + + Allows ``from layerlens.instrument.adapters.providers import OllamaAdapter`` + without importing the underlying vendor SDK at package-import time. + The actual import (and any vendor-SDK side effects) is deferred + until the symbol is first referenced. + """ + if name == "OllamaAdapter": + from layerlens.instrument.adapters.providers.ollama_adapter import ( + OllamaAdapter, + ) + + return OllamaAdapter + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/layerlens/instrument/adapters/providers/_base/__init__.py b/src/layerlens/instrument/adapters/providers/_base/__init__.py new file mode 100644 index 0000000..84aa7da --- /dev/null +++ b/src/layerlens/instrument/adapters/providers/_base/__init__.py @@ -0,0 +1,21 @@ +"""Shared base layer for LLM provider adapters.""" + +from __future__ import annotations + +from layerlens.instrument.adapters.providers._base.tokens import NormalizedTokenUsage +from layerlens.instrument.adapters.providers._base.pricing import ( + PRICING, + AZURE_PRICING, + BEDROCK_PRICING, + calculate_cost, +) +from layerlens.instrument.adapters.providers._base.provider import LLMProviderAdapter + +__all__ = [ + "AZURE_PRICING", + "BEDROCK_PRICING", + "LLMProviderAdapter", + "NormalizedTokenUsage", + "PRICING", + "calculate_cost", +] diff --git a/src/layerlens/instrument/adapters/providers/_base/pricing.py b/src/layerlens/instrument/adapters/providers/_base/pricing.py new file mode 100644 index 0000000..cac71d9 --- /dev/null +++ b/src/layerlens/instrument/adapters/providers/_base/pricing.py @@ -0,0 +1,184 @@ +"""LLM Model Pricing. + +Maintains pricing tables (per-1K-token rates) for all supported models +and provides cost calculation with cached-token adjustments. + +Ported verbatim from ``ateam/stratix/sdk/python/adapters/llm_providers/pricing.py``. +The pricing JSON is the canonical platform-wide source-of-truth and is +hash-checked between ``ateam`` and ``stratix-python`` in CI to prevent +drift. +""" + +from __future__ import annotations + +from typing import Dict, Optional + +from layerlens.instrument.adapters.providers._base.tokens import NormalizedTokenUsage + +# --------------------------------------------------------------------------- +# Pricing tables (per-1K-token rates, USD) +# --------------------------------------------------------------------------- + +PRICING: Dict[str, Dict[str, float]] = { + # OpenAI + "gpt-4o": {"input": 0.0025, "output": 0.0100}, + "gpt-4o-mini": {"input": 0.00015, "output": 0.0006}, + "gpt-4o-2024-11-20": {"input": 0.0025, "output": 0.0100}, + "gpt-4.1": {"input": 0.002, "output": 0.008}, + "gpt-4.1-mini": {"input": 0.0004, "output": 0.0016}, + "gpt-4.1-nano": {"input": 0.0001, "output": 0.0004}, + "gpt-4-turbo": {"input": 0.01, "output": 0.03}, + "gpt-4": {"input": 0.03, "output": 0.06}, + "gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015}, + "o1": {"input": 0.015, "output": 0.060}, + "o1-mini": {"input": 0.003, "output": 0.012}, + "o3": {"input": 0.010, "output": 0.040}, + "o3-mini": {"input": 0.0011, "output": 0.0044}, + "o4-mini": {"input": 0.0011, "output": 0.0044}, + # Anthropic + "claude-sonnet-4-5-20250929": {"input": 0.003, "output": 0.015}, + "claude-opus-4-20250115": {"input": 0.015, "output": 0.075}, + "claude-opus-4-6": {"input": 0.015, "output": 0.075}, + "claude-haiku-4-5-20251001": {"input": 0.0008, "output": 0.004}, + "claude-haiku-3-5-20241022": {"input": 0.0008, "output": 0.004}, + "claude-3-5-sonnet-20241022": {"input": 0.003, "output": 0.015}, + "claude-3-opus-20240229": {"input": 0.015, "output": 0.075}, + "claude-3-haiku-20240307": {"input": 0.00025, "output": 0.00125}, + # Google + "gemini-2.5-pro": {"input": 0.00125, "output": 0.01}, + "gemini-2.5-flash": {"input": 0.000075, "output": 0.0003}, + "gemini-2.0-flash": {"input": 0.0001, "output": 0.0004}, + "gemini-1.5-pro": {"input": 0.00125, "output": 0.005}, + "gemini-1.5-flash": {"input": 0.000075, "output": 0.0003}, + # Meta (hosted: Bedrock / Together / Replicate). For LOCAL Ollama + # inference of these same model families, see the explicit zero-cost + # entries below — Ollama's `model` parameter is matched verbatim and + # bypasses the hosted rates. + "llama-3.3-70b": {"input": 0.00099, "output": 0.00099}, + "llama-3.1-70b": {"input": 0.00099, "output": 0.00099}, + "llama-3.1-8b": {"input": 0.00022, "output": 0.00022}, + # ----- Ollama (local / self-hosted) ----- + # Ollama runs the model on the operator's own hardware, so the + # platform never charges for tokens — `api_cost_usd` is always + # exactly 0.0. These entries exist so that `calculate_cost` returns + # 0.0 (a real number) rather than None (pricing-unavailable) when + # the canonical Ollama model tag is supplied. The OllamaAdapter + # additionally emits an optional `infra_cost_usd` derived from + # compute duration when `cost_per_second` is configured by the + # caller — that path bypasses this table entirely. + "llama3.3": {"input": 0.0, "output": 0.0}, + "llama3.2": {"input": 0.0, "output": 0.0}, + "llama3.2:1b": {"input": 0.0, "output": 0.0}, + "llama3.2:3b": {"input": 0.0, "output": 0.0}, + "llama3.1": {"input": 0.0, "output": 0.0}, + "llama3.1:8b": {"input": 0.0, "output": 0.0}, + "llama3.1:70b": {"input": 0.0, "output": 0.0}, + "llama3.1:405b": {"input": 0.0, "output": 0.0}, + "llama3": {"input": 0.0, "output": 0.0}, + "llama2": {"input": 0.0, "output": 0.0}, + "mistral": {"input": 0.0, "output": 0.0}, + "mistral-nemo": {"input": 0.0, "output": 0.0}, + "mixtral": {"input": 0.0, "output": 0.0}, + "phi3": {"input": 0.0, "output": 0.0}, + "phi3.5": {"input": 0.0, "output": 0.0}, + "qwen2.5": {"input": 0.0, "output": 0.0}, + "qwen2.5-coder": {"input": 0.0, "output": 0.0}, + "gemma2": {"input": 0.0, "output": 0.0}, + "gemma2:2b": {"input": 0.0, "output": 0.0}, + "deepseek-r1": {"input": 0.0, "output": 0.0}, + "deepseek-coder": {"input": 0.0, "output": 0.0}, + "codellama": {"input": 0.0, "output": 0.0}, + "nomic-embed-text": {"input": 0.0, "output": 0.0}, + "mxbai-embed-large": {"input": 0.0, "output": 0.0}, + "all-minilm": {"input": 0.0, "output": 0.0}, + # Mistral (direct API; Bedrock has its own table) + "mistral-large": {"input": 0.002, "output": 0.006}, + "mistral-large-latest": {"input": 0.002, "output": 0.006}, + "mistral-small": {"input": 0.0002, "output": 0.0006}, + "mistral-small-latest": {"input": 0.0002, "output": 0.0006}, + "mistral-medium": {"input": 0.0027, "output": 0.0081}, + "open-mistral-7b": {"input": 0.00025, "output": 0.00025}, + "open-mixtral-8x7b": {"input": 0.0007, "output": 0.0007}, + "open-mixtral-8x22b": {"input": 0.002, "output": 0.006}, + # Cohere (direct API; Bedrock-routed Cohere uses BEDROCK_PRICING) + "command-r-plus": {"input": 0.003, "output": 0.015}, + "command-r": {"input": 0.0005, "output": 0.0015}, + "command-r-plus-08-2024": {"input": 0.0025, "output": 0.01}, + "command-r-08-2024": {"input": 0.00015, "output": 0.0006}, + "command-light": {"input": 0.0003, "output": 0.0006}, + "command": {"input": 0.001, "output": 0.002}, +} + +AZURE_PRICING: Dict[str, Dict[str, float]] = { + "gpt-4o": {"input": 0.00275, "output": 0.011}, + "gpt-4o-mini": {"input": 0.000165, "output": 0.00066}, + "gpt-4-turbo": {"input": 0.011, "output": 0.033}, + "gpt-4": {"input": 0.033, "output": 0.066}, + "gpt-35-turbo": {"input": 0.00055, "output": 0.00165}, +} + +BEDROCK_PRICING: Dict[str, Dict[str, float]] = { + "anthropic.claude-3-5-sonnet-20241022-v2:0": {"input": 0.003, "output": 0.015}, + "anthropic.claude-3-opus-20240229-v1:0": {"input": 0.015, "output": 0.075}, + "anthropic.claude-3-haiku-20240307-v1:0": {"input": 0.00025, "output": 0.00125}, + "meta.llama3-1-70b-instruct-v1:0": {"input": 0.00099, "output": 0.00099}, + "meta.llama3-1-8b-instruct-v1:0": {"input": 0.00022, "output": 0.00022}, + "cohere.command-r-plus-v1:0": {"input": 0.003, "output": 0.015}, + "cohere.command-r-v1:0": {"input": 0.0005, "output": 0.0015}, +} + + +def _cached_token_discount(model: str) -> float: + """Determine the cached-token rate as a fraction of input price. + + Different providers offer different cache discounts: + + * Anthropic — 90% discount (pay 10% of input rate). + * Google — 75% discount (pay 25% of input rate). + * OpenAI and others — 50% discount (pay 50% of input rate). + """ + lower = model.lower() + if lower.startswith("claude"): + return 0.1 + if lower.startswith("gemini"): + return 0.25 + return 0.5 + + +def calculate_cost( + model: str, + usage: NormalizedTokenUsage, + pricing_table: Optional[Dict[str, Dict[str, float]]] = None, +) -> Optional[float]: + """Calculate the API cost in USD for a model invocation. + + Args: + model: Model name (e.g., ``"gpt-4o"``, ``"claude-sonnet-4-5-20250929"``). + usage: Normalized token usage from the provider response. + pricing_table: Override pricing table (for Azure / Bedrock). + Defaults to :data:`PRICING`. + + Returns: + Cost in USD, or ``None`` if the model is not in the pricing table. + """ + table = pricing_table or PRICING + rates = table.get(model) + if rates is None: + return None + + input_rate = rates.get("input", 0.0) + output_rate = rates.get("output", 0.0) + + prompt_tokens = usage.prompt_tokens + cached = usage.cached_tokens or 0 + + non_cached = max(prompt_tokens - cached, 0) + cached_rate = input_rate * _cached_token_discount(model) + + cost = ( + (non_cached * input_rate / 1000) + + (cached * cached_rate / 1000) + + (usage.completion_tokens * output_rate / 1000) + ) + + return round(cost, 8) diff --git a/src/layerlens/instrument/adapters/providers/_base/provider.py b/src/layerlens/instrument/adapters/providers/_base/provider.py new file mode 100644 index 0000000..9182aab --- /dev/null +++ b/src/layerlens/instrument/adapters/providers/_base/provider.py @@ -0,0 +1,406 @@ +"""LLM Provider Base Adapter. + +Abstract intermediate class for all LLM provider adapters. Extends +:class:`BaseAdapter` with provider-specific emit helpers for +``model.invoke``, ``cost.record``, ``tool.call``, and +``policy.violation`` events. + +Supports W3C Trace Context propagation (``traceparent`` / +``tracestate``) for correlating spans across adapter boundaries. + +Ported from ``ateam/stratix/sdk/python/adapters/llm_providers/base_provider.py``. +""" + +from __future__ import annotations + +import time +import uuid +import logging +from abc import abstractmethod +from typing import Any, Dict, List, Optional + +from layerlens._compat.pydantic import model_dump +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.capture import CaptureConfig +from layerlens.instrument.adapters.providers._base.tokens import NormalizedTokenUsage +from layerlens.instrument.adapters.providers._base.pricing import calculate_cost + +# W3C Trace Context header names. +_TRACEPARENT_HEADER = "traceparent" +_TRACESTATE_HEADER = "tracestate" + +logger = logging.getLogger(__name__) + + +class LLMProviderAdapter(BaseAdapter): + """Abstract base class for all LLM provider adapters. + + Provides concrete implementations for: + + * Event emission helpers (:meth:`_emit_model_invoke`, + :meth:`_emit_cost_record`, :meth:`_emit_tool_calls`, + :meth:`_emit_provider_error`). + * Lifecycle methods (:meth:`health_check`, + :meth:`get_adapter_info`, :meth:`serialize_for_replay`). + * Client reference management (``_client``, ``_originals``). + + Subclasses must implement: + + * :meth:`connect` — import framework, set HEALTHY. + * :meth:`disconnect` — restore originals, set DISCONNECTED. + * :meth:`connect_client` — wrap the provider client. + """ + + adapter_type: str = "llm_provider" + + def __init__( + self, + stratix: Any = None, + capture_config: Optional[CaptureConfig] = None, + ) -> None: + super().__init__(stratix=stratix, capture_config=capture_config) + self._client: Any = None + self._originals: Dict[str, Any] = {} + self._framework_version: Optional[str] = None + + # --- Abstract methods subclasses must implement --- + + @abstractmethod + def connect_client(self, client: Any) -> Any: + """Wrap or monkey-patch the provider client to intercept API calls. + + Args: + client: The provider SDK client instance. + + Returns: + The wrapped client (same object, modified in-place). + """ + + # --- Concrete lifecycle methods --- + + def connect(self) -> None: + """Verify framework availability and mark as connected.""" + self._framework_version = self._detect_framework_version() + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + """Restore all original methods and disconnect.""" + self._restore_originals() + self._client = None + self._originals.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def _restore_originals(self) -> None: + """Restore original methods on the client. Override for custom logic.""" + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name=type(self).__name__, + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_TOOLS, + ], + description=f"LayerLens adapter for {self.FRAMEWORK} LLM provider", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + return ReplayableTrace( + adapter_name=type(self).__name__, + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": model_dump(self._capture_config)}, + ) + + @staticmethod + def _detect_framework_version() -> Optional[str]: + """Override in subclasses to detect SDK version.""" + return None + + # --- W3C Trace Context Propagation --- + + def _inject_trace_context( + self, + headers: Optional[Dict[str, str]] = None, + ) -> Dict[str, str]: + """Inject W3C ``traceparent`` / ``tracestate`` headers for outbound requests. + + If OpenTelemetry is available, uses the OTel propagator. Otherwise + generates a minimal ``traceparent`` from the current trace / span + IDs. + + Args: + headers: Existing headers dict to inject into (mutated in place). + + Returns: + Headers dict with ``traceparent`` (and optionally ``tracestate``) added. + """ + if headers is None: + headers = {} + + try: + # opentelemetry is an optional dep installed via the + # `[otel]` extra; fall through to the manual traceparent + # synthesis below when it is not available. + from opentelemetry.propagate import inject # type: ignore[import-not-found,unused-ignore] + + inject(headers) + except ImportError: + trace_id = getattr(self, "_current_trace_id", None) + span_id = getattr(self, "_current_span_id", None) + if trace_id and span_id: + headers[_TRACEPARENT_HEADER] = f"00-{trace_id}-{span_id}-01" + + return headers + + def _extract_trace_context( + self, + headers: Dict[str, str], + ) -> Dict[str, str]: + """Extract W3C ``traceparent`` / ``tracestate`` from inbound headers. + + Args: + headers: Inbound headers dict. + + Returns: + Dict with ``trace_id``, ``parent_span_id``, ``trace_flags``, + and optionally ``tracestate``. + """ + result: Dict[str, str] = {} + + traceparent = headers.get(_TRACEPARENT_HEADER, "") + if traceparent: + parts = traceparent.split("-") + if len(parts) >= 4: + result["trace_id"] = parts[1] + result["parent_span_id"] = parts[2] + result["trace_flags"] = parts[3] + + tracestate = headers.get(_TRACESTATE_HEADER, "") + if tracestate: + result["tracestate"] = tracestate + + return result + + # --- Event emission helpers --- + + def _emit_model_invoke( + self, + provider: str, + model: Optional[str], + parameters: Optional[Dict[str, Any]] = None, + usage: Optional[NormalizedTokenUsage] = None, + latency_ms: Optional[float] = None, + error: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + input_messages: Optional[List[Dict[str, str]]] = None, + output_message: Optional[Dict[str, str]] = None, + ) -> None: + """Emit a ``model.invoke`` (L3) event.""" + payload: Dict[str, Any] = { + "provider": provider, + "model": model, + "timestamp_ns": time.time_ns(), + } + if parameters: + payload["parameters"] = parameters + if usage: + payload["prompt_tokens"] = usage.prompt_tokens + payload["completion_tokens"] = usage.completion_tokens + payload["total_tokens"] = usage.total_tokens + if usage.cached_tokens is not None: + payload["cached_tokens"] = usage.cached_tokens + if usage.reasoning_tokens is not None: + payload["reasoning_tokens"] = usage.reasoning_tokens + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if error: + payload["error"] = error + if metadata: + for k, v in metadata.items(): + if k not in payload: + payload[k] = v + if self._capture_config.capture_content: + if input_messages: + payload["messages"] = input_messages + if output_message: + payload["output_message"] = output_message + + self.emit_dict_event("model.invoke", payload) + + @staticmethod + def _normalize_messages( + raw_messages: Any, + system: Any = None, + ) -> Optional[List[Dict[str, str]]]: + """Normalize provider-specific message formats to ``[{role, content}]``. + + Args: + raw_messages: Messages from the provider SDK kwargs (list of + dicts, list of objects, or ``None``). + system: Separate system prompt (e.g. Anthropic's ``system`` + kwarg). May be a string or a list of content blocks. + + Returns: + Normalized list, or ``None`` if no messages were found. + """ + if not raw_messages and not system: + return None + + messages: List[Dict[str, str]] = [] + + if system: + if isinstance(system, str): + messages.append({"role": "system", "content": system[:10_000]}) + elif isinstance(system, list): + parts: List[str] = [] + for block in system: + if isinstance(block, str): + parts.append(block) + elif isinstance(block, dict) and "text" in block: + parts.append(str(block["text"])) + if parts: + messages.append({"role": "system", "content": "\n".join(parts)[:10_000]}) + + if raw_messages: + for msg in raw_messages: + role = "" + content = "" + if isinstance(msg, dict): + role = str(msg.get("role", "")) + raw_content = msg.get("content", "") + if isinstance(raw_content, str): + content = raw_content + elif isinstance(raw_content, list): + parts2: List[str] = [] + for part in raw_content: + if isinstance(part, str): + parts2.append(part) + elif isinstance(part, dict): + text = part.get("text") or part.get("content", "") + if text: + parts2.append(str(text)) + content = "\n".join(parts2) + else: + content = str(raw_content) if raw_content else "" + elif hasattr(msg, "role") and hasattr(msg, "content"): + role = str(getattr(msg, "role", "")) + raw_content = getattr(msg, "content", "") + if isinstance(raw_content, str): + content = raw_content + elif isinstance(raw_content, list): + parts3: List[str] = [] + for part in raw_content: + if isinstance(part, str): + parts3.append(part) + elif hasattr(part, "text"): + parts3.append(str(part.text)) + elif isinstance(part, dict) and "text" in part: + parts3.append(str(part["text"])) + content = "\n".join(parts3) + else: + content = str(raw_content) if raw_content else "" + else: + continue + + if role: + messages.append({"role": role, "content": content[:10_000]}) + + return messages if messages else None + + def _emit_cost_record( + self, + model: Optional[str], + usage: Optional[NormalizedTokenUsage], + provider: Optional[str] = None, + pricing_table: Optional[Dict[str, Dict[str, float]]] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> None: + """Emit a ``cost.record`` (cross-cutting) event.""" + payload: Dict[str, Any] = { + "provider": provider or self.FRAMEWORK, + "model": model, + } + + if usage: + payload["prompt_tokens"] = usage.prompt_tokens + payload["completion_tokens"] = usage.completion_tokens + payload["total_tokens"] = usage.total_tokens + + cost = calculate_cost(model or "", usage, pricing_table) + if cost is not None: + payload["api_cost_usd"] = cost + else: + payload["api_cost_usd"] = None + payload["pricing_unavailable"] = True + + if metadata: + for k, v in metadata.items(): + if k not in payload: + payload[k] = v + + self.emit_dict_event("cost.record", payload) + + def _emit_tool_calls( + self, + tool_calls: List[Dict[str, Any]], + parent_model: Optional[str] = None, + ) -> None: + """Emit ``tool.call`` (L5a) events for function / tool calls in a response.""" + for tc in tool_calls: + payload: Dict[str, Any] = { + "tool_name": tc.get("name", "unknown"), + "tool_input": tc.get("arguments") or tc.get("input"), + "provider": self.FRAMEWORK, + } + if parent_model: + payload["model"] = parent_model + if "id" in tc: + payload["tool_call_id"] = tc["id"] + + self.emit_dict_event("tool.call", payload) + + def _emit_provider_error( + self, + provider: str, + error: str, + model: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> None: + """Emit ``policy.violation`` (cross-cutting) for provider errors.""" + payload: Dict[str, Any] = { + "provider": provider, + "error": error, + "violation_type": "safety", + } + if model: + payload["model"] = model + if metadata: + for k, v in metadata.items(): + if k not in payload: + payload[k] = v + + self.emit_dict_event("policy.violation", payload) diff --git a/src/layerlens/instrument/adapters/providers/_base/tokens.py b/src/layerlens/instrument/adapters/providers/_base/tokens.py new file mode 100644 index 0000000..69c7c7c --- /dev/null +++ b/src/layerlens/instrument/adapters/providers/_base/tokens.py @@ -0,0 +1,80 @@ +"""Normalized Token Usage. + +Provides a common data structure for token usage across all LLM +providers. Each provider adapter constructs this from its own response +format. + +Ported from ``ateam/stratix/sdk/python/adapters/llm_providers/token_usage.py``. + +The source uses Pydantic v2's ``model_validator`` and ``model_copy``, +which do not exist in Pydantic v1. The ``stratix-python`` SDK pins +``pydantic>=1.9.0, <3``, so this port avoids both v2-only features: + +* The auto-total behavior is implemented as :meth:`with_auto_total` + classmethod and :meth:`compute_total` instance method that construct + fresh instances rather than relying on a validator hook. +* Callers in this codebase always pass an explicit ``total_tokens``, + so the auto-compute is purely a defensive convenience for external + callers. +""" + +from __future__ import annotations + +from typing import Optional + +from layerlens._compat.pydantic import Field, BaseModel + + +class NormalizedTokenUsage(BaseModel): + """Normalized token usage across all LLM providers.""" + + prompt_tokens: int = Field(default=0, description="Input tokens (prompt, system, context)") + completion_tokens: int = Field(default=0, description="Output tokens (response, generation)") + total_tokens: int = Field(default=0, description="prompt_tokens + completion_tokens") + cached_tokens: Optional[int] = Field( + default=None, + description="Cached prompt tokens (OpenAI cached, Anthropic cache_read)", + ) + reasoning_tokens: Optional[int] = Field( + default=None, + description="Reasoning tokens (o1/o3 reasoning, Claude extended thinking)", + ) + + @classmethod + def with_auto_total( + cls, + prompt_tokens: int = 0, + completion_tokens: int = 0, + total_tokens: int = 0, + cached_tokens: Optional[int] = None, + reasoning_tokens: Optional[int] = None, + ) -> "NormalizedTokenUsage": + """Construct a usage record, auto-computing ``total_tokens`` when zero. + + Use this constructor when the provider response does not include + an explicit total. Callers that already have a total should + instantiate :class:`NormalizedTokenUsage` directly. + """ + if total_tokens == 0 and (prompt_tokens or completion_tokens): + total_tokens = prompt_tokens + completion_tokens + return cls( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + cached_tokens=cached_tokens, + reasoning_tokens=reasoning_tokens, + ) + + def compute_total(self) -> "NormalizedTokenUsage": + """Return a fresh instance with ``total_tokens`` computed from prompt + completion. + + Constructs a new instance rather than calling Pydantic v2's + ``model_copy(update=...)`` so the code runs under v1 and v2. + """ + return type(self)( + prompt_tokens=self.prompt_tokens, + completion_tokens=self.completion_tokens, + total_tokens=self.prompt_tokens + self.completion_tokens, + cached_tokens=self.cached_tokens, + reasoning_tokens=self.reasoning_tokens, + ) diff --git a/src/layerlens/instrument/adapters/providers/ollama_adapter.py b/src/layerlens/instrument/adapters/providers/ollama_adapter.py new file mode 100644 index 0000000..84facb2 --- /dev/null +++ b/src/layerlens/instrument/adapters/providers/ollama_adapter.py @@ -0,0 +1,261 @@ +"""Ollama LLM Provider Adapter. + +Wraps the Ollama Python SDK to intercept ``chat``, ``generate``, and +``embeddings`` calls. All API costs are $0.00 (local). Optional infra +cost tracking via compute duration when ``cost_per_second`` is set. + +Ported from ``ateam/stratix/sdk/python/adapters/llm_providers/ollama_adapter.py``. +""" + +from __future__ import annotations + +import os +import time +import logging +from typing import Any, Dict, Optional + +from layerlens.instrument.adapters._base.adapter import AdapterStatus +from layerlens.instrument.adapters._base.capture import CaptureConfig +from layerlens.instrument.adapters.providers._base.tokens import NormalizedTokenUsage +from layerlens.instrument.adapters.providers._base.provider import LLMProviderAdapter + +logger = logging.getLogger(__name__) + + +class OllamaAdapter(LLMProviderAdapter): + """LayerLens adapter for the Ollama Python SDK. + + Wraps ``ollama.chat()``, ``ollama.generate()``, and + ``ollama.embeddings()`` calls. API cost is always $0.00 (local + inference). Optionally tracks infra cost from compute duration if + ``cost_per_second`` is configured. + """ + + FRAMEWORK = "ollama" + VERSION = "0.1.0" + + def __init__( + self, + stratix: Any = None, + capture_config: Optional[CaptureConfig] = None, + cost_per_second: Optional[float] = None, + ) -> None: + super().__init__(stratix=stratix, capture_config=capture_config) + self._cost_per_second = cost_per_second + self._endpoint: Optional[str] = None + + def connect(self) -> None: + """Detect Ollama endpoint and mark as connected.""" + self._endpoint = os.environ.get("OLLAMA_HOST", "http://localhost:11434") + self._framework_version = self._detect_framework_version() + self._connected = True + self._status = AdapterStatus.HEALTHY + + def connect_client(self, client: Any) -> Any: + """Wrap Ollama client / module methods with tracing.""" + self._client = client + + if hasattr(client, "chat"): + original_chat = client.chat + self._originals["chat"] = original_chat + client.chat = self._wrap_call(original_chat, "chat") + + if hasattr(client, "generate"): + original_gen = client.generate + self._originals["generate"] = original_gen + client.generate = self._wrap_call(original_gen, "generate") + + if hasattr(client, "embeddings"): + original_embed = client.embeddings + self._originals["embeddings"] = original_embed + client.embeddings = self._wrap_call(original_embed, "embeddings") + + return client + + def _restore_originals(self) -> None: + if self._client is None: + return + for method_name, original in self._originals.items(): + try: + setattr(self._client, method_name, original) + except Exception: + logger.warning("Could not restore %s", method_name) + + @staticmethod + def _detect_framework_version() -> Optional[str]: + try: + import ollama # type: ignore[import-not-found,unused-ignore] + + version = getattr(ollama, "__version__", None) + return str(version) if version is not None else None + except ImportError: + return None + + def _wrap_call(self, original: Any, method_name: str) -> Any: + adapter = self + + def traced_call(*args: Any, **kwargs: Any) -> Any: + model = kwargs.get("model") or (args[0] if args else None) + start_ns = time.time_ns() + + input_messages = None + if method_name == "chat": + input_messages = adapter._normalize_messages(kwargs.get("messages")) + elif method_name == "generate": + prompt = kwargs.get("prompt") + if prompt: + input_messages = [ + {"role": "user", "content": str(prompt)[:10_000]} + ] + + try: + response = original(*args, **kwargs) + except Exception as exc: + elapsed_ms = (time.time_ns() - start_ns) / 1_000_000 + try: + adapter._emit_model_invoke( + provider="ollama", + model=model, + latency_ms=elapsed_ms, + error=str(exc), + metadata={ + "method": method_name, + "endpoint": adapter._endpoint, + }, + input_messages=input_messages, + ) + adapter._emit_provider_error("ollama", str(exc), model=model) + except Exception: + logger.warning("Error emitting Ollama error event", exc_info=True) + raise + + try: + elapsed_ms = (time.time_ns() - start_ns) / 1_000_000 + usage = adapter._extract_usage(response) + infra_cost = adapter._calculate_infra_cost(response) + output_message = adapter._extract_output_message(response, method_name) + + ollama_metadata: Dict[str, Any] = { + "method": method_name, + "endpoint": adapter._endpoint, + } + if isinstance(response, dict): + done_reason = response.get("done_reason") + else: + done_reason = getattr(response, "done_reason", None) + if done_reason is not None: + ollama_metadata["finish_reason"] = done_reason + + adapter._emit_model_invoke( + provider="ollama", + model=model, + usage=usage, + latency_ms=elapsed_ms, + metadata=ollama_metadata, + input_messages=input_messages, + output_message=output_message, + ) + + cost_meta: Dict[str, Any] = {"api_cost_usd": 0.0} + if infra_cost is not None: + cost_meta["infra_cost_usd"] = infra_cost + + adapter.emit_dict_event( + "cost.record", + { + "provider": "ollama", + "model": model, + "prompt_tokens": usage.prompt_tokens if usage else 0, + "completion_tokens": usage.completion_tokens if usage else 0, + "total_tokens": usage.total_tokens if usage else 0, + **cost_meta, + }, + ) + except Exception: + logger.warning("Error emitting Ollama trace events", exc_info=True) + + return response + + traced_call._layerlens_original = original # type: ignore[attr-defined] + return traced_call + + @staticmethod + def _extract_usage(response: Any) -> Optional[NormalizedTokenUsage]: + """Extract token usage from an Ollama response.""" + if response is None: + return None + if isinstance(response, dict): + prompt = response.get("prompt_eval_count", 0) or 0 + completion = response.get("eval_count", 0) or 0 + return NormalizedTokenUsage( + prompt_tokens=prompt, + completion_tokens=completion, + total_tokens=prompt + completion, + ) + prompt = getattr(response, "prompt_eval_count", 0) or 0 + completion = getattr(response, "eval_count", 0) or 0 + return NormalizedTokenUsage( + prompt_tokens=prompt, + completion_tokens=completion, + total_tokens=prompt + completion, + ) + + @staticmethod + def _extract_output_message( + response: Any, method_name: str + ) -> Optional[Dict[str, str]]: + """Extract the output message from an Ollama response.""" + try: + if response is None: + return None + if method_name == "chat": + msg = ( + response.get("message", {}) + if isinstance(response, dict) + else getattr(response, "message", None) + ) + if msg: + content = ( + msg.get("content", "") + if isinstance(msg, dict) + else getattr(msg, "content", "") + ) + if content: + return {"role": "assistant", "content": str(content)[:10_000]} + elif method_name == "generate": + text = ( + response.get("response", "") + if isinstance(response, dict) + else getattr(response, "response", "") + ) + if text: + return {"role": "assistant", "content": str(text)[:10_000]} + except Exception: + pass + return None + + def _calculate_infra_cost(self, response: Any) -> Optional[float]: + """Calculate optional infrastructure cost from compute duration.""" + if self._cost_per_second is None: + return None + if response is None: + return None + + total_ns = 0 + if isinstance(response, dict): + total_ns = (response.get("eval_duration", 0) or 0) + ( + response.get("prompt_eval_duration", 0) or 0 + ) + else: + total_ns = (getattr(response, "eval_duration", 0) or 0) + ( + getattr(response, "prompt_eval_duration", 0) or 0 + ) + + if total_ns > 0: + total_seconds = total_ns / 1_000_000_000 + return round(total_seconds * self._cost_per_second, 8) + return None + + +# Registry lazy-loading convention. +ADAPTER_CLASS = OllamaAdapter diff --git a/tests/instrument/__init__.py b/tests/instrument/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/instrument/_baselines/default_dependencies.txt b/tests/instrument/_baselines/default_dependencies.txt new file mode 100644 index 0000000..da04e06 --- /dev/null +++ b/tests/instrument/_baselines/default_dependencies.txt @@ -0,0 +1,22 @@ +# Baseline of REQUIRED runtime dependencies for `pip install layerlens`. +# +# Format: one PEP 508 requirement per line, sorted alphabetically by +# package name (PEP 503 normalized). Comments (lines starting with `#`) +# and blank lines are ignored. +# +# This file is consumed by tests/instrument/test_default_install.py to +# guard against accidental dependency additions in the SDK's default +# install set. Adding a line here represents a deliberate, reviewer- +# acknowledged decision to require a new transitive dependency for +# every `pip install layerlens` user. +# +# Adding a new heavy dependency? Put it behind an extra in +# `[project.optional-dependencies]` instead. Only widely-used, +# lightweight, dependency-stable packages belong in the default set. +# +# To regenerate after an intentional change: +# 1. Edit `[project] dependencies` in pyproject.toml. +# 2. Run: python scripts/regen_dep_baselines.py +# 3. Commit both pyproject.toml and this file in the same PR. +httpx>=0.23.0, <1 +pydantic>=1.9.0, <3 diff --git a/tests/instrument/_baselines/resolved_dependencies.txt b/tests/instrument/_baselines/resolved_dependencies.txt new file mode 100644 index 0000000..83168d7 --- /dev/null +++ b/tests/instrument/_baselines/resolved_dependencies.txt @@ -0,0 +1,40 @@ +# Baseline of TRANSITIVELY-RESOLVED package names for `pip install layerlens`. +# +# Format: one PEP 503 normalized package name per line, sorted +# alphabetically. Comments (lines starting with `#`) and blank lines +# are ignored. Versions are intentionally OMITTED — version drift in +# transitive deps is a separate concern (handled by the lockfile); +# this guard is purely about install-set BLOAT. +# +# This file is consumed by tests/instrument/test_resolved_dep_tree.py +# and `.github/workflows/dep-tree-guard.yaml` to guard against +# transitive bloat. A direct dep with a permissive lower bound can +# pull in a tree that quintuples install size; this baseline catches +# it. +# +# The CI workflow resolves the dependency tree from a clean +# environment (no extras), normalizes the package names, and diffs +# against this file: +# - ADDITIONS fail the build. +# - REMOVALS pass (transitive deps disappearing is good news). +# +# Adding a transitively-resolved dep here represents an explicit +# acknowledgement that the new transitive bloat is acceptable. +# +# To regenerate after an intentional change (e.g. bumping the floor +# of a direct dep, accepting a new transitive package): +# 1. Edit `[project] dependencies` in pyproject.toml as desired. +# 2. Run: python scripts/regen_dep_baselines.py +# 3. Commit pyproject.toml AND this file in the same PR. +annotated-types +anyio +certifi +exceptiongroup +h11 +httpcore +httpx +idna +pydantic +pydantic-core +typing-extensions +typing-inspection diff --git a/tests/instrument/adapters/__init__.py b/tests/instrument/adapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/instrument/adapters/providers/__init__.py b/tests/instrument/adapters/providers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/instrument/adapters/providers/test_ollama_adapter.py b/tests/instrument/adapters/providers/test_ollama_adapter.py new file mode 100644 index 0000000..231aba3 --- /dev/null +++ b/tests/instrument/adapters/providers/test_ollama_adapter.py @@ -0,0 +1,392 @@ +"""HTTP-fixture tests for the Ollama LLM provider adapter. + +These tests instrument a real :class:`ollama.Client` and intercept the +underlying HTTP traffic with :mod:`respx`, asserting that the adapter +emits the canonical telemetry events (``model.invoke``, +``cost.record``, ``policy.violation``) and that all Ollama-specific +invariants hold: + +* ``api_cost_usd`` is always exactly ``0.0`` (local inference). +* ``infra_cost_usd`` appears only when ``cost_per_second`` is set. +* The endpoint and method are recorded in event metadata. +* ``connect_client`` works on the real ``ollama.Client`` instance. +* ``disconnect`` restores all originally-bound methods. + +Hitting the real httpx layer (rather than a hand-rolled +``SimpleNamespace`` mock) gives us confidence the adapter integrates +with the actual Ollama Python SDK as shipped on PyPI. +""" + +from __future__ import annotations + +from typing import Any, Dict, List + +import httpx +import respx +import pytest + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.providers.ollama_adapter import ( + ADAPTER_CLASS, + OllamaAdapter, +) + +# Ollama's Python SDK normalises ``http://localhost:11434`` to +# ``http://127.0.0.1:11434`` before httpx sees the URL, so respx must +# match against the IP form. +_OLLAMA_BASE = "http://127.0.0.1:11434" + + +class _RecordingStratix: + """Minimal capture sink that records every emitted event.""" + + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +def _by_type(events: List[Dict[str, Any]], event_type: str) -> List[Dict[str, Any]]: + return [e for e in events if e["event_type"] == event_type] + + +def _chat_response_body( + *, + content: str = "hello", + prompt_eval_count: int = 10, + eval_count: int = 5, + prompt_eval_duration_ns: int = 1_000_000_000, + eval_duration_ns: int = 2_000_000_000, + done_reason: str = "stop", +) -> Dict[str, Any]: + return { + "model": "llama3.1", + "created_at": "2026-01-01T00:00:00Z", + "message": {"role": "assistant", "content": content}, + "done": True, + "done_reason": done_reason, + "total_duration": prompt_eval_duration_ns + eval_duration_ns, + "load_duration": 0, + "prompt_eval_count": prompt_eval_count, + "prompt_eval_duration": prompt_eval_duration_ns, + "eval_count": eval_count, + "eval_duration": eval_duration_ns, + } + + +def _generate_response_body( + *, + response: str = "why the sky is blue", + prompt_eval_count: int = 5, + eval_count: int = 3, +) -> Dict[str, Any]: + return { + "model": "llama3.1", + "created_at": "2026-01-01T00:00:00Z", + "response": response, + "done": True, + "done_reason": "stop", + "total_duration": 1_500_000_000, + "load_duration": 0, + "prompt_eval_count": prompt_eval_count, + "prompt_eval_duration": 500_000_000, + "eval_count": eval_count, + "eval_duration": 1_000_000_000, + } + + +# --------------------------------------------------------------------------- +# Module / registry plumbing +# --------------------------------------------------------------------------- + + +def test_adapter_class_export() -> None: + """The lazy registry contract: ``ADAPTER_CLASS`` is the class itself.""" + assert ADAPTER_CLASS is OllamaAdapter + + +def test_lazy_export_from_providers_package() -> None: + """``from ...providers import OllamaAdapter`` triggers lazy import.""" + from layerlens.instrument.adapters.providers import OllamaAdapter as Lazy + + assert Lazy is OllamaAdapter + + +# --------------------------------------------------------------------------- +# Endpoint detection +# --------------------------------------------------------------------------- + + +def test_connect_default_endpoint(monkeypatch: pytest.MonkeyPatch) -> None: + """Without ``OLLAMA_HOST`` set, the adapter records the local default.""" + monkeypatch.delenv("OLLAMA_HOST", raising=False) + adapter = OllamaAdapter() + adapter.connect() + assert adapter.status == AdapterStatus.HEALTHY + assert adapter._endpoint == "http://localhost:11434" + + +def test_connect_uses_env_endpoint(monkeypatch: pytest.MonkeyPatch) -> None: + """``OLLAMA_HOST`` overrides the default and is stamped on every event.""" + monkeypatch.setenv("OLLAMA_HOST", "http://my-ollama:11434") + adapter = OllamaAdapter() + adapter.connect() + assert adapter._endpoint == "http://my-ollama:11434" + + +# --------------------------------------------------------------------------- +# Chat path — HTTP-fixture round-trip +# --------------------------------------------------------------------------- + + +@respx.mock(base_url=_OLLAMA_BASE) +def test_chat_emits_zero_api_cost(respx_mock: respx.MockRouter) -> None: + """Local inference => ``api_cost_usd`` is exactly 0.0.""" + respx_mock.post("/api/chat").mock( + return_value=httpx.Response(200, json=_chat_response_body()) + ) + + from ollama import Client + + sink = _RecordingStratix() + adapter = OllamaAdapter(stratix=sink, capture_config=CaptureConfig.full()) + adapter.connect() + client = Client() + adapter.connect_client(client) + + response = client.chat( + model="llama3.1", + messages=[{"role": "user", "content": "hi"}], + ) + + assert response.message.content == "hello" + + cost_events = _by_type(sink.events, "cost.record") + assert len(cost_events) == 1 + payload = cost_events[0]["payload"] + assert payload["api_cost_usd"] == 0.0 + assert payload["provider"] == "ollama" + assert payload["prompt_tokens"] == 10 + assert payload["completion_tokens"] == 5 + assert payload["total_tokens"] == 15 + # Without cost_per_second, no infra cost should be emitted. + assert "infra_cost_usd" not in payload + + +@respx.mock(base_url=_OLLAMA_BASE) +def test_chat_records_method_and_endpoint(respx_mock: respx.MockRouter) -> None: + """The ``model.invoke`` event records method and endpoint metadata.""" + respx_mock.post("/api/chat").mock( + return_value=httpx.Response(200, json=_chat_response_body()) + ) + + from ollama import Client + + sink = _RecordingStratix() + adapter = OllamaAdapter(stratix=sink, capture_config=CaptureConfig.full()) + adapter.connect() + client = Client() + adapter.connect_client(client) + + client.chat(model="llama3.1", messages=[{"role": "user", "content": "hi"}]) + + invokes = _by_type(sink.events, "model.invoke") + assert len(invokes) == 1 + payload = invokes[0]["payload"] + assert payload["method"] == "chat" + assert payload["endpoint"] == "http://localhost:11434" + assert payload["finish_reason"] == "stop" + assert payload["model"] == "llama3.1" + # Output captured under capture_content=True. + assert payload["output_message"]["content"] == "hello" + # Latency is a non-negative float. + assert isinstance(payload["latency_ms"], float) + assert payload["latency_ms"] >= 0.0 + + +@respx.mock(base_url=_OLLAMA_BASE) +def test_chat_with_cost_per_second_emits_infra_cost( + respx_mock: respx.MockRouter, +) -> None: + """``cost_per_second`` => ``infra_cost_usd = total_seconds * rate``.""" + respx_mock.post("/api/chat").mock( + return_value=httpx.Response( + 200, + # 1s prompt eval + 2s eval = 3s @ $0.01/s => $0.03 + json=_chat_response_body(), + ) + ) + + from ollama import Client + + sink = _RecordingStratix() + adapter = OllamaAdapter( + stratix=sink, + capture_config=CaptureConfig.full(), + cost_per_second=0.01, + ) + adapter.connect() + client = Client() + adapter.connect_client(client) + + client.chat(model="llama3.1", messages=[{"role": "user", "content": "hi"}]) + + cost_events = _by_type(sink.events, "cost.record") + assert cost_events[0]["payload"]["infra_cost_usd"] == pytest.approx(0.03) + # api_cost is still 0 — infra cost is additive, not a substitute. + assert cost_events[0]["payload"]["api_cost_usd"] == 0.0 + + +# --------------------------------------------------------------------------- +# Generate path +# --------------------------------------------------------------------------- + + +@respx.mock(base_url=_OLLAMA_BASE) +def test_generate_captures_prompt_as_input_message( + respx_mock: respx.MockRouter, +) -> None: + """``generate`` synthesises a single user-role message from the prompt.""" + respx_mock.post("/api/generate").mock( + return_value=httpx.Response(200, json=_generate_response_body()) + ) + + from ollama import Client + + sink = _RecordingStratix() + adapter = OllamaAdapter(stratix=sink, capture_config=CaptureConfig.full()) + adapter.connect() + client = Client() + adapter.connect_client(client) + + client.generate(model="llama3.1", prompt="Why is the sky blue?") + + invokes = _by_type(sink.events, "model.invoke") + payload = invokes[0]["payload"] + assert payload["method"] == "generate" + assert payload["messages"] == [ + {"role": "user", "content": "Why is the sky blue?"} + ] + assert payload["output_message"]["content"] == "why the sky is blue" + assert payload["prompt_tokens"] == 5 + assert payload["completion_tokens"] == 3 + + +# --------------------------------------------------------------------------- +# Embeddings path +# --------------------------------------------------------------------------- + + +@respx.mock(base_url=_OLLAMA_BASE) +def test_embeddings_emits_model_invoke_with_method( + respx_mock: respx.MockRouter, +) -> None: + """Embeddings are L3 ``model.invoke`` events with ``method=embeddings``.""" + respx_mock.post("/api/embeddings").mock( + return_value=httpx.Response(200, json={"embedding": [0.1, 0.2, 0.3]}) + ) + + from ollama import Client + + sink = _RecordingStratix() + adapter = OllamaAdapter(stratix=sink, capture_config=CaptureConfig.full()) + adapter.connect() + client = Client() + adapter.connect_client(client) + + response = client.embeddings(model="nomic-embed-text", prompt="hello") + assert len(response.embedding) == 3 + + invokes = _by_type(sink.events, "model.invoke") + assert any(e["payload"]["method"] == "embeddings" for e in invokes) + # Embeddings response carries no token counts — the cost payload + # falls back to zeros and api_cost_usd is still 0.0. + cost = _by_type(sink.events, "cost.record")[0]["payload"] + assert cost["api_cost_usd"] == 0.0 + assert cost["prompt_tokens"] == 0 + assert cost["completion_tokens"] == 0 + + +# --------------------------------------------------------------------------- +# Error path +# --------------------------------------------------------------------------- + + +@respx.mock(base_url=_OLLAMA_BASE) +def test_chat_error_emits_policy_violation_and_reraises( + respx_mock: respx.MockRouter, +) -> None: + """An HTTP-500 from Ollama raises and emits both error events.""" + respx_mock.post("/api/chat").mock( + return_value=httpx.Response(500, json={"error": "model not found"}) + ) + + from ollama import Client, ResponseError + + sink = _RecordingStratix() + adapter = OllamaAdapter(stratix=sink, capture_config=CaptureConfig.full()) + adapter.connect() + client = Client() + adapter.connect_client(client) + + with pytest.raises(ResponseError): + client.chat(model="nope", messages=[{"role": "user", "content": "hi"}]) + + invokes = _by_type(sink.events, "model.invoke") + violations = _by_type(sink.events, "policy.violation") + assert any( + "model not found" in str(e["payload"].get("error", "")) for e in invokes + ) + assert any(v["payload"]["provider"] == "ollama" for v in violations) + + +# --------------------------------------------------------------------------- +# Lifecycle +# --------------------------------------------------------------------------- + + +def test_disconnect_restores_originals() -> None: + """``disconnect`` removes the wrappers; later attribute access returns + the original class-level bound method, not the traced wrapper. + + We can't use ``is`` against the pre-connect bound method (Python + re-binds the descriptor on every attribute access), so we identify + the wrapper by its sentinel ``_layerlens_original`` attribute. + """ + from ollama import Client + + adapter = OllamaAdapter() + adapter.connect() + client = Client() + + adapter.connect_client(client) + # Wrappers have the sentinel attribute. + assert hasattr(client.chat, "_layerlens_original") + assert hasattr(client.generate, "_layerlens_original") + assert hasattr(client.embeddings, "_layerlens_original") + + adapter.disconnect() + # After disconnect, the instance attributes are gone and access + # falls through to the class-level bound method, which has no + # sentinel marker. + assert not hasattr(client.chat, "_layerlens_original") + assert not hasattr(client.generate, "_layerlens_original") + assert not hasattr(client.embeddings, "_layerlens_original") + assert adapter.status == AdapterStatus.DISCONNECTED + + +def test_dict_response_path_extract_usage() -> None: + """The dict-response branch of ``_extract_usage`` mirrors the SDK form.""" + usage = OllamaAdapter._extract_usage( + {"prompt_eval_count": 7, "eval_count": 11} + ) + assert usage is not None + assert usage.prompt_tokens == 7 + assert usage.completion_tokens == 11 + assert usage.total_tokens == 18 + + +def test_extract_usage_handles_none_response() -> None: + assert OllamaAdapter._extract_usage(None) is None diff --git a/tests/instrument/test_default_install.py b/tests/instrument/test_default_install.py new file mode 100644 index 0000000..55facdb --- /dev/null +++ b/tests/instrument/test_default_install.py @@ -0,0 +1,182 @@ +"""Default-install integrity guard. + +Adding adapter extras to ``pyproject.toml`` MUST NOT change the runtime +dependency set installed by a plain ``pip install layerlens``. This +test reads ``[project] dependencies`` directly from ``pyproject.toml`` +and asserts the required dependency list matches the canonical baseline +checked in at ``tests/instrument/_baselines/default_dependencies.txt``. + +Two parallel checks run: + +1. **Direct deps from pyproject.toml** vs. the checked-in baseline file. + This is the load-bearing source of truth — what new SDK releases + actually advertise as required. +2. **Installed metadata Requires-Dist** vs. the same baseline. + Belt-and-suspenders: catches mismatch between source-of-truth and + what the wheel actually ships. + +If you add a new required dependency to ``[project] dependencies`` in +``pyproject.toml`` (rare and intentional), update the baseline file in +the same PR. If you add an extras group, no change is needed — extras +are not in ``Requires-Dist`` until a user opts in. +""" + +from __future__ import annotations + +import re +import sys +from typing import Set, Dict, List, Tuple +from pathlib import Path + +if sys.version_info >= (3, 11): + import tomllib +else: # pragma: no cover - Python 3.9/3.10 fallback + import tomli as tomllib + + +_REPO_ROOT: Path = Path(__file__).resolve().parents[2] +_PYPROJECT: Path = _REPO_ROOT / "pyproject.toml" +_BASELINE_PATH: Path = Path(__file__).resolve().parent / "_baselines" / "default_dependencies.txt" + + +def _normalize(name: str) -> str: + """Normalize a distribution name per PEP 503.""" + return re.sub(r"[-_.]+", "-", name).strip().lower() + + +def _split_name(requirement: str) -> str: + """Extract the bare package name from a PEP 508 requirement line.""" + # PEP 508 grammar: name[extras] specifier ; marker + # We just need the name, which terminates at: whitespace, `[`, `;`, + # `<`, `>`, `=`, `!`, `~`, or end-of-string. + bare = re.split(r"[\s\[;<>=!~]", requirement, maxsplit=1)[0] + return _normalize(bare) + + +def _read_baseline_file() -> Tuple[List[str], Dict[str, str]]: + """Return (raw_lines, name->requirement) from the baseline file. + + Comments and blank lines are stripped from the returned data + structures but the raw list preserves order for diagnostic output. + """ + raw = _BASELINE_PATH.read_text(encoding="utf-8").splitlines() + by_name: Dict[str, str] = {} + for line in raw: + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + by_name[_split_name(stripped)] = stripped + return raw, by_name + + +def _read_pyproject_default_deps() -> Dict[str, str]: + """Return name -> raw requirement string from ``[project] dependencies``.""" + with _PYPROJECT.open("rb") as fh: + data = tomllib.load(fh) + deps = data.get("project", {}).get("dependencies", []) or [] + out: Dict[str, str] = {} + for req in deps: + if not isinstance(req, str): + continue + out[_split_name(req)] = req.strip() + return out + + +def _required_dist_names() -> Set[str]: + """Read ``layerlens``'s installed metadata and return required dep names. + + Skips requirements gated by an ``extra ==`` marker — those are + optional dependencies, not part of the default install set. + """ + from importlib.metadata import distribution + + dist = distribution("layerlens") + requires = dist.requires or [] + names: Set[str] = set() + for req in requires: + if "extra ==" in req: + continue + names.add(_split_name(req)) + return names + + +def test_pyproject_default_dependencies_match_baseline() -> None: + """``[project] dependencies`` in pyproject.toml MUST equal the baseline.""" + pyproject_deps = _read_pyproject_default_deps() + _, baseline_by_name = _read_baseline_file() + + pyproject_names = set(pyproject_deps) + baseline_names = set(baseline_by_name) + + added = pyproject_names - baseline_names + removed = baseline_names - pyproject_names + + assert not added, ( + f"New required dependency added to pyproject.toml that is NOT in the " + f"checked-in baseline: {sorted(added)}.\n" + f" Baseline file: {_BASELINE_PATH}\n" + f" Either move the dep into an extras group in pyproject.toml,\n" + f" OR justify the addition in the PR description and update the\n" + f" baseline file in the same PR." + ) + assert not removed, ( + f"Baseline lists dependencies not present in pyproject.toml: " + f"{sorted(removed)}.\n" + f" Baseline file: {_BASELINE_PATH}\n" + f" If the removal is intentional, update the baseline file." + ) + + # Also verify the version specifier matches exactly. A silent bump of + # a lower bound would be a behaviour change worth surfacing. + for name in sorted(pyproject_names): + assert pyproject_deps[name] == baseline_by_name[name], ( + f"Version specifier drift for `{name}`:\n" + f" pyproject.toml: {pyproject_deps[name]!r}\n" + f" baseline: {baseline_by_name[name]!r}\n" + f" Update the baseline file if the bump is intentional." + ) + + +def test_installed_metadata_matches_baseline() -> None: + """Installed wheel ``Requires-Dist`` MUST match the baseline name set.""" + actual = _required_dist_names() + _, baseline_by_name = _read_baseline_file() + expected = set(baseline_by_name) + + extra = actual - expected + missing = expected - actual + + assert not extra, ( + f"Installed `layerlens` advertises required deps not in the baseline: " + f"{sorted(extra)}.\n" + f" This means the built wheel diverged from pyproject.toml — investigate." + ) + assert not missing, ( + f"Installed `layerlens` is missing baseline-required deps: " + f"{sorted(missing)}.\n" + f" Reinstall the package: `pip install -e .`" + ) + + +def test_baseline_file_is_sorted_and_well_formed() -> None: + """The baseline file must be sorted and have one requirement per line.""" + raw, by_name = _read_baseline_file() + + # Filter to the data lines and verify sort order. + data_lines: List[str] = [line.strip() for line in raw if line.strip() and not line.strip().startswith("#")] + sorted_data = sorted(data_lines, key=_split_name) + assert data_lines == sorted_data, ( + "Baseline file must be sorted alphabetically by normalized package name.\n" + f" Expected order: {sorted_data}\n" + f" Actual order: {data_lines}" + ) + + # No duplicate names. + seen: Set[str] = set() + for line in data_lines: + name = _split_name(line) + assert name not in seen, f"Duplicate dependency in baseline: {name}" + seen.add(name) + + # by_name was populated, so the file is non-empty. + assert by_name, "Baseline file must contain at least one dependency." diff --git a/tests/instrument/test_lazy_imports.py b/tests/instrument/test_lazy_imports.py new file mode 100644 index 0000000..8536567 --- /dev/null +++ b/tests/instrument/test_lazy_imports.py @@ -0,0 +1,104 @@ +"""Lazy-import guards for the Instrument layer. + +Importing ``layerlens`` (or ``layerlens.instrument``) MUST NOT import +any optional adapter dependency. Adapter modules that wrap heavy +frameworks (langchain, llama-index, crewai, etc.) are loaded by +:class:`AdapterRegistry` only when the user explicitly requests that +framework — never at SDK import time. + +This is the single load-bearing guarantee the v1.x stable client SDK +makes about install-and-import surface area. Breaking it would mean +that simply running ``import layerlens`` in a process triggers a 30+MB +of optional package imports, which is a regression. +""" + +from __future__ import annotations + +import sys +from typing import Set + +# Modules that MUST NOT be loaded as a side effect of importing layerlens +# or layerlens.instrument. These are the heavy-framework dependencies of +# the adapter extras. +_FORBIDDEN_PREFIXES: Set[str] = { + "langchain", + "langchain_core", + "langgraph", + "llama_index", + "crewai", + "autogen", + "pyautogen", + "semantic_kernel", + "ag_ui", + "mcp", + "smolagents", + "agno", + "strands", + "browser_use", + "openai", + "anthropic", + "boto3", + "litellm", + "ollama", + "google.cloud.aiplatform", + "pydantic_ai", + "cohere", + "mistralai", +} + + +def _modules_under(prefixes: Set[str]) -> Set[str]: + """Return loaded module names matching any forbidden prefix.""" + loaded: Set[str] = set() + for name in list(sys.modules): + for prefix in prefixes: + if name == prefix or name.startswith(prefix + "."): + loaded.add(name) + break + return loaded + + +def test_layerlens_import_does_not_pull_frameworks() -> None: + """Plain ``import layerlens`` MUST NOT load any framework dep.""" + # Drop forbidden modules first so the test isolates this import. + for name in list(sys.modules): + for prefix in _FORBIDDEN_PREFIXES: + if name == prefix or name.startswith(prefix + "."): + del sys.modules[name] + + import layerlens # noqa: F401 + + leaked = _modules_under(_FORBIDDEN_PREFIXES) + assert not leaked, ( + f"Importing layerlens leaked framework modules: {sorted(leaked)}. " + "Ensure adapter modules are NOT imported at SDK init time." + ) + + +def test_instrument_import_does_not_pull_frameworks() -> None: + """``import layerlens.instrument`` MUST NOT load any framework dep.""" + for name in list(sys.modules): + for prefix in _FORBIDDEN_PREFIXES: + if name == prefix or name.startswith(prefix + "."): + del sys.modules[name] + + import layerlens.instrument # noqa: F401 + import layerlens.instrument.adapters # noqa: F401 + import layerlens.instrument.adapters._base # noqa: F401 + + leaked = _modules_under(_FORBIDDEN_PREFIXES) + assert not leaked, ( + f"Importing layerlens.instrument leaked framework modules: {sorted(leaked)}. " + "The instrument package and its _base layer must not import any adapter module." + ) + + +def test_adapter_packages_importable_without_framework() -> None: + """The ``providers`` package must be importable without optional deps. + + It exposes only ``__init__.py`` documentation; concrete adapter + modules are loaded by :class:`AdapterRegistry` on demand. The sister + ``frameworks`` and ``protocols`` packages are landed by their own + M-series PRs and intentionally omitted here. + """ + import layerlens.instrument.adapters.providers # noqa: F401 diff --git a/tests/instrument/test_resolved_dep_tree.py b/tests/instrument/test_resolved_dep_tree.py new file mode 100644 index 0000000..98886ec --- /dev/null +++ b/tests/instrument/test_resolved_dep_tree.py @@ -0,0 +1,202 @@ +"""Resolved transitive-dependency-tree guard. + +A direct dep with a permissive lower bound can pull in a tree that +quintuples install size. ``Requires-Dist`` only shows direct deps — +the actual install footprint is the TRANSITIVE closure of every +direct dep at the version pip's resolver picks. + +This test compares the transitively-resolved package-name set for +``pip install layerlens`` (no extras) against a checked-in baseline +at ``tests/instrument/_baselines/resolved_dependencies.txt``. + +Modes +----- + +The test runs in one of two modes depending on environment: + +1. **Offline / no-uv mode** (default for `pytest` runs without `uv` on + PATH): the test only validates the baseline file's structure + (sorted, normalized, no duplicates) and that every direct dep from + ``pyproject.toml`` is also present in the resolved baseline (which + it must be — direct deps always appear in their own resolved tree). + +2. **Online mode** (when ``uv`` is on PATH AND + ``LAYERLENS_RESOLVE_DEPS=1`` is set, OR running under CI): the test + invokes ``uv pip compile`` to actually resolve the tree, then diffs + the resolved name set against the baseline. Additions fail; removals + pass with a hint to regenerate the baseline. + +The CI workflow ``.github/workflows/dep-tree-guard.yaml`` always runs +in online mode. Local runs default to offline so devs without ``uv`` +installed can still iterate on the test suite. +""" + +from __future__ import annotations + +import os +import re +import sys +import shutil +import subprocess +from typing import Set, List +from pathlib import Path + +import pytest + +if sys.version_info >= (3, 11): + import tomllib +else: # pragma: no cover - Python 3.9/3.10 fallback + import tomli as tomllib + + +_REPO_ROOT: Path = Path(__file__).resolve().parents[2] +_PYPROJECT: Path = _REPO_ROOT / "pyproject.toml" +_BASELINE_PATH: Path = Path(__file__).resolve().parent / "_baselines" / "resolved_dependencies.txt" + + +def _normalize(name: str) -> str: + """Normalize a distribution name per PEP 503.""" + return re.sub(r"[-_.]+", "-", name).strip().lower() + + +def _split_name(requirement: str) -> str: + """Extract the bare package name from a PEP 508 requirement line.""" + bare = re.split(r"[\s\[;<>=!~]", requirement, maxsplit=1)[0] + return _normalize(bare) + + +def _read_baseline_names() -> List[str]: + """Return the sorted list of normalized names in the baseline file.""" + raw = _BASELINE_PATH.read_text(encoding="utf-8").splitlines() + out: List[str] = [] + for line in raw: + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + out.append(_split_name(stripped)) + return out + + +def _read_pyproject_direct_deps() -> List[str]: + """Return the raw ``[project] dependencies`` strings.""" + with _PYPROJECT.open("rb") as fh: + data = tomllib.load(fh) + deps = data.get("project", {}).get("dependencies", []) or [] + return [str(d).strip() for d in deps if isinstance(d, str)] + + +def _resolve_tree_via_uv(direct_deps: List[str]) -> Set[str]: + """Invoke ``uv pip compile`` and return the resolved name set.""" + proc = subprocess.run( + [ + "uv", + "pip", + "compile", + "-q", + "--no-header", + "--no-annotate", + "--no-strip-extras", + "--universal", + "-", + ], + input="\n".join(direct_deps).encode("utf-8"), + capture_output=True, + check=False, + ) + if proc.returncode != 0: + stderr = proc.stderr.decode("utf-8", errors="replace") + raise RuntimeError(f"`uv pip compile` failed (exit {proc.returncode}):\n{stderr}") + output = proc.stdout.decode("utf-8") + + names: Set[str] = set() + for line in output.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + names.add(_split_name(line)) + return names + + +def _online_mode_requested() -> bool: + """Return whether the test should perform a live resolve.""" + if shutil.which("uv") is None: + return False + if os.environ.get("CI") == "true": + return True + return os.environ.get("LAYERLENS_RESOLVE_DEPS") == "1" + + +def test_baseline_file_is_sorted_and_well_formed() -> None: + """The baseline must be sorted, normalized, and free of duplicates.""" + names = _read_baseline_names() + assert names, "Baseline file must contain at least one resolved package name." + + sorted_names = sorted(names) + assert names == sorted_names, ( + "Baseline file must be sorted alphabetically by normalized package name.\n" + f" Expected: {sorted_names}\n" + f" Actual: {names}" + ) + + # No duplicates. + assert len(names) == len(set(names)), ( + f"Duplicate names in baseline: {sorted({n for n in names if names.count(n) > 1})}" + ) + + # Every line must already be in normalized form. + for n in names: + assert n == _normalize(n), f"Baseline contains non-normalized name {n!r}; expected {_normalize(n)!r}." + + +def test_baseline_includes_every_direct_dep() -> None: + """Every direct dep in pyproject.toml must appear in the resolved baseline. + + This is a tautology in any consistent baseline (a package is always + in its own resolved tree), but the check catches the case where a + direct dep was added to pyproject.toml without regenerating the + baseline. + """ + direct_names = {_split_name(req) for req in _read_pyproject_direct_deps()} + baseline_names = set(_read_baseline_names()) + missing = direct_names - baseline_names + assert not missing, ( + f"Direct dep(s) in pyproject.toml not present in resolved baseline: " + f"{sorted(missing)}.\n" + f" Run `python scripts/regen_dep_baselines.py` to refresh." + ) + + +@pytest.mark.skipif( + not _online_mode_requested(), + reason=( + "Live dependency resolution requires `uv` on PATH and either " + "CI=true or LAYERLENS_RESOLVE_DEPS=1. Skipping in offline mode." + ), +) +def test_resolved_tree_matches_baseline() -> None: + """The live-resolved tree MUST NOT add packages beyond the baseline.""" + direct_deps = _read_pyproject_direct_deps() + resolved = _resolve_tree_via_uv(direct_deps) + baseline = set(_read_baseline_names()) + + added = resolved - baseline + removed = baseline - resolved + + assert not added, ( + f"Resolved dependency tree added packages NOT in the baseline: " + f"{sorted(added)}.\n" + f" This means a direct dep started pulling in new transitive deps.\n" + f" If the addition is acceptable, regenerate the baseline:\n" + f" python scripts/regen_dep_baselines.py\n" + f" Otherwise, tighten the version specifier on the offending direct dep." + ) + + if removed: + # Removals are good news (less bloat) but we still report them so + # devs can refresh the baseline. Don't fail the test; this is a + # one-way ratchet that only blocks ADDITIONS. + sys.stderr.write( + f"\nNOTE: resolved tree no longer pulls in: {sorted(removed)}.\n" + f" Consider running `python scripts/regen_dep_baselines.py` " + f"to tighten the baseline.\n" + )