diff --git a/docs/adapters/frameworks-agno.md b/docs/adapters/frameworks-agno.md new file mode 100644 index 0000000..fd7e36b --- /dev/null +++ b/docs/adapters/frameworks-agno.md @@ -0,0 +1,101 @@ +# Agno framework adapter + +`layerlens.instrument.adapters.frameworks.agno.AgnoAdapter` instruments +[Agno](https://github.com/agno-agi/agno) agents — single-agent and +multi-agent teams — by wrapping `Agent.run()` and `Agent.arun()`. + +## Install + +```bash +pip install 'layerlens[agno]' +``` + +Pulls `agno>=0.1,<1.0`. Requires Python 3.10+. + +## Quick start + +```python +from agno.agent import Agent +from agno.models.openai import OpenAIChat + +from layerlens.instrument.adapters.frameworks.agno import AgnoAdapter, instrument_agent +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="agno") +adapter = AgnoAdapter() +adapter.add_sink(sink) +adapter.connect() + +agent = Agent(model=OpenAIChat(id="gpt-4o-mini"), instructions="Be concise.") +adapter.instrument_agent(agent) + +response = agent.run("What is 2 + 2?") + +adapter.disconnect() +sink.close() +``` + +`instrument_agent(agent)` is the one-liner equivalent. + +## What's wrapped + +`adapter.instrument_agent(agent)` patches the following on each Agent: + +- `run` — sync entry point. Emits `agent.input` + `agent.output` and any + inner `model.invoke` / `tool.call` events. +- `arun` — async entry point. Same semantics. +- `_run_tool` — emits `tool.call` per tool invocation (when present in the + Agno version). +- Model adapter hooks — emit `model.invoke` per LLM call. + +`disconnect()` restores all originals. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `environment.config` | L4a | First `run` per agent. | +| `agent.input` | L1 | Beginning of every `run` / `arun`. | +| `agent.output` | L1 | End of every `run` / `arun`. | +| `agent.action` | L4a | Per intermediate reasoning step. | +| `agent.handoff` | L4a | When a team agent delegates to a sub-agent. | +| `agent.state.change` | cross-cutting | Memory mutations. | +| `tool.call` | L5a | Per tool invocation. | +| `model.invoke` | L3 | Per LLM call. | + +## Agno specifics + +- **Teams**: Agno supports multi-agent teams via `Team(agents=[...])`. + Each team member must be instrumented individually with + `adapter.instrument_agent(team_member)` — or call + `instrument_agent(team)` and the convenience helper recurses. +- **Reasoning agents**: when `reasoning=True` is set on an Agent, the + intermediate reasoning steps emit `agent.action` events with a + `step_index` field. +- **Storage backends**: Agno session storage (Postgres, sqlite, Redis, + etc.) emits `agent.state.change` on every save. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Recommended. +adapter = AgnoAdapter(capture_config=CaptureConfig.standard()) + +# Heavy: include reasoning steps as agent.code (the chain-of-thought). +adapter = AgnoAdapter( + capture_config=CaptureConfig( + l1_agent_io=True, + l2_agent_code=True, + l3_model_metadata=True, + l5a_tool_calls=True, + ), +) +``` + +## BYOK + +Agno model adapters (`OpenAIChat`, `AnthropicClaude`, etc.) read their own +credentials. The Agno adapter does not own them. For platform-managed +BYOK see `docs/adapters/byok.md` (atlas-app M1.B). diff --git a/docs/adapters/frameworks-bedrock_agents.md b/docs/adapters/frameworks-bedrock_agents.md new file mode 100644 index 0000000..99dddf1 --- /dev/null +++ b/docs/adapters/frameworks-bedrock_agents.md @@ -0,0 +1,113 @@ +# AWS Bedrock Agents framework adapter + +`layerlens.instrument.adapters.frameworks.bedrock_agents.BedrockAgentsAdapter` +instruments AWS Bedrock Agent runtime calls by registering boto3 event hooks +and parsing the `InvokeAgent` response stream's `trace` blocks. + +## Install + +```bash +pip install 'layerlens[bedrock-agents]' +``` + +Pulls `boto3>=1.34`. AWS credentials and region must be configured the +standard way (env vars, IAM role, profile). + +## Quick start + +```python +import boto3 + +from layerlens.instrument.adapters.frameworks.bedrock_agents import ( + BedrockAgentsAdapter, + instrument_client, +) +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="bedrock_agents") +adapter = BedrockAgentsAdapter() +adapter.add_sink(sink) +adapter.connect() + +client = boto3.client("bedrock-agent-runtime", region_name="us-east-1") +adapter.instrument_client(client) + +response = client.invoke_agent( + agentId="ABCDEFGHIJ", + agentAliasId="TSTALIASID", + sessionId="my-session", + inputText="What is 2+2?", +) +# Iterate the response stream — trace events are captured automatically. +for chunk in response["completion"]: + pass + +adapter.disconnect() +sink.close() +``` + +`instrument_client(client)` is the convenience helper. + +## What's wrapped + +`adapter.instrument_client(client)` registers two boto3 event hooks on the +provided `bedrock-agent-runtime` client: + +- `provide-client-params.bedrock-agent-runtime.InvokeAgent` — fires before + the request goes out. Captures `agentId`, `sessionId`, `inputText`, + emits `agent.input` and `environment.config` on first agent encounter. +- `after-call.bedrock-agent-runtime.InvokeAgent` — fires after the response + comes back. Walks the `trace` blocks in the streamed events and emits + `model.invoke` / `tool.call` / `agent.action` per trace step. + +`disconnect()` unregisters both hooks. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `environment.config` | L4a | First `InvokeAgent` per `agentId`. | +| `agent.input` | L1 | Beginning of every `InvokeAgent`. | +| `agent.output` | L1 | End of every `InvokeAgent` (after stream consumption). | +| `agent.action` | L4a | Per `orchestrationTrace.modelInvocationInput` block. | +| `agent.handoff` | L4a | Per cross-agent collaboration step. | +| `tool.call` | L5a | Per `actionGroupInvocationInput` / `knowledgeBaseLookupInput` block. | +| `model.invoke` | L3 | Per `modelInvocationOutput` block (with token usage). | + +## Bedrock Agents specifics + +- **Action groups**: each `actionGroup` invocation maps to a `tool.call` + with `tool_name = "{actionGroupName}::{apiPath}"` and the typed + parameters in the payload. +- **Knowledge bases**: every KB lookup emits a `tool.call` with + `tool_name = "knowledge_base::{knowledgeBaseId}"` and the rendered + query + retrieved citations. +- **Multi-agent collaboration**: when a supervisor agent delegates to a + collaborator, an `agent.handoff` event is emitted with both agent IDs. +- **Session attributes**: passed through into `agent.input` payloads as + `session_attributes`. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Recommended. +adapter = BedrockAgentsAdapter(capture_config=CaptureConfig.standard()) + +# Compliance: drop user input/output content but keep tool/model metadata. +adapter = BedrockAgentsAdapter( + capture_config=CaptureConfig( + l1_agent_io=True, + l3_model_metadata=True, + l5a_tool_calls=True, + capture_content=False, + ), +) +``` + +## BYOK + +Bedrock Agents bills directly to your AWS account via your IAM identity. +There's no separate API key to manage. The model used by the agent is +configured server-side in the agent definition. diff --git a/docs/adapters/frameworks-benchmark_import.md b/docs/adapters/frameworks-benchmark_import.md new file mode 100644 index 0000000..d859f16 --- /dev/null +++ b/docs/adapters/frameworks-benchmark_import.md @@ -0,0 +1,108 @@ +# Benchmark import framework adapter + +`layerlens.instrument.adapters.frameworks.benchmark_import.BenchmarkImportAdapter` +imports external benchmark datasets into Stratix evaluation spaces. Unlike +the other framework adapters, this is a **data importer**, not a runtime +instrumentation adapter — it reads benchmarks from disk or from +HuggingFace and produces normalized rows. + +## Install + +```bash +pip install 'layerlens[benchmark-import]' +``` + +The `benchmark-import` extra has no required dependencies. To use the +HuggingFace import path, additionally install `datasets`: + +```bash +pip install datasets +``` + +## Quick start (CSV) + +```python +from layerlens.instrument.adapters.frameworks.benchmark_import import ( + BenchmarkImportAdapter, +) + +adapter = BenchmarkImportAdapter() + +result = adapter.import_csv( + path="my_benchmark.csv", + schema_mapping={"question": "prompt", "answer": "expected_output"}, + max_records=1000, + tags=["custom", "qa"], +) + +print(f"Imported {result.records_imported} records into {result.benchmark_id}") +``` + +## Quick start (HuggingFace) + +```python +result = adapter.import_huggingface( + dataset_name="squad", + split="validation", + max_records=200, + tags=["public", "qa"], +) +``` + +## Quick start (HELM) + +```python +result = adapter.import_helm( + path="/path/to/helm_results.json", + tags=["helm", "leaderboard"], +) +``` + +## Public API + +| Method | Description | +|---|---| +| `import_huggingface(dataset_name, split=, subset=, schema_mapping=, max_records=, tags=)` | Stream a HuggingFace dataset into Stratix. | +| `import_helm(path, tags=)` | Import HELM JSON results. | +| `import_csv(path, schema_mapping=, delimiter=, max_records=, tags=)` | Import a CSV benchmark. | +| `import_json(path, schema_mapping=, records_key=, max_records=, tags=)` | Import a JSON benchmark. | +| `import_parquet(path, schema_mapping=, max_records=, tags=)` | Import a Parquet benchmark (requires `pyarrow`). | + +All methods return `ImportResult` with `success`, `benchmark_id`, +`records_imported`, `records_skipped`, `duration_ms`, `errors`, and +`metadata` (a `BenchmarkMetadata` Pydantic model). + +## Schema mapping + +Supplying a `schema_mapping` dict renames source columns to the canonical +Stratix evaluation schema: + +| Stratix field | Common source columns | +|---|---| +| `prompt` | `question`, `input`, `query` | +| `expected_output` | `answer`, `target`, `reference`, `ground_truth` | +| `difficulty` | `difficulty`, `level` | +| `category` | `category`, `subject`, `topic` | + +When no mapping is provided, the adapter applies a small set of automatic +heuristics (case-insensitive name match against the canonical fields). + +## Persistence + +If you pass a `store=` argument to `BenchmarkImportAdapter(...)` (something +that exposes `save_benchmark(metadata, records)`), the adapter writes +imported benchmarks through it. Otherwise records are returned to the +caller and held in `adapter._benchmarks` keyed by `benchmark_id`. + +## Events emitted + +This adapter does not emit telemetry events — it produces benchmark rows. +Once stored in atlas-app, the platform's evaluation runner can iterate the +benchmark and produce `model.invoke` / `evaluation.score` events through +the standard provider adapters. + +## BYOK + +Not applicable. The adapter reads files locally or downloads from +HuggingFace using the standard `datasets` library — no model API keys are +involved. diff --git a/docs/adapters/frameworks-embedding.md b/docs/adapters/frameworks-embedding.md new file mode 100644 index 0000000..0ba431b --- /dev/null +++ b/docs/adapters/frameworks-embedding.md @@ -0,0 +1,113 @@ +# Embedding & vector store framework adapter + +`layerlens.instrument.adapters.frameworks.embedding.EmbeddingAdapter` and +`VectorStoreAdapter` instrument embedding-creation calls and vector-store +operations across the common providers. They emit `embedding.create` and +`vector_store.query` events with dimension, batch size, and latency metadata. + +## Install + +```bash +pip install 'layerlens[embedding]' +``` + +The `embedding` extra has no required dependencies — bring your own provider +client (`openai`, `cohere`, `sentence-transformers`, `pinecone-client`, +`weaviate-client`, `chromadb`). + +## Quick start (embeddings) + +```python +from openai import OpenAI + +from layerlens.instrument.adapters.frameworks.embedding import EmbeddingAdapter +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="embedding") +adapter = EmbeddingAdapter() +adapter.add_sink(sink) +adapter.connect() + +client = OpenAI() +adapter.wrap_openai(client) + +response = client.embeddings.create( + model="text-embedding-3-small", + input=["hello world"], +) +print(f"Dimensions: {len(response.data[0].embedding)}") + +adapter.disconnect() +sink.close() +``` + +## Quick start (vector stores) + +```python +from layerlens.instrument.adapters.frameworks.embedding import VectorStoreAdapter + +vs_adapter = VectorStoreAdapter() +vs_adapter.connect() + +# Pinecone: vs_adapter.wrap_pinecone(my_index) +# Weaviate: vs_adapter.wrap_weaviate(my_collection) +# Chroma: vs_adapter.wrap_chroma(my_collection) +``` + +## What's wrapped + +`EmbeddingAdapter`: + +- `wrap_openai(client)` — patches `client.embeddings.create`. +- `wrap_cohere(client)` — patches `client.embed`. +- `wrap_sentence_transformer(model)` — patches `model.encode`. + +`VectorStoreAdapter`: + +- `wrap_pinecone(index)` — patches `index.query`. +- `wrap_weaviate(collection)` — patches `collection.query.near_vector` and + `collection.query.bm25`. +- `wrap_chroma(collection)` — patches `collection.query`. + +`disconnect()` restores all wrapped methods to their originals. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `embedding.create` | L3 | Per embedding call. Payload: `provider`, `model`, `batch_size`, `dimensions`, `total_tokens`, `latency_ms`. | +| `vector_store.query` | L3 | Per vector-store query. Payload: `provider`, `top_k`, `result_count`, `latency_ms`, `index_name`. | + +## Dimension tracking + +The adapter inspects the response shape to record the actual returned +dimension count: + +- OpenAI: `result.data[0].embedding` length. +- Cohere: `result.embeddings[0]` length. +- SentenceTransformer: `result.shape[1]` when the result is a numpy/torch tensor. + +If a model is configured with `dimensions=N` truncation (OpenAI v3 family), +the recorded value is the post-truncation dimensionality. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Both events are L3, so the standard preset captures them. +adapter = EmbeddingAdapter(capture_config=CaptureConfig.standard()) + +# Production: drop content (the input text) but keep dimension/latency. +adapter = EmbeddingAdapter( + capture_config=CaptureConfig( + l3_model_metadata=True, + capture_content=False, + ), +) +``` + +## BYOK + +The embedding adapter does not own provider keys — they belong to the +underlying client. For platform-managed BYOK see `docs/adapters/byok.md`. diff --git a/docs/adapters/frameworks-google_adk.md b/docs/adapters/frameworks-google_adk.md new file mode 100644 index 0000000..eb42ae2 --- /dev/null +++ b/docs/adapters/frameworks-google_adk.md @@ -0,0 +1,108 @@ +# Google Agent Development Kit framework adapter + +`layerlens.instrument.adapters.frameworks.google_adk.GoogleADKAdapter` +instruments [Google ADK](https://github.com/google/adk-python) agents using +the framework's native 6-callback system. + +## Install + +```bash +pip install 'layerlens[google-adk]' +``` + +Pulls `google-adk>=0.1,<1.0`. Requires Python 3.10+. + +## Quick start + +```python +from google.adk.agents import LlmAgent + +from layerlens.instrument.adapters.frameworks.google_adk import ( + GoogleADKAdapter, + instrument_agent, +) +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="google_adk") +adapter = GoogleADKAdapter() +adapter.add_sink(sink) +adapter.connect() + +agent = LlmAgent(name="answerer", model="gemini-2.0-flash", instruction="Be concise.") +adapter.instrument_agent(agent) + +# Run via the runner of your choice (Runner, AdkApp, etc.) + +adapter.disconnect() +sink.close() +``` + +`instrument_agent(agent)` is the convenience helper. + +## What's wrapped + +`adapter.instrument_agent(agent)` attaches all six native ADK callbacks: + +- `before_agent_callback` → `agent.input` + `environment.config` +- `after_agent_callback` → `agent.output` +- `before_model_callback` → start timer for the model call +- `after_model_callback` → `model.invoke` +- `before_tool_callback` → start timer for the tool call +- `after_tool_callback` → `tool.call` + +ADK callbacks are part of the public agent contract. Setting them is the +recommended integration pattern from Google — no monkey-patching is +required, and `disconnect()` simply clears the local timer state. If your +ADK code uses a different agent type (`SequentialAgent`, `ParallelAgent`), +ensure each member agent is instrumented. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `environment.config` | L4a | First `before_agent_callback` per agent. | +| `agent.input` | L1 | Every `before_agent_callback`. | +| `agent.output` | L1 | Every `after_agent_callback`. | +| `model.invoke` | L3 | Every `after_model_callback`. | +| `tool.call` | L5a | Every `after_tool_callback`. | + +## ADK specifics + +- **Native callback contract**: ADK guarantees that `before_*` is followed + by exactly one `after_*` per call. Latency is computed using + thread-local start timestamps. +- **Multimodal Gemini**: when the model produces multimodal output, the + emitted `model.invoke` payload includes a `content_types` list (e.g. + `["text", "image"]`). +- **Tool function names**: extracted from the `tool.name` field on the + `BeforeToolCallback` context — these match the function name registered + on the agent. +- **Sequential / parallel agents**: a parent `SequentialAgent` calls + `before_agent_callback` once per child; the adapter records the parent + agent name in `parent_agent` on each child event. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Recommended. +adapter = GoogleADKAdapter(capture_config=CaptureConfig.standard()) + +# Drop content for compliance. +adapter = GoogleADKAdapter( + capture_config=CaptureConfig( + l1_agent_io=True, + l3_model_metadata=True, + l5a_tool_calls=True, + capture_content=False, + ), +) +``` + +## BYOK + +ADK reads Google AI / Vertex AI credentials from the standard environment +(`GOOGLE_API_KEY` for Google AI Studio, ADC for Vertex). The adapter does +not own those credentials. For platform-managed BYOK see +`docs/adapters/byok.md` (atlas-app M1.B). diff --git a/docs/adapters/frameworks-llama_index.md b/docs/adapters/frameworks-llama_index.md new file mode 100644 index 0000000..76d04b2 --- /dev/null +++ b/docs/adapters/frameworks-llama_index.md @@ -0,0 +1,108 @@ +# LlamaIndex framework adapter + +`layerlens.instrument.adapters.frameworks.llama_index.LlamaIndexAdapter` +instruments [LlamaIndex](https://github.com/run-llama/llama_index) agents, +workflows, query engines, and retrievers using the framework's modern +**Instrumentation Module** (v0.10.20+) — non-invasive, no monkey-patching. + +## Install + +```bash +pip install 'layerlens[llama-index]' +``` + +Pulls `llama-index>=0.10,<0.13`. Requires Python 3.10+. + +## Quick start + +```python +from llama_index.core.agent import ReActAgent +from llama_index.llms.openai import OpenAI + +from layerlens.instrument.adapters.frameworks.llama_index import ( + LlamaIndexAdapter, + instrument_workflow, +) +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="llama_index") +adapter = LlamaIndexAdapter() +adapter.add_sink(sink) +adapter.connect() +adapter.instrument_workflow(None) # registers the global event handler + +llm = OpenAI(model="gpt-4o-mini") +agent = ReActAgent.from_tools([], llm=llm) +response = agent.chat("What is 2+2?") + +adapter.disconnect() +sink.close() +``` + +`instrument_workflow(workflow=None)` (called once per process) registers a +global LlamaIndex `BaseEventHandler` that captures every event LlamaIndex +dispatches. + +## What's wrapped + +`adapter.instrument_workflow(...)` registers a `BaseEventHandler` with +`llama_index.core.instrumentation.get_dispatcher()`. The handler observes: + +- LLM events (`LLMChatStartEvent`, `LLMChatEndEvent`, + `LLMCompletionStartEvent`, `LLMCompletionEndEvent`) +- Tool events (`AgentToolCallEvent`) +- Agent events (`AgentRunStepStartEvent`, `AgentRunStepEndEvent`, + `AgentChatWithStepStartEvent`, `AgentChatWithStepEndEvent`) +- Retrieval events (`RetrievalStartEvent`, `RetrievalEndEvent`) +- Embedding events (`EmbeddingStartEvent`, `EmbeddingEndEvent`) + +`disconnect()` removes the handler from the dispatcher's +`event_handlers` list, restoring the original behaviour. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `environment.config` | L4a | First agent / workflow event per process. | +| `agent.input` | L1 | `AgentChatWithStepStartEvent` / agent step start. | +| `agent.output` | L1 | `AgentChatWithStepEndEvent` / agent step end. | +| `agent.action` | L4a | Per `AgentRunStepEndEvent`. | +| `tool.call` | L5a | Per `AgentToolCallEvent`. | +| `model.invoke` | L3 | Per LLM start/end pair. | + +## LlamaIndex specifics + +- **Workflows**: the new `Workflow` class emits dispatcher events the same + way; the same handler captures both classic agents (`ReActAgent`, + `OpenAIAgent`) and workflow `@step` runs. +- **RAG retrievers**: retrieval events are surfaced as `tool.call` with + `tool_name="retriever"` and the resolved chunk count. +- **Streaming**: streamed LLM responses fire one `LLMChatEndEvent` after + the final chunk; the adapter emits one consolidated `model.invoke`. +- **Span propagation**: LlamaIndex span IDs propagate into the event + payload as `span_id` / `parent_span_id` for tree reconstruction. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Recommended. +adapter = LlamaIndexAdapter(capture_config=CaptureConfig.standard()) + +# Production-light: drop retrieved chunks (large), keep query + result count. +adapter = LlamaIndexAdapter( + capture_config=CaptureConfig( + l1_agent_io=True, + l3_model_metadata=True, + l5a_tool_calls=True, + capture_content=False, + ), +) +``` + +## BYOK + +LlamaIndex LLM integrations (`OpenAI`, `Anthropic`, `Bedrock`, etc.) read +their own credentials. The adapter does not own them. For platform-managed +BYOK see `docs/adapters/byok.md` (atlas-app M1.B). diff --git a/docs/adapters/frameworks-ms_agent_framework.md b/docs/adapters/frameworks-ms_agent_framework.md new file mode 100644 index 0000000..295f2b9 --- /dev/null +++ b/docs/adapters/frameworks-ms_agent_framework.md @@ -0,0 +1,115 @@ +# Microsoft Agent Framework adapter + +`layerlens.instrument.adapters.frameworks.ms_agent_framework.MSAgentAdapter` +instruments [Microsoft Agent Framework](https://learn.microsoft.com/en-us/semantic-kernel/agents/) +(Semantic Kernel Agents) by wrapping `AgentChat.invoke()` and +`AgentGroupChat.invoke()`. + +## Install + +```bash +pip install 'layerlens[ms-agent-framework]' +``` + +Pulls `semantic-kernel>=1.0,<2.0` (Semantic Kernel hosts the agents API). +Requires Python 3.10+. + +## Quick start + +```python +import asyncio +from semantic_kernel.agents import ChatCompletionAgent +from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion + +from layerlens.instrument.adapters.frameworks.ms_agent_framework import ( + MSAgentAdapter, + instrument_agent, +) +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="ms_agent_framework") +adapter = MSAgentAdapter() +adapter.add_sink(sink) +adapter.connect() + +agent = ChatCompletionAgent( + service=OpenAIChatCompletion(ai_model_id="gpt-4o-mini"), + name="answerer", + instructions="Be concise.", +) +adapter.instrument_chat(agent) + +async def main() -> None: + async for response in agent.invoke("What is 2+2?"): + print(response.content) + +asyncio.run(main()) + +adapter.disconnect() +sink.close() +``` + +`instrument_agent(chat)` is the convenience helper. + +## What's wrapped + +`adapter.instrument_chat(chat_or_agent)` wraps the framework's invocation +surfaces: + +- `invoke` — async generator returning the agent's responses. +- `invoke_stream` — async generator returning streaming chunks (when + present in the installed version). + +Both wrappers emit lifecycle events around the call and capture inner +`tool.call` and `model.invoke` events from the underlying Semantic Kernel +filters. `disconnect()` restores the originals. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `environment.config` | L4a | First wrap of each chat. | +| `agent.input` | L1 | Beginning of every `invoke` / `invoke_stream`. | +| `agent.output` | L1 | End of every invocation (per response). | +| `agent.action` | L4a | Per intermediate step. | +| `agent.handoff` | L4a | Per `AgentGroupChat` speaker turn. | +| `tool.call` | L5a | Per plugin function invocation. | +| `model.invoke` | L3 | Per LLM call. | + +## MS Agent Framework specifics + +- **`AgentChat` vs `AgentGroupChat`**: both support the same + `invoke()` signature; group chats additionally emit `agent.handoff` + on each speaker turn. +- **Plugins**: Semantic Kernel plugin functions surface as `tool.call` — + the plugin name + function name combine into `tool_name`. +- **Multi-agent terminations**: configurable termination strategies + emit `agent.action` with `terminate_reason` when a group chat ends. +- **Streaming**: `invoke_stream` emits one consolidated `model.invoke` + on stream completion; per-chunk text is accumulated. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Recommended. +adapter = MSAgentAdapter(capture_config=CaptureConfig.standard()) + +# Drop content for compliance. +adapter = MSAgentAdapter( + capture_config=CaptureConfig( + l1_agent_io=True, + l3_model_metadata=True, + l5a_tool_calls=True, + capture_content=False, + ), +) +``` + +## BYOK + +Microsoft Agent Framework uses Semantic Kernel connectors +(`OpenAIChatCompletion`, `AzureChatCompletion`, etc.) for model access. +The adapter does not own those credentials. For platform-managed BYOK +see `docs/adapters/byok.md` (atlas-app M1.B). diff --git a/docs/adapters/frameworks-openai_agents.md b/docs/adapters/frameworks-openai_agents.md new file mode 100644 index 0000000..f9d983b --- /dev/null +++ b/docs/adapters/frameworks-openai_agents.md @@ -0,0 +1,109 @@ +# OpenAI Agents SDK framework adapter + +`layerlens.instrument.adapters.frameworks.openai_agents.OpenAIAgentsAdapter` +instruments the [OpenAI Agents SDK](https://github.com/openai/openai-agents-python) +by registering a custom `TracingProcessor` and wrapping `Runner.run` for +execution lifecycle events. + +## Install + +```bash +pip install 'layerlens[openai-agents]' openai-agents +``` + +The OpenAI Agents SDK ships as `openai-agents` (separate from the `openai` +client). The `openai-agents` extra here pulls the prerequisite `openai>=1.30` +client; the agents framework itself is installed separately to keep the +optional-deps surface clean. + +## Quick start + +```python +from agents import Agent, Runner + +from layerlens.instrument.adapters.frameworks.openai_agents import ( + OpenAIAgentsAdapter, + instrument_runner, +) +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="openai_agents") +adapter = OpenAIAgentsAdapter() +adapter.add_sink(sink) +adapter.connect() +adapter.instrument_runner(None) # registers the global trace processor + +agent = Agent(name="answerer", model="gpt-4o-mini", instructions="Be concise.") +result = Runner.run_sync(agent, "What is 2+2?") +print(result.final_output) + +adapter.disconnect() +sink.close() +``` + +## What's wrapped + +`adapter.instrument_runner(...)` registers a custom +`agents.tracing.TracingProcessor` via `agents.add_trace_processor()`. The +processor receives every span the SDK produces — agent runs, model calls, +function tools, handoffs, guardrails — and translates them into LayerLens +events. + +> **Note**: the OpenAI Agents SDK exposes `add_trace_processor` but no +> matching `remove_trace_processor`. `disconnect()` flips the adapter's +> internal `_connected` flag — the registered processor is still attached +> to the SDK but stops emitting events. To fully remove the processor, +> the SDK process must be restarted. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `environment.config` | L4a | First agent span observed. | +| `agent.input` | L1 | Per agent span start. | +| `agent.output` | L1 | Per agent span end. | +| `agent.action` | L4a | Per `response_span` (model call decision). | +| `agent.handoff` | L4a | Per `handoff_span`. | +| `tool.call` | L5a | Per `function_span`. | +| `model.invoke` | L3 | Per `generation_span` (model call). | +| `policy.violation` | cross-cutting | Per `guardrail_span` that fails. | + +## OpenAI Agents specifics + +- **Span hierarchy**: each event payload includes `span_id` + `parent_span_id` + + `trace_id` from the SDK so the platform can reconstruct the agent run + tree exactly. +- **Handoffs**: the SDK's first-class `handoff` primitive maps cleanly to + `agent.handoff` with `source_agent` + `target_agent` + `tool_args` + (when the handoff carries arguments). +- **Guardrails**: input/output guardrails emit `policy.violation` with + the guardrail name and the rendered reason. +- **Function tools**: tool name and JSON-encoded args/return are captured; + schemas come from `tool.params_json_schema`. +- **Streaming**: streamed runs (`Runner.run_streamed`) emit one + consolidated `model.invoke` per generation span on completion. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Recommended. +adapter = OpenAIAgentsAdapter(capture_config=CaptureConfig.standard()) + +# Compliance: drop content but keep span structure. +adapter = OpenAIAgentsAdapter( + capture_config=CaptureConfig( + l1_agent_io=True, + l3_model_metadata=True, + l5a_tool_calls=True, + capture_content=False, + ), +) +``` + +## BYOK + +The OpenAI Agents SDK uses the standard OpenAI client for model calls and +reads `OPENAI_API_KEY` from the environment. The adapter does not own the +key. For platform-managed BYOK see `docs/adapters/byok.md` (atlas-app M1.B). diff --git a/docs/adapters/frameworks-pydantic_ai.md b/docs/adapters/frameworks-pydantic_ai.md new file mode 100644 index 0000000..d2b5865 --- /dev/null +++ b/docs/adapters/frameworks-pydantic_ai.md @@ -0,0 +1,108 @@ +# PydanticAI framework adapter + +`layerlens.instrument.adapters.frameworks.pydantic_ai.PydanticAIAdapter` +instruments [PydanticAI](https://github.com/pydantic/pydantic-ai) agents by +wrapping `Agent.run()` and `Agent.run_sync()`. + +## Install + +```bash +pip install 'layerlens[pydantic-ai]' +``` + +Pulls `pydantic-ai>=0.0.13,<1.0`. Requires Python 3.10+. + +## Quick start + +```python +from pydantic_ai import Agent + +from layerlens.instrument.adapters.frameworks.pydantic_ai import ( + PydanticAIAdapter, + instrument_agent, +) +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="pydantic_ai") +adapter = PydanticAIAdapter() +adapter.add_sink(sink) +adapter.connect() + +agent = Agent("openai:gpt-4o-mini", system_prompt="Be concise.") +adapter.instrument_agent(agent) + +result = agent.run_sync("What is 2 + 2?") +print(result.data) + +adapter.disconnect() +sink.close() +``` + +`instrument_agent(agent)` is the convenience helper. + +## What's wrapped + +`adapter.instrument_agent(agent)` wraps the agent's two entry points: + +- `run` — async coroutine. Emits `agent.input` at start, `agent.output` at + end. Captures intermediate `model.invoke` and `tool.call` events from the + PydanticAI message history. +- `run_sync` — synchronous wrapper. Same semantics. + +`disconnect()` restores both methods to their originals. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `environment.config` | L4a | First wrap of each agent. | +| `agent.input` | L1 | Beginning of every `run` / `run_sync`. | +| `agent.output` | L1 | End of every `run` / `run_sync`. | +| `agent.action` | L4a | Per intermediate model step (multi-step runs). | +| `tool.call` | L5a | Per registered tool invocation. | +| `model.invoke` | L3 | Per LLM call (one per model step). | + +The `model.invoke` payload includes the model name (parsed from the +PydanticAI model spec like `openai:gpt-4o-mini`), token usage from +`result.usage()`, and the structured result type if one was declared. + +## PydanticAI specifics + +- **Structured results**: when an agent declares `result_type=MyModel`, the + validated Pydantic model is included in `agent.output` (subject to + `CaptureConfig.capture_content`). Validation errors emit + `policy.violation`. +- **Model spec parsing**: PydanticAI accepts model spec strings like + `"openai:gpt-4o-mini"` or `"anthropic:claude-3-5-sonnet"`. The adapter + splits these into `provider` + `model` for downstream cost lookups. +- **Streaming**: streamed runs (`agent.run_stream`) wrap the async iterator + and emit a single consolidated `model.invoke` on stream completion. Set + `stream=False` on the LLM client if you want per-call events. +- **OpenTelemetry compatibility**: PydanticAI also speaks Logfire/OTel. + The LayerLens adapter and Logfire can run side-by-side; they don't + conflict because they observe different hooks. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Recommended. +adapter = PydanticAIAdapter(capture_config=CaptureConfig.standard()) + +# Drop content for compliance. +adapter = PydanticAIAdapter( + capture_config=CaptureConfig( + l1_agent_io=True, + l3_model_metadata=True, + l5a_tool_calls=True, + capture_content=False, + ), +) +``` + +## BYOK + +PydanticAI reads provider credentials from the env (`OPENAI_API_KEY`, +`ANTHROPIC_API_KEY`, `GROQ_API_KEY`, etc.). The adapter does not own them. +For platform-managed BYOK see `docs/adapters/byok.md` (atlas-app M1.B). diff --git a/docs/adapters/frameworks-semantic_kernel.md b/docs/adapters/frameworks-semantic_kernel.md new file mode 100644 index 0000000..b29e16b --- /dev/null +++ b/docs/adapters/frameworks-semantic_kernel.md @@ -0,0 +1,107 @@ +# Semantic Kernel framework adapter + +`layerlens.instrument.adapters.frameworks.semantic_kernel.SemanticKernelAdapter` +instruments [Microsoft Semantic Kernel](https://github.com/microsoft/semantic-kernel) +using the kernel's native filter API — non-invasive, no monkey-patching. + +## Install + +```bash +pip install 'layerlens[semantic-kernel]' +``` + +Pulls `semantic-kernel>=1.0,<2.0`. Requires Python 3.10+. + +## Quick start + +```python +import asyncio +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion + +from layerlens.instrument.adapters.frameworks.semantic_kernel import SemanticKernelAdapter +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="semantic_kernel") +adapter = SemanticKernelAdapter() +adapter.add_sink(sink) +adapter.connect() + +kernel = Kernel() +kernel.add_service(OpenAIChatCompletion(ai_model_id="gpt-4o-mini")) +adapter.instrument_kernel(kernel) + +async def main() -> None: + result = await kernel.invoke_prompt("What is 2 + 2?") + print(result) + +asyncio.run(main()) + +adapter.disconnect() +sink.close() +``` + +## What's wrapped + +`adapter.instrument_kernel(kernel)` registers three Semantic Kernel filters +on the supplied kernel: + +- `function_invocation_filter` — fires before/after every `KernelFunction` + call (plugin function, prompt function, etc.). +- `prompt_rendering_filter` — fires before/after the prompt template is + rendered for prompt functions. +- `auto_function_invocation_filter` — fires when the model auto-selects a + plugin function via tool-calling. + +No methods are monkey-patched; on `disconnect()` the filter list is cleared +and the kernel returns to its original behaviour. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `environment.config` | L4a | First plugin invocation per kernel. | +| `agent.input` | L1 | Function invocation start. | +| `agent.output` | L1 | Function invocation end (success or error). | +| `agent.code` | L2 | Per plugin function when `l2_agent_code` is true. | +| `agent.action` | L4a | Per planner step. | +| `agent.state.change` | cross-cutting | Memory store reads/writes. | +| `tool.call` | L5a | Per `auto_function_invocation` (model-selected plugin). | +| `model.invoke` | L3 | Per LLM call inside the kernel. | + +## Semantic Kernel specifics + +- **Plugin attribution**: every event includes `plugin_name`, + `function_name`, and (for prompt functions) the rendered prompt token + count when available. +- **Filter API is preferred**: filters are first-class Semantic Kernel + citizens — they survive kernel cloning and don't break the type system. + This is why this adapter uses filters instead of method-wrapping. +- **Async-first**: Semantic Kernel is async-first; all filters are async + and propagate the `next` continuation correctly. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Recommended. +adapter = SemanticKernelAdapter(capture_config=CaptureConfig.standard()) + +# Capture rendered prompt template body. +adapter = SemanticKernelAdapter( + capture_config=CaptureConfig( + l1_agent_io=True, + l3_model_metadata=True, + l5a_tool_calls=True, + capture_content=True, + ), +) +``` + +## BYOK + +Semantic Kernel uses `OpenAIChatCompletion`, `AzureChatCompletion`, +`HuggingFacePromptExecutionSettings`, etc. for model access. The adapter +does not own those credentials. For platform-managed BYOK see +`docs/adapters/byok.md` (atlas-app M1.B). diff --git a/docs/adapters/frameworks-strands.md b/docs/adapters/frameworks-strands.md new file mode 100644 index 0000000..a8ff251 --- /dev/null +++ b/docs/adapters/frameworks-strands.md @@ -0,0 +1,104 @@ +# AWS Strands framework adapter + +`layerlens.instrument.adapters.frameworks.strands.StrandsAdapter` instruments +[AWS Strands](https://github.com/strands-agents/sdk-python) agents by +wrapping `Agent.__call__` and `Agent.invoke`. + +## Install + +```bash +pip install 'layerlens[strands]' +``` + +Pulls `strands-agents>=0.1,<1.0`. Requires Python 3.10+. AWS credentials +must be provisioned the standard way (env, IAM role, profile) since Strands +runs against Bedrock under the hood. + +## Quick start + +```python +from strands import Agent + +from layerlens.instrument.adapters.frameworks.strands import ( + StrandsAdapter, + instrument_agent, +) +from layerlens.instrument.transport.sink_http import HttpEventSink + +sink = HttpEventSink(adapter_name="strands") +adapter = StrandsAdapter() +adapter.add_sink(sink) +adapter.connect() + +agent = Agent(model="us.anthropic.claude-3-5-sonnet-20241022-v2:0") +adapter.instrument_agent(agent) + +response = agent("What is 2 + 2?") + +adapter.disconnect() +sink.close() +``` + +`instrument_agent(agent)` is the convenience helper. + +## What's wrapped + +`adapter.instrument_agent(agent)` wraps both invocation surfaces: + +- `__call__` — the primary entry point (`agent("question")`). +- `invoke` — alternative entry point present in some Strands versions. + +Both wrappers emit lifecycle events around the call and capture inner +`tool.call` and `model.invoke` events from Strands' internal callback +hooks. `disconnect()` restores the originals. + +## Events emitted + +| Event | Layer | When | +|---|---|---| +| `environment.config` | L4a | First wrap of each agent. | +| `agent.input` | L1 | Beginning of every `__call__` / `invoke`. | +| `agent.output` | L1 | End of every `__call__` / `invoke`. | +| `agent.action` | L4a | Per intermediate reasoning loop iteration. | +| `agent.handoff` | L4a | Multi-agent collaboration handoffs. | +| `tool.call` | L5a | Per Strands tool invocation. | +| `model.invoke` | L3 | Per LLM call (Strands routes these through Bedrock). | + +## Strands specifics + +- **Bedrock-native**: every `model.invoke` payload includes the Bedrock + `modelId` and the conversation `inferenceConfig`. Token usage is parsed + from the Bedrock response shape. +- **Tools**: Strands tools registered via the `@tool` decorator surface + their function name and JSON schema in `tool.call.tool_schema`. +- **Loops**: Strands runs a reasoning loop (think → act → observe). Each + loop iteration emits an `agent.action` with `loop_index` and a copy of + the conversation state. +- **Multi-agent**: Strands supports orchestrator/worker patterns; cross-agent + delegation emits `agent.handoff` with `source_agent` + `target_agent`. + +## Capture config + +```python +from layerlens.instrument.adapters._base import CaptureConfig + +# Recommended. +adapter = StrandsAdapter(capture_config=CaptureConfig.standard()) + +# Drop conversation content for compliance. +adapter = StrandsAdapter( + capture_config=CaptureConfig( + l1_agent_io=True, + l3_model_metadata=True, + l5a_tool_calls=True, + capture_content=False, + ), +) +``` + +## BYOK + +Strands authenticates against AWS using the standard boto3 credential +chain (env / profile / IAM role). There's no separate API key. The Bedrock +model used by the agent is configured at construction time via the +`model` parameter. diff --git a/pyproject.toml b/pyproject.toml index ae6d1dc..d66e51b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,22 @@ classifiers = [ [project.optional-dependencies] cli = ["click>=8.0.0"] +# --- Instrument layer: framework adapters (agent tier) --- +# Adding any extra below MUST keep the default `pip install layerlens` +# install set unchanged. Verified by `tests/instrument/test_default_install.py`. +semantic-kernel = ["semantic-kernel>=1.0,<2.0; python_version >= '3.10'"] +llama-index = ["llama-index>=0.10,<0.13; python_version >= '3.10'"] +openai-agents = ["openai>=1.30,<2"] +pydantic-ai = ["pydantic-ai>=0.0.13,<1.0; python_version >= '3.10'"] +agno = ["agno>=0.1,<1.0; python_version >= '3.10'"] +strands = ["strands-agents>=0.1,<1.0; python_version >= '3.10'"] +smolagents = ["smolagents>=1.0,<2.0; python_version >= '3.10'"] +ms-agent-framework = ["semantic-kernel>=1.0,<2.0; python_version >= '3.10'"] +google-adk = ["google-adk>=0.1,<1.0; python_version >= '3.10'"] +bedrock-agents = ["boto3>=1.34"] +embedding = [] # vector store hooks; deps come from the underlying store +benchmark-import = [] # replay-based; no extra deps + [project.urls] Homepage = "https://github.com/LayerLens/stratix-python" Repository = "https://github.com/LayerLens/stratix-python" @@ -139,14 +155,21 @@ known-first-party = ["openai", "tests"] "tests/**.py" = ["T201", "T203", "ARG", "B007"] "examples/**.py" = ["T201", "T203"] "src/layerlens/cli/**" = ["T201", "T203"] +# Framework callbacks have signatures dictated by upstream — unused +# arguments are part of the contract, not a code smell. +"src/layerlens/instrument/adapters/frameworks/**.py" = ["ARG002"] [tool.pyright] include = ["src", "tests"] exclude = ["**/__pycache__"] reportMissingTypeStubs = false -# Less strict settings for tests and cli +# Less strict settings for tests, cli, and the dynamic-monkey-patching +# framework adapter code. mypy --strict stays strict for these dirs; +# pyright is relaxed here because it can't follow runtime attribute +# mutation that the framework instrumentation relies on. executionEnvironments = [ { root = "src/layerlens/cli", reportMissingImports = false, reportFunctionMemberAccess = false, reportCallIssue = false, reportArgumentType = false, reportAttributeAccessIssue = false }, + { root = "src/layerlens/instrument/adapters/frameworks", reportPossiblyUnbound = false, reportPossiblyUnboundVariable = false, reportCallIssue = false, reportAttributeAccessIssue = false, reportArgumentType = false, reportMissingImports = false, reportFunctionMemberAccess = false }, { root = "tests", reportGeneralTypeIssues = false, reportOptionalSubscript = false, reportOptionalMemberAccess = false, reportUntypedFunctionDecorator = false, reportUnknownArgumentType = false, reportUnknownMemberType = false, reportUnknownVariableType = false, reportUnnecessaryIsInstance = false, reportUnnecessaryComparison = false, reportArgumentType = false, reportCallIssue = false }, ] diff --git a/samples/instrument/agno/__init__.py b/samples/instrument/agno/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/agno/main.py b/samples/instrument/agno/main.py new file mode 100644 index 0000000..64c7151 --- /dev/null +++ b/samples/instrument/agno/main.py @@ -0,0 +1,76 @@ +"""Sample: instrument an Agno agent with the LayerLens adapter. + +Builds a one-shot Agno ``Agent`` with the OpenAI ``gpt-4o-mini`` model, +instruments it via ``AgnoAdapter.instrument_agent``, and runs a single +``agent.run()`` call. Each run emits ``agent.input`` + ``model.invoke`` + +``agent.output`` events that ship to atlas-app via ``HttpEventSink``. + +Required environment: + +* ``OPENAI_API_KEY`` — used by the ``OpenAIChat`` model. +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[agno,providers-openai]' + python -m samples.instrument.agno.main +""" + +from __future__ import annotations + +import os +import sys + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.agno import AgnoAdapter + + +def main() -> int: + if not os.environ.get("OPENAI_API_KEY"): + print("OPENAI_API_KEY is not set; cannot run sample.", file=sys.stderr) + return 2 + + try: + from agno.agent import Agent + from agno.models.openai import OpenAIChat + except ImportError: + print( + "agno not installed. Install with:\n" + " pip install 'layerlens[agno,providers-openai]'", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="agno", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = AgnoAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + + agent = Agent( + model=OpenAIChat(id="gpt-4o-mini", max_tokens=20), + instructions="Reply with the digit only.", + ) + + try: + adapter.instrument_agent(agent) + response = agent.run("What is 2 + 2?") + content = getattr(response, "content", str(response)) + print(f"Response: {content}") + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/bedrock_agents/__init__.py b/samples/instrument/bedrock_agents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/bedrock_agents/main.py b/samples/instrument/bedrock_agents/main.py new file mode 100644 index 0000000..3cc03bf --- /dev/null +++ b/samples/instrument/bedrock_agents/main.py @@ -0,0 +1,96 @@ +"""Sample: instrument an AWS Bedrock Agent invocation with LayerLens. + +Builds a ``bedrock-agent-runtime`` boto3 client, registers the LayerLens +event hooks via ``BedrockAgentsAdapter.instrument_client``, and runs a +single ``invoke_agent`` call. Emits ``agent.input`` + ``model.invoke`` + +``tool.call`` + ``agent.output`` events that ship to atlas-app via +``HttpEventSink``. + +This sample requires a live Bedrock Agent ID. If you don't have one, +the sample exits with a clear error. + +Required environment: + +* ``AWS_ACCESS_KEY_ID`` / ``AWS_SECRET_ACCESS_KEY`` (or another standard + boto3 credential source — IAM role, profile, etc.). +* ``AWS_REGION`` — the AWS region your agent lives in. +* ``BEDROCK_AGENT_ID`` — your Bedrock Agent ID (e.g. ``ABCDEFGHIJ``). +* ``BEDROCK_AGENT_ALIAS_ID`` — agent alias to invoke (default + ``TSTALIASID``). +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[bedrock-agents]' + python -m samples.instrument.bedrock_agents.main +""" + +from __future__ import annotations + +import os +import sys +import uuid + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.bedrock_agents import BedrockAgentsAdapter + + +def main() -> int: + agent_id = os.environ.get("BEDROCK_AGENT_ID") + if not agent_id: + print("BEDROCK_AGENT_ID is not set; cannot run sample.", file=sys.stderr) + return 2 + + region = os.environ.get("AWS_REGION", "us-east-1") + alias_id = os.environ.get("BEDROCK_AGENT_ALIAS_ID", "TSTALIASID") + + try: + import boto3 + except ImportError: + print( + "boto3 not installed. Install with:\n" + " pip install 'layerlens[bedrock-agents]'", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="bedrock_agents", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = BedrockAgentsAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + + client = boto3.client("bedrock-agent-runtime", region_name=region) + adapter.instrument_client(client) + + try: + response = client.invoke_agent( + agentId=agent_id, + agentAliasId=alias_id, + sessionId=str(uuid.uuid4()), + inputText="What is 2 + 2?", + ) + # Drain the streamed response — trace events fire as we iterate. + chunks: list[bytes] = [] + for event in response["completion"]: + if "chunk" in event: + chunks.append(event["chunk"]["bytes"]) + text = b"".join(chunks).decode("utf-8", errors="replace") + print(f"Response: {text}") + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/benchmark_import/__init__.py b/samples/instrument/benchmark_import/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/benchmark_import/main.py b/samples/instrument/benchmark_import/main.py new file mode 100644 index 0000000..e776610 --- /dev/null +++ b/samples/instrument/benchmark_import/main.py @@ -0,0 +1,68 @@ +"""Sample: import a tiny CSV benchmark with the LayerLens adapter. + +Writes a small CSV to a tempfile, then runs ``BenchmarkImportAdapter.import_csv`` +and prints the resulting ``ImportResult``. This adapter is a data importer +(not a runtime trace adapter) so it does not require any LLM credentials. + +Run:: + + pip install 'layerlens[benchmark-import]' + python -m samples.instrument.benchmark_import.main +""" + +from __future__ import annotations + +import csv +import sys +import tempfile +from pathlib import Path + +from layerlens.instrument.adapters.frameworks.benchmark_import import ( + BenchmarkImportAdapter, +) + + +def _write_sample_csv(path: Path) -> None: + rows = [ + {"question": "What is 2 + 2?", "answer": "4", "category": "math"}, + {"question": "Capital of France?", "answer": "Paris", "category": "geo"}, + {"question": "Largest planet?", "answer": "Jupiter", "category": "science"}, + ] + with path.open("w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=["question", "answer", "category"]) + writer.writeheader() + writer.writerows(rows) + + +def main() -> int: + adapter = BenchmarkImportAdapter() + + with tempfile.TemporaryDirectory() as tmp: + csv_path = Path(tmp) / "sample_benchmark.csv" + _write_sample_csv(csv_path) + + result = adapter.import_csv( + path=str(csv_path), + schema_mapping={ + "question": "prompt", + "answer": "expected_output", + "category": "category", + }, + tags=["sample", "qa"], + ) + + if not result.success: + print(f"Import failed: {result.errors}", file=sys.stderr) + return 1 + + print(f"Benchmark id: {result.benchmark_id}") + print(f"Records imported: {result.records_imported}") + print(f"Duration: {result.duration_ms:.2f} ms") + if result.metadata is not None: + print(f"Tags: {', '.join(result.metadata.tags)}") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/embedding/__init__.py b/samples/instrument/embedding/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/embedding/main.py b/samples/instrument/embedding/main.py new file mode 100644 index 0000000..2fe1a30 --- /dev/null +++ b/samples/instrument/embedding/main.py @@ -0,0 +1,76 @@ +"""Sample: instrument an OpenAI embedding call with the LayerLens adapter. + +Wraps an OpenAI client with ``EmbeddingAdapter.wrap_openai`` and runs a +single ``embeddings.create`` call. Emits one ``embedding.create`` event +that ships to atlas-app via ``HttpEventSink``. + +Required environment: + +* ``OPENAI_API_KEY`` — your OpenAI API key. +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[embedding,providers-openai]' + python -m samples.instrument.embedding.main +""" + +from __future__ import annotations + +import os +import sys + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.embedding import EmbeddingAdapter + + +def main() -> int: + if not os.environ.get("OPENAI_API_KEY"): + print("OPENAI_API_KEY is not set; cannot run sample.", file=sys.stderr) + return 2 + + try: + from openai import OpenAI + except ImportError: + print( + "openai not installed. Install with:\n" + " pip install 'layerlens[embedding,providers-openai]'", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="embedding", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = EmbeddingAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + + client = OpenAI() + adapter.wrap_openai(client) + + try: + response = client.embeddings.create( + model="text-embedding-3-small", + input=["hello world", "the quick brown fox"], + ) + first = response.data[0].embedding + print(f"Embeddings: {len(response.data)} vectors of dim {len(first)}") + if response.usage is not None: + print(f"Tokens: {response.usage.total_tokens}") + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/google_adk/__init__.py b/samples/instrument/google_adk/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/google_adk/main.py b/samples/instrument/google_adk/main.py new file mode 100644 index 0000000..210cc1e --- /dev/null +++ b/samples/instrument/google_adk/main.py @@ -0,0 +1,119 @@ +"""Sample: instrument a Google ADK agent with the LayerLens adapter. + +Builds a one-shot ``LlmAgent``, attaches the LayerLens callbacks via +``GoogleADKAdapter.instrument_agent``, and runs a single turn through the +ADK ``Runner``. Each callback fires a LayerLens event that ships to atlas-app +via ``HttpEventSink``. + +Required environment: + +* ``GOOGLE_API_KEY`` — used by the Gemini model when running against + Google AI Studio. (For Vertex AI, set ``GOOGLE_GENAI_USE_VERTEXAI=true`` + and provide ADC.) +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[google-adk]' + python -m samples.instrument.google_adk.main +""" + +from __future__ import annotations + +import os +import sys +import asyncio + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.google_adk import GoogleADKAdapter + + +async def _run_agent(runner: object, session_id: str, user_id: str) -> str: + from google.genai import types # type: ignore[import-untyped,unused-ignore] + + new_message = types.Content( + role="user", + parts=[types.Part(text="What is 2 + 2?")], + ) + + chunks: list[str] = [] + # ``run_async`` is the recommended async API on the ADK Runner. + async for event in runner.run_async( # type: ignore[attr-defined] + user_id=user_id, + session_id=session_id, + new_message=new_message, + ): + content = getattr(event, "content", None) + if content is None: + continue + for part in getattr(content, "parts", []) or []: + text = getattr(part, "text", None) + if text: + chunks.append(text) + return "".join(chunks) + + +def main() -> int: + if not os.environ.get("GOOGLE_API_KEY") and os.environ.get( + "GOOGLE_GENAI_USE_VERTEXAI" + ) != "true": + print( + "Neither GOOGLE_API_KEY nor GOOGLE_GENAI_USE_VERTEXAI is set; " + "cannot run sample.", + file=sys.stderr, + ) + return 2 + + try: + from google.adk.agents import LlmAgent + from google.adk.runners import InMemoryRunner + except ImportError: + print( + "google-adk not installed. Install with:\n" + " pip install 'layerlens[google-adk]'", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="google_adk", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = GoogleADKAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + + agent = LlmAgent( + name="answerer", + model="gemini-2.0-flash", + instruction="Reply with the digit only.", + ) + adapter.instrument_agent(agent) + + runner = InMemoryRunner(agent=agent, app_name="layerlens-sample") + user_id = "sample-user" + # Create a session up front so ``run_async`` has somewhere to write. + session = asyncio.run( + runner.session_service.create_session( + app_name="layerlens-sample", user_id=user_id + ) + ) + + try: + text = asyncio.run(_run_agent(runner, session.id, user_id)) + print(f"Response: {text}") + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/llama_index/__init__.py b/samples/instrument/llama_index/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/llama_index/main.py b/samples/instrument/llama_index/main.py new file mode 100644 index 0000000..4f59ba5 --- /dev/null +++ b/samples/instrument/llama_index/main.py @@ -0,0 +1,80 @@ +"""Sample: instrument a LlamaIndex chat call with the LayerLens adapter. + +Registers the LayerLens event handler with the global LlamaIndex +``Dispatcher`` via ``LlamaIndexAdapter.instrument_workflow``, then runs a +single LLM ``chat`` call. The handler emits ``model.invoke`` (and any +``tool.call`` / ``agent.*`` events) which ship to atlas-app via +``HttpEventSink``. + +Required environment: + +* ``OPENAI_API_KEY`` — used by ``llama_index.llms.openai.OpenAI``. +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[llama-index,providers-openai]' llama-index-llms-openai + python -m samples.instrument.llama_index.main +""" + +from __future__ import annotations + +import os +import sys + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.llama_index import LlamaIndexAdapter + + +def main() -> int: + if not os.environ.get("OPENAI_API_KEY"): + print("OPENAI_API_KEY is not set; cannot run sample.", file=sys.stderr) + return 2 + + try: + from llama_index.core.llms import ChatMessage, MessageRole + from llama_index.llms.openai import OpenAI as LlamaOpenAI + except ImportError: + print( + "llama-index not installed. Install with:\n" + " pip install 'layerlens[llama-index,providers-openai]'" + " llama-index-llms-openai", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="llama_index", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = LlamaIndexAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + adapter.instrument_workflow(None) # global event handler registration + + llm = LlamaOpenAI(model="gpt-4o-mini", max_tokens=20) + + try: + response = llm.chat( + [ + ChatMessage(role=MessageRole.SYSTEM, content="Be concise."), + ChatMessage(role=MessageRole.USER, content="What is 2 + 2?"), + ] + ) + text = getattr(response.message, "content", str(response)) + print(f"Response: {text}") + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/ms_agent_framework/__init__.py b/samples/instrument/ms_agent_framework/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/ms_agent_framework/main.py b/samples/instrument/ms_agent_framework/main.py new file mode 100644 index 0000000..7e0bd27 --- /dev/null +++ b/samples/instrument/ms_agent_framework/main.py @@ -0,0 +1,87 @@ +"""Sample: instrument a Microsoft Agent Framework chat with LayerLens. + +Builds a one-shot ``ChatCompletionAgent`` backed by an OpenAI chat +completion service, wraps it via ``MSAgentAdapter.instrument_chat``, and +runs a single ``invoke`` call. Each invocation emits ``agent.input`` + +``model.invoke`` + ``agent.output`` events that ship to atlas-app via +``HttpEventSink``. + +Required environment: + +* ``OPENAI_API_KEY`` — used by ``OpenAIChatCompletion``. +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[ms-agent-framework,providers-openai]' + python -m samples.instrument.ms_agent_framework.main +""" + +from __future__ import annotations + +import os +import sys +import asyncio + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.ms_agent_framework import MSAgentAdapter + + +async def _run(agent: object) -> str: + chunks: list[str] = [] + async for response in agent.invoke("What is 2 + 2?"): # type: ignore[attr-defined] + content = getattr(response, "content", None) + if content is not None: + chunks.append(str(content)) + return " ".join(chunks) + + +def main() -> int: + if not os.environ.get("OPENAI_API_KEY"): + print("OPENAI_API_KEY is not set; cannot run sample.", file=sys.stderr) + return 2 + + try: + from semantic_kernel.agents import ChatCompletionAgent + from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion + except ImportError: + print( + "semantic-kernel agents not installed. Install with:\n" + " pip install 'layerlens[ms-agent-framework,providers-openai]'", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="ms_agent_framework", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = MSAgentAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + + agent = ChatCompletionAgent( + service=OpenAIChatCompletion(ai_model_id="gpt-4o-mini"), + name="answerer", + instructions="Reply with the digit only.", + ) + adapter.instrument_chat(agent) + + try: + text = asyncio.run(_run(agent)) + print(f"Response: {text}") + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/openai_agents/__init__.py b/samples/instrument/openai_agents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/openai_agents/main.py b/samples/instrument/openai_agents/main.py new file mode 100644 index 0000000..5c9736b --- /dev/null +++ b/samples/instrument/openai_agents/main.py @@ -0,0 +1,76 @@ +"""Sample: instrument the OpenAI Agents SDK with the LayerLens adapter. + +Registers the LayerLens trace processor with the SDK via +``OpenAIAgentsAdapter.instrument_runner``, then runs a one-turn agent via +``Runner.run_sync``. Each span the SDK produces (agent, model, tool, +handoff) emits a LayerLens event that ships to atlas-app via +``HttpEventSink``. + +Required environment: + +* ``OPENAI_API_KEY`` — used by the underlying OpenAI client. +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[openai-agents]' openai-agents + python -m samples.instrument.openai_agents.main +""" + +from __future__ import annotations + +import os +import sys + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.openai_agents import OpenAIAgentsAdapter + + +def main() -> int: + if not os.environ.get("OPENAI_API_KEY"): + print("OPENAI_API_KEY is not set; cannot run sample.", file=sys.stderr) + return 2 + + try: + from agents import Agent, Runner + except ImportError: + print( + "openai-agents not installed. Install with:\n" + " pip install 'layerlens[openai-agents]' openai-agents", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="openai_agents", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = OpenAIAgentsAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + adapter.instrument_runner(None) # global trace processor + + agent = Agent( + name="answerer", + instructions="Reply with the digit only.", + model="gpt-4o-mini", + ) + + try: + result = Runner.run_sync(agent, "What is 2 + 2?") + print(f"Response: {result.final_output}") + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/pydantic_ai/__init__.py b/samples/instrument/pydantic_ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/pydantic_ai/main.py b/samples/instrument/pydantic_ai/main.py new file mode 100644 index 0000000..e302e85 --- /dev/null +++ b/samples/instrument/pydantic_ai/main.py @@ -0,0 +1,80 @@ +"""Sample: instrument a PydanticAI agent with the LayerLens adapter. + +Builds a one-shot ``Agent``, wraps it with +``PydanticAIAdapter.instrument_agent``, and runs ``agent.run_sync``. Each +run emits ``agent.input`` + ``model.invoke`` + ``agent.output`` events that +ship to atlas-app via ``HttpEventSink``. + +Required environment: + +* ``OPENAI_API_KEY`` — used by the ``"openai:gpt-4o-mini"`` model spec. +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[pydantic-ai,providers-openai]' + python -m samples.instrument.pydantic_ai.main +""" + +from __future__ import annotations + +import os +import sys + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.pydantic_ai import PydanticAIAdapter + + +def main() -> int: + if not os.environ.get("OPENAI_API_KEY"): + print("OPENAI_API_KEY is not set; cannot run sample.", file=sys.stderr) + return 2 + + try: + from pydantic_ai import Agent + except ImportError: + print( + "pydantic-ai not installed. Install with:\n" + " pip install 'layerlens[pydantic-ai,providers-openai]'", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="pydantic_ai", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = PydanticAIAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + + agent = Agent( + "openai:gpt-4o-mini", + system_prompt="Reply with the digit only.", + ) + + try: + adapter.instrument_agent(agent) + result = agent.run_sync("What is 2 + 2?") + print(f"Response: {result.data}") + usage = result.usage() + if usage is not None: + print( + f"Tokens — request: {usage.request_tokens}, " + f"response: {usage.response_tokens}, total: {usage.total_tokens}" + ) + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/semantic_kernel/__init__.py b/samples/instrument/semantic_kernel/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/semantic_kernel/main.py b/samples/instrument/semantic_kernel/main.py new file mode 100644 index 0000000..310180b --- /dev/null +++ b/samples/instrument/semantic_kernel/main.py @@ -0,0 +1,86 @@ +"""Sample: instrument a Semantic Kernel prompt invocation with LayerLens. + +Builds a ``Kernel`` with an OpenAI chat completion service, registers the +LayerLens filters via ``SemanticKernelAdapter.instrument_kernel``, and runs a +single ``invoke_prompt`` call. Filter callbacks emit ``agent.input`` / +``agent.output`` / ``model.invoke`` events that ship to atlas-app via +``HttpEventSink``. + +Required environment: + +* ``OPENAI_API_KEY`` — used by ``OpenAIChatCompletion``. +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[semantic-kernel,providers-openai]' + python -m samples.instrument.semantic_kernel.main +""" + +from __future__ import annotations + +import os +import sys +import asyncio + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.semantic_kernel import SemanticKernelAdapter + + +async def _run(kernel: object) -> str: + # Imported here to keep the top-level module importable without semantic-kernel. + from semantic_kernel.functions import KernelArguments # type: ignore[import-not-found,unused-ignore] + + result = await kernel.invoke_prompt( # type: ignore[attr-defined] + prompt="Reply with just the digit. What is 2 + 2?", + arguments=KernelArguments(), + ) + return str(result) + + +def main() -> int: + if not os.environ.get("OPENAI_API_KEY"): + print("OPENAI_API_KEY is not set; cannot run sample.", file=sys.stderr) + return 2 + + try: + from semantic_kernel import Kernel + from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion + except ImportError: + print( + "semantic-kernel not installed. Install with:\n" + " pip install 'layerlens[semantic-kernel,providers-openai]'", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="semantic_kernel", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = SemanticKernelAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + + kernel = Kernel() + kernel.add_service(OpenAIChatCompletion(ai_model_id="gpt-4o-mini")) + adapter.instrument_kernel(kernel) + + try: + response = asyncio.run(_run(kernel)) + print(f"Response: {response}") + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/instrument/strands/__init__.py b/samples/instrument/strands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/samples/instrument/strands/main.py b/samples/instrument/strands/main.py new file mode 100644 index 0000000..3a3bce3 --- /dev/null +++ b/samples/instrument/strands/main.py @@ -0,0 +1,86 @@ +"""Sample: instrument an AWS Strands agent with the LayerLens adapter. + +Builds a one-shot Strands ``Agent`` backed by a Bedrock model, wraps it via +``StrandsAdapter.instrument_agent``, and runs a single call. Each call emits +``agent.input`` + ``model.invoke`` + ``agent.output`` events that ship to +atlas-app via ``HttpEventSink``. + +Required environment: + +* ``AWS_ACCESS_KEY_ID`` / ``AWS_SECRET_ACCESS_KEY`` (or another standard + boto3 credential source — IAM role, profile, etc.). +* ``AWS_REGION`` — the AWS region (Strands defaults to us-west-2; set + this to wherever your Bedrock model access is enabled). +* ``BEDROCK_MODEL_ID`` — Bedrock model ID for Strands to use; defaults to + ``us.anthropic.claude-3-5-sonnet-20241022-v2:0`` if unset. +* ``LAYERLENS_STRATIX_API_KEY`` — your LayerLens API key (optional). +* ``LAYERLENS_STRATIX_BASE_URL`` — atlas-app base URL (optional). + +Run:: + + pip install 'layerlens[strands]' + python -m samples.instrument.strands.main +""" + +from __future__ import annotations + +import os +import sys + +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.transport.sink_http import HttpEventSink +from layerlens.instrument.adapters.frameworks.strands import StrandsAdapter + + +def main() -> int: + if not os.environ.get("AWS_ACCESS_KEY_ID") and not os.environ.get( + "AWS_PROFILE" + ): + print( + "AWS credentials are not set (need AWS_ACCESS_KEY_ID or AWS_PROFILE); " + "cannot run sample.", + file=sys.stderr, + ) + return 2 + + try: + from strands import Agent + except ImportError: + print( + "strands-agents not installed. Install with:\n" + " pip install 'layerlens[strands]'", + file=sys.stderr, + ) + return 2 + + sink = HttpEventSink( + adapter_name="strands", + path="/telemetry/spans", + max_batch=10, + flush_interval_s=1.0, + ) + + adapter = StrandsAdapter(capture_config=CaptureConfig.standard()) + adapter.add_sink(sink) + adapter.connect() + + model_id = os.environ.get( + "BEDROCK_MODEL_ID", + "us.anthropic.claude-3-5-sonnet-20241022-v2:0", + ) + agent = Agent(model=model_id, system_prompt="Reply with the digit only.") + + try: + adapter.instrument_agent(agent) + response = agent("What is 2 + 2?") + print(f"Response: {response}") + finally: + sink.close() + adapter.disconnect() + + print("Telemetry shipped. Check the LayerLens dashboard adapter health page.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/layerlens/instrument/adapters/frameworks/__init__.py b/src/layerlens/instrument/adapters/frameworks/__init__.py new file mode 100644 index 0000000..4cfd328 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/__init__.py @@ -0,0 +1,32 @@ +"""Framework adapters for the LayerLens Instrument layer. + +Each framework adapter wraps an agent / chain framework's lifecycle to +intercept agent runs, model invocations, tool calls, state changes, and +handoffs, emitting events through the LayerLens telemetry pipeline. + +Adapters available (loaded on demand via :class:`AdapterRegistry`): + +* ``langchain`` — LangChain (callbacks + agent + chain + memory) +* ``langgraph`` — LangGraph (graph hooks + handoff detection + state) +* ``crewai`` — CrewAI (delegation + team metadata) +* ``autogen`` — AutoGen (group chat + lifecycle) +* ``agentforce`` — Salesforce Agentforce (auth, client, event mapping) +* ``semantic_kernel`` — Microsoft Semantic Kernel (filters + lifecycle) +* ``langfuse_importer`` — Langfuse trace import / export +* ``embedding`` — Embedding + vector store instrumentation +* ``openai_agents`` — OpenAI Agents SDK lifecycle +* ``ms_agent_framework`` — MS Agent Framework lifecycle +* ``agno`` — Agno lifecycle +* ``bedrock_agents`` — AWS Bedrock Agents lifecycle +* ``llama_index`` — LlamaIndex lifecycle +* ``google_adk`` — Google ADK lifecycle +* ``strands`` — Strands lifecycle +* ``benchmark_import`` — Benchmark replay-based ingestion +* ``pydantic_ai`` — Pydantic-AI lifecycle +* ``smolagents`` — SmolAgents (HuggingFace) lifecycle +* ``browser_use`` — Browser-Use lifecycle (placeholder; ported in M7) + +Importing this package does NOT import any framework SDK. +""" + +from __future__ import annotations diff --git a/src/layerlens/instrument/adapters/frameworks/agno/__init__.py b/src/layerlens/instrument/adapters/frameworks/agno/__init__.py new file mode 100644 index 0000000..a1f27f5 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/agno/__init__.py @@ -0,0 +1,25 @@ +""" +LayerLens adapter for Agno. + +Instruments Agno agents by wrapping Agent.run() and Agent.arun() +methods to capture lifecycle events across single and multi-agent teams. +""" + +from __future__ import annotations + +from typing import Any + +from layerlens.instrument.adapters.frameworks.agno.lifecycle import AgnoAdapter + +ADAPTER_CLASS = AgnoAdapter + + +def instrument_agent(agent: Any, stratix: Any = None, capture_config: dict[str, Any] = None) -> Any: # type: ignore[assignment] + """Convenience function to instrument an Agno agent.""" + adapter = AgnoAdapter(stratix=stratix, capture_config=capture_config) + adapter.connect() + adapter.instrument_agent(agent) + return adapter + + +__all__ = ["AgnoAdapter", "ADAPTER_CLASS", "instrument_agent"] diff --git a/src/layerlens/instrument/adapters/frameworks/agno/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/agno/lifecycle.py new file mode 100644 index 0000000..047f262 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/agno/lifecycle.py @@ -0,0 +1,479 @@ +""" +Agno adapter lifecycle. + +Instrumentation strategy: Agent wrapper (run/arun wrapping) + Agent.run() start -> agent.input (L1) + Agent.run() end -> agent.output (L1) + Tool execution -> tool.call (L5a) + Model invocation -> model.invoke (L3) + Team delegation -> agent.handoff (L2) + Agent config -> environment.config (L4a) +""" + +from __future__ import annotations + +import time +import uuid +import hashlib +import logging +import threading +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class AgnoAdapter(BaseAdapter): + """LayerLens adapter for Agno.""" + + FRAMEWORK = "agno" + VERSION = "0.1.0" + # The adapter source has no direct ``pydantic`` imports (verified by + # grep across ``frameworks/agno/``). Agno itself uses Pydantic v2 + # internally but the adapter only wraps ``Agent.run`` / ``Agent.arun`` + # and emits dict events, never touching framework Pydantic models. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: Any | None = None, + stratix_instance: Any | None = None, + ) -> None: + resolved = stratix or stratix_instance + super().__init__(stratix=resolved, capture_config=capture_config) + self._originals: dict[int, dict[str, Any]] = {} # id(agent) -> {method: original} + self._wrapped_agents: list[Any] = [] # strong refs for disconnect unwrap + self._adapter_lock = threading.Lock() + self._seen_agents: set[str] = set() + self._framework_version: str | None = None + self._run_starts: dict[int, int] = {} # thread_id -> start_ns + + def connect(self) -> None: + """Verify Agno availability and prepare the adapter.""" + try: + import agno # type: ignore[import-not-found,unused-ignore] + + self._framework_version = getattr(agno, "__version__", "unknown") + except ImportError: + logger.debug("agno not installed") + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + """Unwrap all instrumented agents and release resources.""" + for agent in self._wrapped_agents: + self._unwrap_agent(agent) + self._wrapped_agents.clear() + self._originals.clear() + self._seen_agents.clear() + self._run_starts.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def _unwrap_agent(self, agent: Any) -> None: + """Restore original methods on a wrapped agent.""" + agent_id = id(agent) + originals = self._originals.get(agent_id) + if not originals: + return + for method_name, original in originals.items(): + try: + setattr(agent, method_name, original) + except Exception: + logger.debug("Could not unwrap %s.%s", agent_id, method_name, exc_info=True) + + def health_check(self) -> AdapterHealth: + """Return a health snapshot.""" + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + """Return metadata about this adapter.""" + return AdapterInfo( + name="AgnoAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + AdapterCapability.TRACE_HANDOFFS, + ], + description="LayerLens adapter for Agno", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + """Serialize the current trace data for replay.""" + return ReplayableTrace( + adapter_name="AgnoAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": self._capture_config.model_dump()}, + ) + + # --- Framework Integration --- + + def instrument_agent(self, agent: Any) -> Any: + """Wrap Agno agent.run() and agent.arun() methods to capture lifecycle events.""" + agent_id = id(agent) + if agent_id in self._originals: + return agent + originals: dict[str, Any] = {} + # Wrap run() (sync) + if hasattr(agent, "run"): + originals["run"] = agent.run + agent.run = self._create_traced_run_sync(agent, agent.run) + # Wrap arun() (async) + if hasattr(agent, "arun"): + originals["arun"] = agent.arun + agent.arun = self._create_traced_run(agent, agent.arun) + self._originals[agent_id] = originals + self._wrapped_agents.append(agent) + agent_name = getattr(agent, "name", None) or str(type(agent).__name__) + self._emit_agent_config(agent_name, agent) + return agent + + def _create_traced_run(self, agent: Any, original_run: Any) -> Any: + """Create an async traced wrapper for agent.arun().""" + adapter = self + + async def traced_run(*args: Any, **kwargs: Any) -> Any: + agent_name = getattr(agent, "name", None) or "agno_agent" + input_data = kwargs.get("message") or (args[0] if args else None) + adapter.on_run_start(agent_name=agent_name, input_data=input_data) + error: Exception | None = None + result = None + try: + result = await original_run(*args, **kwargs) + except Exception as exc: + error = exc + raise + finally: + output = None + if result is not None: + output = getattr(result, "content", result) + adapter.on_run_end(agent_name=agent_name, output=output, error=error) + adapter._extract_run_details(agent, result) + return result + + traced_run._layerlens_original = original_run # type: ignore[attr-defined] + return traced_run + + def _create_traced_run_sync(self, agent: Any, original_run: Any) -> Any: + """Create a sync traced wrapper for agent.run().""" + adapter = self + + def traced_run_sync(*args: Any, **kwargs: Any) -> Any: + agent_name = getattr(agent, "name", None) or "agno_agent" + input_data = kwargs.get("message") or (args[0] if args else None) + adapter.on_run_start(agent_name=agent_name, input_data=input_data) + error: Exception | None = None + result = None + try: + result = original_run(*args, **kwargs) + except Exception as exc: + error = exc + raise + finally: + output = None + if result is not None: + output = getattr(result, "content", result) + adapter.on_run_end(agent_name=agent_name, output=output, error=error) + adapter._extract_run_details(agent, result) + return result + + traced_run_sync._layerlens_original = original_run # type: ignore[attr-defined] + return traced_run_sync + + def _extract_run_details(self, agent: Any, result: Any) -> None: + """Extract tool calls, model invocations, and team handoffs from run result.""" + if result is None: + return + try: + # Extract model invocation details + model = getattr(agent, "model", None) + if model: + model_name = getattr(model, "id", None) or str(model) + self.emit_dict_event( + "model.invoke", + { + "framework": "agno", + "model": model_name, + "provider": self._detect_provider(model_name), + }, + ) + + # Extract usage/token info from result + usage = getattr(result, "metrics", None) or getattr(result, "usage", None) + if usage: + self.emit_dict_event( + "cost.record", + { + "framework": "agno", + "tokens_prompt": getattr(usage, "input_tokens", None) + or getattr(usage, "prompt_tokens", None), + "tokens_completion": getattr(usage, "output_tokens", None) + or getattr(usage, "completion_tokens", None), + "tokens_total": getattr(usage, "total_tokens", None), + }, + ) + + # Extract tool calls from messages + messages = getattr(result, "messages", None) or [] + for msg in messages: + tool_calls = getattr(msg, "tool_calls", None) + if tool_calls: + for tc in tool_calls: + self.emit_dict_event( + "tool.call", + { + "framework": "agno", + "tool_name": getattr(tc, "function", {}).get("name", "unknown") + if isinstance(getattr(tc, "function", None), dict) + else getattr(getattr(tc, "function", None), "name", "unknown"), + "tool_input": self._safe_serialize( + getattr(tc, "function", {}).get("arguments") + if isinstance(getattr(tc, "function", None), dict) + else None + ), + }, + ) + + # Detect team delegation (multi-agent handoffs) + team = getattr(agent, "team", None) + if team: + members = getattr(team, "members", None) or getattr(team, "agents", None) or [] + for member in members: + member_name = getattr(member, "name", None) or str(member) + self.emit_dict_event( + "agent.handoff", + { + "from_agent": getattr(agent, "name", "leader"), + "to_agent": member_name, + "reason": "team_delegation", + }, + ) + except Exception: + logger.debug("Could not extract run details", exc_info=True) + + # --- Lifecycle Hooks --- + + def on_run_start(self, agent_name: str | None = None, input_data: Any = None) -> None: + """Emit agent.input event when an agent run starts.""" + if not self._connected: + return + try: + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._run_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "agno", + "agent_name": agent_name, + "input": self._safe_serialize(input_data), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in on_run_start", exc_info=True) + + def on_run_end( + self, + agent_name: str | None = None, + output: Any = None, + error: Exception | None = None, + ) -> None: + """Emit agent.output event when an agent run ends.""" + if not self._connected: + return + try: + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._run_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + payload: dict[str, Any] = { + "framework": "agno", + "agent_name": agent_name, + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + self.emit_dict_event("agent.output", payload) + self.emit_dict_event( + "agent.state.change", + { + "framework": "agno", + "agent_name": agent_name, + "event_subtype": "run_complete" if not error else "run_failed", + }, + ) + except Exception: + logger.warning("Error in on_run_end", exc_info=True) + + def on_tool_use( + self, + tool_name: str, + tool_input: Any = None, + tool_output: Any = None, + error: Exception | None = None, + latency_ms: float | None = None, + ) -> None: + """Emit tool.call event for a tool invocation.""" + if not self._connected: + return + try: + payload: dict[str, Any] = { + "framework": "agno", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + } + if error: + payload["error"] = str(error) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("tool.call", payload) + except Exception: + logger.warning("Error in on_tool_use", exc_info=True) + + def on_llm_call( + self, + provider: str | None = None, + model: str | None = None, + tokens_prompt: int | None = None, + tokens_completion: int | None = None, + latency_ms: float | None = None, + messages: list[dict[str, str]] | None = None, + ) -> None: + """Emit model.invoke event for an LLM call.""" + if not self._connected: + return + try: + payload: dict[str, Any] = {"framework": "agno"} + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if tokens_prompt is not None: + payload["tokens_prompt"] = tokens_prompt + if tokens_completion is not None: + payload["tokens_completion"] = tokens_completion + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if self._capture_config.capture_content and messages: + payload["messages"] = messages + self.emit_dict_event("model.invoke", payload) + except Exception: + logger.warning("Error in on_llm_call", exc_info=True) + + def on_handoff(self, from_agent: str, to_agent: str, context: Any = None) -> None: + """Emit agent.handoff event for team delegation.""" + if not self._connected: + return + try: + context_str = str(context) if context else "" + self.emit_dict_event( + "agent.handoff", + { + "from_agent": from_agent, + "to_agent": to_agent, + "reason": "agno_team_delegation", + "context_hash": hashlib.sha256(context_str.encode()).hexdigest() + if context_str + else None, + }, + ) + except Exception: + logger.warning("Error in on_handoff", exc_info=True) + + # --- Helpers --- + + def _detect_provider(self, model: str | None) -> str | None: + """Detect the LLM provider from a model identifier.""" + if not model: + return None + model_lower = model.lower() + if "gpt" in model_lower or "o1" in model_lower or "o3" in model_lower: + return "openai" + if "claude" in model_lower: + return "anthropic" + if "gemini" in model_lower: + return "google" + if "mistral" in model_lower or "mixtral" in model_lower: + return "mistral" + if "llama" in model_lower: + return "meta" + if "command" in model_lower: + return "cohere" + return None + + def _emit_agent_config(self, agent_name: str, agent: Any) -> None: + """Emit environment.config event for agent configuration on first encounter.""" + with self._adapter_lock: + if agent_name in self._seen_agents: + return + self._seen_agents.add(agent_name) + metadata: dict[str, Any] = { + "framework": "agno", + "agent_name": agent_name, + } + model = getattr(agent, "model", None) + if model: + metadata["model"] = str(model) + description = getattr(agent, "description", None) + if description: + metadata["description"] = str(description)[:500] + instructions = getattr(agent, "instructions", None) + if instructions and self._capture_config.capture_content: + metadata["instructions"] = str(instructions)[:500] + tools = getattr(agent, "tools", None) + if tools: + metadata["tools"] = [getattr(t, "name", str(t)) for t in tools] + knowledge = getattr(agent, "knowledge", None) + if knowledge: + metadata["knowledge"] = str(type(knowledge).__name__) + team = getattr(agent, "team", None) + if team: + members = getattr(team, "members", None) or getattr(team, "agents", None) or [] + metadata["team_members"] = [getattr(m, "name", str(m)) for m in members] + self.emit_dict_event("environment.config", metadata) + + def _safe_serialize(self, value: Any) -> Any: + """Safely serialize a value for event payloads.""" + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + except Exception: + return str(value) diff --git a/src/layerlens/instrument/adapters/frameworks/bedrock_agents/__init__.py b/src/layerlens/instrument/adapters/frameworks/bedrock_agents/__init__.py new file mode 100644 index 0000000..af50a36 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/bedrock_agents/__init__.py @@ -0,0 +1,27 @@ +""" +LayerLens adapter for AWS Bedrock Agents. + +Instruments AWS Bedrock Agents via boto3 event hooks and trace +extraction from invoke_agent response streams. +""" + +from __future__ import annotations + +from typing import Any + +from layerlens.instrument.adapters.frameworks.bedrock_agents.lifecycle import BedrockAgentsAdapter + +ADAPTER_CLASS = BedrockAgentsAdapter + + +def instrument_client( + client: Any, stratix: Any = None, capture_config: dict[str, Any] | None = None +) -> Any: + """Convenience function to instrument a Bedrock Agent Runtime client.""" + adapter = BedrockAgentsAdapter(stratix=stratix, capture_config=capture_config) + adapter.connect() + adapter.instrument_client(client) + return adapter + + +__all__ = ["BedrockAgentsAdapter", "ADAPTER_CLASS", "instrument_client"] diff --git a/src/layerlens/instrument/adapters/frameworks/bedrock_agents/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/bedrock_agents/lifecycle.py new file mode 100644 index 0000000..b7dd92c --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/bedrock_agents/lifecycle.py @@ -0,0 +1,456 @@ +""" +AWS Bedrock Agents adapter lifecycle. + +Instrumentation strategy: boto3 event hooks + OTel (ADOT integration) + invoke_agent request → agent.input (L1) + invoke_agent response → agent.output (L1) + Action Group → tool.call (L5a) + Knowledge Base query → tool.call (L5a, retrieval) + Model invocation → model.invoke (L3) + Supervisor→Collaborator → agent.handoff (Cross) +""" + +from __future__ import annotations + +import time +import uuid +import hashlib +import logging +import threading +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class BedrockAgentsAdapter(BaseAdapter): + """LayerLens adapter for AWS Bedrock Agents.""" + + FRAMEWORK = "bedrock_agents" + VERSION = "0.1.0" + # The adapter source has no direct ``pydantic`` imports (verified by + # grep across ``frameworks/bedrock_agents/``). Bedrock Agents is a + # remote AWS service consumed via boto3 hooks — boto3 does not use + # Pydantic. Adapter emits plain dict events. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: Any | None = None, + stratix_instance: Any | None = None, + ) -> None: + resolved = stratix or stratix_instance + super().__init__(stratix=resolved, capture_config=capture_config) + self._originals: dict[str, Any] = {} + self._adapter_lock = threading.Lock() + self._seen_agents: set[str] = set() + self._framework_version: str | None = None + self._invoke_starts: dict[int, int] = {} + + def connect(self) -> None: + try: + import boto3 # type: ignore[import-untyped,unused-ignore] + + self._framework_version = boto3.__version__ + except ImportError: + logger.debug("boto3 not installed") + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + # Unregister boto3 event hooks + client = self._originals.get("client") + if client is not None: + try: + event_system = client.meta.events + event_system.unregister( + "provide-client-params.bedrock-agent-runtime.InvokeAgent", + self._before_invoke_agent, + ) + event_system.unregister( + "after-call.bedrock-agent-runtime.InvokeAgent", + self._after_invoke_agent, + ) + except Exception: + logger.debug("Could not unregister boto3 event hooks", exc_info=True) + self._originals.clear() + self._seen_agents.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name="BedrockAgentsAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + AdapterCapability.TRACE_HANDOFFS, + ], + description="LayerLens adapter for AWS Bedrock Agents", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + return ReplayableTrace( + adapter_name="BedrockAgentsAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": self._capture_config.model_dump()}, + ) + + # --- Framework Integration --- + + def instrument_client(self, client: Any) -> Any: + """Register boto3 event hooks on a bedrock-agent-runtime client.""" + try: + event_system = client.meta.events + event_system.register( + "provide-client-params.bedrock-agent-runtime.InvokeAgent", + self._before_invoke_agent, + ) + event_system.register( + "after-call.bedrock-agent-runtime.InvokeAgent", + self._after_invoke_agent, + ) + self._originals["client"] = client + except Exception: + logger.warning("Failed to register boto3 event hooks", exc_info=True) + return client + + # --- boto3 Event Hooks --- + + def _before_invoke_agent(self, **kwargs: Any) -> None: + if not self._connected: + return + try: + params = kwargs.get("params", {}) + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._invoke_starts[tid] = start_ns + agent_id = params.get("agentId", "unknown") + self._emit_agent_config(agent_id, params) + self.emit_dict_event( + "agent.input", + { + "framework": "bedrock_agents", + "agent_id": agent_id, + "session_id": params.get("sessionId"), + "input": params.get("inputText"), + "enable_trace": params.get("enableTrace", False), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in _before_invoke_agent", exc_info=True) + + def _after_invoke_agent(self, **kwargs: Any) -> None: + if not self._connected: + return + try: + parsed = kwargs.get("parsed", {}) + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._invoke_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + output = self._extract_completion(parsed) + self.emit_dict_event( + "agent.output", + { + "framework": "bedrock_agents", + "output": output, + "duration_ns": duration_ns, + "session_id": parsed.get("sessionId"), + }, + ) + # Extract trace steps if available + self._process_trace(parsed) + except Exception: + logger.warning("Error in _after_invoke_agent", exc_info=True) + + def _process_trace(self, parsed: dict[str, Any]) -> None: + """Extract trace steps from Bedrock response and emit events.""" + trace = parsed.get("trace", {}) + steps = trace.get("trace", {}).get("orchestrationTrace", {}).get("steps", []) + if not steps and isinstance(trace, dict): + # Try alternative trace structure + steps = trace.get("steps", []) + for step in steps: + step_type = step.get("type", "") + if step_type == "ACTION_GROUP": + self._emit_action_group(step) + elif step_type == "KNOWLEDGE_BASE": + self._emit_knowledge_base(step) + elif step_type == "MODEL_INVOCATION": + self._emit_model_invocation(step) + elif step_type == "AGENT_COLLABORATOR": + self._emit_collaborator_handoff(step) + + def _emit_action_group(self, step: dict[str, Any]) -> None: + action = step.get("actionGroupInvocationOutput", {}) + self.emit_dict_event( + "tool.call", + { + "framework": "bedrock_agents", + "tool_name": step.get("actionGroupName", "unknown"), + "tool_input": self._safe_serialize(step.get("actionGroupInput")), + "tool_output": self._safe_serialize(action.get("output")), + "tool_type": "action_group", + }, + ) + + def _emit_knowledge_base(self, step: dict[str, Any]) -> None: + kb = step.get("knowledgeBaseLookupOutput", {}) + self.emit_dict_event( + "tool.call", + { + "framework": "bedrock_agents", + "tool_name": step.get("knowledgeBaseId", "knowledge_base"), + "tool_input": self._safe_serialize(step.get("knowledgeBaseLookupInput")), + "tool_output": self._safe_serialize(kb.get("retrievedReferences")), + "tool_type": "knowledge_base_retrieval", + }, + ) + + def _emit_model_invocation(self, step: dict[str, Any]) -> None: + invocation = step.get("modelInvocationOutput", {}) + payload: dict[str, Any] = { + "framework": "bedrock_agents", + "provider": "aws_bedrock", + } + model_id = step.get("foundationModel") + if model_id: + payload["model"] = model_id + usage = invocation.get("usage", {}) + if usage: + payload["tokens_prompt"] = usage.get("inputTokens") + payload["tokens_completion"] = usage.get("outputTokens") + self.emit_dict_event("model.invoke", payload) + if usage: + self.emit_dict_event( + "cost.record", + { + "framework": "bedrock_agents", + "model": model_id, + "tokens_prompt": usage.get("inputTokens"), + "tokens_completion": usage.get("outputTokens"), + "tokens_total": (usage.get("inputTokens") or 0) + + (usage.get("outputTokens") or 0), + }, + ) + + def _emit_collaborator_handoff(self, step: dict[str, Any]) -> None: + self.emit_dict_event( + "agent.handoff", + { + "from_agent": step.get("supervisorAgentId", "supervisor"), + "to_agent": step.get("collaboratorAgentId", "collaborator"), + "reason": "supervisor_delegation", + "framework": "bedrock_agents", + }, + ) + + # --- Lifecycle Hooks --- + + def on_invoke_start(self, agent_id: str | None = None, input_text: str | None = None) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._invoke_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "bedrock_agents", + "agent_id": agent_id, + "input": input_text, + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in on_invoke_start", exc_info=True) + + def on_invoke_end( + self, + agent_id: str | None = None, + output: Any = None, + error: Exception | None = None, + ) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._invoke_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + payload: dict[str, Any] = { + "framework": "bedrock_agents", + "agent_id": agent_id, + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + self.emit_dict_event("agent.output", payload) + except Exception: + logger.warning("Error in on_invoke_end", exc_info=True) + + def on_tool_use( + self, + tool_name: str, + tool_input: Any = None, + tool_output: Any = None, + error: Exception | None = None, + latency_ms: float | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = { + "framework": "bedrock_agents", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + } + if error: + payload["error"] = str(error) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("tool.call", payload) + except Exception: + logger.warning("Error in on_tool_use", exc_info=True) + + def on_llm_call( + self, + provider: str | None = None, + model: str | None = None, + tokens_prompt: int | None = None, + tokens_completion: int | None = None, + latency_ms: float | None = None, + messages: list[dict[str, str]] | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = {"framework": "bedrock_agents"} + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if tokens_prompt is not None: + payload["tokens_prompt"] = tokens_prompt + if tokens_completion is not None: + payload["tokens_completion"] = tokens_completion + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if self._capture_config.capture_content and messages: + payload["messages"] = messages + self.emit_dict_event("model.invoke", payload) + except Exception: + logger.warning("Error in on_llm_call", exc_info=True) + + def on_handoff(self, from_agent: str, to_agent: str, context: Any = None) -> None: + if not self._connected: + return + try: + context_str = str(context) if context else "" + self.emit_dict_event( + "agent.handoff", + { + "from_agent": from_agent, + "to_agent": to_agent, + "reason": "supervisor_delegation", + "context_hash": hashlib.sha256(context_str.encode()).hexdigest() + if context_str + else None, + }, + ) + except Exception: + logger.warning("Error in on_handoff", exc_info=True) + + # --- Helpers --- + + def _extract_completion(self, parsed: dict[str, Any]) -> str | None: + """Extract completion text from the boto3 parsed response. + + IMPORTANT: We do NOT consume the 'completion' EventStream directly + as that would prevent the caller from reading the response. Instead + we extract from already-parsed metadata fields that boto3 populates. + """ + # Try the output text field (populated by boto3 after-call parsing) + output_text = parsed.get("outputText") + if output_text: + return str(output_text) + # Try the output field + output = parsed.get("output", {}) + if isinstance(output, dict): + text = output.get("text") + if text: + return str(text) + # Fallback: serialize whatever non-stream data is available + for key in ("returnControlInvocationResults", "sessionAttributes"): + val = parsed.get(key) + if val: + serialized = self._safe_serialize(val) + return str(serialized) if serialized is not None else None + return None + + def _emit_agent_config(self, agent_id: str, params: dict[str, Any]) -> None: + with self._adapter_lock: + if agent_id in self._seen_agents: + return + self._seen_agents.add(agent_id) + self.emit_dict_event( + "environment.config", + { + "framework": "bedrock_agents", + "agent_id": agent_id, + "agent_alias_id": params.get("agentAliasId"), + "enable_trace": params.get("enableTrace", False), + }, + ) + + def _safe_serialize(self, value: Any) -> Any: + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + except Exception: + return str(value) diff --git a/src/layerlens/instrument/adapters/frameworks/benchmark_import/__init__.py b/src/layerlens/instrument/adapters/frameworks/benchmark_import/__init__.py new file mode 100644 index 0000000..16c21ad --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/benchmark_import/__init__.py @@ -0,0 +1,20 @@ +""" +STRATIX Benchmark Import Adapter (FEA-1913) + +Enables importing external benchmark datasets from HuggingFace Datasets, +HELM, and custom sources (CSV/JSON/Parquet) into Stratix evaluation spaces. +""" + +from __future__ import annotations + +from layerlens.instrument.adapters.frameworks.benchmark_import.adapter import ( + ImportResult, + BenchmarkMetadata, + BenchmarkImportAdapter, +) + +__all__ = [ + "BenchmarkImportAdapter", + "BenchmarkMetadata", + "ImportResult", +] diff --git a/src/layerlens/instrument/adapters/frameworks/benchmark_import/adapter.py b/src/layerlens/instrument/adapters/frameworks/benchmark_import/adapter.py new file mode 100644 index 0000000..1f37ac5 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/benchmark_import/adapter.py @@ -0,0 +1,446 @@ +""" +STRATIX Benchmark Import Adapter (ADP-074) + +Imports external benchmark datasets from: +- HuggingFace Datasets (via ``datasets`` library with streaming) +- HELM (Holistic Evaluation of Language Models) JSON results +- Custom sources: CSV, JSON, Parquet files + +Features: +- Automatic schema detection and mapping to Stratix benchmark format +- Versioned tracking with source, version, and import timestamp +- Comparison of external benchmark scores with internal evaluations +""" + +from __future__ import annotations + +import csv +import json +import time +import uuid +import logging +from typing import Any, Optional +from pathlib import Path +from datetime import datetime, timezone + +# Python 3.11+ exposes ``datetime.UTC``; we alias to ``timezone.utc`` for 3.8+ compat. +UTC = timezone.utc + +from pydantic import Field, BaseModel + +logger = logging.getLogger(__name__) + + +class BenchmarkMetadata(BaseModel): + """Metadata for an imported benchmark.""" + + benchmark_id: str = Field(default_factory=lambda: f"bench-{uuid.uuid4().hex[:12]}") + name: str = Field(description="Benchmark name") + source: str = Field(description="Import source (huggingface, helm, csv, json, parquet)") + source_identifier: str = Field( + default="", description="Source-specific ID (e.g., HF dataset name)" + ) + version: str = Field(default="1.0.0", description="Benchmark version") + record_count: int = Field(default=0, description="Number of records imported") + schema_mapping: dict[str, str] = Field( + default_factory=dict, description="Field mapping applied" + ) + imported_at: str = Field( + default_factory=lambda: datetime.now(UTC).isoformat(), + ) + imported_by: str = Field(default="", description="User who triggered the import") + tags: list[str] = Field(default_factory=list) + + +class ImportResult(BaseModel): + """Result of a benchmark import operation.""" + + success: bool = Field(default=True) + benchmark_id: str = Field(default="") + records_imported: int = Field(default=0) + records_skipped: int = Field(default=0) + duration_ms: float = Field(default=0.0) + errors: list[str] = Field(default_factory=list) + # Use Optional[...] (not `X | None`) so Pydantic 2 can resolve the field + # annotation under Python 3.9 — `from __future__ import annotations` does + # not help here because Pydantic eagerly evaluates the forward ref. + metadata: Optional[BenchmarkMetadata] = Field(default=None) + + +class BenchmarkImportAdapter: + """ + Imports external benchmark datasets into Stratix evaluation spaces. + + Usage:: + + adapter = BenchmarkImportAdapter() + + # Import from HuggingFace + result = adapter.import_huggingface("squad", split="validation") + + # Import from HELM results + result = adapter.import_helm("/path/to/helm_results.json") + + # Import from CSV + result = adapter.import_csv("/path/to/benchmark.csv", schema_mapping={ + "question": "prompt", + "answer": "expected_output", + }) + """ + + def __init__(self, store: Any | None = None) -> None: + """ + Args: + store: Optional storage backend for persisting imported benchmarks. + If None, benchmarks are returned in-memory only. + """ + self._store = store + self._benchmarks: dict[str, BenchmarkMetadata] = {} + + # -- HuggingFace Datasets ---------------------------------------------- + + def import_huggingface( + self, + dataset_name: str, + split: str = "test", + subset: str | None = None, + schema_mapping: dict[str, str] | None = None, + max_records: int | None = None, + tags: list[str] | None = None, + ) -> ImportResult: + """Import a benchmark from HuggingFace Datasets. + + Args: + dataset_name: HuggingFace dataset identifier (e.g., "squad", "mmlu"). + split: Dataset split to import (default: "test"). + subset: Optional dataset subset/config. + schema_mapping: Optional field mapping override. + max_records: Maximum number of records to import. + tags: Optional tags for categorization. + + Returns: + ImportResult with import statistics and metadata. + """ + start = time.monotonic() + errors: list[str] = [] + records: list[dict[str, Any]] = [] + + try: + import datasets as hf_datasets # type: ignore[import-not-found,unused-ignore] + + load_kwargs: dict[str, Any] = {"path": dataset_name, "split": split, "streaming": True} + if subset: + load_kwargs["name"] = subset + + ds = hf_datasets.load_dataset(**load_kwargs) + + count = 0 + for record in ds: + if max_records and count >= max_records: + break + mapped = self._apply_schema_mapping(dict(record), schema_mapping) + records.append(mapped) + count += 1 # noqa: SIM113 + + except ImportError: + errors.append("'datasets' library not installed. Run: pip install datasets") + return ImportResult(success=False, errors=errors) + except Exception as exc: + errors.append(f"HuggingFace import failed: {exc}") + return ImportResult(success=False, errors=errors) + + elapsed_ms = (time.monotonic() - start) * 1000 + + metadata = BenchmarkMetadata( + name=dataset_name, + source="huggingface", + source_identifier=f"{dataset_name}/{subset or 'default'}/{split}", + record_count=len(records), + schema_mapping=schema_mapping or {}, + tags=tags or ["huggingface"], + ) + + self._benchmarks[metadata.benchmark_id] = metadata + self._persist(metadata, records) + + return ImportResult( + success=True, + benchmark_id=metadata.benchmark_id, + records_imported=len(records), + duration_ms=round(elapsed_ms, 2), + metadata=metadata, + ) + + # -- HELM Results ------------------------------------------------------ + + def import_helm( + self, + path: str, + schema_mapping: dict[str, str] | None = None, + tags: list[str] | None = None, + ) -> ImportResult: + """Import HELM benchmark results from a JSON file. + + Args: + path: Path to HELM results JSON file. + schema_mapping: Optional field mapping override. + tags: Optional tags. + + Returns: + ImportResult with import statistics. + """ + start = time.monotonic() + errors: list[str] = [] + records: list[dict[str, Any]] = [] + + try: + with open(path, encoding="utf-8") as f: + data = json.load(f) + + # HELM format: list of scenario results with instances + scenarios = ( + data if isinstance(data, list) else data.get("results", data.get("scenarios", [])) + ) + if isinstance(scenarios, dict): + scenarios = [scenarios] + + for scenario in scenarios: + instances = scenario.get("instances", scenario.get("results", [])) + if isinstance(instances, list): + for inst in instances: + mapped = self._apply_schema_mapping(dict(inst), schema_mapping) + mapped.setdefault("scenario", scenario.get("scenario", "")) + mapped.setdefault("model", scenario.get("model", "")) + records.append(mapped) + else: + mapped = self._apply_schema_mapping(dict(scenario), schema_mapping) + records.append(mapped) + + except FileNotFoundError: + errors.append(f"File not found: {path}") + return ImportResult(success=False, errors=errors) + except json.JSONDecodeError as exc: + errors.append(f"Invalid JSON: {exc}") + return ImportResult(success=False, errors=errors) + except Exception as exc: + errors.append(f"HELM import failed: {exc}") + return ImportResult(success=False, errors=errors) + + elapsed_ms = (time.monotonic() - start) * 1000 + + metadata = BenchmarkMetadata( + name=Path(path).stem, + source="helm", + source_identifier=path, + record_count=len(records), + schema_mapping=schema_mapping or {}, + tags=tags or ["helm"], + ) + + self._benchmarks[metadata.benchmark_id] = metadata + self._persist(metadata, records) + + return ImportResult( + success=True, + benchmark_id=metadata.benchmark_id, + records_imported=len(records), + duration_ms=round(elapsed_ms, 2), + metadata=metadata, + ) + + # -- CSV / JSON / Parquet ---------------------------------------------- + + def import_csv( + self, + path: str, + schema_mapping: dict[str, str] | None = None, + delimiter: str = ",", + max_records: int | None = None, + tags: list[str] | None = None, + ) -> ImportResult: + """Import a benchmark from a CSV file.""" + start = time.monotonic() + errors: list[str] = [] + records: list[dict[str, Any]] = [] + + try: + with open(path, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter=delimiter) + for i, row in enumerate(reader): + if max_records and i >= max_records: + break + mapped = self._apply_schema_mapping(dict(row), schema_mapping) + records.append(mapped) + except Exception as exc: + errors.append(f"CSV import failed: {exc}") + return ImportResult(success=False, errors=errors) + + elapsed_ms = (time.monotonic() - start) * 1000 + + metadata = BenchmarkMetadata( + name=Path(path).stem, + source="csv", + source_identifier=path, + record_count=len(records), + schema_mapping=schema_mapping or {}, + tags=tags or ["csv"], + ) + + self._benchmarks[metadata.benchmark_id] = metadata + self._persist(metadata, records) + + return ImportResult( + success=True, + benchmark_id=metadata.benchmark_id, + records_imported=len(records), + duration_ms=round(elapsed_ms, 2), + metadata=metadata, + ) + + def import_json( + self, + path: str, + schema_mapping: dict[str, str] | None = None, + records_key: str | None = None, + max_records: int | None = None, + tags: list[str] | None = None, + ) -> ImportResult: + """Import a benchmark from a JSON file (array or object with records key).""" + start = time.monotonic() + errors: list[str] = [] + records: list[dict[str, Any]] = [] + + try: + with open(path, encoding="utf-8") as f: + data = json.load(f) + + items = data + if isinstance(data, dict): + items = data.get(records_key or "records", data.get("data", [])) + if not isinstance(items, list): + items = [items] + + for i, item in enumerate(items): + if max_records and i >= max_records: + break + mapped = self._apply_schema_mapping(dict(item), schema_mapping) + records.append(mapped) + except Exception as exc: + errors.append(f"JSON import failed: {exc}") + return ImportResult(success=False, errors=errors) + + elapsed_ms = (time.monotonic() - start) * 1000 + + metadata = BenchmarkMetadata( + name=Path(path).stem, + source="json", + source_identifier=path, + record_count=len(records), + schema_mapping=schema_mapping or {}, + tags=tags or ["json"], + ) + + self._benchmarks[metadata.benchmark_id] = metadata + self._persist(metadata, records) + + return ImportResult( + success=True, + benchmark_id=metadata.benchmark_id, + records_imported=len(records), + duration_ms=round(elapsed_ms, 2), + metadata=metadata, + ) + + def import_parquet( + self, + path: str, + schema_mapping: dict[str, str] | None = None, + max_records: int | None = None, + tags: list[str] | None = None, + ) -> ImportResult: + """Import a benchmark from a Parquet file.""" + start = time.monotonic() + errors: list[str] = [] + records: list[dict[str, Any]] = [] + + try: + import pyarrow.parquet as pq # type: ignore[import-untyped,unused-ignore] + + table = pq.read_table(path) # type: ignore[no-untyped-call,unused-ignore] + df_dicts = table.to_pydict() + + # Convert columnar to row-based + keys = list(df_dicts.keys()) + num_rows = len(df_dicts[keys[0]]) if keys else 0 + + for i in range(min(num_rows, max_records or num_rows)): + row = {k: df_dicts[k][i] for k in keys} + mapped = self._apply_schema_mapping(row, schema_mapping) + records.append(mapped) + + except ImportError: + errors.append("'pyarrow' library not installed. Run: pip install pyarrow") + return ImportResult(success=False, errors=errors) + except Exception as exc: + errors.append(f"Parquet import failed: {exc}") + return ImportResult(success=False, errors=errors) + + elapsed_ms = (time.monotonic() - start) * 1000 + + metadata = BenchmarkMetadata( + name=Path(path).stem, + source="parquet", + source_identifier=path, + record_count=len(records), + schema_mapping=schema_mapping or {}, + tags=tags or ["parquet"], + ) + + self._benchmarks[metadata.benchmark_id] = metadata + self._persist(metadata, records) + + return ImportResult( + success=True, + benchmark_id=metadata.benchmark_id, + records_imported=len(records), + duration_ms=round(elapsed_ms, 2), + metadata=metadata, + ) + + # -- Query ------------------------------------------------------------- + + def list_benchmarks(self) -> list[BenchmarkMetadata]: + """Return metadata for all imported benchmarks.""" + return list(self._benchmarks.values()) + + def get_benchmark(self, benchmark_id: str) -> BenchmarkMetadata | None: + """Return metadata for a specific benchmark.""" + return self._benchmarks.get(benchmark_id) + + # -- Internal ---------------------------------------------------------- + + @staticmethod + def _apply_schema_mapping( + record: dict[str, Any], + mapping: dict[str, str] | None, + ) -> dict[str, Any]: + """Apply field name mapping to a record.""" + if not mapping: + return record + result: dict[str, Any] = {} + for src_key, value in record.items(): + dst_key = mapping.get(src_key, src_key) + result[dst_key] = value + return result + + def _persist(self, metadata: BenchmarkMetadata, records: list[dict[str, Any]]) -> None: + """Persist benchmark metadata and records to the store.""" + if self._store is None: + return + try: + self._store.insert_row("benchmarks", metadata.model_dump()) + for record in records: + record["benchmark_id"] = metadata.benchmark_id + self._store.insert_row("benchmark_records", record) + except Exception: + logger.debug("Failed to persist benchmark %s", metadata.benchmark_id, exc_info=True) diff --git a/src/layerlens/instrument/adapters/frameworks/embedding/__init__.py b/src/layerlens/instrument/adapters/frameworks/embedding/__init__.py new file mode 100644 index 0000000..bff129d --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/embedding/__init__.py @@ -0,0 +1,20 @@ +""" +STRATIX Embedding & Vector Store Adapters (FEA-1910) + +Provides adapters for tracing embedding operations and vector store queries +across popular providers and databases. +""" + +from __future__ import annotations + +from layerlens.instrument.adapters.frameworks.embedding.embedding_adapter import ( + ADAPTER_CLASS, + EmbeddingAdapter, +) +from layerlens.instrument.adapters.frameworks.embedding.vector_store_adapter import VectorStoreAdapter + +__all__ = [ + "ADAPTER_CLASS", + "EmbeddingAdapter", + "VectorStoreAdapter", +] diff --git a/src/layerlens/instrument/adapters/frameworks/embedding/embedding_adapter.py b/src/layerlens/instrument/adapters/frameworks/embedding/embedding_adapter.py new file mode 100644 index 0000000..a1cb875 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/embedding/embedding_adapter.py @@ -0,0 +1,257 @@ +""" +STRATIX Embedding Provider Adapter (ADP-060) + +Wraps embedding API calls to capture dimension tracking, batch handling, +and per-item latency. Supports OpenAI, Cohere, and HuggingFace embedding +providers. + +Emits ``embedding.create`` events with dimension, token, and latency metadata. +""" + +from __future__ import annotations + +import time +import logging +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.capture import CaptureConfig +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class EmbeddingAdapter(BaseAdapter): + """ + LayerLens adapter for embedding providers. + + Wraps embedding client ``embed()`` / ``create()`` calls to emit + ``embedding.create`` events with dimension tracking, batch handling, + and per-item latency. + + Supported providers: + - OpenAI (``openai.embeddings.create``) + - Cohere (``cohere.Client.embed``) + - HuggingFace (``sentence_transformers.SentenceTransformer.encode``) + + Usage:: + + from layerlens.instrument.adapters.frameworks.embedding import EmbeddingAdapter + + adapter = EmbeddingAdapter() + adapter.connect() + + # Wrap an OpenAI client + client = adapter.wrap_openai(openai_client) + result = client.embeddings.create(model="text-embedding-3-small", input=["hello"]) + """ + + FRAMEWORK = "embedding" + VERSION = "0.1.0" + # The adapter source has no direct ``pydantic`` imports (verified by + # grep across ``frameworks/embedding/``). The pyproject extra is + # empty (deps come from the underlying embedding store). Adapter + # wraps client methods structurally and emits dict events. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: CaptureConfig | None = None, + ) -> None: + super().__init__(stratix=stratix, capture_config=capture_config) + self._originals: dict[str, Any] = {} + self._clients: list[Any] = [] + + # -- Lifecycle --------------------------------------------------------- + + def connect(self) -> None: + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + self._restore_originals() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + self._close_sinks() + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name="EmbeddingAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + capabilities=[ + AdapterCapability.TRACE_MODELS, + ], + author="STRATIX Team", + description="Traces embedding operations across OpenAI, Cohere, and HuggingFace providers", # noqa: E501 + ) + + def serialize_for_replay(self) -> ReplayableTrace: + return ReplayableTrace( + adapter_name="EmbeddingAdapter", + framework=self.FRAMEWORK, + trace_id="", + events=list(self._trace_events), + ) + + # -- Provider wrappers ------------------------------------------------- + + def wrap_openai(self, client: Any) -> Any: + """Wrap an OpenAI client's embeddings.create method.""" + if hasattr(client, "embeddings"): + original = client.embeddings.create + self._originals["openai.embeddings.create"] = (client, original) + client.embeddings.create = self._make_openai_wrapper(original) + self._clients.append(client) + return client + + def wrap_cohere(self, client: Any) -> Any: + """Wrap a Cohere client's embed method.""" + if hasattr(client, "embed"): + original = client.embed + self._originals["cohere.embed"] = (client, original) + client.embed = self._make_cohere_wrapper(original) + self._clients.append(client) + return client + + def wrap_sentence_transformer(self, model: Any) -> Any: + """Wrap a SentenceTransformer's encode method.""" + if hasattr(model, "encode"): + original = model.encode + self._originals["st.encode"] = (model, original) + model.encode = self._make_st_wrapper(original) + self._clients.append(model) + return model + + # -- Internal wrappers ------------------------------------------------- + + def _make_openai_wrapper(self, original: Any) -> Any: + adapter = self + + def wrapper(*args: Any, **kwargs: Any) -> Any: + model = kwargs.get("model", "unknown") + input_data = kwargs.get("input", args[0] if args else []) + batch_size = len(input_data) if isinstance(input_data, list) else 1 + + start = time.monotonic() + result = original(*args, **kwargs) + elapsed_ms = (time.monotonic() - start) * 1000 + + dimensions = None + if hasattr(result, "data") and result.data: + first = result.data[0] + if hasattr(first, "embedding"): + dimensions = len(first.embedding) + + tokens = 0 + if hasattr(result, "usage") and hasattr(result.usage, "total_tokens"): + tokens = result.usage.total_tokens + + adapter.emit_dict_event( + "embedding.create", + { + "provider": "openai", + "model": model, + "batch_size": batch_size, + "dimensions": dimensions, + "total_tokens": tokens, + "latency_ms": round(elapsed_ms, 2), + }, + ) + return result + + return wrapper + + def _make_cohere_wrapper(self, original: Any) -> Any: + adapter = self + + def wrapper(*args: Any, **kwargs: Any) -> Any: + model = kwargs.get("model", "embed-english-v3.0") + texts = kwargs.get("texts", args[0] if args else []) + batch_size = len(texts) if isinstance(texts, list) else 1 + + start = time.monotonic() + result = original(*args, **kwargs) + elapsed_ms = (time.monotonic() - start) * 1000 + + dimensions = None + if hasattr(result, "embeddings") and result.embeddings: + dimensions = len(result.embeddings[0]) + + adapter.emit_dict_event( + "embedding.create", + { + "provider": "cohere", + "model": model, + "batch_size": batch_size, + "dimensions": dimensions, + "latency_ms": round(elapsed_ms, 2), + }, + ) + return result + + return wrapper + + def _make_st_wrapper(self, original: Any) -> Any: + adapter = self + + def wrapper(*args: Any, **kwargs: Any) -> Any: + sentences = args[0] if args else kwargs.get("sentences", []) + batch_size = len(sentences) if isinstance(sentences, list) else 1 + + start = time.monotonic() + result = original(*args, **kwargs) + elapsed_ms = (time.monotonic() - start) * 1000 + + dimensions = None + if hasattr(result, "shape") and len(result.shape) > 1: + dimensions = result.shape[1] + + adapter.emit_dict_event( + "embedding.create", + { + "provider": "sentence_transformers", + "model": "local", + "batch_size": batch_size, + "dimensions": dimensions, + "latency_ms": round(elapsed_ms, 2), + }, + ) + return result + + return wrapper + + # -- Cleanup ----------------------------------------------------------- + + def _restore_originals(self) -> None: + for key, (obj, original) in self._originals.items(): + try: + if key == "openai.embeddings.create": + obj.embeddings.create = original + elif key == "cohere.embed": + obj.embed = original + elif key == "st.encode": + obj.encode = original + except Exception: + logger.debug("Could not restore %s", key) + self._originals.clear() + + +ADAPTER_CLASS = EmbeddingAdapter diff --git a/src/layerlens/instrument/adapters/frameworks/embedding/vector_store_adapter.py b/src/layerlens/instrument/adapters/frameworks/embedding/vector_store_adapter.py new file mode 100644 index 0000000..7085e2f --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/embedding/vector_store_adapter.py @@ -0,0 +1,260 @@ +""" +STRATIX Vector Store Adapter (ADP-061) + +Traces retrieval operations across popular vector databases: +Pinecone, Weaviate, and Chroma. Captures query parameters, +result relevance scores, and retrieval latency. + +Emits ``retrieval.query`` events with filter parameters, top-k results, +and score distributions. +""" + +from __future__ import annotations + +import time +import logging +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.capture import CaptureConfig +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class VectorStoreAdapter(BaseAdapter): + """ + LayerLens adapter for vector store databases. + + Wraps query/search methods on Pinecone, Weaviate, and Chroma clients + to emit ``retrieval.query`` events capturing filter params, top-k + results, score distributions, and latency. + + Usage:: + + from layerlens.instrument.adapters.frameworks.embedding import VectorStoreAdapter + + adapter = VectorStoreAdapter() + adapter.connect() + + # Wrap a Pinecone index + index = adapter.wrap_pinecone(pinecone_index) + results = index.query(vector=[0.1, 0.2, ...], top_k=10) + """ + + FRAMEWORK = "vector_store" + VERSION = "0.1.0" + # The adapter source has no direct ``pydantic`` imports (verified by + # grep across ``frameworks/embedding/``). Pinecone/Weaviate/Chroma + # client wrappers operate on dict / list responses; no Pydantic + # interaction. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: CaptureConfig | None = None, + ) -> None: + super().__init__(stratix=stratix, capture_config=capture_config) + self._originals: dict[str, Any] = {} + self._clients: list[Any] = [] + + # -- Lifecycle --------------------------------------------------------- + + def connect(self) -> None: + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + self._restore_originals() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + self._close_sinks() + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name="VectorStoreAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + ], + author="STRATIX Team", + description="Traces vector retrieval operations across Pinecone, Weaviate, and Chroma", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + return ReplayableTrace( + adapter_name="VectorStoreAdapter", + framework=self.FRAMEWORK, + trace_id="", + events=list(self._trace_events), + ) + + # -- Provider wrappers ------------------------------------------------- + + def wrap_pinecone(self, index: Any) -> Any: + """Wrap a Pinecone Index's query method.""" + if hasattr(index, "query"): + original = index.query + self._originals["pinecone.query"] = (index, original) + index.query = self._make_pinecone_wrapper(original) + self._clients.append(index) + return index + + def wrap_weaviate(self, collection: Any) -> Any: + """Wrap a Weaviate collection's query methods.""" + if hasattr(collection, "query"): + query_obj = collection.query + if hasattr(query_obj, "near_vector"): + original = query_obj.near_vector + self._originals["weaviate.near_vector"] = (query_obj, original) + query_obj.near_vector = self._make_weaviate_wrapper(original, "near_vector") + if hasattr(query_obj, "near_text"): + original = query_obj.near_text + self._originals["weaviate.near_text"] = (query_obj, original) + query_obj.near_text = self._make_weaviate_wrapper(original, "near_text") + self._clients.append(collection) + return collection + + def wrap_chroma(self, collection: Any) -> Any: + """Wrap a Chroma Collection's query method.""" + if hasattr(collection, "query"): + original = collection.query + self._originals["chroma.query"] = (collection, original) + collection.query = self._make_chroma_wrapper(original) + self._clients.append(collection) + return collection + + # -- Internal wrappers ------------------------------------------------- + + def _make_pinecone_wrapper(self, original: Any) -> Any: + adapter = self + + def wrapper(*args: Any, **kwargs: Any) -> Any: + top_k = kwargs.get("top_k", 10) + has_filter = "filter" in kwargs and kwargs["filter"] is not None + namespace = kwargs.get("namespace", "") + + start = time.monotonic() + result = original(*args, **kwargs) + elapsed_ms = (time.monotonic() - start) * 1000 + + # Extract score distribution from matches + scores: list[float] = [] + match_count = 0 + if hasattr(result, "matches"): + match_count = len(result.matches) + scores = [m.score for m in result.matches if hasattr(m, "score")] + + adapter.emit_dict_event( + "retrieval.query", + { + "provider": "pinecone", + "top_k": top_k, + "has_filter": has_filter, + "namespace": namespace, + "match_count": match_count, + "score_min": round(min(scores), 4) if scores else None, + "score_max": round(max(scores), 4) if scores else None, + "score_mean": round(sum(scores) / len(scores), 4) if scores else None, + "latency_ms": round(elapsed_ms, 2), + }, + ) + return result + + return wrapper + + def _make_weaviate_wrapper(self, original: Any, method_name: str) -> Any: + adapter = self + + def wrapper(*args: Any, **kwargs: Any) -> Any: + limit = kwargs.get("limit", 10) + + start = time.monotonic() + result = original(*args, **kwargs) + elapsed_ms = (time.monotonic() - start) * 1000 + + result_count = 0 + if hasattr(result, "objects"): + result_count = len(result.objects) + + adapter.emit_dict_event( + "retrieval.query", + { + "provider": "weaviate", + "query_type": method_name, + "limit": limit, + "result_count": result_count, + "latency_ms": round(elapsed_ms, 2), + }, + ) + return result + + return wrapper + + def _make_chroma_wrapper(self, original: Any) -> Any: + adapter = self + + def wrapper(*args: Any, **kwargs: Any) -> Any: + n_results = kwargs.get("n_results", 10) + has_where = "where" in kwargs and kwargs["where"] is not None + + start = time.monotonic() + result = original(*args, **kwargs) + elapsed_ms = (time.monotonic() - start) * 1000 + + result_count = 0 + distances: list[float] = [] + if isinstance(result, dict): + ids = result.get("ids", [[]]) + result_count = len(ids[0]) if ids and ids[0] else 0 + dist_list = result.get("distances", [[]]) + if dist_list and dist_list[0]: + distances = dist_list[0] + + adapter.emit_dict_event( + "retrieval.query", + { + "provider": "chroma", + "n_results": n_results, + "has_filter": has_where, + "result_count": result_count, + "distance_min": round(min(distances), 4) if distances else None, + "distance_max": round(max(distances), 4) if distances else None, + "latency_ms": round(elapsed_ms, 2), + }, + ) + return result + + return wrapper + + # -- Cleanup ----------------------------------------------------------- + + def _restore_originals(self) -> None: + for key, (obj, original) in self._originals.items(): + try: + if key == "pinecone.query" or key == "chroma.query": + obj.query = original + elif key.startswith("weaviate."): + method = key.split(".", 1)[1] + setattr(obj, method, original) + except Exception: + logger.debug("Could not restore %s", key) + self._originals.clear() diff --git a/src/layerlens/instrument/adapters/frameworks/google_adk/__init__.py b/src/layerlens/instrument/adapters/frameworks/google_adk/__init__.py new file mode 100644 index 0000000..a91ce51 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/google_adk/__init__.py @@ -0,0 +1,25 @@ +""" +LayerLens adapter for Google Agent Development Kit (ADK). + +Instruments Google ADK agents using the native 6-callback system +(BeforeAgent, AfterAgent, BeforeModel, AfterModel, BeforeTool, AfterTool). +""" + +from __future__ import annotations + +from typing import Any + +from layerlens.instrument.adapters.frameworks.google_adk.lifecycle import GoogleADKAdapter + +ADAPTER_CLASS = GoogleADKAdapter + + +def instrument_agent(agent: Any, stratix: Any = None, capture_config: dict[str, Any] = None) -> Any: # type: ignore[assignment] + """Convenience function to instrument a Google ADK agent.""" + adapter = GoogleADKAdapter(stratix=stratix, capture_config=capture_config) + adapter.connect() + adapter.instrument_agent(agent) + return adapter + + +__all__ = ["GoogleADKAdapter", "ADAPTER_CLASS", "instrument_agent"] diff --git a/src/layerlens/instrument/adapters/frameworks/google_adk/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/google_adk/lifecycle.py new file mode 100644 index 0000000..499e7d8 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/google_adk/lifecycle.py @@ -0,0 +1,447 @@ +""" +Google Agent Development Kit (ADK) adapter lifecycle. + +Instrumentation strategy: Callback pattern (native first-class support) + BeforeAgentCallback → agent.input (L1) + AfterAgentCallback → agent.output (L1) + BeforeModelCallback → model.invoke start (L3) + AfterModelCallback → model.invoke complete (L3) + BeforeToolCallback → tool.call start (L5a) + AfterToolCallback → tool.call complete (L5a) + transfer_to_agent → agent.handoff (Cross) +""" + +from __future__ import annotations + +import time +import uuid +import hashlib +import logging +import threading +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class GoogleADKAdapter(BaseAdapter): + """LayerLens adapter for Google Agent Development Kit.""" + + FRAMEWORK = "google_adk" + VERSION = "0.1.0" + # The adapter source has no direct ``pydantic`` imports (verified by + # grep across ``frameworks/google_adk/``). The adapter only registers + # ADK's native 6-callback hooks and emits dict events; it never + # touches ADK's own Pydantic models. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: Any | None = None, + stratix_instance: Any | None = None, + ) -> None: + resolved = stratix or stratix_instance + super().__init__(stratix=resolved, capture_config=capture_config) + self._originals: dict[str, Any] = {} + self._adapter_lock = threading.Lock() + self._seen_agents: set[str] = set() + self._framework_version: str | None = None + self._model_call_starts: dict[int, int] = {} # thread_id -> start_ns + self._tool_call_starts: dict[str, int] = {} + self._agent_starts: dict[int, int] = {} # thread_id -> start_ns + + def connect(self) -> None: + try: + import google.adk # type: ignore[import-untyped,unused-ignore] + + self._framework_version = getattr(google.adk, "__version__", "unknown") + except ImportError: + try: + import google.genai # type: ignore[import-untyped,unused-ignore] + + self._framework_version = getattr(google.genai, "__version__", "unknown") + except ImportError: + logger.debug("google-adk not installed") + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + self._originals.clear() + self._seen_agents.clear() + self._model_call_starts.clear() + self._tool_call_starts.clear() + self._agent_starts.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name="GoogleADKAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + AdapterCapability.TRACE_HANDOFFS, + ], + description="LayerLens adapter for Google Agent Development Kit", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + return ReplayableTrace( + adapter_name="GoogleADKAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": self._capture_config.model_dump()}, + ) + + # --- Framework Integration --- + + def instrument_agent(self, agent: Any) -> Any: + """Attach Stratix callbacks to a Google ADK agent.""" + try: + agent.before_agent_callback = self._before_agent_callback + agent.after_agent_callback = self._after_agent_callback + agent.before_model_callback = self._before_model_callback + agent.after_model_callback = self._after_model_callback + agent.before_tool_callback = self._before_tool_callback + agent.after_tool_callback = self._after_tool_callback + except Exception: + logger.warning("Failed to attach callbacks to agent", exc_info=True) + return agent + + # --- Callback Implementations --- + + def _before_agent_callback(self, callback_context: Any) -> Any: + if not self._connected: + return None + try: + agent_name = self._get_agent_name(callback_context) + self._emit_agent_config(agent_name, callback_context) + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._agent_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "google_adk", + "agent_name": agent_name, + "input": self._safe_serialize(getattr(callback_context, "user_content", None)), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in before_agent_callback", exc_info=True) + return None + + def _after_agent_callback(self, callback_context: Any) -> Any: + if not self._connected: + return None + try: + agent_name = self._get_agent_name(callback_context) + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._agent_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + self.emit_dict_event( + "agent.output", + { + "framework": "google_adk", + "agent_name": agent_name, + "output": self._safe_serialize(getattr(callback_context, "agent_output", None)), + "duration_ns": duration_ns, + }, + ) + except Exception: + logger.warning("Error in after_agent_callback", exc_info=True) + return None + + def _before_model_callback(self, callback_context: Any, llm_request: Any) -> Any: + if not self._connected: + return None + try: + tid = threading.get_ident() + with self._adapter_lock: + self._model_call_starts[tid] = time.time_ns() + except Exception: + logger.warning("Error in before_model_callback", exc_info=True) + return None + + def _after_model_callback(self, callback_context: Any, llm_response: Any) -> Any: + if not self._connected: + return None + try: + tid = threading.get_ident() + with self._adapter_lock: + start_ns = self._model_call_starts.pop(tid, None) + latency_ms = None + if start_ns: + latency_ms = (time.time_ns() - start_ns) / 1_000_000 + payload: dict[str, Any] = {"framework": "google_adk"} + model = getattr(callback_context, "model", None) or getattr(llm_response, "model", None) + if model: + payload["model"] = str(model) + payload["provider"] = "google" + usage = getattr(llm_response, "usage_metadata", None) + if usage: + payload["tokens_prompt"] = getattr(usage, "prompt_token_count", None) + payload["tokens_completion"] = getattr(usage, "candidates_token_count", None) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("model.invoke", payload) + if usage: + self.emit_dict_event( + "cost.record", + { + "framework": "google_adk", + "model": payload.get("model"), + "tokens_prompt": payload.get("tokens_prompt"), + "tokens_completion": payload.get("tokens_completion"), + "tokens_total": ( + (payload.get("tokens_prompt") or 0) + + (payload.get("tokens_completion") or 0) + ), + }, + ) + except Exception: + logger.warning("Error in after_model_callback", exc_info=True) + return None + + def _before_tool_callback(self, callback_context: Any, tool_name: str, tool_input: Any) -> Any: + if not self._connected: + return None + try: + call_id = f"{tool_name}_{id(tool_input)}" + with self._adapter_lock: + self._tool_call_starts[call_id] = time.time_ns() + except Exception: + logger.warning("Error in before_tool_callback", exc_info=True) + return None + + def _after_tool_callback( + self, + callback_context: Any, + tool_name: str, + tool_input: Any, + tool_output: Any, + ) -> Any: + if not self._connected: + return None + try: + call_id = f"{tool_name}_{id(tool_input)}" + with self._adapter_lock: + start_ns = self._tool_call_starts.pop(call_id, None) + latency_ms = None + if start_ns: + latency_ms = (time.time_ns() - start_ns) / 1_000_000 + self.emit_dict_event( + "tool.call", + { + "framework": "google_adk", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + "latency_ms": latency_ms, + }, + ) + except Exception: + logger.warning("Error in after_tool_callback", exc_info=True) + return None + + # --- Lifecycle Hooks --- + + def on_agent_start(self, agent_name: str | None = None, input_data: Any = None) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._agent_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "google_adk", + "agent_name": agent_name, + "input": self._safe_serialize(input_data), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in on_agent_start", exc_info=True) + + def on_agent_end( + self, + agent_name: str | None = None, + output: Any = None, + error: Exception | None = None, + ) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._agent_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + payload: dict[str, Any] = { + "framework": "google_adk", + "agent_name": agent_name, + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + self.emit_dict_event("agent.output", payload) + except Exception: + logger.warning("Error in on_agent_end", exc_info=True) + + def on_handoff(self, from_agent: str, to_agent: str, context: Any = None) -> None: + if not self._connected: + return + try: + context_str = str(context) if context else "" + self.emit_dict_event( + "agent.handoff", + { + "from_agent": from_agent, + "to_agent": to_agent, + "reason": "transfer_to_agent", + "context_hash": hashlib.sha256(context_str.encode()).hexdigest() + if context_str + else None, + "context_preview": context_str[:500] if context_str else None, + }, + ) + except Exception: + logger.warning("Error in on_handoff", exc_info=True) + + def on_tool_use( + self, + tool_name: str, + tool_input: Any = None, + tool_output: Any = None, + error: Exception | None = None, + latency_ms: float | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = { + "framework": "google_adk", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + } + if error: + payload["error"] = str(error) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("tool.call", payload) + except Exception: + logger.warning("Error in on_tool_use", exc_info=True) + + def on_llm_call( + self, + provider: str | None = None, + model: str | None = None, + tokens_prompt: int | None = None, + tokens_completion: int | None = None, + latency_ms: float | None = None, + messages: list[dict[str, str]] | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = {"framework": "google_adk"} + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if tokens_prompt is not None: + payload["tokens_prompt"] = tokens_prompt + if tokens_completion is not None: + payload["tokens_completion"] = tokens_completion + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if self._capture_config.capture_content and messages: + payload["messages"] = messages + self.emit_dict_event("model.invoke", payload) + except Exception: + logger.warning("Error in on_llm_call", exc_info=True) + + # --- Helpers --- + + def _get_agent_name(self, callback_context: Any) -> str: + agent = getattr(callback_context, "agent", None) + if agent: + return getattr(agent, "name", None) or str(agent) + return "unknown" + + def _emit_agent_config(self, agent_name: str, callback_context: Any) -> None: + with self._adapter_lock: + if agent_name in self._seen_agents: + return + self._seen_agents.add(agent_name) + agent = getattr(callback_context, "agent", None) + metadata: dict[str, Any] = { + "framework": "google_adk", + "agent_name": agent_name, + } + if agent: + for attr in ("description", "instruction", "model"): + val = getattr(agent, attr, None) + if val is not None: + metadata[attr] = str(val) + tools = getattr(agent, "tools", None) + if tools: + metadata["tools"] = [getattr(t, "name", str(t)) for t in tools] + sub_agents = getattr(agent, "sub_agents", None) + if sub_agents: + metadata["sub_agents"] = [getattr(a, "name", str(a)) for a in sub_agents] + session = getattr(callback_context, "session", None) + if session: + metadata["session_id"] = getattr(session, "id", None) + self.emit_dict_event("environment.config", metadata) + + def _safe_serialize(self, value: Any) -> Any: + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + except Exception: + return str(value) diff --git a/src/layerlens/instrument/adapters/frameworks/llama_index/__init__.py b/src/layerlens/instrument/adapters/frameworks/llama_index/__init__.py new file mode 100644 index 0000000..658114f --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/llama_index/__init__.py @@ -0,0 +1,28 @@ +""" +LayerLens adapter for LlamaIndex. + +Instruments LlamaIndex agents and workflows using the modern +Instrumentation Module (v0.10.20+) with a custom BaseEventHandler. +""" + +from __future__ import annotations + +from typing import Any + +from layerlens.instrument.adapters.frameworks.llama_index.lifecycle import LlamaIndexAdapter + +ADAPTER_CLASS = LlamaIndexAdapter + + +def instrument_workflow( + workflow: Any = None, stratix: Any = None, capture_config: dict[str, Any] | None = None +) -> Any: + """Convenience function to instrument LlamaIndex.""" + adapter = LlamaIndexAdapter(stratix=stratix, capture_config=capture_config) + adapter.connect() + if workflow is not None: + adapter.instrument_workflow(workflow) + return adapter + + +__all__ = ["LlamaIndexAdapter", "ADAPTER_CLASS", "instrument_workflow"] diff --git a/src/layerlens/instrument/adapters/frameworks/llama_index/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/llama_index/lifecycle.py new file mode 100644 index 0000000..9c28bb3 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/llama_index/lifecycle.py @@ -0,0 +1,446 @@ +""" +LlamaIndex adapter lifecycle. + +Instrumentation strategy: Instrumentation Module (modern event-driven, v0.10.20+) + Agent start → agent.input (L1) + Agent end → agent.output (L1) + LLM call → model.invoke (L3) + Tool call → tool.call (L5a) + Query/retrieval → tool.call (L5a, retrieval) + Agent handoff → agent.handoff (Cross) + Workflow event → agent.state.change (Cross) +""" + +from __future__ import annotations + +import time +import uuid +import hashlib +import logging +import threading +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class LlamaIndexAdapter(BaseAdapter): + """LayerLens adapter for LlamaIndex.""" + + FRAMEWORK = "llama_index" + VERSION = "0.1.0" + # The adapter source has no direct ``pydantic`` imports (verified by + # grep across ``frameworks/llama_index/``). LlamaIndex's + # Instrumentation Module emits dict-shaped events that the adapter + # forwards without touching framework Pydantic models. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: Any | None = None, + stratix_instance: Any | None = None, + ) -> None: + resolved = stratix or stratix_instance + super().__init__(stratix=resolved, capture_config=capture_config) + self._originals: dict[str, Any] = {} + self._adapter_lock = threading.Lock() + self._seen_agents: set[str] = set() + self._framework_version: str | None = None + self._event_handler: Any | None = None + self._agent_starts: dict[int, int] = {} # thread_id -> start_ns + + def connect(self) -> None: + try: + import llama_index.core # type: ignore[import-not-found,unused-ignore] + + self._framework_version = getattr(llama_index.core, "__version__", "unknown") + except ImportError: + try: + import llama_index # type: ignore[import-not-found,unused-ignore] + + self._framework_version = getattr(llama_index, "__version__", "unknown") + except ImportError: + logger.debug("llama-index not installed") + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + if self._event_handler is not None: + try: + from llama_index.core.instrumentation import ( # type: ignore[import-not-found,unused-ignore] + get_dispatcher, + ) + + dispatcher = get_dispatcher() + # LlamaIndex dispatcher stores handlers in span_handlers / event_handlers lists + handlers = getattr(dispatcher, "event_handlers", []) + if self._event_handler in handlers: + handlers.remove(self._event_handler) + except Exception: + logger.debug("Could not unregister event handler", exc_info=True) + self._event_handler = None + self._originals.clear() + self._seen_agents.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name="LlamaIndexAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + AdapterCapability.TRACE_HANDOFFS, + ], + description="LayerLens adapter for LlamaIndex", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + return ReplayableTrace( + adapter_name="LlamaIndexAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": self._capture_config.model_dump()}, + ) + + # --- Framework Integration --- + + def instrument_workflow(self, workflow: Any) -> Any: + """Register Stratix event handler with LlamaIndex instrumentation.""" + try: + from llama_index.core.instrumentation import get_dispatcher + + dispatcher = get_dispatcher() + handler = self._create_event_handler() + if handler is None: + logger.warning("Could not create event handler (BaseEventHandler not importable)") + return workflow + dispatcher.add_event_handler(handler) + self._event_handler = handler + except ImportError: + logger.debug("LlamaIndex instrumentation module not available") + except Exception: + logger.warning("Failed to register event handler", exc_info=True) + return workflow + + def _create_event_handler(self) -> Any: + """Create a LlamaIndex event handler that routes to Stratix.""" + adapter = self + + try: + from llama_index.core.instrumentation.events import ( # type: ignore[import-not-found,unused-ignore] + BaseEvent, + ) + from llama_index.core.instrumentation.event_handlers import ( # type: ignore[import-not-found,unused-ignore] + BaseEventHandler, + ) + except ImportError: + return None + + class StratixEventHandler(BaseEventHandler): # type: ignore[misc] + @classmethod + def class_name(cls) -> str: + return "StratixEventHandler" + + def handle(self, event: BaseEvent, **kwargs: Any) -> None: + try: + adapter._handle_event(event) + except Exception: + logger.warning("Error handling LlamaIndex event", exc_info=True) + + return StratixEventHandler() + + def _handle_event(self, event: Any) -> None: + """Route LlamaIndex events to appropriate Stratix event emission.""" + if not self._connected: + return + event_type = type(event).__name__ + + if event_type in ("LLMChatStartEvent", "LLMStartEvent"): + self._on_llm_start(event) + elif event_type in ("LLMChatEndEvent", "LLMCompletionEndEvent"): + self._on_llm_end(event) + elif event_type == "ToolCallEvent": + self._on_tool_call(event) + elif event_type in ("RetrievalStartEvent", "QueryStartEvent"): + self._on_retrieval_start(event) + elif event_type in ("RetrievalEndEvent", "QueryEndEvent"): + self._on_retrieval_end(event) + elif event_type in ("AgentRunStepStartEvent",): + self._on_agent_step_start(event) + elif event_type in ("AgentRunStepEndEvent",): + self._on_agent_step_end(event) + + def _on_llm_start(self, event: Any) -> None: + pass # Timing tracked on end + + def _on_llm_end(self, event: Any) -> None: + payload: dict[str, Any] = {"framework": "llama_index"} + model = getattr(event, "model", None) or getattr(event, "model_name", None) + if model: + payload["model"] = str(model) + response = getattr(event, "response", None) + if response: + raw = getattr(response, "raw", None) + if raw: + usage = getattr(raw, "usage", None) + if usage: + payload["tokens_prompt"] = getattr(usage, "prompt_tokens", None) + payload["tokens_completion"] = getattr(usage, "completion_tokens", None) + self.emit_dict_event("model.invoke", payload) + if "tokens_prompt" in payload or "tokens_completion" in payload: + self.emit_dict_event( + "cost.record", + { + "framework": "llama_index", + "model": payload.get("model"), + "tokens_prompt": payload.get("tokens_prompt"), + "tokens_completion": payload.get("tokens_completion"), + "tokens_total": (payload.get("tokens_prompt") or 0) + + (payload.get("tokens_completion") or 0), + }, + ) + + def _on_tool_call(self, event: Any) -> None: + self.emit_dict_event( + "tool.call", + { + "framework": "llama_index", + "tool_name": getattr(event, "tool_name", None) or getattr(event, "name", "unknown"), + "tool_input": self._safe_serialize(getattr(event, "tool_input", None)), + "tool_output": self._safe_serialize(getattr(event, "tool_output", None)), + }, + ) + + def _on_retrieval_start(self, event: Any) -> None: + pass # Tracked on end + + def _on_retrieval_end(self, event: Any) -> None: + nodes = getattr(event, "nodes", None) or [] + self.emit_dict_event( + "tool.call", + { + "framework": "llama_index", + "tool_name": "retrieval", + "tool_type": "retrieval", + "tool_output": self._safe_serialize( + [{"score": getattr(n, "score", None)} for n in nodes[:10]] + ), + "result_count": len(nodes), + }, + ) + + def _on_agent_step_start(self, event: Any) -> None: + agent_name = getattr(event, "agent_id", None) or "llama_agent" + self._emit_agent_config(agent_name, event) + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._agent_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "llama_index", + "agent_name": agent_name, + "step": getattr(event, "step", None), + "timestamp_ns": start_ns, + }, + ) + + def _on_agent_step_end(self, event: Any) -> None: + agent_name = getattr(event, "agent_id", None) or "llama_agent" + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._agent_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + self.emit_dict_event( + "agent.output", + { + "framework": "llama_index", + "agent_name": agent_name, + "output": self._safe_serialize(getattr(event, "response", None)), + "duration_ns": duration_ns, + }, + ) + + # --- Lifecycle Hooks --- + + def on_agent_start(self, agent_name: str | None = None, input_data: Any = None) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._agent_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "llama_index", + "agent_name": agent_name, + "input": self._safe_serialize(input_data), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in on_agent_start", exc_info=True) + + def on_agent_end( + self, + agent_name: str | None = None, + output: Any = None, + error: Exception | None = None, + ) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._agent_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + payload: dict[str, Any] = { + "framework": "llama_index", + "agent_name": agent_name, + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + self.emit_dict_event("agent.output", payload) + except Exception: + logger.warning("Error in on_agent_end", exc_info=True) + + def on_tool_use( + self, + tool_name: str, + tool_input: Any = None, + tool_output: Any = None, + error: Exception | None = None, + latency_ms: float | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = { + "framework": "llama_index", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + } + if error: + payload["error"] = str(error) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("tool.call", payload) + except Exception: + logger.warning("Error in on_tool_use", exc_info=True) + + def on_llm_call( + self, + provider: str | None = None, + model: str | None = None, + tokens_prompt: int | None = None, + tokens_completion: int | None = None, + latency_ms: float | None = None, + messages: list[dict[str, str]] | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = {"framework": "llama_index"} + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if tokens_prompt is not None: + payload["tokens_prompt"] = tokens_prompt + if tokens_completion is not None: + payload["tokens_completion"] = tokens_completion + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if self._capture_config.capture_content and messages: + payload["messages"] = messages + self.emit_dict_event("model.invoke", payload) + except Exception: + logger.warning("Error in on_llm_call", exc_info=True) + + def on_handoff(self, from_agent: str, to_agent: str, context: Any = None) -> None: + if not self._connected: + return + try: + context_str = str(context) if context else "" + self.emit_dict_event( + "agent.handoff", + { + "from_agent": from_agent, + "to_agent": to_agent, + "reason": "agent_workflow_handoff", + "context_hash": hashlib.sha256(context_str.encode()).hexdigest() + if context_str + else None, + }, + ) + except Exception: + logger.warning("Error in on_handoff", exc_info=True) + + # --- Helpers --- + + def _emit_agent_config(self, agent_name: str, event_or_agent: Any) -> None: + with self._adapter_lock: + if agent_name in self._seen_agents: + return + self._seen_agents.add(agent_name) + metadata: dict[str, Any] = { + "framework": "llama_index", + "agent_name": agent_name, + } + tools = getattr(event_or_agent, "tools", None) + if tools: + metadata["tools"] = [getattr(t, "name", str(t)) for t in tools] + self.emit_dict_event("environment.config", metadata) + + def _safe_serialize(self, value: Any) -> Any: + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, list): + return [self._safe_serialize(v) for v in value[:100]] + return str(value) + except Exception: + return str(value) diff --git a/src/layerlens/instrument/adapters/frameworks/ms_agent_framework/__init__.py b/src/layerlens/instrument/adapters/frameworks/ms_agent_framework/__init__.py new file mode 100644 index 0000000..984173d --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/ms_agent_framework/__init__.py @@ -0,0 +1,25 @@ +""" +LayerLens adapter for Microsoft Agent Framework. + +Instruments Microsoft Agent Framework (Semantic Kernel Agents) by wrapping +AgentChat.invoke() and AgentGroupChat.invoke() to capture lifecycle events. +""" + +from __future__ import annotations + +from typing import Any + +from layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle import MSAgentAdapter + +ADAPTER_CLASS = MSAgentAdapter + + +def instrument_agent(agent: Any, stratix: Any = None, capture_config: dict[str, Any] = None) -> Any: # type: ignore[assignment] + """Convenience function to instrument a Microsoft Agent Framework chat.""" + adapter = MSAgentAdapter(stratix=stratix, capture_config=capture_config) + adapter.connect() + adapter.instrument_chat(agent) + return adapter + + +__all__ = ["MSAgentAdapter", "ADAPTER_CLASS", "instrument_agent"] diff --git a/src/layerlens/instrument/adapters/frameworks/ms_agent_framework/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/ms_agent_framework/lifecycle.py new file mode 100644 index 0000000..838dde6 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/ms_agent_framework/lifecycle.py @@ -0,0 +1,498 @@ +""" +Microsoft Agent Framework adapter lifecycle. + +Instrumentation strategy: Chat wrapper (invoke wrapping) + Chat.invoke() start -> agent.input (L1) + Chat.invoke() end -> agent.output (L1) + Agent turn (group chat) -> agent.handoff (L2) + Tool call -> tool.call (L5a) + Model call -> model.invoke (L3) + Channel selection -> agent.state.change (Cross) +""" + +from __future__ import annotations + +import time +import uuid +import hashlib +import logging +import threading +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class MSAgentAdapter(BaseAdapter): + """LayerLens adapter for Microsoft Agent Framework.""" + + FRAMEWORK = "ms_agent_framework" + VERSION = "0.1.0" + # The adapter source has no direct ``pydantic`` imports (verified by + # grep across ``frameworks/ms_agent_framework/``). The adapter wraps + # AgentChat.invoke() and emits dict events. The pyproject extra pulls + # ``semantic-kernel>=1.0,<2.0`` (SK 1.x is internally Pydantic v2) + # but the adapter itself stays version-agnostic. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: Any | None = None, + stratix_instance: Any | None = None, + ) -> None: + resolved = stratix or stratix_instance + super().__init__(stratix=resolved, capture_config=capture_config) + self._originals: dict[int, dict[str, Any]] = {} # id(chat) -> {method: original} + self._wrapped_chats: list[Any] = [] # strong refs for disconnect unwrap + self._adapter_lock = threading.Lock() + self._seen_agents: set[str] = set() + self._framework_version: str | None = None + self._run_starts: dict[int, int] = {} # thread_id -> start_ns + + def connect(self) -> None: + """Verify Microsoft Agent Framework availability and prepare the adapter.""" + try: + import semantic_kernel.agents # type: ignore[import-not-found,unused-ignore] + + self._framework_version = getattr(semantic_kernel.agents, "__version__", None) + if not self._framework_version: + import semantic_kernel # type: ignore[import-not-found,unused-ignore] + + self._framework_version = getattr(semantic_kernel, "__version__", "unknown") + except ImportError: + logger.debug("semantic-kernel agents not installed") + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + """Unwrap all instrumented chats and release resources.""" + for chat in self._wrapped_chats: + self._unwrap_chat(chat) + self._wrapped_chats.clear() + self._originals.clear() + self._seen_agents.clear() + self._run_starts.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def _unwrap_chat(self, chat: Any) -> None: + """Restore original methods on a wrapped chat.""" + chat_id = id(chat) + originals = self._originals.get(chat_id) + if not originals: + return + for method_name, original in originals.items(): + try: + setattr(chat, method_name, original) + except Exception: + logger.debug("Could not unwrap %s.%s", chat_id, method_name, exc_info=True) + + def health_check(self) -> AdapterHealth: + """Return a health snapshot.""" + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + """Return metadata about this adapter.""" + return AdapterInfo( + name="MSAgentAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + AdapterCapability.TRACE_HANDOFFS, + ], + description="LayerLens adapter for Microsoft Agent Framework", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + """Serialize the current trace data for replay.""" + return ReplayableTrace( + adapter_name="MSAgentAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": self._capture_config.model_dump()}, + ) + + # --- Framework Integration --- + + def instrument_chat(self, chat: Any) -> Any: + """Wrap AgentChat or AgentGroupChat invoke methods to capture lifecycle events.""" + chat_id = id(chat) + if chat_id in self._originals: + return chat + originals: dict[str, Any] = {} + # Wrap invoke() (async generator) + if hasattr(chat, "invoke"): + originals["invoke"] = chat.invoke + chat.invoke = self._create_traced_invoke(chat, chat.invoke) + # Wrap invoke_stream() if present + if hasattr(chat, "invoke_stream"): + originals["invoke_stream"] = chat.invoke_stream + chat.invoke_stream = self._create_traced_invoke_stream(chat, chat.invoke_stream) + self._originals[chat_id] = originals + self._wrapped_chats.append(chat) + chat_name = getattr(chat, "name", None) or str(type(chat).__name__) + self._emit_chat_config(chat_name, chat) + return chat + + def instrument_agent(self, agent: Any) -> Any: + """Convenience alias: wraps instrument_chat for AgentChat instances.""" + return self.instrument_chat(agent) + + def _create_traced_invoke(self, chat: Any, original_invoke: Any) -> Any: + """Create a traced wrapper for chat.invoke().""" + adapter = self + + async def traced_invoke(*args: Any, **kwargs: Any) -> Any: + chat_name = getattr(chat, "name", None) or "ms_agent_chat" + agent = kwargs.get("agent") or (args[0] if args else None) + agent_name = getattr(agent, "name", None) or chat_name if agent else chat_name + input_data = kwargs.get("input") or kwargs.get("message") + adapter.on_run_start(agent_name=agent_name, input_data=input_data) + error: Exception | None = None + results: list[Any] = [] + try: + # invoke() returns an async iterable of ChatMessageContent + async for message in original_invoke(*args, **kwargs): + results.append(message) + adapter._process_message(chat, message, agent_name) + yield message + except Exception as exc: + error = exc + raise + finally: + output = adapter._safe_serialize(results[-1]) if results else None + adapter.on_run_end(agent_name=agent_name, output=output, error=error) + + traced_invoke._layerlens_original = original_invoke # type: ignore[attr-defined] + return traced_invoke + + def _create_traced_invoke_stream(self, chat: Any, original_invoke_stream: Any) -> Any: + """Create a traced wrapper for chat.invoke_stream().""" + adapter = self + + async def traced_invoke_stream(*args: Any, **kwargs: Any) -> Any: + chat_name = getattr(chat, "name", None) or "ms_agent_chat" + agent = kwargs.get("agent") or (args[0] if args else None) + agent_name = getattr(agent, "name", None) or chat_name if agent else chat_name + adapter.on_run_start(agent_name=agent_name, input_data=None) + error: Exception | None = None + last_message = None + try: + async for message in original_invoke_stream(*args, **kwargs): + last_message = message + yield message + except Exception as exc: + error = exc + raise + finally: + output = adapter._safe_serialize(last_message) if last_message else None + adapter.on_run_end(agent_name=agent_name, output=output, error=error) + + traced_invoke_stream._layerlens_original = original_invoke_stream # type: ignore[attr-defined] + return traced_invoke_stream + + def _process_message(self, chat: Any, message: Any, current_agent: str) -> None: + """Process a chat message to extract tool calls, model info, and handoffs.""" + try: + # Detect agent turn transitions (handoffs in group chat) + msg_agent_name = getattr(message, "agent_name", None) or getattr(message, "name", None) + if msg_agent_name and msg_agent_name != current_agent: + self.emit_dict_event( + "agent.handoff", + { + "from_agent": current_agent, + "to_agent": msg_agent_name, + "reason": "group_chat_turn", + }, + ) + + # Extract tool calls from message + items = getattr(message, "items", None) or [] + for item in items: + item_type = type(item).__name__ + if "FunctionCall" in item_type or "ToolCall" in item_type: + self.emit_dict_event( + "tool.call", + { + "framework": "ms_agent_framework", + "tool_name": getattr(item, "name", None) + or getattr(item, "function_name", "unknown"), + "tool_input": self._safe_serialize(getattr(item, "arguments", None)), + }, + ) + elif "FunctionResult" in item_type or "ToolResult" in item_type: + self.emit_dict_event( + "tool.call", + { + "framework": "ms_agent_framework", + "tool_name": getattr(item, "name", None) + or getattr(item, "function_name", "unknown"), + "tool_output": self._safe_serialize(getattr(item, "result", None)), + }, + ) + + # Extract model info from metadata + metadata = getattr(message, "metadata", None) or {} + if isinstance(metadata, dict): + model = metadata.get("model") or metadata.get("model_id") + if model: + self.emit_dict_event( + "model.invoke", + { + "framework": "ms_agent_framework", + "model": str(model), + "provider": self._detect_provider(str(model)), + }, + ) + usage = metadata.get("usage") + if usage: + self.emit_dict_event( + "cost.record", + { + "framework": "ms_agent_framework", + "model": str(model) if model else None, + "tokens_prompt": getattr(usage, "prompt_tokens", None) + or (usage.get("prompt_tokens") if isinstance(usage, dict) else None), + "tokens_completion": getattr(usage, "completion_tokens", None) + or ( + usage.get("completion_tokens") if isinstance(usage, dict) else None + ), + }, + ) + except Exception: + logger.debug("Could not process message", exc_info=True) + + # --- Lifecycle Hooks --- + + def on_run_start(self, agent_name: str | None = None, input_data: Any = None) -> None: + """Emit agent.input event when a chat invocation starts.""" + if not self._connected: + return + try: + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._run_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "ms_agent_framework", + "agent_name": agent_name, + "input": self._safe_serialize(input_data), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in on_run_start", exc_info=True) + + def on_run_end( + self, + agent_name: str | None = None, + output: Any = None, + error: Exception | None = None, + ) -> None: + """Emit agent.output event when a chat invocation ends.""" + if not self._connected: + return + try: + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._run_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + payload: dict[str, Any] = { + "framework": "ms_agent_framework", + "agent_name": agent_name, + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + self.emit_dict_event("agent.output", payload) + self.emit_dict_event( + "agent.state.change", + { + "framework": "ms_agent_framework", + "agent_name": agent_name, + "event_subtype": "run_complete" if not error else "run_failed", + }, + ) + except Exception: + logger.warning("Error in on_run_end", exc_info=True) + + def on_tool_use( + self, + tool_name: str, + tool_input: Any = None, + tool_output: Any = None, + error: Exception | None = None, + latency_ms: float | None = None, + ) -> None: + """Emit tool.call event for a tool invocation.""" + if not self._connected: + return + try: + payload: dict[str, Any] = { + "framework": "ms_agent_framework", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + } + if error: + payload["error"] = str(error) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("tool.call", payload) + except Exception: + logger.warning("Error in on_tool_use", exc_info=True) + + def on_llm_call( + self, + provider: str | None = None, + model: str | None = None, + tokens_prompt: int | None = None, + tokens_completion: int | None = None, + latency_ms: float | None = None, + messages: list[dict[str, str]] | None = None, + ) -> None: + """Emit model.invoke event for an LLM call.""" + if not self._connected: + return + try: + payload: dict[str, Any] = {"framework": "ms_agent_framework"} + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if tokens_prompt is not None: + payload["tokens_prompt"] = tokens_prompt + if tokens_completion is not None: + payload["tokens_completion"] = tokens_completion + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if self._capture_config.capture_content and messages: + payload["messages"] = messages + self.emit_dict_event("model.invoke", payload) + except Exception: + logger.warning("Error in on_llm_call", exc_info=True) + + def on_handoff(self, from_agent: str, to_agent: str, context: Any = None) -> None: + """Emit agent.handoff event for agent turn transitions.""" + if not self._connected: + return + try: + context_str = str(context) if context else "" + self.emit_dict_event( + "agent.handoff", + { + "from_agent": from_agent, + "to_agent": to_agent, + "reason": "group_chat_turn", + "context_hash": hashlib.sha256(context_str.encode()).hexdigest() + if context_str + else None, + }, + ) + except Exception: + logger.warning("Error in on_handoff", exc_info=True) + + # --- Helpers --- + + def _detect_provider(self, model: str | None) -> str | None: + """Detect the LLM provider from a model identifier.""" + if not model: + return None + model_lower = model.lower() + if "gpt" in model_lower or "o1" in model_lower or "o3" in model_lower: + return "openai" + if "claude" in model_lower: + return "anthropic" + if "gemini" in model_lower: + return "google" + if "mistral" in model_lower or "mixtral" in model_lower: + return "mistral" + if "phi" in model_lower: + return "microsoft" + if "llama" in model_lower: + return "meta" + return "azure_openai" # Default for MS Agent Framework + + def _emit_chat_config(self, chat_name: str, chat: Any) -> None: + """Emit environment.config event for chat configuration on first encounter.""" + with self._adapter_lock: + if chat_name in self._seen_agents: + return + self._seen_agents.add(chat_name) + metadata: dict[str, Any] = { + "framework": "ms_agent_framework", + "chat_name": chat_name, + "chat_type": type(chat).__name__, + } + # Extract agents from group chat + agents = getattr(chat, "agents", None) + if agents: + metadata["agents"] = [getattr(a, "name", str(a)) for a in agents] + # Extract agent info from single chat + agent = getattr(chat, "agent", None) + if agent: + metadata["agent_name"] = getattr(agent, "name", str(agent)) + instructions = getattr(agent, "instructions", None) + if instructions and self._capture_config.capture_content: + metadata["instructions"] = str(instructions)[:500] + kernel = getattr(agent, "kernel", None) + if kernel: + plugins = getattr(kernel, "plugins", None) + if plugins: + metadata["plugins"] = ( + list(plugins.keys()) + if isinstance(plugins, dict) + else [str(p) for p in plugins] + ) + # Selection strategy for group chats + selection_strategy = getattr(chat, "selection_strategy", None) + if selection_strategy: + metadata["selection_strategy"] = type(selection_strategy).__name__ + termination_strategy = getattr(chat, "termination_strategy", None) + if termination_strategy: + metadata["termination_strategy"] = type(termination_strategy).__name__ + self.emit_dict_event("environment.config", metadata) + + def _safe_serialize(self, value: Any) -> Any: + """Safely serialize a value for event payloads.""" + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + except Exception: + return str(value) diff --git a/src/layerlens/instrument/adapters/frameworks/openai_agents/__init__.py b/src/layerlens/instrument/adapters/frameworks/openai_agents/__init__.py new file mode 100644 index 0000000..4976ce3 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/openai_agents/__init__.py @@ -0,0 +1,29 @@ +""" +LayerLens adapter for OpenAI Agents SDK. + +Instruments OpenAI Agents SDK (openai-agents) by registering a custom +TraceProcessor that receives all SDK span events, plus wrapping Runner +for execution lifecycle tracing. +""" + +from __future__ import annotations + +from typing import Any + +from layerlens.instrument.adapters.frameworks.openai_agents.lifecycle import OpenAIAgentsAdapter + +ADAPTER_CLASS = OpenAIAgentsAdapter + + +def instrument_runner( + runner: Any = None, stratix: Any = None, capture_config: dict[str, Any] | None = None +) -> Any: + """Convenience function to instrument OpenAI Agents SDK.""" + adapter = OpenAIAgentsAdapter(stratix=stratix, capture_config=capture_config) + adapter.connect() + if runner is not None: + adapter.instrument_runner(runner) + return adapter + + +__all__ = ["OpenAIAgentsAdapter", "ADAPTER_CLASS", "instrument_runner"] diff --git a/src/layerlens/instrument/adapters/frameworks/openai_agents/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/openai_agents/lifecycle.py new file mode 100644 index 0000000..0d66474 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/openai_agents/lifecycle.py @@ -0,0 +1,513 @@ +""" +OpenAI Agents SDK adapter lifecycle. + +Instrumentation strategy: Dual approach + 1. TraceProcessor (primary) — framework-sanctioned, receives all SDK span events + 2. Runner wrapping (secondary) — execution lifecycle hooks + +SDK spans map to Stratix events: + AgentSpanData → agent.input / agent.output (L1) + GenerationSpanData → model.invoke (L3) + FunctionSpanData → tool.call (L5a) + HandoffSpanData → agent.handoff (Cross) + GuardrailSpanData → policy.violation (Cross) + Runner start/end → agent.state.change (Cross) +""" + +from __future__ import annotations + +import time +import uuid +import hashlib +import logging +import threading +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class OpenAIAgentsAdapter(BaseAdapter): + """LayerLens adapter for OpenAI Agents SDK.""" + + FRAMEWORK = "openai_agents" + VERSION = "0.1.0" + # The adapter source has no direct ``pydantic`` imports (verified by + # grep across ``frameworks/openai_agents/``). The adapter registers + # a TraceProcessor and wraps Runner; both hand the adapter + # SpanData-typed dicts that are read structurally rather than via + # Pydantic methods. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: Any | None = None, + stratix_instance: Any | None = None, + ) -> None: + resolved = stratix or stratix_instance + super().__init__(stratix=resolved, capture_config=capture_config) + self._adapter_lock = threading.Lock() + self._seen_agents: set[str] = set() + self._framework_version: str | None = None + self._trace_processor: Any | None = None + self._run_starts: dict[int, int] = {} # thread_id -> start_ns + + def connect(self) -> None: + """Import openai-agents SDK and register trace processor.""" + try: + import agents # type: ignore[import-not-found,unused-ignore] + + self._framework_version = getattr(agents, "__version__", "unknown") + except ImportError: + logger.debug("openai-agents not installed") + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + """Remove trace processor and flush sinks.""" + # Note: OpenAI Agents SDK add_trace_processor() is additive and global. + # There is no SDK API to remove a processor, so we disable it via the + # _connected guard in emit_dict_event instead. + self._trace_processor = None + self._seen_agents.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name="OpenAIAgentsAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + AdapterCapability.TRACE_HANDOFFS, + ], + description="LayerLens adapter for OpenAI Agents SDK", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + return ReplayableTrace( + adapter_name="OpenAIAgentsAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": self._capture_config.model_dump()}, + ) + + # --- Framework Integration --- + + def instrument_runner(self, runner: Any) -> Any: + """Register Stratix trace processor with the SDK.""" + try: + from agents import add_trace_processor # type: ignore[import-not-found,unused-ignore] + + processor = self._create_trace_processor() + if processor is None: + logger.warning("Could not create trace processor (TraceProcessor not importable)") + return runner + add_trace_processor(processor) + self._trace_processor = processor + except ImportError: + logger.debug("Cannot import agents.add_trace_processor") + except Exception: + logger.warning("Failed to register trace processor", exc_info=True) + return runner + + def _create_trace_processor(self) -> Any: + """Create a TraceProcessor that routes SDK spans to Stratix events.""" + adapter = self + + try: + from agents.tracing import TracingProcessor # type: ignore[import-not-found,unused-ignore] + except ImportError: + return None + + # Renamed from StratixTraceProcessor → LayerLensTraceProcessor; + # backward-compat alias is exposed at module scope below. + class LayerLensTraceProcessor(TracingProcessor): # type: ignore[misc,unused-ignore] + def on_trace_start(self, trace: Any) -> None: + try: + adapter._on_trace_start(trace) + except Exception: + logger.warning("Error in on_trace_start", exc_info=True) + + def on_trace_end(self, trace: Any) -> None: + try: + adapter._on_trace_end(trace) + except Exception: + logger.warning("Error in on_trace_end", exc_info=True) + + def on_span_start(self, span: Any) -> None: + try: + adapter._on_span_start(span) + except Exception: + logger.warning("Error in on_span_start", exc_info=True) + + def on_span_end(self, span: Any) -> None: + try: + adapter._on_span_end(span) + except Exception: + logger.warning("Error in on_span_end", exc_info=True) + + def force_flush(self) -> None: + pass + + def shutdown(self) -> None: + pass + + return LayerLensTraceProcessor() + + # --- Trace Lifecycle --- + + def _on_trace_start(self, trace: Any) -> None: + if not self._connected: + return + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._run_starts[tid] = start_ns + self.emit_dict_event( + "agent.state.change", + { + "framework": "openai_agents", + "event_subtype": "trace_start", + "trace_id": getattr(trace, "trace_id", None), + "timestamp_ns": start_ns, + }, + ) + + def _on_trace_end(self, trace: Any) -> None: + if not self._connected: + return + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._run_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + self.emit_dict_event( + "agent.state.change", + { + "framework": "openai_agents", + "event_subtype": "trace_end", + "trace_id": getattr(trace, "trace_id", None), + "duration_ns": duration_ns, + }, + ) + + def _on_span_start(self, span: Any) -> None: + span_data = getattr(span, "span_data", None) + if span_data is None: + return + span_type = type(span_data).__name__ + if span_type == "AgentSpanData": + self._on_agent_span_start(span, span_data) + elif span_type == "GenerationSpanData": + pass # handled on end + elif span_type == "HandoffSpanData": + self._on_handoff_span_start(span, span_data) + elif span_type == "GuardrailSpanData": + pass # handled on end + + def _on_span_end(self, span: Any) -> None: + span_data = getattr(span, "span_data", None) + if span_data is None: + return + span_type = type(span_data).__name__ + if span_type == "AgentSpanData": + self._on_agent_span_end(span, span_data) + elif span_type == "GenerationSpanData": + self._on_generation_span_end(span, span_data) + elif span_type == "FunctionSpanData": + self._on_function_span_end(span, span_data) + elif span_type == "HandoffSpanData": + self._on_handoff_span_end(span, span_data) + elif span_type == "GuardrailSpanData": + self._on_guardrail_span_end(span, span_data) + + # --- Span Type Handlers --- + + def _on_agent_span_start(self, span: Any, data: Any) -> None: + agent_name = getattr(data, "name", None) or "unknown" + self._emit_agent_config(agent_name, data) + self.emit_dict_event( + "agent.input", + { + "framework": "openai_agents", + "agent_name": agent_name, + "span_id": getattr(span, "span_id", None), + "timestamp_ns": time.time_ns(), + }, + ) + + def _on_agent_span_end(self, span: Any, data: Any) -> None: + agent_name = getattr(data, "name", None) or "unknown" + output = getattr(data, "output", None) + self.emit_dict_event( + "agent.output", + { + "framework": "openai_agents", + "agent_name": agent_name, + "output": self._safe_serialize(output), + "span_id": getattr(span, "span_id", None), + }, + ) + + def _on_generation_span_end(self, span: Any, data: Any) -> None: + payload: dict[str, Any] = {"framework": "openai_agents"} + model = getattr(data, "model", None) + if model: + payload["model"] = model + input_tokens = getattr(data, "input_tokens", None) + output_tokens = getattr(data, "output_tokens", None) + if input_tokens is not None: + payload["tokens_prompt"] = input_tokens + if output_tokens is not None: + payload["tokens_completion"] = output_tokens + duration = getattr(span, "duration_ms", None) + if duration is not None: + payload["latency_ms"] = duration + self.emit_dict_event("model.invoke", payload) + if input_tokens is not None or output_tokens is not None: + self.emit_dict_event( + "cost.record", + { + "framework": "openai_agents", + "model": model, + "tokens_prompt": input_tokens, + "tokens_completion": output_tokens, + "tokens_total": (input_tokens or 0) + (output_tokens or 0), + }, + ) + + def _on_function_span_end(self, span: Any, data: Any) -> None: + tool_name = getattr(data, "name", None) or "unknown" + self.emit_dict_event( + "tool.call", + { + "framework": "openai_agents", + "tool_name": tool_name, + "tool_input": self._safe_serialize(getattr(data, "input", None)), + "tool_output": self._safe_serialize(getattr(data, "output", None)), + "latency_ms": getattr(span, "duration_ms", None), + }, + ) + + def _on_handoff_span_start(self, span: Any, data: Any) -> None: + pass # Start event captured on end for complete data + + def _on_handoff_span_end(self, span: Any, data: Any) -> None: + from_agent = getattr(data, "from_agent", None) or "unknown" + to_agent = getattr(data, "to_agent", None) or "unknown" + self.emit_dict_event( + "agent.handoff", + { + "from_agent": from_agent, + "to_agent": to_agent, + "reason": "handoff", + "framework": "openai_agents", + }, + ) + + def _on_guardrail_span_end(self, span: Any, data: Any) -> None: + guardrail_name = getattr(data, "name", None) or "unknown" + triggered = getattr(data, "triggered", False) + self.emit_dict_event( + "policy.violation", + { + "framework": "openai_agents", + "guardrail_name": guardrail_name, + "triggered": triggered, + "output": self._safe_serialize(getattr(data, "output", None)), + }, + ) + + # --- Lifecycle Hooks (Runner wrapping) --- + + def on_run_start(self, agent_name: str | None = None, input_data: Any = None) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._run_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "openai_agents", + "agent_name": agent_name, + "input": self._safe_serialize(input_data), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in on_run_start", exc_info=True) + + def on_run_end( + self, + agent_name: str | None = None, + output: Any = None, + error: Exception | None = None, + ) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._run_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + payload: dict[str, Any] = { + "framework": "openai_agents", + "agent_name": agent_name, + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + self.emit_dict_event("agent.output", payload) + except Exception: + logger.warning("Error in on_run_end", exc_info=True) + + def on_tool_use( + self, + tool_name: str, + tool_input: Any = None, + tool_output: Any = None, + error: Exception | None = None, + latency_ms: float | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = { + "framework": "openai_agents", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + } + if error: + payload["error"] = str(error) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("tool.call", payload) + except Exception: + logger.warning("Error in on_tool_use", exc_info=True) + + def on_llm_call( + self, + provider: str | None = None, + model: str | None = None, + tokens_prompt: int | None = None, + tokens_completion: int | None = None, + latency_ms: float | None = None, + messages: list[dict[str, str]] | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = {"framework": "openai_agents"} + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if tokens_prompt is not None: + payload["tokens_prompt"] = tokens_prompt + if tokens_completion is not None: + payload["tokens_completion"] = tokens_completion + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if self._capture_config.capture_content and messages: + payload["messages"] = messages + self.emit_dict_event("model.invoke", payload) + except Exception: + logger.warning("Error in on_llm_call", exc_info=True) + + def on_handoff( + self, + from_agent: str, + to_agent: str, + context: Any = None, + ) -> None: + if not self._connected: + return + try: + context_str = str(context) if context else "" + context_hash = ( + hashlib.sha256(context_str.encode("utf-8")).hexdigest() if context_str else None + ) + self.emit_dict_event( + "agent.handoff", + { + "from_agent": from_agent, + "to_agent": to_agent, + "reason": "handoff", + "context_hash": context_hash, + "context_preview": context_str[:500] if context_str else None, + }, + ) + except Exception: + logger.warning("Error in on_handoff", exc_info=True) + + # --- Helpers --- + + def _emit_agent_config(self, agent_name: str, data: Any) -> None: + with self._adapter_lock: + if agent_name in self._seen_agents: + return + self._seen_agents.add(agent_name) + metadata: dict[str, Any] = { + "framework": "openai_agents", + "agent_name": agent_name, + } + for attr in ("instructions", "model", "handoff_description"): + val = getattr(data, attr, None) + if val is not None: + metadata[attr] = str(val) + tools = getattr(data, "tools", None) + if tools: + metadata["tools"] = [getattr(t, "name", str(t)) for t in tools] + handoffs = getattr(data, "handoffs", None) + if handoffs: + metadata["handoffs"] = [getattr(h, "agent_name", str(h)) for h in handoffs] + self.emit_dict_event("environment.config", metadata) + + def _safe_serialize(self, value: Any) -> Any: + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + except Exception: + return str(value) diff --git a/src/layerlens/instrument/adapters/frameworks/pydantic_ai/__init__.py b/src/layerlens/instrument/adapters/frameworks/pydantic_ai/__init__.py new file mode 100644 index 0000000..67254fc --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/pydantic_ai/__init__.py @@ -0,0 +1,31 @@ +""" +LayerLens adapter for PydanticAI. + +Instruments PydanticAI agents via OpenTelemetry wrapper (Logfire-compatible) +and Agent wrapper for lifecycle hooks. +""" + +from __future__ import annotations + +from typing import Any + +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat, requires_pydantic + +# Round-2 deliberation item 20: pydantic-ai is built on Pydantic v2 only; +# fail fast under v1. +requires_pydantic(PydanticCompat.V2_ONLY) + +from layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle import PydanticAIAdapter + +ADAPTER_CLASS = PydanticAIAdapter + + +def instrument_agent(agent: Any, stratix: Any = None, capture_config: dict[str, Any] = None) -> Any: # type: ignore[assignment] + """Convenience function to instrument a PydanticAI agent.""" + adapter = PydanticAIAdapter(stratix=stratix, capture_config=capture_config) + adapter.connect() + adapter.instrument_agent(agent) + return adapter + + +__all__ = ["PydanticAIAdapter", "ADAPTER_CLASS", "instrument_agent"] diff --git a/src/layerlens/instrument/adapters/frameworks/pydantic_ai/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/pydantic_ai/lifecycle.py new file mode 100644 index 0000000..b9a5ae5 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/pydantic_ai/lifecycle.py @@ -0,0 +1,423 @@ +""" +PydanticAI adapter lifecycle. + +Instrumentation strategy: OTel wrapper (Logfire-compatible) + Agent wrapper + Agent.run() start → agent.input (L1) + Agent.run() end → agent.output (L1) + ModelRequestNode → model.invoke (L3) + CallToolsNode → tool.call (L5a) + AgentRun transitions → agent.state.change (Cross) +""" + +from __future__ import annotations + +import time +import uuid +import hashlib +import logging +import threading +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class PydanticAIAdapter(BaseAdapter): + """LayerLens adapter for PydanticAI.""" + + FRAMEWORK = "pydantic_ai" + VERSION = "0.1.0" + # Pydantic-AI is built on Pydantic v2 from day one — see + # pydantic-ai's own pyproject which requires ``pydantic>=2.7``. + # There is no v1 path; the framework cannot be installed alongside v1. + requires_pydantic = PydanticCompat.V2_ONLY + + def __init__( + self, + stratix: Any | None = None, + capture_config: Any | None = None, + stratix_instance: Any | None = None, + ) -> None: + resolved = stratix or stratix_instance + super().__init__(stratix=resolved, capture_config=capture_config) + self._originals: dict[int, dict[str, Any]] = {} # id(agent) -> {method: original} + self._wrapped_agents: list[Any] = [] # strong refs for disconnect unwrap + self._adapter_lock = threading.Lock() + self._seen_agents: set[str] = set() + self._framework_version: str | None = None + self._run_starts: dict[int, int] = {} # thread_id -> start_ns + + def connect(self) -> None: + try: + import pydantic_ai # type: ignore[import-not-found,unused-ignore] + + self._framework_version = getattr(pydantic_ai, "__version__", "unknown") + except ImportError: + logger.debug("pydantic-ai not installed") + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + for agent in self._wrapped_agents: + self._unwrap_agent(agent) + self._wrapped_agents.clear() + self._originals.clear() + self._seen_agents.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def _unwrap_agent(self, agent: Any) -> None: + """Restore original methods on a wrapped agent.""" + agent_id = id(agent) + originals = self._originals.get(agent_id) + if not originals: + return + for method_name, original in originals.items(): + try: + setattr(agent, method_name, original) + except Exception: + logger.debug("Could not unwrap %s.%s", agent_id, method_name, exc_info=True) + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name="PydanticAIAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + ], + description="LayerLens adapter for PydanticAI", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + return ReplayableTrace( + adapter_name="PydanticAIAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": self._capture_config.model_dump()}, + ) + + # --- Framework Integration --- + + def instrument_agent(self, agent: Any) -> Any: + """Wrap PydanticAI agent.run() methods to capture lifecycle events.""" + agent_id = id(agent) + if agent_id in self._originals: + return agent + originals: dict[str, Any] = {} + # Wrap run() + if hasattr(agent, "run"): + originals["run"] = agent.run + agent.run = self._create_traced_run(agent, agent.run) + # Wrap run_sync() + if hasattr(agent, "run_sync"): + originals["run_sync"] = agent.run_sync + agent.run_sync = self._create_traced_run_sync(agent, agent.run_sync) + self._originals[agent_id] = originals + self._wrapped_agents.append(agent) + agent_name = getattr(agent, "name", None) or str(type(agent).__name__) + self._emit_agent_config(agent_name, agent) + return agent + + def _create_traced_run(self, agent: Any, original_run: Any) -> Any: + adapter = self + + async def traced_run(*args: Any, **kwargs: Any) -> Any: + agent_name = getattr(agent, "name", None) or "pydantic_ai_agent" + user_prompt = args[0] if args else kwargs.get("user_prompt") + adapter.on_run_start(agent_name=agent_name, input_data=user_prompt) + error: Exception | None = None + result = None + try: + result = await original_run(*args, **kwargs) + except Exception as exc: + error = exc + raise + finally: + output = None + if result is not None: + output = getattr(result, "data", result) + adapter.on_run_end(agent_name=agent_name, output=output, error=error) + adapter._extract_run_usage(result) + return result + + traced_run._layerlens_original = original_run # type: ignore[attr-defined] + return traced_run + + def _create_traced_run_sync(self, agent: Any, original_run_sync: Any) -> Any: + adapter = self + + def traced_run_sync(*args: Any, **kwargs: Any) -> Any: + agent_name = getattr(agent, "name", None) or "pydantic_ai_agent" + user_prompt = args[0] if args else kwargs.get("user_prompt") + adapter.on_run_start(agent_name=agent_name, input_data=user_prompt) + error: Exception | None = None + result = None + try: + result = original_run_sync(*args, **kwargs) + except Exception as exc: + error = exc + raise + finally: + output = None + if result is not None: + output = getattr(result, "data", result) + adapter.on_run_end(agent_name=agent_name, output=output, error=error) + adapter._extract_run_usage(result) + return result + + traced_run_sync._layerlens_original = original_run_sync # type: ignore[attr-defined] + return traced_run_sync + + def _extract_run_usage(self, result: Any) -> None: + """Extract usage info from PydanticAI RunResult.""" + if result is None: + return + try: + usage = getattr(result, "usage", None) or getattr(result, "_usage", None) + if usage: + self.emit_dict_event( + "cost.record", + { + "framework": "pydantic_ai", + "tokens_prompt": getattr(usage, "request_tokens", None), + "tokens_completion": getattr(usage, "response_tokens", None), + "tokens_total": getattr(usage, "total_tokens", None), + }, + ) + # Extract model invocation details + all_messages = getattr(result, "all_messages", None) or [] + for msg in all_messages: + msg_kind = getattr(msg, "kind", None) + if msg_kind == "response": + model = getattr(result, "model_name", None) + self.emit_dict_event( + "model.invoke", + { + "framework": "pydantic_ai", + "model": model, + "provider": self._detect_provider(model), + }, + ) + elif msg_kind == "tool-return": + self.emit_dict_event( + "tool.call", + { + "framework": "pydantic_ai", + "tool_name": getattr(msg, "tool_name", "unknown"), + "tool_output": self._safe_serialize(getattr(msg, "content", None)), + }, + ) + except Exception: + logger.debug("Could not extract run usage", exc_info=True) + + # --- Lifecycle Hooks --- + + def on_run_start(self, agent_name: str | None = None, input_data: Any = None) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._run_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "pydantic_ai", + "agent_name": agent_name, + "input": self._safe_serialize(input_data), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in on_run_start", exc_info=True) + + def on_run_end( + self, + agent_name: str | None = None, + output: Any = None, + error: Exception | None = None, + ) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._run_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + payload: dict[str, Any] = { + "framework": "pydantic_ai", + "agent_name": agent_name, + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + self.emit_dict_event("agent.output", payload) + self.emit_dict_event( + "agent.state.change", + { + "framework": "pydantic_ai", + "agent_name": agent_name, + "event_subtype": "run_complete" if not error else "run_failed", + }, + ) + except Exception: + logger.warning("Error in on_run_end", exc_info=True) + + def on_tool_use( + self, + tool_name: str, + tool_input: Any = None, + tool_output: Any = None, + error: Exception | None = None, + latency_ms: float | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = { + "framework": "pydantic_ai", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + } + if error: + payload["error"] = str(error) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("tool.call", payload) + except Exception: + logger.warning("Error in on_tool_use", exc_info=True) + + def on_llm_call( + self, + provider: str | None = None, + model: str | None = None, + tokens_prompt: int | None = None, + tokens_completion: int | None = None, + latency_ms: float | None = None, + messages: list[dict[str, str]] | None = None, + ) -> None: + if not self._connected: + return + try: + payload: dict[str, Any] = {"framework": "pydantic_ai"} + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if tokens_prompt is not None: + payload["tokens_prompt"] = tokens_prompt + if tokens_completion is not None: + payload["tokens_completion"] = tokens_completion + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if self._capture_config.capture_content and messages: + payload["messages"] = messages + self.emit_dict_event("model.invoke", payload) + except Exception: + logger.warning("Error in on_llm_call", exc_info=True) + + def on_handoff(self, from_agent: str, to_agent: str, context: Any = None) -> None: + if not self._connected: + return + try: + context_str = str(context) if context else "" + self.emit_dict_event( + "agent.handoff", + { + "from_agent": from_agent, + "to_agent": to_agent, + "reason": "pydantic_ai_handoff", + "context_hash": hashlib.sha256(context_str.encode()).hexdigest() + if context_str + else None, + }, + ) + except Exception: + logger.warning("Error in on_handoff", exc_info=True) + + # --- Helpers --- + + def _detect_provider(self, model: str | None) -> str | None: + if not model: + return None + model_lower = model.lower() + if "gpt" in model_lower or "o1" in model_lower or "o3" in model_lower: + return "openai" + if "claude" in model_lower: + return "anthropic" + if "gemini" in model_lower: + return "google" + if "mistral" in model_lower or "mixtral" in model_lower: + return "mistral" + return None + + def _emit_agent_config(self, agent_name: str, agent: Any) -> None: + with self._adapter_lock: + if agent_name in self._seen_agents: + return + self._seen_agents.add(agent_name) + metadata: dict[str, Any] = { + "framework": "pydantic_ai", + "agent_name": agent_name, + } + model = getattr(agent, "model", None) + if model: + metadata["model"] = str(model) + system_prompt = getattr(agent, "system_prompt", None) + if system_prompt and self._capture_config.capture_content: + metadata["system_prompt"] = str(system_prompt)[:500] + tools = getattr(agent, "_function_tools", None) or getattr(agent, "tools", None) + if tools: + if isinstance(tools, dict): + metadata["tools"] = list(tools.keys()) + else: + metadata["tools"] = [getattr(t, "name", str(t)) for t in tools] + result_type = getattr(agent, "result_type", None) + if result_type: + metadata["result_type"] = str(result_type) + self.emit_dict_event("environment.config", metadata) + + def _safe_serialize(self, value: Any) -> Any: + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + except Exception: + return str(value) diff --git a/src/layerlens/instrument/adapters/frameworks/semantic_kernel/__init__.py b/src/layerlens/instrument/adapters/frameworks/semantic_kernel/__init__.py new file mode 100644 index 0000000..bb11927 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/semantic_kernel/__init__.py @@ -0,0 +1,16 @@ +""" +STRATIX Semantic Kernel Adapter + +Provides plugin invocation tracing, planner execution tracking, +and memory operation capture for Microsoft Semantic Kernel. +""" + +from __future__ import annotations + +from layerlens.instrument.adapters.frameworks.semantic_kernel.lifecycle import ( + SemanticKernelAdapter, +) + +ADAPTER_CLASS = SemanticKernelAdapter + +__all__ = ["SemanticKernelAdapter", "ADAPTER_CLASS"] diff --git a/src/layerlens/instrument/adapters/frameworks/semantic_kernel/filters.py b/src/layerlens/instrument/adapters/frameworks/semantic_kernel/filters.py new file mode 100644 index 0000000..2e30ba8 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/semantic_kernel/filters.py @@ -0,0 +1,259 @@ +""" +Semantic Kernel Filter Implementations + +Provides STRATIX-instrumented filter classes for the SK filter API: +- LayerLensFunctionFilter: Function invocation pre/post hooks +- LayerLensPromptRenderFilter: Prompt template rendering hooks +- LayerLensAutoFunctionFilter: Auto-invoked function hooks +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from layerlens.instrument.adapters.frameworks.semantic_kernel.lifecycle import SemanticKernelAdapter + +logger = logging.getLogger(__name__) + + +class LayerLensFunctionFilter: + """ + Intercepts SK function invocations via the FunctionInvocationFilter API. + + Captures plugin name, function name, arguments, result, and latency. + """ + + def __init__(self, adapter: SemanticKernelAdapter) -> None: + self._adapter = adapter + self._contexts: dict[int, dict[str, Any]] = {} + + async def __call__(self, context: Any, next: Any = None) -> None: + """SK filter callable interface: (context, next=...) -> Awaitable[None].""" + return await self.on_function_invocation(context, next) + + async def on_function_invocation( + self, + context: Any, + next_handler: Any = None, + ) -> None: + """Pre/post hook for function invocation.""" + plugin_name = self._extract_plugin_name(context) + function_name = self._extract_function_name(context) + arguments = self._extract_arguments(context) + + try: + trace_ctx = self._adapter.on_function_start( + plugin_name=plugin_name, + function_name=function_name, + arguments=arguments, + ) + except Exception: + logger.warning("Error in function start hook", exc_info=True) + trace_ctx = {} + + error = None + try: + if next_handler: + await next_handler(context) + except Exception as exc: + error = exc + raise + finally: + try: + result = self._extract_result(context) + self._adapter.on_function_end( + context=trace_ctx, + result=result, + error=error, + ) + except Exception: + logger.warning("Error in function end hook", exc_info=True) + + def on_function_invocation_sync( + self, + plugin_name: str, + function_name: str, + arguments: dict[str, Any] | None = None, + result: Any = None, + error: Exception | None = None, + ) -> None: + """Synchronous hook for testing and non-async usage.""" + try: + trace_ctx = self._adapter.on_function_start( + plugin_name=plugin_name, + function_name=function_name, + arguments=arguments, + ) + self._adapter.on_function_end( + context=trace_ctx, + result=result, + error=error, + ) + except Exception: + logger.warning("Error in sync function hook", exc_info=True) + + @staticmethod + def _extract_plugin_name(context: Any) -> str: + """Extract plugin name from SK invocation context.""" + if hasattr(context, "function"): + fn = context.function + return getattr(fn, "plugin_name", "") or getattr(fn, "skill_name", "") or "" + return getattr(context, "plugin_name", "") or "" + + @staticmethod + def _extract_function_name(context: Any) -> str: + if hasattr(context, "function"): + fn = context.function + return getattr(fn, "name", "") or "" + return getattr(context, "function_name", "") or "" + + @staticmethod + def _extract_arguments(context: Any) -> dict[str, Any] | None: + args = getattr(context, "arguments", None) + if args is None: + return None + if isinstance(args, dict): + return args + if hasattr(args, "items"): + return dict(args.items()) + return None + + @staticmethod + def _extract_result(context: Any) -> Any: + return getattr(context, "result", None) + + +class LayerLensPromptRenderFilter: + """ + Intercepts SK prompt rendering via the PromptRenderFilter API. + + Captures template text and rendered prompt string. + """ + + def __init__(self, adapter: SemanticKernelAdapter) -> None: + self._adapter = adapter + + async def __call__(self, context: Any, next: Any = None) -> None: + """SK filter callable interface.""" + return await self.on_prompt_render(context, next) + + async def on_prompt_render( + self, + context: Any, + next_handler: Any = None, + ) -> None: + """Pre/post hook for prompt rendering.""" + function_name = getattr(context, "function_name", None) or "" + template = getattr(context, "prompt_template", None) + + if next_handler: + await next_handler(context) + + try: + rendered = getattr(context, "rendered_prompt", None) + self._adapter.on_prompt_render( + template=str(template) if template else None, + rendered_prompt=str(rendered) if rendered else None, + function_name=function_name, + ) + except Exception: + logger.warning("Error in prompt render hook", exc_info=True) + + def on_prompt_render_sync( + self, + template: str | None = None, + rendered_prompt: str | None = None, + function_name: str | None = None, + ) -> None: + """Synchronous hook for testing.""" + try: + self._adapter.on_prompt_render( + template=template, + rendered_prompt=rendered_prompt, + function_name=function_name, + ) + except Exception: + logger.warning("Error in sync prompt render hook", exc_info=True) + + +class LayerLensAutoFunctionFilter: + """ + Intercepts LLM-initiated (auto-invoked) function calls via + the AutoFunctionInvocationFilter API. + + Marks all emitted events with auto_invoked=True. + """ + + def __init__(self, adapter: SemanticKernelAdapter) -> None: + self._adapter = adapter + + async def __call__(self, context: Any, next: Any = None) -> None: + """SK filter callable interface.""" + return await self.on_auto_function_invocation(context, next) + + async def on_auto_function_invocation( + self, + context: Any, + next_handler: Any = None, + ) -> None: + """Pre/post hook for auto-invoked functions.""" + plugin_name = LayerLensFunctionFilter._extract_plugin_name(context) + function_name = LayerLensFunctionFilter._extract_function_name(context) + arguments = LayerLensFunctionFilter._extract_arguments(context) + + try: + trace_ctx = self._adapter.on_function_start( + plugin_name=plugin_name, + function_name=function_name, + arguments=arguments, + auto_invoked=True, + ) + except Exception: + logger.warning("Error in auto function start hook", exc_info=True) + trace_ctx = {} + + error = None + try: + if next_handler: + await next_handler(context) + except Exception as exc: + error = exc + raise + finally: + try: + result = LayerLensFunctionFilter._extract_result(context) + self._adapter.on_function_end( + context=trace_ctx, + result=result, + error=error, + auto_invoked=True, + ) + except Exception: + logger.warning("Error in auto function end hook", exc_info=True) + + def on_auto_function_invocation_sync( + self, + plugin_name: str, + function_name: str, + arguments: dict[str, Any] | None = None, + result: Any = None, + error: Exception | None = None, + ) -> None: + """Synchronous hook for testing.""" + try: + trace_ctx = self._adapter.on_function_start( + plugin_name=plugin_name, + function_name=function_name, + arguments=arguments, + auto_invoked=True, + ) + self._adapter.on_function_end( + context=trace_ctx, + result=result, + error=error, + auto_invoked=True, + ) + except Exception: + logger.warning("Error in sync auto function hook", exc_info=True) diff --git a/src/layerlens/instrument/adapters/frameworks/semantic_kernel/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/semantic_kernel/lifecycle.py new file mode 100644 index 0000000..38eab07 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/semantic_kernel/lifecycle.py @@ -0,0 +1,602 @@ +""" +STRATIX Semantic Kernel Lifecycle Hooks + +Provides the main SemanticKernelAdapter class. Instruments SK Kernel +instances via the official filter API (FunctionInvocationFilter, +PromptRenderFilter, AutoFunctionInvocationFilter). +""" + +from __future__ import annotations + +import time +import uuid +import logging +import threading +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.capture import CaptureConfig +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class SemanticKernelAdapter(BaseAdapter): + """ + Main adapter for integrating STRATIX with Microsoft Semantic Kernel. + + Instruments Kernel instances via the official SK filter API to capture + plugin invocations, planner executions, memory operations, and LLM calls. + + Usage: + adapter = SemanticKernelAdapter(stratix=stratix_instance) + adapter.connect() + kernel = adapter.instrument_kernel(kernel) + result = await kernel.invoke(my_function, arg1=val1) + """ + + FRAMEWORK = "semantic_kernel" + VERSION = "0.1.0" + # The adapter source files import nothing from ``pydantic`` directly + # (verified by grep across ``frameworks/semantic_kernel/``). The + # adapter only registers SK filter callbacks and emits dict events; + # it never touches Semantic Kernel's own Pydantic models. SK 1.0+ is + # internally Pydantic v2, but customers running older SK 0.x with + # Pydantic v1 can still use this adapter. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: CaptureConfig | None = None, + memory_service: Any | None = None, + ) -> None: + super().__init__(stratix=stratix, capture_config=capture_config) + + self._adapter_lock = threading.Lock() + self._seen_plugins: set[str] = set() + self._invocation_count: int = 0 + self._kernel_start_ns: int = 0 + self._framework_version: str | None = None + self._filters_registered: list[Any] = [] + self._memory_service = memory_service + + # --- BaseAdapter lifecycle --- + + def connect(self) -> None: + """Verify Semantic Kernel is importable and mark as connected.""" + try: + import semantic_kernel # type: ignore[import-not-found,unused-ignore] # noqa: F401 + + version = getattr(semantic_kernel, "__version__", "unknown") + logger.debug("Semantic Kernel %s detected", version) + except ImportError: + logger.debug("Semantic Kernel not installed; adapter usable in mock/test mode") + self._framework_version = self._detect_framework_version() + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + """Disconnect and clear state.""" + self._filters_registered.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name="SemanticKernelAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + ], + description="LayerLens adapter for Microsoft Semantic Kernel", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + return ReplayableTrace( + adapter_name="SemanticKernelAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={ + "capture_config": self._capture_config.model_dump(), + }, + ) + + # --- Kernel instrumentation --- + + def instrument_kernel(self, kernel: Any) -> Any: + """ + Instrument a Semantic Kernel instance with STRATIX tracing. + + Registers filter instances on the kernel for function invocations, + prompt rendering, and auto-function invocations. + + Args: + kernel: A semantic_kernel.Kernel instance + + Returns: + The modified kernel (same object, with filters attached) + """ + from layerlens.instrument.adapters.frameworks.semantic_kernel.filters import ( + LayerLensFunctionFilter, + LayerLensAutoFunctionFilter, + LayerLensPromptRenderFilter, + ) + + func_filter = LayerLensFunctionFilter(adapter=self) + prompt_filter = LayerLensPromptRenderFilter(adapter=self) + auto_filter = LayerLensAutoFunctionFilter(adapter=self) + + # Register filters via SK's filter API + try: + if hasattr(kernel, "add_filter"): + kernel.add_filter("function_invocation", func_filter) + kernel.add_filter("prompt_rendering", prompt_filter) + kernel.add_filter("auto_function_invocation", auto_filter) + self._filters_registered = [func_filter, prompt_filter, auto_filter] + else: + # Fallback: store on kernel for callback-based approach + kernel._stratix_filters = [func_filter, prompt_filter, auto_filter] + self._filters_registered = [func_filter, prompt_filter, auto_filter] + except Exception: + logger.warning("Could not register filters on kernel", exc_info=True) + + kernel._stratix_adapter = self + + # Discover registered plugins + self._discover_plugins(kernel) + + return kernel + + # --- Lifecycle hooks (called by filters) --- + + def on_function_start( + self, + plugin_name: str, + function_name: str, + arguments: dict[str, Any] | None = None, + auto_invoked: bool = False, + ) -> dict[str, Any]: + """ + Handle function invocation start. + + Returns context dict for correlation with on_function_end. + """ + with self._adapter_lock: + self._invocation_count += 1 + invocation_seq = self._invocation_count + + context = { + "start_ns": time.time_ns(), + "invocation_seq": invocation_seq, + "plugin_name": plugin_name, + "function_name": function_name, + } + + # Emit agent config on first plugin encounter + with self._adapter_lock: + if plugin_name not in self._seen_plugins: + self._seen_plugins.add(plugin_name) + self.emit_dict_event( + "environment.config", + { + "framework": "semantic_kernel", + "plugin_name": plugin_name, + "function_name": function_name, + }, + ) + + return context + + def on_function_end( + self, + context: dict[str, Any], + result: Any = None, + error: Exception | None = None, + auto_invoked: bool = False, + ) -> None: + """ + Handle function invocation end. + + Emits tool.call (L5a) for plugin functions. + """ + start_ns = context.get("start_ns", 0) + elapsed_ms = (time.time_ns() - start_ns) / 1_000_000 if start_ns else 0 + + payload: dict[str, Any] = { + "framework": "semantic_kernel", + "tool_name": f"{context.get('plugin_name', '')}.{context.get('function_name', '')}", + "plugin_name": context.get("plugin_name"), + "function_name": context.get("function_name"), + "latency_ms": elapsed_ms, + "invocation_seq": context.get("invocation_seq"), + } + + if auto_invoked: + payload["auto_invoked"] = True + + if result is not None: + payload["result_preview"] = self._truncate(self._safe_serialize(result)) + + if error: + payload["error"] = str(error) + + self.emit_dict_event("tool.call", payload) + + def on_prompt_render( + self, + template: str | None = None, + rendered_prompt: str | None = None, + function_name: str | None = None, + ) -> None: + """ + Handle prompt template rendering. + + Emits agent.code (L2) for template rendering events. + """ + payload: dict[str, Any] = { + "framework": "semantic_kernel", + "event_subtype": "prompt_render", + } + if function_name: + payload["function_name"] = function_name + if template: + payload["template_preview"] = self._truncate(template, 500) + if rendered_prompt: + payload["rendered_preview"] = self._truncate(rendered_prompt, 500) + + self.emit_dict_event("agent.code", payload) + + def on_model_invoke( + self, + provider: str | None = None, + model: str | None = None, + prompt_tokens: int | None = None, + completion_tokens: int | None = None, + latency_ms: float | None = None, + error: str | None = None, + messages: list[dict[str, str]] | None = None, + ) -> None: + """ + Handle LLM call from SK service. + + Emits model.invoke (L3) and cost.record (cross-cutting). + """ + payload: dict[str, Any] = { + "framework": "semantic_kernel", + } + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if prompt_tokens is not None: + payload["prompt_tokens"] = prompt_tokens + if completion_tokens is not None: + payload["completion_tokens"] = completion_tokens + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if error: + payload["error"] = error + if self._capture_config.capture_content and messages: + payload["messages"] = messages + + self.emit_dict_event("model.invoke", payload) + + # Emit cost record + if prompt_tokens or completion_tokens: + self.emit_dict_event( + "cost.record", + { + "framework": "semantic_kernel", + "provider": provider, + "model": model, + "prompt_tokens": prompt_tokens or 0, + "completion_tokens": completion_tokens or 0, + "total_tokens": (prompt_tokens or 0) + (completion_tokens or 0), + }, + ) + + def on_planner_step( + self, + planner_type: str, + step_index: int | None = None, + plan: Any = None, + thought: str | None = None, + action: str | None = None, + observation: str | None = None, + status: str | None = None, + ) -> None: + """ + Handle planner execution step. + + Emits agent.code (L2) for plan generation and step execution. + """ + payload: dict[str, Any] = { + "framework": "semantic_kernel", + "event_subtype": "planner_step", + "planner_type": planner_type, + } + if step_index is not None: + payload["step_index"] = step_index + if plan is not None: + payload["plan_preview"] = self._truncate(str(plan), 1000) + if thought: + payload["thought"] = self._truncate(thought) + if action: + payload["action"] = action + if observation: + payload["observation"] = self._truncate(observation) + if status: + payload["status"] = status + + self.emit_dict_event("agent.code", payload) + + def on_memory_operation( + self, + operation: str, + collection: str | None = None, + key: str | None = None, + query: str | None = None, + result_count: int | None = None, + relevance_scores: list[float] | None = None, + backend_type: str | None = None, + ) -> None: + """ + Handle memory operation (save, search, get). + + Emits tool.call (L5a) for memory operations. + """ + payload: dict[str, Any] = { + "framework": "semantic_kernel", + "tool_name": f"memory.{operation}", + "operation": operation, + } + if collection: + payload["collection"] = collection + if key: + payload["key"] = key + if query: + payload["query_preview"] = self._truncate(query, 200) + if result_count is not None: + payload["result_count"] = result_count + if relevance_scores: + payload["relevance_scores"] = relevance_scores[:10] + if backend_type: + payload["backend_type"] = backend_type + + self.emit_dict_event("tool.call", payload) + + def on_kernel_invoke_start(self, input_text: Any = None) -> None: + """Handle kernel invocation start. Emits agent.input (L1).""" + with self._adapter_lock: + self._kernel_start_ns = time.time_ns() + + self.emit_dict_event( + "agent.input", + { + "framework": "semantic_kernel", + "input": self._safe_serialize(input_text), + "timestamp_ns": self._kernel_start_ns, + }, + ) + + def on_kernel_invoke_end( + self, + output: Any = None, + error: Exception | None = None, + ) -> None: + """Handle kernel invocation end. Emits agent.output (L1).""" + end_ns = time.time_ns() + duration_ns = end_ns - self._kernel_start_ns if self._kernel_start_ns else 0 + + payload: dict[str, Any] = { + "framework": "semantic_kernel", + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + + self.emit_dict_event("agent.output", payload) + + # --- Plugin discovery --- + + def _discover_plugins(self, kernel: Any) -> None: + """Discover and register plugins from the kernel.""" + try: + plugins = getattr(kernel, "plugins", None) + if plugins is None: + return + if isinstance(plugins, dict) or hasattr(plugins, "keys"): + plugin_names = list(plugins.keys()) + else: + plugin_names = [str(p) for p in plugins] + + for name in plugin_names: + with self._adapter_lock: + if name not in self._seen_plugins: + self._seen_plugins.add(name) + self.emit_dict_event( + "environment.config", + { + "framework": "semantic_kernel", + "plugin_name": name, + "event_subtype": "plugin_registered", + }, + ) + except Exception: + logger.debug("Error discovering SK plugins", exc_info=True) + + # --- Internal helpers --- + + def _safe_serialize(self, value: Any) -> Any: + """Safely serialize a value for events.""" + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + except Exception: + return str(value) + + def _truncate(self, text: Any, max_len: int = 500) -> str: + """Truncate text to max_len.""" + text_str = str(text) if not isinstance(text, str) else text + if len(text_str) <= max_len: + return text_str + return text_str[:max_len] + "..." + + @staticmethod + def _detect_framework_version() -> str | None: + try: + import semantic_kernel # type: ignore[import-not-found,unused-ignore] + + return getattr(semantic_kernel, "__version__", None) + except ImportError: + return None + + +class StratixMemoryStore: + """Semantic Kernel memory store backed by AgentMemoryService. + + Implements the SK memory store interface (``save_information``, + ``get_nearest_matches``) by delegating to the STRATIX + ``AgentMemoryService``. This allows SK applications to use + STRATIX persistent memory without changing their code. + + Usage:: + + from stratix.memory.service import AgentMemoryService # type: ignore[import-not-found,import-untyped,unused-ignore] + + memory_svc = AgentMemoryService(crud_store) + store = StratixMemoryStore(memory_svc, agent_id="my-agent", org_id="org-1") + + # Inside SK: + await store.save_information( + collection="facts", + text="Paris is the capital of France", + id="fact-1", + ) + matches = await store.get_nearest_matches( + collection="facts", + query="capital of France", + limit=3, + ) + """ + + def __init__( + self, + memory_service: Any, + agent_id: str = "semantic_kernel", + org_id: str = "", + ) -> None: + """Initialise the memory store. + + Args: + memory_service: An ``AgentMemoryService`` instance. + agent_id: Agent identifier used for all memory entries. + org_id: Organisation identifier used for all memory entries. + """ + self._memory_service = memory_service + self._agent_id = agent_id + self._org_id = org_id + + async def save_information( + self, + collection: str, + text: str, + id: str, # noqa: A002 — matches SK interface + description: str | None = None, + additional_metadata: str | None = None, + ) -> None: + """Save a piece of information into the memory store. + + Delegates to ``AgentMemoryService.store()`` with + ``memory_type="semantic"`` and the collection as namespace. + + Args: + collection: SK memory collection name (mapped to namespace). + text: Text content to store. + id: Unique identifier for this memory. + description: Optional description (stored in metadata). + additional_metadata: Optional extra metadata string. + """ + from layerlens.instrument._vendored.memory_models import MemoryEntry + + metadata: dict[str, Any] = {"source": "semantic_kernel_memory_store"} + if description: + metadata["description"] = description + if additional_metadata: + metadata["additional"] = additional_metadata + + entry = MemoryEntry( + id=id, + org_id=self._org_id, + agent_id=self._agent_id, + memory_type="semantic", + namespace=collection, + key=id, + content=text, + importance=0.5, + metadata=metadata, + ) + self._memory_service.store(entry) + + async def get_nearest_matches( + self, + collection: str, + query: str, + limit: int = 5, + min_relevance_score: float = 0.0, + ) -> list[tuple[Any, float]]: + """Retrieve the nearest matches for a query. + + Delegates to ``AgentMemoryService.search()`` and returns results + in the SK-expected format of ``(MemoryEntry, relevance_score)`` + tuples. + + Args: + collection: SK memory collection name (used as search context). + query: Text query to match against memory content. + limit: Maximum number of results to return. + min_relevance_score: Minimum relevance threshold (reserved for + future vector search support; currently unused). + + Returns: + List of ``(MemoryEntry, score)`` tuples ordered by importance. + """ + results = self._memory_service.search(self._agent_id, query, limit=limit) + # Filter to the requested collection/namespace + filtered = [r for r in results if r.namespace == collection] + # Return as (entry, relevance) tuples — importance serves as proxy score + return [(entry, entry.importance) for entry in filtered] diff --git a/src/layerlens/instrument/adapters/frameworks/semantic_kernel/metadata.py b/src/layerlens/instrument/adapters/frameworks/semantic_kernel/metadata.py new file mode 100644 index 0000000..ee6275e --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/semantic_kernel/metadata.py @@ -0,0 +1,60 @@ +""" +Semantic Kernel Metadata Extraction + +Extracts plugin and kernel configuration metadata for environment.config events. +""" + +from __future__ import annotations + +import logging +from typing import Any + +logger = logging.getLogger(__name__) + + +class SKMetadataExtractor: + """Extract metadata from Semantic Kernel components.""" + + def extract_plugin_metadata(self, plugin: Any) -> dict[str, Any]: + """Extract metadata from a registered plugin.""" + metadata: dict[str, Any] = {} + try: + metadata["plugin_name"] = getattr(plugin, "name", str(plugin)) + metadata["description"] = getattr(plugin, "description", None) + + # Extract function names + functions = getattr(plugin, "functions", None) + if functions: # noqa: SIM102 + if isinstance(functions, dict) or hasattr(functions, "keys"): + metadata["function_names"] = list(functions.keys()) + except Exception: + logger.debug("Error extracting plugin metadata", exc_info=True) + return metadata + + def extract_kernel_metadata(self, kernel: Any) -> dict[str, Any]: + """Extract metadata from a Kernel instance.""" + metadata: dict[str, Any] = {} + try: + # Extract registered plugins + plugins = getattr(kernel, "plugins", None) + if plugins: + if isinstance(plugins, dict): + metadata["plugin_count"] = len(plugins) + metadata["plugin_names"] = list(plugins.keys()) + elif hasattr(plugins, "__len__"): + metadata["plugin_count"] = len(plugins) + + # Extract registered services + services = getattr(kernel, "services", None) + if services and isinstance(services, dict): + metadata["service_count"] = len(services) + metadata["service_types"] = [type(s).__name__ for s in services.values()] + + # Extract memory backend + memory = getattr(kernel, "memory", None) + if memory: + metadata["memory_backend"] = type(memory).__name__ + + except Exception: + logger.debug("Error extracting kernel metadata", exc_info=True) + return metadata diff --git a/src/layerlens/instrument/adapters/frameworks/smolagents/__init__.py b/src/layerlens/instrument/adapters/frameworks/smolagents/__init__.py new file mode 100644 index 0000000..7a75354 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/smolagents/__init__.py @@ -0,0 +1,31 @@ +"""LayerLens adapter for SmolAgents (HuggingFace). + +Instruments SmolAgents (CodeAgent, ToolCallingAgent) via wrapper pattern +since the framework has no native callback system. +""" + +from __future__ import annotations + +from typing import Any, Optional + +from layerlens.instrument.adapters._base.capture import CaptureConfig +from layerlens.instrument.adapters.frameworks.smolagents.lifecycle import ( + SmolAgentsAdapter, +) + +ADAPTER_CLASS = SmolAgentsAdapter + + +def instrument_agent( + agent: Any, + stratix: Any = None, + capture_config: Optional[CaptureConfig] = None, +) -> SmolAgentsAdapter: + """Convenience: instrument a SmolAgents agent and return the adapter.""" + adapter = SmolAgentsAdapter(stratix=stratix, capture_config=capture_config) + adapter.connect() + adapter.instrument_agent(agent) + return adapter + + +__all__ = ["ADAPTER_CLASS", "SmolAgentsAdapter", "instrument_agent"] diff --git a/src/layerlens/instrument/adapters/frameworks/smolagents/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/smolagents/lifecycle.py new file mode 100644 index 0000000..a8d150e --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/smolagents/lifecycle.py @@ -0,0 +1,398 @@ +"""SmolAgents adapter lifecycle. + +Instrumentation strategy: agent wrapper + lifecycle hooks (no native callbacks). + +* ``Agent.run()`` start → ``agent.input`` (L1) +* ``Agent.run()`` end → ``agent.output`` (L1) +* Model call → ``model.invoke`` (L3) +* Tool execution → ``tool.call`` (L5a) +* Code execution → ``agent.code`` (L2) +* Manager → managed → ``agent.handoff`` (cross-cutting) + +Ported from ``ateam/stratix/sdk/python/adapters/smolagents/lifecycle.py``. +""" + +from __future__ import annotations + +import time +import uuid +import hashlib +import logging +import threading +from typing import Any, Set, Dict, List, Optional + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class SmolAgentsAdapter(BaseAdapter): + """LayerLens adapter for SmolAgents (HuggingFace).""" + + FRAMEWORK = "smolagents" + VERSION = "0.1.0" + # The only Pydantic touch in the adapter is + # ``from layerlens._compat.pydantic import model_dump`` at line 105 + # of this file — the v1/v2 shim itself. SmolAgents 1.x uses Pydantic + # internally but the adapter only wraps ``Agent.run()`` and never + # touches framework Pydantic models directly. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any = None, + capture_config: Any = None, + stratix_instance: Any = None, + ) -> None: + resolved = stratix or stratix_instance + super().__init__(stratix=resolved, capture_config=capture_config) + self._originals: Dict[int, Dict[str, Any]] = {} + self._adapter_lock = threading.Lock() + self._seen_agents: Set[str] = set() + self._framework_version: Optional[str] = None + self._run_starts: Dict[int, int] = {} + self._wrapped_agents: List[Any] = [] + + def connect(self) -> None: + try: + import smolagents # type: ignore[import-not-found,unused-ignore] + + version = getattr(smolagents, "__version__", "unknown") + self._framework_version = ( + str(version) if version is not None else "unknown" + ) + except ImportError: + logger.debug("smolagents not installed") + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + for agent in self._wrapped_agents: + self._unwrap_agent(agent) + self._wrapped_agents.clear() + self._originals.clear() + self._seen_agents.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def health_check(self) -> AdapterHealth: + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + return AdapterInfo( + name="SmolAgentsAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + AdapterCapability.TRACE_HANDOFFS, + ], + description="LayerLens adapter for SmolAgents (HuggingFace)", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + from layerlens._compat.pydantic import model_dump + + return ReplayableTrace( + adapter_name="SmolAgentsAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": model_dump(self._capture_config)}, + ) + + # --- Framework integration --- + + def instrument_agent(self, agent: Any) -> Any: + """Wrap a SmolAgents agent's ``run()`` method.""" + agent_id = id(agent) + if agent_id in self._originals: + return agent + originals: Dict[str, Any] = {} + if hasattr(agent, "run"): + originals["run"] = agent.run + agent.run = self._create_traced_run(agent, agent.run) + self._originals[agent_id] = originals + self._wrapped_agents.append(agent) + agent_name = self._get_agent_name(agent) + agent_type = type(agent).__name__ + self._emit_agent_config(agent_name, agent, agent_type) + managed = getattr(agent, "managed_agents", None) + if managed: + if isinstance(managed, dict): + for _name, managed_agent in managed.items(): + self.instrument_agent(managed_agent) + elif isinstance(managed, list): + for managed_agent in managed: + self.instrument_agent(managed_agent) + return agent + + def _create_traced_run(self, agent: Any, original_run: Any) -> Any: + adapter = self + + def traced_run(*args: Any, **kwargs: Any) -> Any: + agent_name = adapter._get_agent_name(agent) + task = args[0] if args else kwargs.get("task") + adapter.on_run_start(agent_name=agent_name, input_data=task) + error: Optional[Exception] = None + result: Any = None + try: + result = original_run(*args, **kwargs) + except Exception as exc: + error = exc + raise + finally: + adapter.on_run_end(agent_name=agent_name, output=result, error=error) + agent_type = type(agent).__name__ + if agent_type == "CodeAgent" and result is not None: + adapter._emit_code_execution(agent_name, result) + return result + + traced_run._layerlens_original = original_run # type: ignore[attr-defined] + return traced_run + + def _unwrap_agent(self, agent: Any) -> None: + agent_id = id(agent) + originals = self._originals.get(agent_id) + if not originals: + return + for method_name, original in originals.items(): + try: + setattr(agent, method_name, original) + except Exception: + logger.debug("Could not unwrap %s", method_name, exc_info=True) + + # --- Lifecycle hooks --- + + def on_run_start( + self, + agent_name: Optional[str] = None, + input_data: Any = None, + ) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._run_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "smolagents", + "agent_name": agent_name, + "input": self._safe_serialize(input_data), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in on_run_start", exc_info=True) + + def on_run_end( + self, + agent_name: Optional[str] = None, + output: Any = None, + error: Optional[Exception] = None, + ) -> None: + if not self._connected: + return + try: + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._run_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + payload: Dict[str, Any] = { + "framework": "smolagents", + "agent_name": agent_name, + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + self.emit_dict_event("agent.output", payload) + except Exception: + logger.warning("Error in on_run_end", exc_info=True) + + def on_tool_use( + self, + tool_name: str, + tool_input: Any = None, + tool_output: Any = None, + error: Optional[Exception] = None, + latency_ms: Optional[float] = None, + ) -> None: + if not self._connected: + return + try: + payload: Dict[str, Any] = { + "framework": "smolagents", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + } + if error: + payload["error"] = str(error) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("tool.call", payload) + except Exception: + logger.warning("Error in on_tool_use", exc_info=True) + + def on_llm_call( + self, + provider: Optional[str] = None, + model: Optional[str] = None, + tokens_prompt: Optional[int] = None, + tokens_completion: Optional[int] = None, + latency_ms: Optional[float] = None, + messages: Optional[List[Dict[str, str]]] = None, + ) -> None: + if not self._connected: + return + try: + payload: Dict[str, Any] = {"framework": "smolagents"} + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if tokens_prompt is not None: + payload["tokens_prompt"] = tokens_prompt + if tokens_completion is not None: + payload["tokens_completion"] = tokens_completion + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if self._capture_config.capture_content and messages: + payload["messages"] = messages + self.emit_dict_event("model.invoke", payload) + except Exception: + logger.warning("Error in on_llm_call", exc_info=True) + + def on_handoff( + self, + from_agent: str, + to_agent: str, + context: Any = None, + ) -> None: + if not self._connected: + return + try: + context_str = str(context) if context else "" + self.emit_dict_event( + "agent.handoff", + { + "from_agent": from_agent, + "to_agent": to_agent, + "reason": "managed_agent_delegation", + "context_hash": ( + hashlib.sha256(context_str.encode()).hexdigest() + if context_str + else None + ), + "context_preview": ( + context_str[:500] + if context_str and self._capture_config.capture_content + else None + ), + }, + ) + except Exception: + logger.warning("Error in on_handoff", exc_info=True) + + # --- Helpers --- + + def _get_agent_name(self, agent: Any) -> str: + return getattr(agent, "name", None) or type(agent).__name__ + + def _emit_agent_config( + self, + agent_name: str, + agent: Any, + agent_type: str, + ) -> None: + with self._adapter_lock: + if agent_name in self._seen_agents: + return + self._seen_agents.add(agent_name) + metadata: Dict[str, Any] = { + "framework": "smolagents", + "agent_name": agent_name, + "agent_type": agent_type, + } + tools = getattr(agent, "tools", None) + if tools: + if isinstance(tools, dict): + metadata["tools"] = list(tools.keys()) + else: + metadata["tools"] = [getattr(t, "name", str(t)) for t in tools] + model = getattr(agent, "model", None) + if model: + metadata["model"] = str(model) + managed = getattr(agent, "managed_agents", None) + if managed: + if isinstance(managed, dict): + metadata["managed_agents"] = list(managed.keys()) + elif isinstance(managed, list): + metadata["managed_agents"] = [ + getattr(a, "name", str(a)) for a in managed + ] + system_prompt = getattr(agent, "system_prompt", None) + if system_prompt and self._capture_config.capture_content: + metadata["system_prompt"] = str(system_prompt)[:500] + self.emit_dict_event("environment.config", metadata) + + def _emit_code_execution(self, agent_name: str, result: Any) -> None: + """Emit an L2 code execution event for ``CodeAgent``.""" + try: + logs = getattr(result, "logs", None) or getattr(result, "inner_messages", None) + self.emit_dict_event( + "agent.code", + { + "framework": "smolagents", + "agent_name": agent_name, + "event_subtype": "code_execution", + "output": self._safe_serialize(result), + "logs": self._safe_serialize(logs), + }, + ) + except Exception: + logger.debug("Could not emit code execution event", exc_info=True) + + def _safe_serialize(self, value: Any) -> Any: + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + except Exception: + return str(value) + + +# Registry lazy-loading convention. +ADAPTER_CLASS = SmolAgentsAdapter diff --git a/src/layerlens/instrument/adapters/frameworks/strands/__init__.py b/src/layerlens/instrument/adapters/frameworks/strands/__init__.py new file mode 100644 index 0000000..2c10c53 --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/strands/__init__.py @@ -0,0 +1,25 @@ +""" +LayerLens adapter for AWS Strands. + +Instruments AWS Strands agents by hooking into the agent callback system +to capture tool calls, model invocations, and conversation state. +""" + +from __future__ import annotations + +from typing import Any + +from layerlens.instrument.adapters.frameworks.strands.lifecycle import StrandsAdapter + +ADAPTER_CLASS = StrandsAdapter + + +def instrument_agent(agent: Any, stratix: Any = None, capture_config: dict[str, Any] = None) -> Any: # type: ignore[assignment] + """Convenience function to instrument an AWS Strands agent.""" + adapter = StrandsAdapter(stratix=stratix, capture_config=capture_config) + adapter.connect() + adapter.instrument_agent(agent) + return adapter + + +__all__ = ["StrandsAdapter", "ADAPTER_CLASS", "instrument_agent"] diff --git a/src/layerlens/instrument/adapters/frameworks/strands/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/strands/lifecycle.py new file mode 100644 index 0000000..e9e319b --- /dev/null +++ b/src/layerlens/instrument/adapters/frameworks/strands/lifecycle.py @@ -0,0 +1,447 @@ +""" +AWS Strands adapter lifecycle. + +Instrumentation strategy: Agent wrapper (run wrapping) + callback hooks + Agent start -> agent.input (L1) + Agent end -> agent.output (L1) + Tool call -> tool.call (L5a) + Model invoke (Bedrock) -> model.invoke (L3) + Conversation state -> agent.state.change (Cross) + Cost (Bedrock pricing) -> cost.record (Cross) +""" + +from __future__ import annotations + +import time +import uuid +import logging +import threading +from typing import Any + +from layerlens.instrument.adapters._base.adapter import ( + AdapterInfo, + BaseAdapter, + AdapterHealth, + AdapterStatus, + ReplayableTrace, + AdapterCapability, +) +from layerlens.instrument.adapters._base.pydantic_compat import PydanticCompat + +logger = logging.getLogger(__name__) + + +class StrandsAdapter(BaseAdapter): + """LayerLens adapter for AWS Strands.""" + + FRAMEWORK = "strands" + VERSION = "0.1.0" + # The adapter source has no direct ``pydantic`` imports (verified by + # grep across ``frameworks/strands/``). Strands instrumentation hooks + # into agent callbacks and emits dict events without crossing the + # framework's Pydantic boundary. + requires_pydantic = PydanticCompat.V1_OR_V2 + + def __init__( + self, + stratix: Any | None = None, + capture_config: Any | None = None, + stratix_instance: Any | None = None, + ) -> None: + resolved = stratix or stratix_instance + super().__init__(stratix=resolved, capture_config=capture_config) + self._originals: dict[int, dict[str, Any]] = {} # id(agent) -> {method: original} + self._wrapped_agents: list[Any] = [] # strong refs for disconnect unwrap + self._adapter_lock = threading.Lock() + self._seen_agents: set[str] = set() + self._framework_version: str | None = None + self._run_starts: dict[int, int] = {} # thread_id -> start_ns + + def connect(self) -> None: + """Verify AWS Strands availability and prepare the adapter.""" + try: + import strands # type: ignore[import-not-found,unused-ignore] + + self._framework_version = getattr(strands, "__version__", "unknown") + except ImportError: + logger.debug("strands-agents not installed") + self._connected = True + self._status = AdapterStatus.HEALTHY + + def disconnect(self) -> None: + """Unwrap all instrumented agents and release resources.""" + for agent in self._wrapped_agents: + self._unwrap_agent(agent) + self._wrapped_agents.clear() + self._originals.clear() + self._seen_agents.clear() + self._run_starts.clear() + self._connected = False + self._status = AdapterStatus.DISCONNECTED + + def _unwrap_agent(self, agent: Any) -> None: + """Restore original methods on a wrapped agent.""" + agent_id = id(agent) + originals = self._originals.get(agent_id) + if not originals: + return + for method_name, original in originals.items(): + try: + setattr(agent, method_name, original) + except Exception: + logger.debug("Could not unwrap %s.%s", agent_id, method_name, exc_info=True) + + def health_check(self) -> AdapterHealth: + """Return a health snapshot.""" + return AdapterHealth( + status=self._status, + framework_name=self.FRAMEWORK, + framework_version=self._framework_version, + adapter_version=self.VERSION, + error_count=self._error_count, + circuit_open=self._circuit_open, + ) + + def get_adapter_info(self) -> AdapterInfo: + """Return metadata about this adapter.""" + return AdapterInfo( + name="StrandsAdapter", + version=self.VERSION, + framework=self.FRAMEWORK, + framework_version=self._framework_version, + capabilities=[ + AdapterCapability.TRACE_TOOLS, + AdapterCapability.TRACE_MODELS, + AdapterCapability.TRACE_STATE, + ], + description="LayerLens adapter for AWS Strands", + ) + + def serialize_for_replay(self) -> ReplayableTrace: + """Serialize the current trace data for replay.""" + return ReplayableTrace( + adapter_name="StrandsAdapter", + framework=self.FRAMEWORK, + trace_id=str(uuid.uuid4()), + events=list(self._trace_events), + state_snapshots=[], + config={"capture_config": self._capture_config.model_dump()}, + ) + + # --- Framework Integration --- + + def instrument_agent(self, agent: Any) -> Any: + """Wrap AWS Strands agent __call__ and invoke methods to capture lifecycle events.""" + agent_id = id(agent) + if agent_id in self._originals: + return agent + originals: dict[str, Any] = {} + # Strands Agent uses __call__ as the primary invocation method + if callable(agent): + originals["__call__"] = agent.__call__ + agent.__call__ = self._create_traced_call(agent, agent.__call__) + # Also wrap invoke() if present + if hasattr(agent, "invoke"): + originals["invoke"] = agent.invoke + agent.invoke = self._create_traced_call(agent, agent.invoke) + self._originals[agent_id] = originals + self._wrapped_agents.append(agent) + agent_name = getattr(agent, "name", None) or str(type(agent).__name__) + self._emit_agent_config(agent_name, agent) + return agent + + def _create_traced_call(self, agent: Any, original_call: Any) -> Any: + """Create a traced wrapper for agent invocation.""" + adapter = self + + def traced_call(*args: Any, **kwargs: Any) -> Any: + agent_name = getattr(agent, "name", None) or "strands_agent" + input_data = args[0] if args else kwargs.get("prompt") or kwargs.get("message") + adapter.on_run_start(agent_name=agent_name, input_data=input_data) + error: Exception | None = None + result = None + try: + result = original_call(*args, **kwargs) + except Exception as exc: + error = exc + raise + finally: + output = None + if result is not None: + output = getattr(result, "content", None) or getattr(result, "text", result) + adapter.on_run_end(agent_name=agent_name, output=output, error=error) + adapter._extract_run_details(agent, result) + return result + + traced_call._layerlens_original = original_call # type: ignore[attr-defined] + return traced_call + + def _extract_run_details(self, agent: Any, result: Any) -> None: + """Extract tool calls, model invocations, and cost from run result.""" + if result is None: + return + try: + # Extract model invocation details + model = getattr(agent, "model", None) or getattr(agent, "model_id", None) + if model: + model_name = str(model) + self.emit_dict_event( + "model.invoke", + { + "framework": "strands", + "model": model_name, + "provider": self._detect_provider(model_name), + }, + ) + + # Extract usage/token info from result + usage = getattr(result, "usage", None) or getattr(result, "metrics", None) + if usage: + tokens_prompt = getattr(usage, "inputTokens", None) or getattr( + usage, "prompt_tokens", None + ) + tokens_completion = getattr(usage, "outputTokens", None) or getattr( + usage, "completion_tokens", None + ) + tokens_total = getattr(usage, "totalTokens", None) or getattr( + usage, "total_tokens", None + ) + self.emit_dict_event( + "cost.record", + { + "framework": "strands", + "model": str(model) if model else None, + "tokens_prompt": tokens_prompt, + "tokens_completion": tokens_completion, + "tokens_total": tokens_total, + }, + ) + + # Extract tool calls from result + tool_results = getattr(result, "tool_results", None) or [] + for tr in tool_results: + self.emit_dict_event( + "tool.call", + { + "framework": "strands", + "tool_name": getattr(tr, "name", None) or tr.get("name", "unknown") + if isinstance(tr, dict) + else "unknown", + "tool_input": self._safe_serialize( + getattr(tr, "input", None) + or (tr.get("input") if isinstance(tr, dict) else None) + ), + "tool_output": self._safe_serialize( + getattr(tr, "output", None) + or (tr.get("output") if isinstance(tr, dict) else None) + ), + }, + ) + + # Emit conversation state change + conversation = getattr(agent, "conversation", None) or getattr( + agent, "conversation_manager", None + ) + if conversation: + turn_count = getattr(conversation, "turn_count", None) or len( + getattr(conversation, "messages", []) + ) + self.emit_dict_event( + "agent.state.change", + { + "framework": "strands", + "agent_name": getattr(agent, "name", "strands_agent"), + "event_subtype": "conversation_update", + "turn_count": turn_count, + }, + ) + except Exception: + logger.debug("Could not extract run details", exc_info=True) + + # --- Lifecycle Hooks --- + + def on_run_start(self, agent_name: str | None = None, input_data: Any = None) -> None: + """Emit agent.input event when an agent run starts.""" + if not self._connected: + return + try: + tid = threading.get_ident() + start_ns = time.time_ns() + with self._adapter_lock: + self._run_starts[tid] = start_ns + self.emit_dict_event( + "agent.input", + { + "framework": "strands", + "agent_name": agent_name, + "input": self._safe_serialize(input_data), + "timestamp_ns": start_ns, + }, + ) + except Exception: + logger.warning("Error in on_run_start", exc_info=True) + + def on_run_end( + self, + agent_name: str | None = None, + output: Any = None, + error: Exception | None = None, + ) -> None: + """Emit agent.output event when an agent run ends.""" + if not self._connected: + return + try: + tid = threading.get_ident() + end_ns = time.time_ns() + with self._adapter_lock: + start_ns = self._run_starts.pop(tid, 0) + duration_ns = end_ns - start_ns if start_ns else 0 + payload: dict[str, Any] = { + "framework": "strands", + "agent_name": agent_name, + "output": self._safe_serialize(output), + "duration_ns": duration_ns, + } + if error: + payload["error"] = str(error) + self.emit_dict_event("agent.output", payload) + self.emit_dict_event( + "agent.state.change", + { + "framework": "strands", + "agent_name": agent_name, + "event_subtype": "run_complete" if not error else "run_failed", + }, + ) + except Exception: + logger.warning("Error in on_run_end", exc_info=True) + + def on_tool_use( + self, + tool_name: str, + tool_input: Any = None, + tool_output: Any = None, + error: Exception | None = None, + latency_ms: float | None = None, + ) -> None: + """Emit tool.call event for a tool invocation.""" + if not self._connected: + return + try: + payload: dict[str, Any] = { + "framework": "strands", + "tool_name": tool_name, + "tool_input": self._safe_serialize(tool_input), + "tool_output": self._safe_serialize(tool_output), + } + if error: + payload["error"] = str(error) + if latency_ms is not None: + payload["latency_ms"] = latency_ms + self.emit_dict_event("tool.call", payload) + except Exception: + logger.warning("Error in on_tool_use", exc_info=True) + + def on_llm_call( + self, + provider: str | None = None, + model: str | None = None, + tokens_prompt: int | None = None, + tokens_completion: int | None = None, + latency_ms: float | None = None, + messages: list[dict[str, str]] | None = None, + ) -> None: + """Emit model.invoke event for an LLM call.""" + if not self._connected: + return + try: + payload: dict[str, Any] = {"framework": "strands"} + if provider: + payload["provider"] = provider + if model: + payload["model"] = model + if tokens_prompt is not None: + payload["tokens_prompt"] = tokens_prompt + if tokens_completion is not None: + payload["tokens_completion"] = tokens_completion + if latency_ms is not None: + payload["latency_ms"] = latency_ms + if self._capture_config.capture_content and messages: + payload["messages"] = messages + self.emit_dict_event("model.invoke", payload) + except Exception: + logger.warning("Error in on_llm_call", exc_info=True) + + # --- Helpers --- + + def _detect_provider(self, model: str | None) -> str | None: + """Detect the LLM provider from a model identifier.""" + if not model: + return None + model_lower = model.lower() + # Strands defaults to Bedrock + if "anthropic" in model_lower or "claude" in model_lower: + return "bedrock" + if "amazon" in model_lower or "titan" in model_lower: + return "bedrock" + if "meta" in model_lower or "llama" in model_lower: + return "bedrock" + if "mistral" in model_lower or "mixtral" in model_lower: + return "bedrock" + if "cohere" in model_lower or "command" in model_lower: + return "bedrock" + if "ai21" in model_lower or "jamba" in model_lower: + return "bedrock" + if "gpt" in model_lower or "o1" in model_lower or "o3" in model_lower: + return "openai" + if "gemini" in model_lower: + return "google" + return "bedrock" # Default to Bedrock for Strands + + def _emit_agent_config(self, agent_name: str, agent: Any) -> None: + """Emit environment.config event for agent configuration on first encounter.""" + with self._adapter_lock: + if agent_name in self._seen_agents: + return + self._seen_agents.add(agent_name) + metadata: dict[str, Any] = { + "framework": "strands", + "agent_name": agent_name, + } + model = getattr(agent, "model", None) or getattr(agent, "model_id", None) + if model: + metadata["model"] = str(model) + system_prompt = getattr(agent, "system_prompt", None) + if system_prompt and self._capture_config.capture_content: + metadata["system_prompt"] = str(system_prompt)[:500] + tools = getattr(agent, "tools", None) + if tools: + if isinstance(tools, dict): + metadata["tools"] = list(tools.keys()) + else: + metadata["tools"] = [ + getattr(t, "name", None) or getattr(t, "tool_name", str(t)) for t in tools + ] + conversation = getattr(agent, "conversation", None) or getattr( + agent, "conversation_manager", None + ) + if conversation: + metadata["conversation_type"] = str(type(conversation).__name__) + self.emit_dict_event("environment.config", metadata) + + def _safe_serialize(self, value: Any) -> Any: + """Safely serialize a value for event payloads.""" + try: + if value is None: + return None + if hasattr(value, "model_dump"): + return value.model_dump() + if hasattr(value, "dict"): + return value.dict() + if isinstance(value, dict): + return dict(value) + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + except Exception: + return str(value) diff --git a/tests/instrument/adapters/__init__.py b/tests/instrument/adapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/instrument/adapters/frameworks/__init__.py b/tests/instrument/adapters/frameworks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/instrument/adapters/frameworks/test_agno_adapter.py b/tests/instrument/adapters/frameworks/test_agno_adapter.py new file mode 100644 index 0000000..6ea4bc6 --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_agno_adapter.py @@ -0,0 +1,214 @@ +"""Unit tests for the Agno framework adapter. + +Mocked at the SDK shape level — no real ``agno`` runtime needed. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, Dict, List + +import pytest + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.agno import ( + ADAPTER_CLASS, + AgnoAdapter, + instrument_agent, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +class _FakeAgent: + """Minimal duck-typed Agno agent for tests.""" + + def __init__( + self, + name: str = "test-agent", + tools: Any = None, + model: Any = None, + description: Any = None, + instructions: Any = None, + team: Any = None, + knowledge: Any = None, + result: Any = None, + raises: bool = False, + ) -> None: + self.name = name + self.tools = tools + self.model = model + self.description = description + self.instructions = instructions + self.team = team + self.knowledge = knowledge + self._result = result + self._raises = raises + + def run(self, message: str, **kwargs: Any) -> Any: + if self._raises: + raise RuntimeError("simulated failure") + return self._result if self._result is not None else SimpleNamespace(content=f"out:{message}") + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is AgnoAdapter + + +def test_lifecycle() -> None: + a = AgnoAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + assert a.is_connected is True + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + assert a.is_connected is False + + +def test_adapter_info_and_health() -> None: + a = AgnoAdapter() + a.connect() + info = a.get_adapter_info() + assert info.framework == "agno" + assert info.name == "AgnoAdapter" + assert info.version == AgnoAdapter.VERSION + assert info.capabilities # non-empty list + health = a.health_check() + assert health.framework_name == "agno" + assert health.status == AdapterStatus.HEALTHY + + +def test_instrument_agent_wraps_run() -> None: + adapter = AgnoAdapter(stratix=_RecordingStratix(), capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner") + adapter.instrument_agent(agent) + # Wrapped: function name is now traced. + assert agent.run.__name__ == "traced_run_sync" + + adapter.disconnect() + # Restored: name is back to the original. + assert agent.run.__name__ == "run" + + +def test_run_emits_input_and_output_events() -> None: + stratix = _RecordingStratix() + adapter = AgnoAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner", model="gpt-5") + adapter.instrument_agent(agent) + result = agent.run("hello") + + assert getattr(result, "content", None) == "out:hello" + + types = [e["event_type"] for e in stratix.events] + assert "environment.config" in types + assert "agent.input" in types + assert "agent.output" in types + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert out["payload"]["agent_name"] == "planner" + assert out["payload"]["duration_ns"] >= 0 + assert out["payload"]["framework"] == "agno" + + +def test_run_failure_emits_output_with_error() -> None: + stratix = _RecordingStratix() + adapter = AgnoAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="failing", raises=True) + adapter.instrument_agent(agent) + + with pytest.raises(RuntimeError): + agent.run("bad") + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert "error" in out["payload"] + assert "simulated failure" in out["payload"]["error"] + + +def test_environment_config_emits_once_per_agent() -> None: + stratix = _RecordingStratix() + adapter = AgnoAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="a1", tools=[SimpleNamespace(name="search")], model="gpt-5") + adapter.instrument_agent(agent) + adapter.instrument_agent(agent) # idempotent + + configs = [e for e in stratix.events if e["event_type"] == "environment.config"] + assert len(configs) == 1 + cfg = configs[0]["payload"] + assert cfg["agent_name"] == "a1" + assert cfg["tools"] == ["search"] + + +def test_on_tool_use_emits_event() -> None: + stratix = _RecordingStratix() + adapter = AgnoAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2, latency_ms=12.3) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_name"] == "calc" + assert evt["payload"]["latency_ms"] == 12.3 + + +def test_on_handoff_emits_event_with_context_hash() -> None: + stratix = _RecordingStratix() + adapter = AgnoAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter.on_handoff(from_agent="a", to_agent="b", context="some context") + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["from_agent"] == "a" + assert evt["payload"]["to_agent"] == "b" + assert evt["payload"]["context_hash"] is not None + + +def test_capture_config_gates_l5a_tool_calls() -> None: + """When l5a_tool_calls is disabled, tool.call events do NOT fire.""" + stratix = _RecordingStratix() + cfg = CaptureConfig(l5a_tool_calls=False) + adapter = AgnoAdapter(stratix=stratix, capture_config=cfg) + adapter.connect() + + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2) + # And handoffs (cross-cutting) should still fire. + adapter.on_handoff(from_agent="a", to_agent="b", context="x") + + types = [e["event_type"] for e in stratix.events] + assert "tool.call" not in types + assert "agent.handoff" in types + + +def test_instrument_agent_helper() -> None: + """Top-level convenience function returns a connected adapter.""" + agent = _FakeAgent(name="helper") + adapter = instrument_agent(agent) + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + + +def test_serialize_for_replay() -> None: + adapter = AgnoAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + + rt = adapter.serialize_for_replay() + assert rt.framework == "agno" + assert rt.adapter_name == "AgnoAdapter" + assert "capture_config" in rt.config diff --git a/tests/instrument/adapters/frameworks/test_bedrock_agents_adapter.py b/tests/instrument/adapters/frameworks/test_bedrock_agents_adapter.py new file mode 100644 index 0000000..a6e9fde --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_bedrock_agents_adapter.py @@ -0,0 +1,235 @@ +"""Unit tests for the AWS Bedrock Agents framework adapter. + +Mocked at the SDK shape level — no real ``boto3`` runtime needed. +The adapter integrates via boto3 event hooks: ``client.meta.events.register(...)``. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Tuple, Callable + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.bedrock_agents import ( + ADAPTER_CLASS, + BedrockAgentsAdapter, + instrument_client, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +class _FakeEventSystem: + """Mimics boto3 client.meta.events register/unregister.""" + + def __init__(self) -> None: + self.handlers: Dict[str, List[Callable[..., Any]]] = {} + self.unregistered: List[Tuple[str, Callable[..., Any]]] = [] + + def register(self, event: str, handler: Callable[..., Any]) -> None: + self.handlers.setdefault(event, []).append(handler) + + def unregister(self, event: str, handler: Callable[..., Any]) -> None: + self.unregistered.append((event, handler)) + if event in self.handlers and handler in self.handlers[event]: + self.handlers[event].remove(handler) + + +class _FakeClient: + """Mimics a boto3 bedrock-agent-runtime client.""" + + def __init__(self) -> None: + self.meta = _FakeMeta() + + +class _FakeMeta: + def __init__(self) -> None: + self.events = _FakeEventSystem() + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is BedrockAgentsAdapter + + +def test_lifecycle() -> None: + a = BedrockAgentsAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + + +def test_adapter_info_and_health() -> None: + a = BedrockAgentsAdapter() + a.connect() + info = a.get_adapter_info() + assert info.framework == "bedrock_agents" + assert info.name == "BedrockAgentsAdapter" + health = a.health_check() + assert health.framework_name == "bedrock_agents" + + +def test_instrument_client_registers_event_hooks() -> None: + adapter = BedrockAgentsAdapter(stratix=_RecordingStratix(), capture_config=CaptureConfig.full()) + adapter.connect() + + client = _FakeClient() + adapter.instrument_client(client) + + handlers = client.meta.events.handlers + assert "provide-client-params.bedrock-agent-runtime.InvokeAgent" in handlers + assert "after-call.bedrock-agent-runtime.InvokeAgent" in handlers + + +def test_disconnect_unregisters_event_hooks() -> None: + adapter = BedrockAgentsAdapter(stratix=_RecordingStratix(), capture_config=CaptureConfig.full()) + adapter.connect() + client = _FakeClient() + adapter.instrument_client(client) + + adapter.disconnect() + assert len(client.meta.events.unregistered) == 2 + + +def test_before_invoke_emits_input_event() -> None: + stratix = _RecordingStratix() + adapter = BedrockAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + client = _FakeClient() + adapter.instrument_client(client) + + # Simulate the boto3 'provide-client-params' event firing. + adapter._before_invoke_agent( + params={ + "agentId": "agent-123", + "agentAliasId": "alias-1", + "sessionId": "sess-1", + "inputText": "hello", + "enableTrace": True, + } + ) + + types = [e["event_type"] for e in stratix.events] + assert "environment.config" in types + assert "agent.input" in types + + inp = next(e for e in stratix.events if e["event_type"] == "agent.input") + assert inp["payload"]["agent_id"] == "agent-123" + assert inp["payload"]["input"] == "hello" + + +def test_after_invoke_emits_output_and_processes_trace() -> None: + stratix = _RecordingStratix() + adapter = BedrockAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + # Simulate the after-call event with a parsed response. + adapter._after_invoke_agent( + parsed={ + "outputText": "the answer is 42", + "sessionId": "sess-1", + "trace": { + "steps": [ + { + "type": "ACTION_GROUP", + "actionGroupName": "calc", + "actionGroupInput": {"x": 1}, + "actionGroupInvocationOutput": {"output": "ok"}, + }, + { + "type": "MODEL_INVOCATION", + "foundationModel": "anthropic.claude-v2", + "modelInvocationOutput": { + "usage": {"inputTokens": 100, "outputTokens": 50} + }, + }, + { + "type": "AGENT_COLLABORATOR", + "supervisorAgentId": "sup-1", + "collaboratorAgentId": "col-1", + }, + ] + }, + } + ) + + types = [e["event_type"] for e in stratix.events] + assert "agent.output" in types + assert "tool.call" in types + assert "model.invoke" in types + assert "cost.record" in types + assert "agent.handoff" in types + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert out["payload"]["output"] == "the answer is 42" + + model = next(e for e in stratix.events if e["event_type"] == "model.invoke") + assert model["payload"]["model"] == "anthropic.claude-v2" + assert model["payload"]["tokens_prompt"] == 100 + + +def test_on_tool_use_emits_event() -> None: + stratix = _RecordingStratix() + adapter = BedrockAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2, latency_ms=12.3) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_name"] == "calc" + assert evt["payload"]["latency_ms"] == 12.3 + + +def test_on_handoff_emits_event_with_context_hash() -> None: + stratix = _RecordingStratix() + adapter = BedrockAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter.on_handoff(from_agent="a", to_agent="b", context="some context") + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["from_agent"] == "a" + assert evt["payload"]["to_agent"] == "b" + assert evt["payload"]["context_hash"] is not None + + +def test_capture_config_gates_l5a_tool_calls() -> None: + """When l5a_tool_calls is disabled, tool.call events do NOT fire (handoff still does).""" + stratix = _RecordingStratix() + cfg = CaptureConfig(l5a_tool_calls=False) + adapter = BedrockAgentsAdapter(stratix=stratix, capture_config=cfg) + adapter.connect() + + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2) + adapter.on_handoff(from_agent="a", to_agent="b", context="x") + + types = [e["event_type"] for e in stratix.events] + assert "tool.call" not in types + assert "agent.handoff" in types + + +def test_instrument_client_helper() -> None: + """Top-level convenience function returns a connected adapter.""" + client = _FakeClient() + adapter = instrument_client(client) + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + # Hooks were registered. + assert "provide-client-params.bedrock-agent-runtime.InvokeAgent" in client.meta.events.handlers + + +def test_serialize_for_replay() -> None: + adapter = BedrockAgentsAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + + rt = adapter.serialize_for_replay() + assert rt.framework == "bedrock_agents" + assert rt.adapter_name == "BedrockAgentsAdapter" + assert "capture_config" in rt.config diff --git a/tests/instrument/adapters/frameworks/test_bulk_ported_smoke.py b/tests/instrument/adapters/frameworks/test_bulk_ported_smoke.py new file mode 100644 index 0000000..47fd522 --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_bulk_ported_smoke.py @@ -0,0 +1,189 @@ +"""Smoke tests for the 9 bulk-ported framework adapters. + +These tests verify the **mechanical port** worked: each adapter imports +cleanly, instantiates, completes the connect → health_check → +get_adapter_info → serialize_for_replay → disconnect cycle without +raising, and exposes ``ADAPTER_CLASS`` for registry lazy-loading. + +Deeper per-adapter tests (event emission, capture-config gating, etc.) +follow the SmolAgents test pattern — see +``test_smolagents_adapter.py``. Each adapter gets that level of coverage +in a follow-up PR; this smoke suite is the entry-criteria for the bulk +port itself. +""" + +from __future__ import annotations + +from typing import Any, Type + +import pytest + +from layerlens.instrument.adapters._base import ( + BaseAdapter, + AdapterStatus, + CaptureConfig, +) + + +def _adapter_classes() -> list[tuple[str, Type[BaseAdapter]]]: + """Import each ported adapter and return ``(name, class)`` tuples.""" + cases: list[tuple[str, Type[BaseAdapter]]] = [] + + from layerlens.instrument.adapters.frameworks.agno import AgnoAdapter + + cases.append(("agno", AgnoAdapter)) + + from layerlens.instrument.adapters.frameworks.bedrock_agents import BedrockAgentsAdapter + + cases.append(("bedrock_agents", BedrockAgentsAdapter)) + + from layerlens.instrument.adapters.frameworks.google_adk import GoogleADKAdapter + + cases.append(("google_adk", GoogleADKAdapter)) + + from layerlens.instrument.adapters.frameworks.llama_index import LlamaIndexAdapter + + cases.append(("llama_index", LlamaIndexAdapter)) + + from layerlens.instrument.adapters.frameworks.pydantic_ai import PydanticAIAdapter + + cases.append(("pydantic_ai", PydanticAIAdapter)) + + from layerlens.instrument.adapters.frameworks.strands import StrandsAdapter + + cases.append(("strands", StrandsAdapter)) + + from layerlens.instrument.adapters.frameworks.openai_agents import OpenAIAgentsAdapter + + cases.append(("openai_agents", OpenAIAgentsAdapter)) + + from layerlens.instrument.adapters.frameworks.ms_agent_framework import MSAgentAdapter + + cases.append(("ms_agent_framework", MSAgentAdapter)) + + # Multi-file framework adapters. + from layerlens.instrument.adapters.frameworks.embedding import EmbeddingAdapter + + cases.append(("embedding", EmbeddingAdapter)) + + from layerlens.instrument.adapters.frameworks.semantic_kernel import ( + SemanticKernelAdapter, + ) + + cases.append(("semantic_kernel", SemanticKernelAdapter)) + + from layerlens.instrument.adapters.frameworks.crewai import CrewAIAdapter + + cases.append(("crewai", CrewAIAdapter)) + + from layerlens.instrument.adapters.frameworks.autogen import AutoGenAdapter + + cases.append(("autogen", AutoGenAdapter)) + + from layerlens.instrument.adapters.frameworks.langchain import ( + LayerLensCallbackHandler, + ) + + cases.append(("langchain", LayerLensCallbackHandler)) + + from layerlens.instrument.adapters.frameworks.langgraph import ( + LayerLensLangGraphAdapter, + ) + + cases.append(("langgraph", LayerLensLangGraphAdapter)) + + from layerlens.instrument.adapters.frameworks.langfuse import LangfuseAdapter + + cases.append(("langfuse", LangfuseAdapter)) + + from layerlens.instrument.adapters.frameworks.agentforce import AgentForceAdapter + + # Note: package directory is ``agentforce`` but the adapter declares + # ``FRAMEWORK = "salesforce_agentforce"``. Test ID uses the package + # name; the metadata test handles the mismatch. + cases.append(("agentforce", AgentForceAdapter)) + + return cases + + +# Map package name → expected FRAMEWORK string (most are identical; +# Agentforce is the only mismatch). +_PKG_TO_FRAMEWORK = { + "agentforce": "salesforce_agentforce", +} + + +@pytest.mark.parametrize("name,cls", _adapter_classes(), ids=lambda v: v if isinstance(v, str) else "") +def test_adapter_metadata(name: str, cls: Type[BaseAdapter]) -> None: + """Every adapter has a ``FRAMEWORK`` and ``VERSION``.""" + expected = _PKG_TO_FRAMEWORK.get(name, name) + assert cls.FRAMEWORK == expected + assert cls.VERSION + + +@pytest.mark.parametrize("name,cls", _adapter_classes(), ids=lambda v: v if isinstance(v, str) else "") +def test_lifecycle(name: str, cls: Type[BaseAdapter]) -> None: + """connect → healthy → disconnect → disconnected.""" + if name == "agentforce": + # AgentForceAdapter.connect() requires Salesforce credentials — + # not a property of the base lifecycle. Lifecycle exercise for + # this adapter happens in its own integration test (gated by + # SALESFORCE_* env vars), not in the bulk smoke suite. + pytest.skip("agentforce.connect() requires Salesforce credentials") + adapter = cls() + adapter.connect() + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + + health = adapter.health_check() + assert health.framework_name == cls.FRAMEWORK + + info = adapter.get_adapter_info() + assert info.framework == cls.FRAMEWORK + + rt = adapter.serialize_for_replay() + assert rt.framework == cls.FRAMEWORK + + adapter.disconnect() + assert adapter.is_connected is False + assert adapter.status == AdapterStatus.DISCONNECTED + + +@pytest.mark.parametrize("name,cls", _adapter_classes(), ids=lambda v: v if isinstance(v, str) else "") +def test_adapter_class_registered(name: str, cls: Type[BaseAdapter]) -> None: + """The package exports ``ADAPTER_CLASS`` for registry lazy-loading.""" + import importlib + + module = importlib.import_module( + f"layerlens.instrument.adapters.frameworks.{name}" + ) + assert getattr(module, "ADAPTER_CLASS", None) is cls + + +@pytest.mark.parametrize("name,cls", _adapter_classes(), ids=lambda v: v if isinstance(v, str) else "") +def test_constructor_accepts_capture_config(name: str, cls: Type[BaseAdapter]) -> None: + """Adapters accept the standard ``capture_config`` constructor arg.""" + adapter = cls(capture_config=CaptureConfig.standard()) + assert adapter.capture_config.l1_agent_io is True + + +def test_benchmark_import_adapter_independent() -> None: + """benchmark_import does NOT extend BaseAdapter (it's a data importer). + + Verify it's importable and its public dataclasses construct correctly. + """ + from layerlens.instrument.adapters.frameworks.benchmark_import import ( + ImportResult, + BenchmarkMetadata, + BenchmarkImportAdapter, + ) + + meta = BenchmarkMetadata(name="test", source="csv") + assert meta.benchmark_id.startswith("bench-") + + result = ImportResult(success=True, benchmark_id=meta.benchmark_id) + assert result.success is True + + adapter: Any = BenchmarkImportAdapter() + # No connect/disconnect — different shape than BaseAdapter subclasses. + assert adapter is not None diff --git a/tests/instrument/adapters/frameworks/test_google_adk_adapter.py b/tests/instrument/adapters/frameworks/test_google_adk_adapter.py new file mode 100644 index 0000000..60506fc --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_google_adk_adapter.py @@ -0,0 +1,220 @@ +"""Unit tests for the Google Agent Development Kit (ADK) framework adapter. + +Mocked at the SDK shape level — no real ``google.adk`` runtime needed. +The adapter integrates via 6 native callbacks (before/after agent/model/tool). +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, Dict, List + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.google_adk import ( + ADAPTER_CLASS, + GoogleADKAdapter, + instrument_agent, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +class _FakeAgent: + """Minimal duck-typed Google ADK agent for tests.""" + + def __init__( + self, + name: str = "adk-agent", + tools: Any = None, + model: Any = None, + description: Any = None, + instruction: Any = None, + sub_agents: Any = None, + ) -> None: + self.name = name + self.tools = tools + self.model = model + self.description = description + self.instruction = instruction + self.sub_agents = sub_agents + self.before_agent_callback: Any = None + self.after_agent_callback: Any = None + self.before_model_callback: Any = None + self.after_model_callback: Any = None + self.before_tool_callback: Any = None + self.after_tool_callback: Any = None + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is GoogleADKAdapter + + +def test_lifecycle() -> None: + a = GoogleADKAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + + +def test_adapter_info_and_health() -> None: + a = GoogleADKAdapter() + a.connect() + info = a.get_adapter_info() + assert info.framework == "google_adk" + assert info.name == "GoogleADKAdapter" + health = a.health_check() + assert health.framework_name == "google_adk" + + +def test_instrument_agent_attaches_callbacks() -> None: + adapter = GoogleADKAdapter(stratix=_RecordingStratix(), capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner") + adapter.instrument_agent(agent) + # All six callbacks attached. Bound methods compare equal but not identical. + assert agent.before_agent_callback == adapter._before_agent_callback + assert agent.after_agent_callback == adapter._after_agent_callback + assert agent.before_model_callback == adapter._before_model_callback + assert agent.after_model_callback == adapter._after_model_callback + assert agent.before_tool_callback == adapter._before_tool_callback + assert agent.after_tool_callback == adapter._after_tool_callback + + +def test_before_after_agent_emits_input_output() -> None: + stratix = _RecordingStratix() + adapter = GoogleADKAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner", model="gemini-2", tools=[SimpleNamespace(name="search")]) + callback_context = SimpleNamespace(agent=agent, user_content="hello world", agent_output="response", session=None) + + adapter._before_agent_callback(callback_context) + adapter._after_agent_callback(callback_context) + + types = [e["event_type"] for e in stratix.events] + assert "environment.config" in types + assert "agent.input" in types + assert "agent.output" in types + + inp = next(e for e in stratix.events if e["event_type"] == "agent.input") + assert inp["payload"]["agent_name"] == "planner" + assert inp["payload"]["input"] == "hello world" + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert out["payload"]["output"] == "response" + assert out["payload"]["duration_ns"] >= 0 + + +def test_after_model_emits_invoke_and_cost() -> None: + stratix = _RecordingStratix() + adapter = GoogleADKAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + callback_context = SimpleNamespace(model="gemini-2", agent=None) + llm_request = SimpleNamespace() + adapter._before_model_callback(callback_context, llm_request) + + llm_response = SimpleNamespace( + usage_metadata=SimpleNamespace(prompt_token_count=10, candidates_token_count=20), + ) + adapter._after_model_callback(callback_context, llm_response) + + invoke = next(e for e in stratix.events if e["event_type"] == "model.invoke") + assert invoke["payload"]["model"] == "gemini-2" + assert invoke["payload"]["provider"] == "google" + assert invoke["payload"]["tokens_prompt"] == 10 + + cost = next(e for e in stratix.events if e["event_type"] == "cost.record") + assert cost["payload"]["tokens_total"] == 30 + + +def test_after_tool_emits_tool_call() -> None: + stratix = _RecordingStratix() + adapter = GoogleADKAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + inp = {"x": 1} + adapter._before_tool_callback(SimpleNamespace(), "calc", inp) + adapter._after_tool_callback(SimpleNamespace(), "calc", inp, 42) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_name"] == "calc" + assert evt["payload"]["tool_output"] == 42 + assert evt["payload"]["latency_ms"] is not None + + +def test_on_handoff_emits_event_with_context_hash() -> None: + stratix = _RecordingStratix() + adapter = GoogleADKAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter.on_handoff(from_agent="a", to_agent="b", context="some context") + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["from_agent"] == "a" + assert evt["payload"]["to_agent"] == "b" + assert evt["payload"]["context_hash"] is not None + + +def test_capture_config_gates_l3_model_metadata() -> None: + """When l3_model_metadata is disabled, model.invoke does NOT fire (handoff still does).""" + stratix = _RecordingStratix() + cfg = CaptureConfig(l3_model_metadata=False) + adapter = GoogleADKAdapter(stratix=stratix, capture_config=cfg) + adapter.connect() + + callback_context = SimpleNamespace(model="gemini-2", agent=None) + adapter._before_model_callback(callback_context, SimpleNamespace()) + adapter._after_model_callback( + callback_context, + SimpleNamespace(usage_metadata=SimpleNamespace(prompt_token_count=10, candidates_token_count=5)), + ) + adapter.on_handoff(from_agent="a", to_agent="b", context="x") + + types = [e["event_type"] for e in stratix.events] + assert "model.invoke" not in types + assert "agent.handoff" in types + + +def test_environment_config_emits_once_per_agent() -> None: + stratix = _RecordingStratix() + adapter = GoogleADKAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="a1", tools=[SimpleNamespace(name="search")]) + cb = SimpleNamespace(agent=agent, user_content="hi", agent_output=None, session=None) + adapter._before_agent_callback(cb) + # second call should not re-emit environment.config + adapter._before_agent_callback(cb) + + configs = [e for e in stratix.events if e["event_type"] == "environment.config"] + assert len(configs) == 1 + assert configs[0]["payload"]["agent_name"] == "a1" + + +def test_instrument_agent_helper() -> None: + """Top-level convenience function returns a connected adapter.""" + agent = _FakeAgent(name="helper") + adapter = instrument_agent(agent) + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + + +def test_serialize_for_replay() -> None: + adapter = GoogleADKAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + rt = adapter.serialize_for_replay() + assert rt.framework == "google_adk" + assert rt.adapter_name == "GoogleADKAdapter" + assert "capture_config" in rt.config diff --git a/tests/instrument/adapters/frameworks/test_llama_index_adapter.py b/tests/instrument/adapters/frameworks/test_llama_index_adapter.py new file mode 100644 index 0000000..6cf5053 --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_llama_index_adapter.py @@ -0,0 +1,199 @@ +"""Unit tests for the LlamaIndex framework adapter. + +Mocked at the SDK shape level — no real ``llama_index`` runtime needed. +Internal dispatch is by ``type(event).__name__``, so each test event uses +a minimally-shaped class with the right name. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, Dict, List + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.llama_index import ( + ADAPTER_CLASS, + LlamaIndexAdapter, + instrument_workflow, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +# Minimal classes shaped like LlamaIndex events. The adapter dispatches by +# ``type(event).__name__``, so the class name is what matters. +class LLMChatEndEvent: + def __init__(self, model: str, response: Any = None) -> None: + self.model = model + self.response = response + + +class ToolCallEvent: + def __init__(self, tool_name: str, tool_input: Any = None, tool_output: Any = None) -> None: + self.tool_name = tool_name + self.tool_input = tool_input + self.tool_output = tool_output + + +class RetrievalEndEvent: + def __init__(self, nodes: List[Any]) -> None: + self.nodes = nodes + + +class AgentRunStepStartEvent: + def __init__(self, agent_id: str, step: int = 0, tools: Any = None) -> None: + self.agent_id = agent_id + self.step = step + self.tools = tools + + +class AgentRunStepEndEvent: + def __init__(self, agent_id: str, response: Any = None) -> None: + self.agent_id = agent_id + self.response = response + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is LlamaIndexAdapter + + +def test_lifecycle() -> None: + a = LlamaIndexAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + + +def test_adapter_info_and_health() -> None: + a = LlamaIndexAdapter() + a.connect() + info = a.get_adapter_info() + assert info.framework == "llama_index" + assert info.name == "LlamaIndexAdapter" + health = a.health_check() + assert health.framework_name == "llama_index" + + +def test_handle_llm_end_emits_model_invoke_and_cost() -> None: + stratix = _RecordingStratix() + adapter = LlamaIndexAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + raw = SimpleNamespace(usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5)) + response = SimpleNamespace(raw=raw) + adapter._handle_event(LLMChatEndEvent(model="gpt-5", response=response)) + + types = [e["event_type"] for e in stratix.events] + assert "model.invoke" in types + assert "cost.record" in types + + invoke = next(e for e in stratix.events if e["event_type"] == "model.invoke") + assert invoke["payload"]["model"] == "gpt-5" + assert invoke["payload"]["tokens_prompt"] == 10 + + cost = next(e for e in stratix.events if e["event_type"] == "cost.record") + assert cost["payload"]["tokens_total"] == 15 + + +def test_handle_tool_call_event_emits_tool_call() -> None: + stratix = _RecordingStratix() + adapter = LlamaIndexAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter._handle_event(ToolCallEvent(tool_name="calc", tool_input={"x": 1}, tool_output=2)) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_name"] == "calc" + assert evt["payload"]["tool_output"] == 2 + + +def test_handle_retrieval_end_emits_retrieval_tool_call() -> None: + stratix = _RecordingStratix() + adapter = LlamaIndexAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + nodes = [SimpleNamespace(score=0.9), SimpleNamespace(score=0.8)] + adapter._handle_event(RetrievalEndEvent(nodes=nodes)) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_type"] == "retrieval" + assert evt["payload"]["result_count"] == 2 + + +def test_agent_step_start_end_emits_input_output_and_config() -> None: + stratix = _RecordingStratix() + adapter = LlamaIndexAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + adapter._handle_event(AgentRunStepStartEvent(agent_id="myagent", step=1)) + adapter._handle_event(AgentRunStepEndEvent(agent_id="myagent", response="result")) + + types = [e["event_type"] for e in stratix.events] + assert "environment.config" in types + assert "agent.input" in types + assert "agent.output" in types + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert out["payload"]["agent_name"] == "myagent" + assert out["payload"]["duration_ns"] >= 0 + + +def test_on_handoff_emits_event_with_context_hash() -> None: + stratix = _RecordingStratix() + adapter = LlamaIndexAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter.on_handoff(from_agent="a", to_agent="b", context="some context") + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["from_agent"] == "a" + assert evt["payload"]["to_agent"] == "b" + assert evt["payload"]["context_hash"] is not None + + +def test_capture_config_gates_l5a_tool_calls() -> None: + stratix = _RecordingStratix() + cfg = CaptureConfig(l5a_tool_calls=False) + adapter = LlamaIndexAdapter(stratix=stratix, capture_config=cfg) + adapter.connect() + + adapter._handle_event(ToolCallEvent(tool_name="calc", tool_input={"x": 1}, tool_output=2)) + adapter.on_handoff(from_agent="a", to_agent="b", context="x") + + types = [e["event_type"] for e in stratix.events] + assert "tool.call" not in types + assert "agent.handoff" in types + + +def test_unknown_event_type_does_nothing() -> None: + """Events the adapter does not recognize should be silently ignored.""" + stratix = _RecordingStratix() + adapter = LlamaIndexAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter._handle_event(SimpleNamespace()) # type name 'SimpleNamespace' — unhandled + + assert stratix.events == [] + + +def test_instrument_workflow_helper_returns_connected_adapter() -> None: + """Convenience function returns a connected adapter even without llama_index installed.""" + adapter = instrument_workflow() + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + + +def test_serialize_for_replay() -> None: + adapter = LlamaIndexAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + rt = adapter.serialize_for_replay() + assert rt.framework == "llama_index" + assert rt.adapter_name == "LlamaIndexAdapter" + assert "capture_config" in rt.config diff --git a/tests/instrument/adapters/frameworks/test_ms_agent_framework_adapter.py b/tests/instrument/adapters/frameworks/test_ms_agent_framework_adapter.py new file mode 100644 index 0000000..24bd6c1 --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_ms_agent_framework_adapter.py @@ -0,0 +1,210 @@ +"""Unit tests for the Microsoft Agent Framework adapter. + +Mocked at the SDK shape level — no real ``semantic_kernel.agents`` runtime +needed. The adapter wraps ``invoke()`` async generators on chat instances; +tests exercise ``_process_message`` and the lifecycle hooks directly. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, Dict, List + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.ms_agent_framework import ( + ADAPTER_CLASS, + MSAgentAdapter, + instrument_agent, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +# Item types — name-driven dispatch in adapter +class FunctionCallContent: + def __init__(self, name: str, arguments: Any) -> None: + self.name = name + self.arguments = arguments + + +class FunctionResultContent: + def __init__(self, name: str, result: Any) -> None: + self.name = name + self.result = result + + +class _FakeChat: + def __init__(self, name: str = "ms-chat", agents: Any = None, agent: Any = None) -> None: + self.name = name + self.agents = agents + self.agent = agent + + async def invoke(self, *args: Any, **kwargs: Any) -> Any: + # async generator stub + if False: + yield None # type: ignore[unreachable] + + async def invoke_stream(self, *args: Any, **kwargs: Any) -> Any: + if False: + yield None # type: ignore[unreachable] + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is MSAgentAdapter + + +def test_lifecycle() -> None: + a = MSAgentAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + + +def test_adapter_info_and_health() -> None: + a = MSAgentAdapter() + a.connect() + info = a.get_adapter_info() + assert info.framework == "ms_agent_framework" + assert info.name == "MSAgentAdapter" + health = a.health_check() + assert health.framework_name == "ms_agent_framework" + + +def test_instrument_chat_wraps_invoke_and_emits_config() -> None: + stratix = _RecordingStratix() + adapter = MSAgentAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + chat = _FakeChat(name="planner-chat") + adapter.instrument_chat(chat) + + # Wrapped: name is now traced. + assert chat.invoke.__name__ == "traced_invoke" + assert chat.invoke_stream.__name__ == "traced_invoke_stream" + + cfg = next(e for e in stratix.events if e["event_type"] == "environment.config") + assert cfg["payload"]["chat_name"] == "planner-chat" + + adapter.disconnect() + # Restored. + assert chat.invoke.__name__ == "invoke" + + +def test_process_message_emits_handoff_on_agent_change() -> None: + stratix = _RecordingStratix() + adapter = MSAgentAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + msg = SimpleNamespace(agent_name="bob", items=[], metadata={}) + adapter._process_message(_FakeChat(), msg, current_agent="alice") + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["from_agent"] == "alice" + assert evt["payload"]["to_agent"] == "bob" + + +def test_process_message_emits_tool_calls_from_function_items() -> None: + stratix = _RecordingStratix() + adapter = MSAgentAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + msg = SimpleNamespace( + items=[ + FunctionCallContent(name="calc", arguments={"x": 1}), + FunctionResultContent(name="calc", result=42), + ], + metadata={}, + ) + adapter._process_message(_FakeChat(), msg, current_agent="alice") + + tool_calls = [e for e in stratix.events if e["event_type"] == "tool.call"] + assert len(tool_calls) == 2 + assert tool_calls[0]["payload"]["tool_name"] == "calc" + assert tool_calls[1]["payload"]["tool_output"] == 42 + + +def test_process_message_emits_model_and_cost_from_metadata() -> None: + stratix = _RecordingStratix() + adapter = MSAgentAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + msg = SimpleNamespace( + items=[], + metadata={"model": "gpt-5", "usage": {"prompt_tokens": 10, "completion_tokens": 5}}, + ) + adapter._process_message(_FakeChat(), msg, current_agent="alice") + + invoke = next(e for e in stratix.events if e["event_type"] == "model.invoke") + assert invoke["payload"]["model"] == "gpt-5" + cost = next(e for e in stratix.events if e["event_type"] == "cost.record") + assert cost["payload"]["tokens_prompt"] == 10 + + +def test_on_run_start_end_emits_input_output_and_state() -> None: + stratix = _RecordingStratix() + adapter = MSAgentAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + adapter.on_run_start(agent_name="planner", input_data="hi") + adapter.on_run_end(agent_name="planner", output="bye") + + types = [e["event_type"] for e in stratix.events] + assert "agent.input" in types + assert "agent.output" in types + assert "agent.state.change" in types + + +def test_capture_config_gates_l5a_tool_calls() -> None: + stratix = _RecordingStratix() + cfg = CaptureConfig(l5a_tool_calls=False) + adapter = MSAgentAdapter(stratix=stratix, capture_config=cfg) + adapter.connect() + + msg = SimpleNamespace( + items=[FunctionCallContent(name="calc", arguments={"x": 1})], + metadata={}, + ) + adapter._process_message(_FakeChat(), msg, current_agent="alice") + adapter.on_handoff(from_agent="a", to_agent="b", context="x") + + types = [e["event_type"] for e in stratix.events] + assert "tool.call" not in types + # handoff is cross-cutting / always enabled. + assert "agent.handoff" in types + + +def test_on_handoff_emits_event_with_context_hash() -> None: + stratix = _RecordingStratix() + adapter = MSAgentAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter.on_handoff(from_agent="a", to_agent="b", context="some context") + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["from_agent"] == "a" + assert evt["payload"]["context_hash"] is not None + + +def test_instrument_agent_helper() -> None: + chat = _FakeChat(name="helper") + adapter = instrument_agent(chat) + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + + +def test_serialize_for_replay() -> None: + adapter = MSAgentAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + rt = adapter.serialize_for_replay() + assert rt.framework == "ms_agent_framework" + assert rt.adapter_name == "MSAgentAdapter" + assert "capture_config" in rt.config diff --git a/tests/instrument/adapters/frameworks/test_openai_agents_adapter.py b/tests/instrument/adapters/frameworks/test_openai_agents_adapter.py new file mode 100644 index 0000000..15efd7d --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_openai_agents_adapter.py @@ -0,0 +1,214 @@ +"""Unit tests for the OpenAI Agents SDK framework adapter. + +Mocked at the SDK shape level — no real ``agents`` runtime needed. The +adapter dispatches by ``type(span_data).__name__``, so each test span +uses a class with the right name (AgentSpanData, GenerationSpanData, etc.). +""" + +from __future__ import annotations + +from typing import Any, Dict, List + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.openai_agents import ( + ADAPTER_CLASS, + OpenAIAgentsAdapter, + instrument_runner, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +# Span data classes — names must match what the adapter dispatches on. +class AgentSpanData: + def __init__(self, name: str, output: Any = None, tools: Any = None, model: Any = None) -> None: + self.name = name + self.output = output + self.tools = tools + self.model = model + + +class GenerationSpanData: + def __init__(self, model: str, input_tokens: int, output_tokens: int) -> None: + self.model = model + self.input_tokens = input_tokens + self.output_tokens = output_tokens + + +class FunctionSpanData: + def __init__(self, name: str, input: Any = None, output: Any = None) -> None: + self.name = name + self.input = input + self.output = output + + +class HandoffSpanData: + def __init__(self, from_agent: str, to_agent: str) -> None: + self.from_agent = from_agent + self.to_agent = to_agent + + +class GuardrailSpanData: + def __init__(self, name: str, triggered: bool, output: Any = None) -> None: + self.name = name + self.triggered = triggered + self.output = output + + +class _Span: + def __init__(self, span_data: Any, span_id: str = "span-1", duration_ms: float = 100.0) -> None: + self.span_data = span_data + self.span_id = span_id + self.duration_ms = duration_ms + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is OpenAIAgentsAdapter + + +def test_lifecycle() -> None: + a = OpenAIAgentsAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + + +def test_adapter_info_and_health() -> None: + a = OpenAIAgentsAdapter() + a.connect() + info = a.get_adapter_info() + assert info.framework == "openai_agents" + assert info.name == "OpenAIAgentsAdapter" + health = a.health_check() + assert health.framework_name == "openai_agents" + + +def test_agent_span_emits_input_output_and_config() -> None: + stratix = _RecordingStratix() + adapter = OpenAIAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + data = AgentSpanData(name="planner", output="response", model="gpt-5") + span = _Span(data, span_id="span-1") + + adapter._on_span_start(span) + adapter._on_span_end(span) + + types = [e["event_type"] for e in stratix.events] + assert "environment.config" in types + assert "agent.input" in types + assert "agent.output" in types + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert out["payload"]["agent_name"] == "planner" + assert out["payload"]["output"] == "response" + + +def test_generation_span_emits_model_invoke_and_cost() -> None: + stratix = _RecordingStratix() + adapter = OpenAIAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + data = GenerationSpanData(model="gpt-5", input_tokens=10, output_tokens=20) + adapter._on_span_end(_Span(data, duration_ms=42.0)) + + invoke = next(e for e in stratix.events if e["event_type"] == "model.invoke") + assert invoke["payload"]["model"] == "gpt-5" + assert invoke["payload"]["tokens_prompt"] == 10 + assert invoke["payload"]["latency_ms"] == 42.0 + + cost = next(e for e in stratix.events if e["event_type"] == "cost.record") + assert cost["payload"]["tokens_total"] == 30 + + +def test_function_span_emits_tool_call() -> None: + stratix = _RecordingStratix() + adapter = OpenAIAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter._on_span_end(_Span(FunctionSpanData(name="calc", input={"x": 1}, output=42))) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_name"] == "calc" + assert evt["payload"]["tool_output"] == 42 + + +def test_handoff_span_emits_agent_handoff() -> None: + stratix = _RecordingStratix() + adapter = OpenAIAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter._on_span_end(_Span(HandoffSpanData(from_agent="a", to_agent="b"))) + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["from_agent"] == "a" + assert evt["payload"]["to_agent"] == "b" + + +def test_guardrail_span_emits_policy_violation() -> None: + stratix = _RecordingStratix() + adapter = OpenAIAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter._on_span_end(_Span(GuardrailSpanData(name="profanity", triggered=True, output="blocked"))) + + evt = next(e for e in stratix.events if e["event_type"] == "policy.violation") + assert evt["payload"]["guardrail_name"] == "profanity" + assert evt["payload"]["triggered"] is True + + +def test_capture_config_gates_l3_model_metadata() -> None: + """When l3_model_metadata is disabled, model.invoke does NOT fire (handoff still does).""" + stratix = _RecordingStratix() + cfg = CaptureConfig(l3_model_metadata=False) + adapter = OpenAIAgentsAdapter(stratix=stratix, capture_config=cfg) + adapter.connect() + + adapter._on_span_end(_Span(GenerationSpanData(model="gpt-5", input_tokens=10, output_tokens=5))) + adapter._on_span_end(_Span(HandoffSpanData(from_agent="a", to_agent="b"))) + + types = [e["event_type"] for e in stratix.events] + assert "model.invoke" not in types + # handoff is cross-cutting / always enabled. + assert "agent.handoff" in types + + +def test_trace_start_end_emits_state_change() -> None: + stratix = _RecordingStratix() + adapter = OpenAIAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + class _Trace: + trace_id = "trace-1" + + adapter._on_trace_start(_Trace()) + adapter._on_trace_end(_Trace()) + + states = [e for e in stratix.events if e["event_type"] == "agent.state.change"] + subtypes = {s["payload"]["event_subtype"] for s in states} + assert "trace_start" in subtypes + assert "trace_end" in subtypes + + +def test_instrument_runner_helper() -> None: + """Convenience function returns a connected adapter even without agents installed.""" + adapter = instrument_runner() + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + + +def test_serialize_for_replay() -> None: + adapter = OpenAIAgentsAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + rt = adapter.serialize_for_replay() + assert rt.framework == "openai_agents" + assert rt.adapter_name == "OpenAIAgentsAdapter" + assert "capture_config" in rt.config diff --git a/tests/instrument/adapters/frameworks/test_pydantic_ai_adapter.py b/tests/instrument/adapters/frameworks/test_pydantic_ai_adapter.py new file mode 100644 index 0000000..b5c31fa --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_pydantic_ai_adapter.py @@ -0,0 +1,216 @@ +"""Unit tests for the PydanticAI framework adapter. + +Mocked at the SDK shape level — no real ``pydantic_ai`` runtime needed. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, Dict, List + +import pytest + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.pydantic_ai import ( + ADAPTER_CLASS, + PydanticAIAdapter, + instrument_agent, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +class _FakeAgent: + """Minimal duck-typed PydanticAI agent for tests.""" + + def __init__( + self, + name: str = "pa-agent", + tools: Any = None, + model: Any = None, + system_prompt: Any = None, + result_type: Any = None, + result: Any = None, + raises: bool = False, + ) -> None: + self.name = name + self.tools = tools + self.model = model + self.system_prompt = system_prompt + self.result_type = result_type + self._result = result + self._raises = raises + + def run_sync(self, user_prompt: str, **kwargs: Any) -> Any: + if self._raises: + raise RuntimeError("simulated failure") + return self._result if self._result is not None else SimpleNamespace(data=f"out:{user_prompt}") + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is PydanticAIAdapter + + +def test_lifecycle() -> None: + a = PydanticAIAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + + +def test_adapter_info_and_health() -> None: + a = PydanticAIAdapter() + a.connect() + info = a.get_adapter_info() + assert info.framework == "pydantic_ai" + assert info.name == "PydanticAIAdapter" + health = a.health_check() + assert health.framework_name == "pydantic_ai" + + +def test_instrument_agent_wraps_run_sync() -> None: + adapter = PydanticAIAdapter(stratix=_RecordingStratix(), capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner") + adapter.instrument_agent(agent) + assert agent.run_sync.__name__ == "traced_run_sync" + + adapter.disconnect() + # Restored to original. + assert agent.run_sync.__name__ == "run_sync" + + +def test_run_emits_input_and_output_events() -> None: + stratix = _RecordingStratix() + adapter = PydanticAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner", model="gpt-5") + adapter.instrument_agent(agent) + result = agent.run_sync("hello") + assert getattr(result, "data", None) == "out:hello" + + types = [e["event_type"] for e in stratix.events] + assert "environment.config" in types + assert "agent.input" in types + assert "agent.output" in types + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert out["payload"]["agent_name"] == "planner" + assert out["payload"]["duration_ns"] >= 0 + + +def test_run_failure_emits_output_with_error() -> None: + stratix = _RecordingStratix() + adapter = PydanticAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + agent = _FakeAgent(name="failing", raises=True) + adapter.instrument_agent(agent) + + with pytest.raises(RuntimeError): + agent.run_sync("bad") + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert "error" in out["payload"] + assert "simulated failure" in out["payload"]["error"] + + +def test_run_extracts_usage_and_messages() -> None: + """When the result has usage and a tool-return message, cost.record + tool.call fire.""" + stratix = _RecordingStratix() + adapter = PydanticAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + usage = SimpleNamespace(request_tokens=10, response_tokens=5, total_tokens=15) + response_msg = SimpleNamespace(kind="response") + tool_msg = SimpleNamespace(kind="tool-return", tool_name="calc", content=42) + result = SimpleNamespace( + data="ok", + usage=usage, + all_messages=[response_msg, tool_msg], + model_name="gpt-5", + ) + agent = _FakeAgent(name="planner", result=result) + adapter.instrument_agent(agent) + agent.run_sync("hi") + + types = [e["event_type"] for e in stratix.events] + assert "cost.record" in types + assert "model.invoke" in types + assert "tool.call" in types + + cost = next(e for e in stratix.events if e["event_type"] == "cost.record") + assert cost["payload"]["tokens_total"] == 15 + tool = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert tool["payload"]["tool_name"] == "calc" + + +def test_on_handoff_emits_event_with_context_hash() -> None: + stratix = _RecordingStratix() + adapter = PydanticAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter.on_handoff(from_agent="a", to_agent="b", context="some context") + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["from_agent"] == "a" + assert evt["payload"]["context_hash"] is not None + + +def test_capture_config_gates_l1_agent_io() -> None: + """When l1_agent_io is disabled, agent.input/output do NOT fire (state.change still does).""" + stratix = _RecordingStratix() + cfg = CaptureConfig(l1_agent_io=False) + adapter = PydanticAIAdapter(stratix=stratix, capture_config=cfg) + adapter.connect() + + adapter.on_run_start(agent_name="a", input_data="x") + adapter.on_run_end(agent_name="a", output="y") + + types = [e["event_type"] for e in stratix.events] + assert "agent.input" not in types + assert "agent.output" not in types + # state.change is cross-cutting / always enabled. + assert "agent.state.change" in types + + +def test_environment_config_emits_once_per_agent() -> None: + stratix = _RecordingStratix() + adapter = PydanticAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="a1", tools=[SimpleNamespace(name="search")], model="gpt-5") + adapter.instrument_agent(agent) + adapter.instrument_agent(agent) # idempotent + + configs = [e for e in stratix.events if e["event_type"] == "environment.config"] + assert len(configs) == 1 + assert configs[0]["payload"]["agent_name"] == "a1" + assert configs[0]["payload"]["tools"] == ["search"] + + +def test_instrument_agent_helper() -> None: + agent = _FakeAgent(name="helper") + adapter = instrument_agent(agent) + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + + +def test_serialize_for_replay() -> None: + adapter = PydanticAIAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + rt = adapter.serialize_for_replay() + assert rt.framework == "pydantic_ai" + assert rt.adapter_name == "PydanticAIAdapter" + assert "capture_config" in rt.config diff --git a/tests/instrument/adapters/frameworks/test_semantic_kernel_adapter.py b/tests/instrument/adapters/frameworks/test_semantic_kernel_adapter.py new file mode 100644 index 0000000..2539048 --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_semantic_kernel_adapter.py @@ -0,0 +1,212 @@ +"""Unit tests for the Microsoft Semantic Kernel adapter. + +Mocked at the SDK shape level — no real ``semantic_kernel`` runtime needed. +The adapter wires filters via ``kernel.add_filter(...)`` and exposes a +suite of lifecycle hooks (``on_function_start``, ``on_model_invoke``, +``on_planner_step``, etc.) that are called by those filters. Tests +exercise the lifecycle hooks directly + verify filter wiring. +""" + +from __future__ import annotations + +from typing import Any, Dict, List + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.semantic_kernel import ( + ADAPTER_CLASS, + SemanticKernelAdapter, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +class _FakeKernel: + def __init__(self, plugins: Any = None) -> None: + self.plugins = plugins or {} + self._added_filters: List[Dict[str, Any]] = [] + + def add_filter(self, filter_type: str, filter_obj: Any) -> None: + self._added_filters.append({"type": filter_type, "filter": filter_obj}) + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is SemanticKernelAdapter + + +def test_lifecycle() -> None: + a = SemanticKernelAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + + +def test_adapter_info_and_health() -> None: + a = SemanticKernelAdapter() + a.connect() + info = a.get_adapter_info() + assert info.framework == "semantic_kernel" + assert info.name == "SemanticKernelAdapter" + health = a.health_check() + assert health.framework_name == "semantic_kernel" + + +def test_instrument_kernel_registers_filters_and_discovers_plugins() -> None: + stratix = _RecordingStratix() + adapter = SemanticKernelAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + kernel = _FakeKernel(plugins={"math": object(), "search": object()}) + adapter.instrument_kernel(kernel) + + filter_types = {f["type"] for f in kernel._added_filters} + assert filter_types == {"function_invocation", "prompt_rendering", "auto_function_invocation"} + + # Plugin discovery emits environment.config events. + configs = [e for e in stratix.events if e["event_type"] == "environment.config"] + plugin_names = {c["payload"].get("plugin_name") for c in configs} + assert "math" in plugin_names + assert "search" in plugin_names + + +def test_on_function_start_end_emits_tool_call() -> None: + stratix = _RecordingStratix() + adapter = SemanticKernelAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + ctx = adapter.on_function_start(plugin_name="math", function_name="add", arguments={"a": 1, "b": 2}) + adapter.on_function_end(context=ctx, result=3) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_name"] == "math.add" + assert evt["payload"]["plugin_name"] == "math" + assert evt["payload"]["function_name"] == "add" + assert evt["payload"]["latency_ms"] >= 0 + + +def test_on_model_invoke_emits_invoke_and_cost() -> None: + stratix = _RecordingStratix() + adapter = SemanticKernelAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + adapter.on_model_invoke( + provider="azure_openai", + model="gpt-5", + prompt_tokens=10, + completion_tokens=5, + latency_ms=20.0, + ) + + invoke = next(e for e in stratix.events if e["event_type"] == "model.invoke") + assert invoke["payload"]["model"] == "gpt-5" + assert invoke["payload"]["latency_ms"] == 20.0 + + cost = next(e for e in stratix.events if e["event_type"] == "cost.record") + assert cost["payload"]["total_tokens"] == 15 + + +def test_on_prompt_render_emits_agent_code() -> None: + stratix = _RecordingStratix() + adapter = SemanticKernelAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + adapter.on_prompt_render( + template="Hello {{name}}", + rendered_prompt="Hello world", + function_name="greet", + ) + + evt = next(e for e in stratix.events if e["event_type"] == "agent.code") + assert evt["payload"]["event_subtype"] == "prompt_render" + assert evt["payload"]["function_name"] == "greet" + + +def test_on_planner_step_emits_agent_code() -> None: + stratix = _RecordingStratix() + adapter = SemanticKernelAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + adapter.on_planner_step( + planner_type="HandlebarsPlanner", + step_index=1, + thought="I need to search", + action="search", + observation="found results", + status="completed", + ) + + evt = next(e for e in stratix.events if e["event_type"] == "agent.code") + assert evt["payload"]["event_subtype"] == "planner_step" + assert evt["payload"]["planner_type"] == "HandlebarsPlanner" + assert evt["payload"]["step_index"] == 1 + + +def test_on_memory_operation_emits_tool_call() -> None: + stratix = _RecordingStratix() + adapter = SemanticKernelAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + adapter.on_memory_operation( + operation="search", + collection="facts", + query="capital of France", + result_count=3, + relevance_scores=[0.9, 0.8, 0.7], + backend_type="qdrant", + ) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_name"] == "memory.search" + assert evt["payload"]["result_count"] == 3 + assert evt["payload"]["backend_type"] == "qdrant" + + +def test_on_kernel_invoke_start_end_emits_input_output() -> None: + stratix = _RecordingStratix() + adapter = SemanticKernelAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter.on_kernel_invoke_start(input_text="hello") + adapter.on_kernel_invoke_end(output="world") + + types = [e["event_type"] for e in stratix.events] + assert "agent.input" in types + assert "agent.output" in types + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert out["payload"]["output"] == "world" + assert out["payload"]["duration_ns"] >= 0 + + +def test_capture_config_gates_l5a_tool_calls() -> None: + """When l5a_tool_calls is disabled, tool.call does NOT fire (model.invoke still does).""" + stratix = _RecordingStratix() + cfg = CaptureConfig(l5a_tool_calls=False) + adapter = SemanticKernelAdapter(stratix=stratix, capture_config=cfg) + adapter.connect() + + ctx = adapter.on_function_start(plugin_name="math", function_name="add") + adapter.on_function_end(context=ctx, result=3) + adapter.on_model_invoke(model="gpt-5", prompt_tokens=10, completion_tokens=5) + + types = [e["event_type"] for e in stratix.events] + assert "tool.call" not in types + assert "model.invoke" in types + + +def test_serialize_for_replay() -> None: + adapter = SemanticKernelAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + rt = adapter.serialize_for_replay() + assert rt.framework == "semantic_kernel" + assert rt.adapter_name == "SemanticKernelAdapter" + assert "capture_config" in rt.config diff --git a/tests/instrument/adapters/frameworks/test_smolagents_adapter.py b/tests/instrument/adapters/frameworks/test_smolagents_adapter.py new file mode 100644 index 0000000..ccf1e29 --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_smolagents_adapter.py @@ -0,0 +1,212 @@ +"""Unit tests for the SmolAgents framework adapter. + +Mocked at the SDK shape level — no real ``smolagents`` runtime needed. +""" + +from __future__ import annotations + +from typing import Any, Dict, List + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.smolagents import ( + ADAPTER_CLASS, + SmolAgentsAdapter, + instrument_agent, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +class _FakeAgent: + """Minimal duck-typed SmolAgents agent for tests.""" + + def __init__( + self, + name: str = "test-agent", + tools: Any = None, + managed_agents: Any = None, + model: Any = None, + system_prompt: Any = None, + ) -> None: + self.name = name + self.tools = tools + self.managed_agents = managed_agents + self.model = model + self.system_prompt = system_prompt + self._raised = False + + def run(self, task: str, **kwargs: Any) -> Any: + if self._raised: + raise RuntimeError("simulated failure") + return f"result for {task}" + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is SmolAgentsAdapter + + +def test_lifecycle() -> None: + a = SmolAgentsAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + + +def test_instrument_agent_wraps_run() -> None: + adapter = SmolAgentsAdapter(stratix=_RecordingStratix(), capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner") + adapter.instrument_agent(agent) + # Wrapped: the bound method's underlying function is now ``traced_run``. + assert agent.run.__name__ == "traced_run" + + adapter.disconnect() + # Restored: name is back to the original. + assert agent.run.__name__ == "run" + + +def test_run_emits_input_and_output_events() -> None: + stratix = _RecordingStratix() + adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner") + adapter.instrument_agent(agent) + result = agent.run("compute 2+2") + + assert result == "result for compute 2+2" + + types = [e["event_type"] for e in stratix.events] + # First event is environment.config from initial agent registration. + assert "environment.config" in types + assert "agent.input" in types + assert "agent.output" in types + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert out["payload"]["agent_name"] == "planner" + assert out["payload"]["duration_ns"] >= 0 + + +def test_run_failure_emits_output_with_error() -> None: + stratix = _RecordingStratix() + adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="failing") + agent._raised = True + adapter.instrument_agent(agent) + + import pytest + + with pytest.raises(RuntimeError): + agent.run("bad task") + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert "error" in out["payload"] + assert "simulated failure" in out["payload"]["error"] + + +def test_managed_agents_recursively_instrumented() -> None: + adapter = SmolAgentsAdapter() + adapter.connect() + + sub = _FakeAgent(name="sub") + parent = _FakeAgent(name="parent", managed_agents={"sub": sub}) + + adapter.instrument_agent(parent) + # Both wrapped. + assert parent.run.__name__ == "traced_run" + assert sub.run.__name__ == "traced_run" + + +def test_environment_config_emits_once_per_agent() -> None: + stratix = _RecordingStratix() + adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent( + name="a1", + tools=["search", "calc"], + model="some-model", + system_prompt="you are helpful", + ) + adapter.instrument_agent(agent) + # Re-instrument should not re-emit config. + adapter.instrument_agent(agent) + + configs = [e for e in stratix.events if e["event_type"] == "environment.config"] + assert len(configs) == 1 + cfg = configs[0]["payload"] + assert cfg["agent_name"] == "a1" + assert cfg["tools"] == ["search", "calc"] + + +def test_on_tool_use_emits_event() -> None: + stratix = _RecordingStratix() + adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2, latency_ms=12.3) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_name"] == "calc" + assert evt["payload"]["latency_ms"] == 12.3 + + +def test_on_handoff_emits_event_with_context_hash() -> None: + stratix = _RecordingStratix() + adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + adapter.on_handoff(from_agent="a", to_agent="b", context="some context") + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["from_agent"] == "a" + assert evt["payload"]["to_agent"] == "b" + assert evt["payload"]["context_hash"] is not None + # Capture content on => preview included. + assert evt["payload"]["context_preview"] == "some context" + + +def test_handoff_redacts_context_when_capture_content_disabled() -> None: + stratix = _RecordingStratix() + adapter = SmolAgentsAdapter( + stratix=stratix, + capture_config=CaptureConfig(capture_content=False), + ) + adapter.connect() + adapter.on_handoff(from_agent="a", to_agent="b", context="secret") + + evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") + assert evt["payload"]["context_preview"] is None + # Hash still present (it's not content). + assert evt["payload"]["context_hash"] is not None + + +def test_instrument_agent_helper() -> None: + """Top-level convenience function returns a connected adapter.""" + agent = _FakeAgent(name="helper") + adapter = instrument_agent(agent) + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + + +def test_serialize_for_replay() -> None: + adapter = SmolAgentsAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + + rt = adapter.serialize_for_replay() + assert rt.framework == "smolagents" + assert "capture_config" in rt.config diff --git a/tests/instrument/adapters/frameworks/test_strands_adapter.py b/tests/instrument/adapters/frameworks/test_strands_adapter.py new file mode 100644 index 0000000..c5eb365 --- /dev/null +++ b/tests/instrument/adapters/frameworks/test_strands_adapter.py @@ -0,0 +1,210 @@ +"""Unit tests for the AWS Strands framework adapter. + +Mocked at the SDK shape level — no real ``strands`` runtime needed. +The adapter wraps ``invoke()`` (and ``__call__``); tests exercise ``invoke``. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, Dict, List + +import pytest + +from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig +from layerlens.instrument.adapters.frameworks.strands import ( + ADAPTER_CLASS, + StrandsAdapter, + instrument_agent, +) + + +class _RecordingStratix: + def __init__(self) -> None: + self.events: List[Dict[str, Any]] = [] + + def emit(self, *args: Any, **kwargs: Any) -> None: + if len(args) == 2 and isinstance(args[0], str): + self.events.append({"event_type": args[0], "payload": args[1]}) + + +class _FakeAgent: + """Minimal duck-typed Strands agent for tests.""" + + def __init__( + self, + name: str = "strands-agent", + tools: Any = None, + model: Any = None, + system_prompt: Any = None, + conversation: Any = None, + result: Any = None, + raises: bool = False, + ) -> None: + self.name = name + self.tools = tools + self.model = model + self.system_prompt = system_prompt + self.conversation = conversation + self._result = result + self._raises = raises + + def invoke(self, prompt: str, **kwargs: Any) -> Any: + if self._raises: + raise RuntimeError("simulated failure") + return ( + self._result + if self._result is not None + else SimpleNamespace(content=f"out:{prompt}", text=None) + ) + + def __call__(self, prompt: str, **kwargs: Any) -> Any: + return self.invoke(prompt, **kwargs) + + +def test_adapter_class_export() -> None: + assert ADAPTER_CLASS is StrandsAdapter + + +def test_lifecycle() -> None: + a = StrandsAdapter() + a.connect() + assert a.status == AdapterStatus.HEALTHY + a.disconnect() + assert a.status == AdapterStatus.DISCONNECTED + + +def test_adapter_info_and_health() -> None: + a = StrandsAdapter() + a.connect() + info = a.get_adapter_info() + assert info.framework == "strands" + assert info.name == "StrandsAdapter" + health = a.health_check() + assert health.framework_name == "strands" + + +def test_instrument_agent_wraps_invoke() -> None: + adapter = StrandsAdapter(stratix=_RecordingStratix(), capture_config=CaptureConfig.full()) + adapter.connect() + agent = _FakeAgent(name="planner") + adapter.instrument_agent(agent) + assert agent.invoke.__name__ == "traced_call" + + adapter.disconnect() + assert agent.invoke.__name__ == "invoke" + + +def test_invoke_emits_input_and_output_events() -> None: + stratix = _RecordingStratix() + adapter = StrandsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner", model="anthropic.claude-v2") + adapter.instrument_agent(agent) + result = agent.invoke("hello") + assert getattr(result, "content", None) == "out:hello" + + types = [e["event_type"] for e in stratix.events] + assert "environment.config" in types + assert "agent.input" in types + assert "agent.output" in types + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert out["payload"]["agent_name"] == "planner" + assert out["payload"]["duration_ns"] >= 0 + + +def test_invoke_extracts_usage_and_emits_cost() -> None: + stratix = _RecordingStratix() + adapter = StrandsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + usage = SimpleNamespace(inputTokens=10, outputTokens=5, totalTokens=15) + result = SimpleNamespace(content="ok", text=None, usage=usage, tool_results=[]) + agent = _FakeAgent(name="planner", model="anthropic.claude-v2", result=result) + adapter.instrument_agent(agent) + agent.invoke("hi") + + types = [e["event_type"] for e in stratix.events] + assert "model.invoke" in types + assert "cost.record" in types + + cost = next(e for e in stratix.events if e["event_type"] == "cost.record") + assert cost["payload"]["tokens_total"] == 15 + + +def test_invoke_failure_emits_output_with_error() -> None: + stratix = _RecordingStratix() + adapter = StrandsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + agent = _FakeAgent(name="failing", raises=True) + adapter.instrument_agent(agent) + + with pytest.raises(RuntimeError): + agent.invoke("bad") + + out = next(e for e in stratix.events if e["event_type"] == "agent.output") + assert "error" in out["payload"] + assert "simulated failure" in out["payload"]["error"] + + +def test_on_tool_use_emits_event() -> None: + stratix = _RecordingStratix() + adapter = StrandsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2, latency_ms=12.3) + + evt = next(e for e in stratix.events if e["event_type"] == "tool.call") + assert evt["payload"]["tool_name"] == "calc" + assert evt["payload"]["latency_ms"] == 12.3 + + +def test_capture_config_gates_l3_model_metadata() -> None: + """When l3_model_metadata is disabled, model.invoke does NOT fire (state.change still does).""" + stratix = _RecordingStratix() + cfg = CaptureConfig(l3_model_metadata=False) + adapter = StrandsAdapter(stratix=stratix, capture_config=cfg) + adapter.connect() + + adapter.on_llm_call(model="claude", provider="bedrock") + adapter.on_run_start(agent_name="a", input_data="x") + adapter.on_run_end(agent_name="a", output="y") + + types = [e["event_type"] for e in stratix.events] + assert "model.invoke" not in types + # state.change is cross-cutting — always fires. + assert "agent.state.change" in types + + +def test_environment_config_emits_once_per_agent() -> None: + stratix = _RecordingStratix() + adapter = StrandsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="a1", tools=[SimpleNamespace(name="search")], model="claude") + adapter.instrument_agent(agent) + adapter.instrument_agent(agent) + + configs = [e for e in stratix.events if e["event_type"] == "environment.config"] + assert len(configs) == 1 + assert configs[0]["payload"]["tools"] == ["search"] + + +def test_instrument_agent_helper() -> None: + agent = _FakeAgent(name="helper") + adapter = instrument_agent(agent) + assert adapter.is_connected is True + assert adapter.status == AdapterStatus.HEALTHY + + +def test_serialize_for_replay() -> None: + adapter = StrandsAdapter( + stratix=_RecordingStratix(), + capture_config=CaptureConfig.full(), + ) + adapter.connect() + rt = adapter.serialize_for_replay() + assert rt.framework == "strands" + assert rt.adapter_name == "StrandsAdapter" + assert "capture_config" in rt.config