diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 99166dd..7d2c039 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -19,15 +19,8 @@ * [Traces](api-reference/traces.md) * [Trace Evaluations](api-reference/trace-evaluations.md) * [Judge Optimizations](api-reference/judge-optimizations.md) - * [Instrumentation](api-reference/instrumentation.md) * [Error Handling](api-reference/errors.md) -## Instrumentation -* [Overview](instrumentation/README.md) - * [Quick Start](instrumentation/quickstart.md) - * [LLM Providers](instrumentation/providers.md) - * [Agent Frameworks](instrumentation/frameworks.md) - ## CLI * [Getting Started](cli/getting-started.md) * [Command Reference](cli/commands.md) diff --git a/docs/api-reference/README.md b/docs/api-reference/README.md index f5461d7..d91fcaa 100644 --- a/docs/api-reference/README.md +++ b/docs/api-reference/README.md @@ -1,18 +1,2 @@ -# API Reference - -Detailed documentation for every resource and method in the LayerLens Stratix Python SDK. - -## Resources - -- [Client Configuration](client.md) — `Stratix` and `AsyncStratix` setup -- [Public Client](public-client.md) — Public models, benchmarks, evaluations -- [Evaluations](evaluations.md) — Create and manage evaluations -- [Results](results.md) — Retrieve evaluation results -- [Models & Benchmarks](models-benchmarks.md) — Model and benchmark management -- [Judges](judges.md) — Evaluation judge CRUD -- [Traces](traces.md) — Upload and manage trace data -- [Trace Evaluations](trace-evaluations.md) — Run judges against traces -- [Judge Optimizations](judge-optimizations.md) — Optimize judge configurations -- [Instrumentation](instrumentation.md) — Tracing primitives and adapters -- [Error Handling](errors.md) — Exception hierarchy and handling patterns +# api-reference diff --git a/docs/api-reference/instrumentation.md b/docs/api-reference/instrumentation.md deleted file mode 100644 index a5f4de3..0000000 --- a/docs/api-reference/instrumentation.md +++ /dev/null @@ -1,249 +0,0 @@ -# Instrumentation - -The `layerlens.instrument` module provides tracing primitives and provider/framework adapters for automatic LLM observability. - -## Overview - -### Using Synchronous Client - -```python -from layerlens import Stratix -from layerlens.instrument import trace, span - -client = Stratix() - -@trace(client) -def my_agent(query: str): - with span("process", kind="internal") as s: - result = do_work(query) - s.output = result - return result - -my_agent("Hello") -``` - -### Using Async Client - -```python -import asyncio -from layerlens import AsyncStratix -from layerlens.instrument import trace, span - -client = AsyncStratix() - -@trace(client) -async def my_agent(query: str): - with span("process") as s: - result = await do_work(query) - s.output = result - return result - -asyncio.run(my_agent("Hello")) -``` - -## Core API - -### `trace(client, name=None, metadata=None)` - -Decorator that creates a root span and uploads the trace on function completion. - -#### Parameters - -| Parameter | Type | Required | Description | -| --------- | ---- | -------- | ----------- | -| `client` | `Stratix \| AsyncStratix` | Yes | SDK client used to upload the trace | -| `name` | `str \| None` | No | Override span name (defaults to function name) | -| `metadata` | `dict \| None` | No | Arbitrary metadata attached to the root span | - -#### Behavior - -- Creates a `TraceRecorder` and root `SpanData` -- Sets `_current_recorder` and `_current_span` context variables -- Captures function arguments as `input` -- Captures return value as `output` -- On error: sets `status="error"` and records the error message -- On completion: serializes span tree to a temp JSON file, calls `client.traces.upload()`, deletes the temp file -- Resets context variables in a `finally` block -- Works with both sync and async functions - -#### Example - -```python -@trace(client) -def my_agent(query: str): - return process(query) - -@trace(client, name="custom-name") -async def my_async_agent(query: str): - return await process(query) -``` - -### `span(name, kind="internal", input=None, metadata=None)` - -Context manager that creates a child span under the current active span. - -#### Parameters - -| Parameter | Type | Required | Description | -| --------- | ---- | -------- | ----------- | -| `name` | `str` | Yes | Display name for the span | -| `kind` | `str` | No | Span type: `"internal"`, `"llm"`, `"retriever"`, `"tool"`, `"chain"` | -| `input` | `Any` | No | Input data for the span | -| `metadata` | `dict \| None` | No | Arbitrary metadata attached to the span | - -#### Returns - -Returns a `SpanData` object (or a no-op dummy if no trace is active). - -#### Behavior - -- If called outside a `@trace` context, returns a no-op context manager -- Creates a `SpanData` with the given name and kind -- Appends the span to the current parent's `children` list -- Sets `_current_span` to the new span for the duration of the `with` block -- Restores the previous span on exit -- On error inside the block: sets `status="error"`, records error, re-raises - -#### Example - -```python -@trace(client) -def my_agent(query: str): - with span("step-1", kind="tool") as s: - s.input = query - result = tool_call(query) - s.output = result - s.metadata["tool_version"] = "1.0" - return result -``` - -### `SpanData` - -Dataclass representing a single span in the trace tree. - -#### Properties - -| Property | Type | Default | Description | -| -------- | ---- | ------- | ----------- | -| `name` | `str` | (required) | Span display name | -| `span_id` | `str` | auto-generated | Unique identifier (UUID hex, 16 chars) | -| `parent_id` | `str \| None` | `None` | Parent span ID | -| `start_time` | `float` | `time.time()` | Unix timestamp | -| `end_time` | `float \| None` | `None` | Unix timestamp when finished | -| `status` | `str` | `"ok"` | `"ok"` or `"error"` | -| `kind` | `str` | `"internal"` | Span type | -| `input` | `Any` | `None` | Input data | -| `output` | `Any` | `None` | Output data | -| `error` | `str \| None` | `None` | Error message | -| `metadata` | `dict` | `{}` | Arbitrary key-value metadata | -| `children` | `list[SpanData]` | `[]` | Child spans | - -#### Methods - -##### `finish(error=None)` - -Sets `end_time` to the current time. If `error` is provided, sets `status="error"` and records the error message. - -##### `to_dict()` - -Serializes the span tree to a JSON-compatible dictionary, recursively including all children. - -### `TraceRecorder` - -Collects the span tree and handles flushing to the LayerLens API. - -#### Methods - -##### `flush()` - -Serializes the root span tree to a temporary JSON file, calls `client.traces.upload(path)`, and deletes the temp file. Used by the `@trace` decorator for sync functions. - -##### `async_flush()` - -Async version of `flush()`. Used by the `@trace` decorator for async functions. - -## Provider Adapters - -### `instrument_openai(client)` - -Monkey-patches `client.chat.completions.create` on an OpenAI client instance. - -```python -from layerlens.instrument.adapters.providers.openai import instrument_openai - -instrument_openai(openai_client) -``` - -#### Classes - -| Class | Description | -| ----- | ----------- | -| `OpenAIProvider` | Provider adapter with `connect_client()` / `disconnect()` | - -### `instrument_anthropic(client)` - -Monkey-patches `client.messages.create` on an Anthropic client instance. - -```python -from layerlens.instrument.adapters.providers.anthropic import instrument_anthropic - -instrument_anthropic(anthropic_client) -``` - -#### Classes - -| Class | Description | -| ----- | ----------- | -| `AnthropicProvider` | Provider adapter with `connect_client()` / `disconnect()` | - -### `instrument_litellm()` - -Monkey-patches `litellm.completion` and `litellm.acompletion` at the module level. - -```python -from layerlens.instrument.adapters.providers.litellm import instrument_litellm, uninstrument_litellm - -instrument_litellm() # Patch -uninstrument_litellm() # Restore -``` - -## Framework Adapters - -### `LangChainCallbackHandler(client)` - -LangChain `BaseCallbackHandler` implementation that builds a span tree from chain/LLM/tool/retriever events. - -```python -from layerlens.instrument.adapters.frameworks.langchain import LangChainCallbackHandler - -handler = LangChainCallbackHandler(client) -chain.invoke(input, config={"callbacks": [handler]}) -``` - -#### Supported Callbacks - -| Callback | Span Kind | -| -------- | --------- | -| `on_chain_start` / `on_chain_end` / `on_chain_error` | `chain` | -| `on_llm_start` / `on_llm_end` / `on_llm_error` | `llm` | -| `on_chat_model_start` | `llm` | -| `on_tool_start` / `on_tool_end` / `on_tool_error` | `tool` | -| `on_retriever_start` / `on_retriever_end` / `on_retriever_error` | `retriever` | - -### `LangGraphCallbackHandler(client)` - -Extends `LangChainCallbackHandler` with LangGraph node name extraction. - -```python -from layerlens.instrument.adapters.frameworks.langgraph import LangGraphCallbackHandler - -handler = LangGraphCallbackHandler(client) -graph.invoke(input, config={"callbacks": [handler]}) -``` - -Extracts node names from `metadata.langgraph_node` or plain tags (skipping internal `graph:step:*` tags). - -## Next Steps - -- [Instrumentation Guide](../instrumentation/README.md) for usage patterns and examples -- [Traces API Reference](traces.md) for the underlying upload mechanism diff --git a/docs/instrumentation/README.md b/docs/instrumentation/README.md deleted file mode 100644 index 4d37402..0000000 --- a/docs/instrumentation/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# Instrumentation - -The `layerlens.instrument` module provides automatic tracing for LLM applications. It captures execution spans — function calls, LLM requests, tool invocations — as a tree structure and uploads them as traces to LayerLens for evaluation. - -## How It Works - -1. **`@trace(client)`** wraps a function as the root of a trace. When the function completes, the span tree is serialized to JSON and uploaded via `client.traces.upload()`. -2. **`span()`** creates child spans inside a traced function. Spans nest automatically using Python's `contextvars`. -3. **Provider adapters** (OpenAI, Anthropic, LiteLLM) monkey-patch SDK methods to create LLM spans automatically — no code changes needed inside your functions. -4. **Framework adapters** (LangChain, LangGraph) plug in as callback handlers to capture chain/tool/retriever spans from agent frameworks. - -## Quick Example - -```python -from layerlens import Stratix -from layerlens.instrument import trace, span -from layerlens.instrument.adapters.providers.openai import instrument_openai - -client = Stratix() - -# Auto-instrument OpenAI — all chat.completions.create calls -# inside a @trace will generate LLM spans automatically -import openai -openai_client = openai.OpenAI() -instrument_openai(openai_client) - -@trace(client) -def my_agent(question: str): - with span("retrieve", kind="retriever") as s: - docs = search(question) - s.output = docs - - response = openai_client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "system", "content": f"Context: {docs}"}, - {"role": "user", "content": question}, - ], - ) - return response.choices[0].message.content - -my_agent("What is retrieval-augmented generation?") -``` - -This produces a trace with three spans: - -``` -my_agent (root, kind=internal) -├── retrieve (kind=retriever) -└── openai.chat.completions.create (kind=llm, auto-captured) -``` - -## Guides - -- [Quick Start](quickstart.md) — `@trace`, `span()`, and manual instrumentation -- [LLM Providers](providers.md) — Auto-instrument OpenAI, Anthropic, and LiteLLM -- [Agent Frameworks](frameworks.md) — LangChain and LangGraph callback handlers - -## Key Concepts - -| Concept | Description | -| ------- | ----------- | -| **Trace** | A complete execution tree, rooted at a `@trace`-decorated function | -| **Span** | A single unit of work within a trace (function call, LLM request, tool use) | -| **Kind** | Span type: `internal`, `llm`, `retriever`, `tool`, `chain` | -| **Provider adapter** | Monkey-patches an LLM SDK to emit `llm` spans automatically | -| **Framework adapter** | Callback handler that captures spans from agent frameworks | - -## No-Op Safety - -All instrumentation is no-op safe: - -- Provider adapters pass through to the original SDK method when called outside a `@trace` context -- `span()` returns a dummy context manager when called outside a `@trace` context -- No performance overhead when instrumentation is not active diff --git a/docs/instrumentation/frameworks.md b/docs/instrumentation/frameworks.md deleted file mode 100644 index 6528ca9..0000000 --- a/docs/instrumentation/frameworks.md +++ /dev/null @@ -1,170 +0,0 @@ -# Agent Framework Instrumentation - -Framework adapters plug into agent frameworks as callback handlers. Unlike provider adapters (which monkey-patch SDK methods), framework adapters receive events from the framework and build span trees from them. - -## Supported Frameworks - -| Framework | Adapter | Integration | -| --------- | ------- | ----------- | -| LangChain | `LangChainCallbackHandler` | Pass as a callback handler | -| LangGraph | `LangGraphCallbackHandler` | Pass as a callback handler | - -## LangChain - -### Installation - -```bash -pip install layerlens[langchain] -``` - -### Usage - -```python -from layerlens import Stratix -from layerlens.instrument.adapters.frameworks.langchain import LangChainCallbackHandler - -client = Stratix() -handler = LangChainCallbackHandler(client) - -# Pass the handler to any LangChain runnable -chain = prompt | llm | parser -result = chain.invoke( - {"question": "What is RAG?"}, - config={"callbacks": [handler]}, -) -``` - -The handler automatically captures: - -| Event | Span Kind | Captured Data | -| ----- | --------- | ------------- | -| Chain start/end | `chain` | Chain name, input, output | -| LLM start/end | `llm` | Model name, prompts, response, token usage | -| Tool start/end | `tool` | Tool name, input query, output | -| Retriever start/end | `retriever` | Query, retrieved documents | - -### How It Works - -LangChain provides `run_id` (UUID) and `parent_run_id` for every callback event. The handler uses these to build a span tree: - -1. `on_chain_start` — creates a root span (or child span if `parent_run_id` exists) -2. `on_llm_start` / `on_tool_start` / `on_retriever_start` — creates child spans -3. `on_*_end` — finishes the span with output data -4. `on_*_error` — finishes the span with `status="error"` -5. When the root chain ends — the full span tree is flushed as a trace - -### Example: RAG Chain - -```python -from langchain_core.prompts import ChatPromptTemplate -from langchain_openai import ChatOpenAI -from langchain_core.output_parsers import StrOutputParser - -from layerlens import Stratix -from layerlens.instrument.adapters.frameworks.langchain import LangChainCallbackHandler - -client = Stratix() -handler = LangChainCallbackHandler(client) - -prompt = ChatPromptTemplate.from_template("Answer: {question}") -llm = ChatOpenAI(model="gpt-4o") -chain = prompt | llm | StrOutputParser() - -result = chain.invoke( - {"question": "What is retrieval-augmented generation?"}, - config={"callbacks": [handler]}, -) -``` - -This produces a trace like: - -``` -RunnableSequence (kind=chain) -├── ChatPromptTemplate (kind=chain) -├── ChatOpenAI (kind=llm) -│ metadata: {model: "gpt-4o", usage: {total_tokens: 150}} -└── StrOutputParser (kind=chain) -``` - -### Error Handling - -Chain and LLM errors are captured automatically: - -```python -handler = LangChainCallbackHandler(client) - -try: - chain.invoke(input, config={"callbacks": [handler]}) -except Exception: - pass # Trace still uploads with error spans -``` - -## LangGraph - -The LangGraph adapter extends the LangChain handler with graph node awareness. - -### Installation - -```bash -pip install layerlens[langchain] -``` - -### Usage - -```python -from layerlens import Stratix -from layerlens.instrument.adapters.frameworks.langgraph import LangGraphCallbackHandler - -client = Stratix() -handler = LangGraphCallbackHandler(client) - -# Use with a LangGraph compiled graph -result = graph.invoke( - {"messages": [{"role": "user", "content": "Hello"}]}, - config={"callbacks": [handler]}, -) -``` - -### Node Name Extraction - -LangGraph attaches metadata to chain events that identifies which graph node is executing. The adapter extracts this to produce cleaner span names: - -- Checks `metadata.langgraph_node` for the node name (highest priority) -- Falls back to the first plain tag (no colon), skipping internal `graph:step:*` tags -- Uses the chain name from `serialized` if neither is present - -This means your traces show meaningful names like `agent`, `tools`, `retrieve` instead of generic `RunnableSequence` spans. - -### Example Trace Output - -``` -StateGraph (kind=chain) -├── agent (kind=chain, node) -│ └── ChatOpenAI (kind=llm) -├── tools (kind=chain, node) -│ └── search (kind=tool) -└── agent (kind=chain, node) - └── ChatOpenAI (kind=llm) -``` - -## Framework vs Provider Adapters - -You can use both together. For example, use the LangChain callback handler for span tree structure, and a provider adapter to enrich LLM spans with token usage: - -```python -from layerlens.instrument.adapters.providers.openai import instrument_openai -from layerlens.instrument.adapters.frameworks.langchain import LangChainCallbackHandler - -# Both can be active simultaneously -instrument_openai(openai_client) -handler = LangChainCallbackHandler(client) - -chain.invoke(input, config={"callbacks": [handler]}) -``` - -Note: When using both, you may get duplicate LLM spans (one from the provider adapter, one from the framework callback). In most cases, using just the framework adapter is sufficient since it captures LLM events through callbacks. - -## Next Steps - -- [LLM Providers](providers.md) — Auto-instrument OpenAI, Anthropic, and LiteLLM -- [Quick Start](quickstart.md) — Manual instrumentation with `@trace` and `span()` diff --git a/docs/instrumentation/providers.md b/docs/instrumentation/providers.md deleted file mode 100644 index 4f34d0f..0000000 --- a/docs/instrumentation/providers.md +++ /dev/null @@ -1,222 +0,0 @@ -# LLM Provider Instrumentation - -Provider adapters automatically capture LLM spans when SDK methods are called inside a `@trace` context. No changes to your LLM calling code are needed. - -## Supported Providers - -| Provider | Adapter | Wraps | -| -------- | ------- | ----- | -| OpenAI | `instrument_openai(client)` | `client.chat.completions.create` | -| Anthropic | `instrument_anthropic(client)` | `client.messages.create` | -| LiteLLM | `instrument_litellm()` | `litellm.completion`, `litellm.acompletion` | - -LiteLLM provides a unified interface to 100+ providers (Azure, Google, Cohere, Mistral, Bedrock, etc.), so `instrument_litellm()` covers all of them. - -## OpenAI - -### Installation - -```bash -pip install layerlens[openai] -``` - -### Usage - -```python -import openai -from layerlens import Stratix -from layerlens.instrument import trace -from layerlens.instrument.adapters.providers.openai import instrument_openai - -client = Stratix() -openai_client = openai.OpenAI() - -# Instrument the client instance -instrument_openai(openai_client) - -@trace(client) -def my_agent(question: str): - response = openai_client.chat.completions.create( - model="gpt-4o", - messages=[{"role": "user", "content": question}], - ) - return response.choices[0].message.content - -my_agent("What is Python?") -``` - -The adapter captures: - -- **Span name**: `openai.chat.completions.create` -- **Kind**: `llm` -- **Input**: Messages array -- **Output**: Assistant message content -- **Metadata**: `model`, `temperature`, `max_tokens`, `usage` (prompt/completion/total tokens) - -### Disconnect - -```python -from layerlens.instrument.adapters.providers.openai import OpenAIProvider - -provider = OpenAIProvider() -provider.connect_client(openai_client) - -# Later, restore original methods: -provider.disconnect() -``` - -## Anthropic - -### Installation - -```bash -pip install layerlens[anthropic] -``` - -### Usage - -```python -import anthropic -from layerlens import Stratix -from layerlens.instrument import trace -from layerlens.instrument.adapters.providers.anthropic import instrument_anthropic - -client = Stratix() -anthropic_client = anthropic.Anthropic() - -instrument_anthropic(anthropic_client) - -@trace(client) -def my_agent(question: str): - response = anthropic_client.messages.create( - model="claude-sonnet-4-20250514", - max_tokens=1024, - messages=[{"role": "user", "content": question}], - ) - return response.content[0].text - -my_agent("What is Python?") -``` - -The adapter captures: - -- **Span name**: `anthropic.messages.create` -- **Kind**: `llm` -- **Input**: Messages array -- **Output**: Response content blocks -- **Metadata**: `model`, `usage` (input/output tokens), `stop_reason` - -### Disconnect - -```python -from layerlens.instrument.adapters.providers.anthropic import AnthropicProvider - -provider = AnthropicProvider() -provider.connect_client(anthropic_client) -provider.disconnect() -``` - -## LiteLLM - -LiteLLM works differently from OpenAI/Anthropic — it patches module-level functions rather than client instances. - -### Installation - -```bash -pip install layerlens[litellm] -``` - -### Usage - -```python -import litellm -from layerlens import Stratix -from layerlens.instrument import trace -from layerlens.instrument.adapters.providers.litellm import instrument_litellm - -client = Stratix() - -# Patch litellm module (call once at startup) -instrument_litellm() - -@trace(client) -def my_agent(question: str): - response = litellm.completion( - model="gpt-4o", - messages=[{"role": "user", "content": question}], - ) - return response.choices[0].message.content - -my_agent("What is Python?") -``` - -Since LiteLLM supports 100+ providers, this single call instruments all of them: - -```python -instrument_litellm() - -@trace(client) -def multi_provider(): - # All of these generate LLM spans - litellm.completion(model="gpt-4o", messages=[...]) - litellm.completion(model="claude-sonnet-4-20250514", messages=[...]) - litellm.completion(model="gemini/gemini-pro", messages=[...]) -``` - -### Uninstrument - -```python -from layerlens.instrument.adapters.providers.litellm import uninstrument_litellm - -uninstrument_litellm() -``` - -## Captured Metadata - -All provider adapters capture these request parameters when present: - -| Parameter | Description | -| --------- | ----------- | -| `model` | Model name/ID | -| `temperature` | Sampling temperature | -| `max_tokens` | Maximum response tokens | -| `top_p` | Nucleus sampling parameter | -| `frequency_penalty` | Frequency penalty | -| `presence_penalty` | Presence penalty | -| `response_format` | Structured output format | - -Response metadata varies by provider but always includes token usage when available. - -## Passthrough Behavior - -When called **outside** a `@trace` context, all adapters pass through to the original SDK method with zero overhead. This means you can instrument at startup and leave it on — it only activates when a trace is running. - -```python -instrument_openai(openai_client) - -# No active trace — passes through directly to OpenAI -openai_client.chat.completions.create(model="gpt-4o", messages=[...]) - -@trace(client) -def traced_call(): - # Active trace — generates an LLM span - openai_client.chat.completions.create(model="gpt-4o", messages=[...]) -``` - -## Error Handling - -If an LLM call raises an exception inside a `@trace`, the adapter records the error on the span and re-raises the exception: - -```python -@trace(client) -def my_agent(): - try: - openai_client.chat.completions.create(model="gpt-4o", messages=[...]) - except openai.APIError: - pass # Span is recorded with status="error" -``` - -## Next Steps - -- [Agent Frameworks](frameworks.md) — LangChain and LangGraph callback handlers -- [Quick Start](quickstart.md) — Manual instrumentation with `@trace` and `span()` diff --git a/docs/instrumentation/quickstart.md b/docs/instrumentation/quickstart.md deleted file mode 100644 index 9c954ad..0000000 --- a/docs/instrumentation/quickstart.md +++ /dev/null @@ -1,171 +0,0 @@ -# Instrumentation Quick Start - -This guide covers the core instrumentation API: the `@trace` decorator and the `span()` context manager. - -## Installation - -The instrumentation module is included in the base SDK — no extra dependencies needed: - -```bash -pip install layerlens --extra-index-url https://sdk.layerlens.ai/package -``` - -Provider adapters require their respective SDK as an optional dependency: - -```bash -pip install layerlens[openai] # OpenAI -pip install layerlens[anthropic] # Anthropic -pip install layerlens[litellm] # LiteLLM (100+ providers) -pip install layerlens[langchain] # LangChain / LangGraph -``` - -## The `@trace` Decorator - -`@trace(client)` marks a function as the root of a trace. When the function returns (or raises), the complete span tree is serialized and uploaded. - -### Using Synchronous Client - -```python -from layerlens import Stratix -from layerlens.instrument import trace - -client = Stratix() - -@trace(client) -def my_agent(query: str): - # Everything inside here is traced - return process(query) - -my_agent("Hello") -# → Trace uploaded automatically on return -``` - -### Using Async Client - -```python -import asyncio -from layerlens import AsyncStratix -from layerlens.instrument import trace - -client = AsyncStratix() - -@trace(client) -async def my_agent(query: str): - return await process(query) - -asyncio.run(my_agent("Hello")) -``` - -### Custom Trace Names - -By default the trace is named after the function. Override with the `name` parameter: - -```python -@trace(client, name="qa-pipeline") -def run_pipeline(query: str): - ... -``` - -## The `span()` Context Manager - -Use `span()` inside a traced function to create child spans: - -```python -from layerlens.instrument import trace, span - -@trace(client) -def my_agent(query: str): - with span("retrieve", kind="retriever") as s: - docs = search(query) - s.output = docs - - with span("generate", kind="llm") as s: - answer = call_llm(query, docs) - s.output = answer - - return answer -``` - -### Span Parameters - -| Parameter | Type | Default | Description | -| --------- | ---- | ------- | ----------- | -| `name` | `str` | (required) | Display name for the span | -| `kind` | `str` | `"internal"` | Span type: `internal`, `llm`, `retriever`, `tool`, `chain` | -| `input` | `Any` | `None` | Input data for the span | -| `metadata` | `dict \| None` | `None` | Arbitrary metadata attached to the span | - -### Setting Span Data - -Inside the `with` block, you can set properties on the span object: - -```python -with span("my-step", kind="tool") as s: - s.input = {"query": query} - result = do_work(query) - s.output = result - s.metadata["custom_key"] = "custom_value" -``` - -### Nesting Spans - -Spans nest automatically — the parent-child relationship is tracked via `contextvars`: - -```python -@trace(client) -def my_agent(query: str): - with span("outer") as outer: - with span("inner") as inner: - # inner is a child of outer - ... - with span("sibling"): - # sibling is a child of root, not outer - ... -``` - -This produces: - -``` -my_agent (root) -├── outer -│ └── inner -└── sibling -``` - -## Span Data Model - -Each span captures: - -| Field | Type | Description | -| ----- | ---- | ----------- | -| `name` | `str` | Span name | -| `span_id` | `str` | Unique identifier (auto-generated) | -| `parent_id` | `str \| None` | Parent span ID | -| `start_time` | `float` | Unix timestamp when span started | -| `end_time` | `float \| None` | Unix timestamp when span ended | -| `status` | `str` | `"ok"` or `"error"` | -| `kind` | `str` | `"internal"`, `"llm"`, `"retriever"`, `"tool"`, `"chain"` | -| `input` | `Any` | Input data (set manually or captured by adapters) | -| `output` | `Any` | Output data | -| `error` | `str \| None` | Error message if status is `"error"` | -| `metadata` | `dict` | Arbitrary metadata (model name, token usage, etc.) | -| `children` | `list` | Child spans | - -## Error Handling - -Errors are captured automatically. If an exception is raised inside a traced function or span, the span's status is set to `"error"` and the error message is recorded. The exception still propagates normally. - -```python -@trace(client) -def my_agent(query: str): - with span("risky-step") as s: - raise ValueError("something broke") - # → span status="error", error="something broke" - # → trace still uploads with the error recorded - # → ValueError propagates to caller -``` - -## Next Steps - -- [LLM Providers](providers.md) — Auto-instrument OpenAI, Anthropic, and LiteLLM -- [Agent Frameworks](frameworks.md) — LangChain and LangGraph callback handlers diff --git a/examples/instrument_langchain.py b/examples/instrument_langchain.py deleted file mode 100644 index e19a515..0000000 --- a/examples/instrument_langchain.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Example: Instrument a LangChain chain with automatic span capture. - -Requires: - pip install layerlens[langchain] langchain-openai - export LAYERLENS_STRATIX_API_KEY="your-api-key" - export OPENAI_API_KEY="your-openai-key" -""" - -from langchain_openai import ChatOpenAI -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.output_parsers import StrOutputParser - -from layerlens import Stratix -from layerlens.instrument.adapters.frameworks.langchain import LangChainCallbackHandler - -client = Stratix() -handler = LangChainCallbackHandler(client) - -# Build a simple chain -prompt = ChatPromptTemplate.from_template("Answer this question concisely: {question}") -llm = ChatOpenAI(model="gpt-4o") -chain = prompt | llm | StrOutputParser() - -if __name__ == "__main__": - # The callback handler captures the full chain execution as a trace - result = chain.invoke( - {"question": "What is retrieval-augmented generation?"}, - config={"callbacks": [handler]}, - ) - print(f"Answer: {result}") diff --git a/examples/instrument_openai.py b/examples/instrument_openai.py deleted file mode 100644 index 92118a1..0000000 --- a/examples/instrument_openai.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Example: Instrument OpenAI with automatic LLM span capture. - -Requires: - pip install layerlens[openai] - export LAYERLENS_STRATIX_API_KEY="your-api-key" - export OPENAI_API_KEY="your-openai-key" -""" - -import openai -from layerlens import Stratix -from layerlens.instrument import span, trace -from layerlens.instrument.adapters.providers.openai import instrument_openai - -client = Stratix() -openai_client = openai.OpenAI() - -# Instrument the OpenAI client — all chat.completions.create calls -# inside a @trace will generate LLM spans automatically. -instrument_openai(openai_client) - - -@trace(client) -def qa_agent(question: str): - """Simple Q&A agent with a retrieval step and an LLM call.""" - - # Manual span for a retrieval step - with span("retrieve", kind="retriever") as s: - # In a real app, this would query a vector database - docs = ["Python is a programming language.", "It was created by Guido van Rossum."] - s.output = docs - - # The OpenAI call is automatically instrumented — no span() needed - response = openai_client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "system", "content": f"Answer using this context: {docs}"}, - {"role": "user", "content": question}, - ], - ) - - return response.choices[0].message.content - - -if __name__ == "__main__": - answer = qa_agent("What is Python and who created it?") - print(f"Answer: {answer}") diff --git a/pyproject.toml b/pyproject.toml index 16cb9a1..fc8baa6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,10 +33,6 @@ classifiers = [ [project.optional-dependencies] cli = ["click>=8.0.0"] -openai = ["openai>=1.0.0"] -anthropic = ["anthropic>=0.18.0"] -langchain = ["langchain-core>=0.1.0"] -litellm = ["litellm>=1.0.0"] [project.urls] Homepage = "https://github.com/LayerLens/stratix-python" @@ -140,11 +136,8 @@ known-first-party = ["openai", "tests"] "bin/**.py" = ["T201", "T203"] "scripts/**.py" = ["T201", "T203"] "tests/**.py" = ["T201", "T203"] -"tests/instrument/**.py" = ["T201", "T203", "ARG"] "examples/**.py" = ["T201", "T203"] "src/layerlens/cli/**" = ["T201", "T203"] -"src/layerlens/instrument/adapters/frameworks/langchain.py" = ["ARG002"] -"src/layerlens/instrument/adapters/frameworks/langgraph.py" = ["ARG002"] [tool.pyright] include = ["src", "tests"] diff --git a/requirements-dev.lock b/requirements-dev.lock index 55fc02e..81a18f2 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -10,28 +10,10 @@ # universal: false -e file:. -aiohappyeyeballs==2.6.1 - # via aiohttp -aiohttp==3.13.3 - # via litellm -aiosignal==1.4.0 - # via aiohttp -annotated-doc==0.0.4 - # via typer annotated-types==0.7.0 # via pydantic -anthropic==0.86.0 - # via layerlens anyio==4.9.0 - # via anthropic # via httpx - # via openai -async-timeout==5.0.1 - # via aiohttp -attrs==26.1.0 - # via aiohttp - # via jsonschema - # via referencing backports-tarfile==1.2.0 # via jaraco-context build==1.3.0 @@ -45,57 +27,30 @@ charset-normalizer==3.4.3 # via requests click==8.1.8 # via layerlens - # via litellm - # via typer coverage==7.10.2 # via pytest-cov cryptography==46.0.5 # via secretstorage -distro==1.9.0 - # via anthropic - # via openai -docstring-parser==0.17.0 - # via anthropic docutils==0.22 # via readme-renderer exceptiongroup==1.3.0 # via anyio # via pytest -fastuuid==0.14.0 - # via litellm -filelock==3.19.1 - # via huggingface-hub -frozenlist==1.8.0 - # via aiohttp - # via aiosignal -fsspec==2025.10.0 - # via huggingface-hub h11==0.16.0 # via httpcore -hf-xet==1.4.2 - # via huggingface-hub httpcore==1.0.9 # via httpx httpx==0.28.1 - # via anthropic - # via huggingface-hub - # via langsmith # via layerlens - # via litellm - # via openai -huggingface-hub==1.7.2 - # via tokenizers id==1.5.0 # via twine idna==3.10 # via anyio # via httpx # via requests - # via yarl importlib-metadata==8.7.0 # via build # via keyring - # via litellm # via twine iniconfig==2.1.0 # via pytest @@ -108,39 +63,15 @@ jaraco-functools==4.2.1 jeepney==0.9.0 # via keyring # via secretstorage -jinja2==3.1.6 - # via litellm -jiter==0.13.0 - # via anthropic - # via openai -jsonpatch==1.33 - # via langchain-core -jsonpointer==3.0.0 - # via jsonpatch -jsonschema==4.25.1 - # via litellm -jsonschema-specifications==2025.9.1 - # via jsonschema keyring==25.6.0 # via twine -langchain-core==0.3.83 - # via layerlens -langsmith==0.4.37 - # via langchain-core -litellm==1.82.6 - # via layerlens markdown-it-py==3.0.0 # via rich -markupsafe==3.0.3 - # via jinja2 mdurl==0.1.2 # via markdown-it-py more-itertools==10.7.0 # via jaraco-classes # via jaraco-functools -multidict==6.7.1 - # via aiohttp - # via yarl mypy==1.17.0 mypy-extensions==1.1.0 # via mypy @@ -148,16 +79,8 @@ nh3==0.3.0 # via readme-renderer nodeenv==1.9.1 # via pyright -openai==2.29.0 - # via layerlens - # via litellm -orjson==3.11.5 - # via langsmith packaging==25.0 # via build - # via huggingface-hub - # via langchain-core - # via langsmith # via pytest # via twine pathspec==0.12.1 @@ -165,18 +88,10 @@ pathspec==0.12.1 pluggy==1.6.0 # via pytest # via pytest-cov -propcache==0.4.1 - # via aiohttp - # via yarl pycparser==2.23 # via cffi pydantic==2.11.7 - # via anthropic - # via langchain-core - # via langsmith # via layerlens - # via litellm - # via openai pydantic-core==2.33.2 # via pydantic pygments==2.19.2 @@ -189,87 +104,42 @@ pyright==1.1.399 pytest==8.4.1 # via pytest-cov pytest-cov==6.2.1 -python-dotenv==1.2.1 - # via litellm -pyyaml==6.0.3 - # via huggingface-hub - # via langchain-core readme-renderer==44.0 # via twine -referencing==0.36.2 - # via jsonschema - # via jsonschema-specifications -regex==2026.1.15 - # via tiktoken requests==2.32.5 # via id - # via langsmith # via requests-toolbelt - # via tiktoken # via twine requests-toolbelt==1.0.0 - # via langsmith # via twine rfc3986==2.0.0 # via twine rich==14.1.0 # via twine - # via typer -rpds-py==0.27.1 - # via jsonschema - # via referencing ruff==0.12.7 secretstorage==3.3.3 # via keyring -shellingham==1.5.4 - # via typer sniffio==1.3.1 - # via anthropic # via anyio - # via openai -tenacity==9.1.2 - # via langchain-core -tiktoken==0.12.0 - # via litellm -tokenizers==0.22.2 - # via litellm tomli==2.2.1 # via build # via coverage # via mypy # via pytest -tqdm==4.67.3 - # via huggingface-hub - # via openai twine==6.1.0 -typer==0.23.2 - # via huggingface-hub typing-extensions==4.14.1 - # via aiosignal - # via anthropic # via anyio # via cryptography # via exceptiongroup - # via huggingface-hub - # via langchain-core - # via multidict # via mypy - # via openai # via pydantic # via pydantic-core # via pyright - # via referencing # via typing-inspection typing-inspection==0.4.1 # via pydantic urllib3==2.5.0 # via requests # via twine -uuid-utils==0.14.1 - # via langchain-core -yarl==1.22.0 - # via aiohttp zipp==3.23.0 # via importlib-metadata -zstandard==0.25.0 - # via langsmith diff --git a/requirements.lock b/requirements.lock index 5b9bb3d..1a890c9 100644 --- a/requirements.lock +++ b/requirements.lock @@ -10,187 +10,37 @@ # universal: false -e file:. -aiohappyeyeballs==2.6.1 - # via aiohttp -aiohttp==3.13.3 - # via litellm -aiosignal==1.4.0 - # via aiohttp -annotated-doc==0.0.4 - # via typer annotated-types==0.7.0 # via pydantic -anthropic==0.86.0 - # via layerlens anyio==4.9.0 - # via anthropic # via httpx - # via openai -async-timeout==5.0.1 - # via aiohttp -attrs==26.1.0 - # via aiohttp - # via jsonschema - # via referencing certifi==2025.7.14 # via httpcore # via httpx - # via requests -charset-normalizer==3.4.6 - # via requests click==8.1.8 # via layerlens - # via litellm - # via typer -distro==1.9.0 - # via anthropic - # via openai -docstring-parser==0.17.0 - # via anthropic exceptiongroup==1.3.0 # via anyio -fastuuid==0.14.0 - # via litellm -filelock==3.19.1 - # via huggingface-hub -frozenlist==1.8.0 - # via aiohttp - # via aiosignal -fsspec==2025.10.0 - # via huggingface-hub h11==0.16.0 # via httpcore -hf-xet==1.4.2 - # via huggingface-hub httpcore==1.0.9 # via httpx httpx==0.28.1 - # via anthropic - # via huggingface-hub - # via langsmith # via layerlens - # via litellm - # via openai -huggingface-hub==1.7.2 - # via tokenizers idna==3.10 # via anyio # via httpx - # via requests - # via yarl -importlib-metadata==8.7.1 - # via litellm -jinja2==3.1.6 - # via litellm -jiter==0.13.0 - # via anthropic - # via openai -jsonpatch==1.33 - # via langchain-core -jsonpointer==3.0.0 - # via jsonpatch -jsonschema==4.25.1 - # via litellm -jsonschema-specifications==2025.9.1 - # via jsonschema -langchain-core==0.3.83 - # via layerlens -langsmith==0.4.37 - # via langchain-core -litellm==1.82.6 - # via layerlens -markdown-it-py==3.0.0 - # via rich -markupsafe==3.0.3 - # via jinja2 -mdurl==0.1.2 - # via markdown-it-py -multidict==6.7.1 - # via aiohttp - # via yarl -openai==2.29.0 - # via layerlens - # via litellm -orjson==3.11.5 - # via langsmith -packaging==25.0 - # via huggingface-hub - # via langchain-core - # via langsmith -propcache==0.4.1 - # via aiohttp - # via yarl pydantic==2.11.7 - # via anthropic - # via langchain-core - # via langsmith # via layerlens - # via litellm - # via openai pydantic-core==2.33.2 # via pydantic -pygments==2.19.2 - # via rich -python-dotenv==1.2.1 - # via litellm -pyyaml==6.0.3 - # via huggingface-hub - # via langchain-core -referencing==0.36.2 - # via jsonschema - # via jsonschema-specifications -regex==2026.1.15 - # via tiktoken -requests==2.32.5 - # via langsmith - # via requests-toolbelt - # via tiktoken -requests-toolbelt==1.0.0 - # via langsmith -rich==14.3.3 - # via typer -rpds-py==0.27.1 - # via jsonschema - # via referencing -shellingham==1.5.4 - # via typer sniffio==1.3.1 - # via anthropic # via anyio - # via openai -tenacity==9.1.2 - # via langchain-core -tiktoken==0.12.0 - # via litellm -tokenizers==0.22.2 - # via litellm -tqdm==4.67.3 - # via huggingface-hub - # via openai -typer==0.23.2 - # via huggingface-hub typing-extensions==4.14.1 - # via aiosignal - # via anthropic # via anyio # via exceptiongroup - # via huggingface-hub - # via langchain-core - # via multidict - # via openai # via pydantic # via pydantic-core - # via referencing # via typing-inspection typing-inspection==0.4.1 # via pydantic -urllib3==2.6.3 - # via requests -uuid-utils==0.14.1 - # via langchain-core -yarl==1.22.0 - # via aiohttp -zipp==3.23.0 - # via importlib-metadata -zstandard==0.25.0 - # via langsmith diff --git a/src/layerlens/instrument/__init__.py b/src/layerlens/instrument/__init__.py deleted file mode 100644 index 2e11b51..0000000 --- a/src/layerlens/instrument/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from __future__ import annotations - -from ._span import span -from ._types import SpanData -from ._recorder import TraceRecorder -from ._decorator import trace - -__all__ = [ - "SpanData", - "TraceRecorder", - "span", - "trace", -] diff --git a/src/layerlens/instrument/_context.py b/src/layerlens/instrument/_context.py deleted file mode 100644 index b4328f3..0000000 --- a/src/layerlens/instrument/_context.py +++ /dev/null @@ -1,11 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Optional -from contextvars import ContextVar - -if TYPE_CHECKING: - from ._types import SpanData - from ._recorder import TraceRecorder - -_current_recorder: ContextVar[Optional[TraceRecorder]] = ContextVar("_current_recorder", default=None) -_current_span: ContextVar[Optional[SpanData]] = ContextVar("_current_span", default=None) diff --git a/src/layerlens/instrument/_decorator.py b/src/layerlens/instrument/_decorator.py deleted file mode 100644 index 4f4644f..0000000 --- a/src/layerlens/instrument/_decorator.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import annotations - -import asyncio -import functools -from typing import Any, Dict, Tuple, Callable, Optional - -from ._types import SpanData -from ._context import _current_span, _current_recorder -from ._recorder import TraceRecorder - - -def trace( - client: Any, - *, - name: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, -) -> Callable[..., Any]: - def decorator(fn: Callable[..., Any]) -> Callable[..., Any]: - span_name = name or fn.__name__ - - if asyncio.iscoroutinefunction(fn): - - @functools.wraps(fn) - async def async_wrapper(*args: Any, **kwargs: Any) -> Any: - recorder = TraceRecorder(client) - root = SpanData( - name=span_name, - kind="chain", - input=_capture_input(args, kwargs), - metadata=metadata or {}, - ) - recorder.root = root - - rec_token = _current_recorder.set(recorder) - span_token = _current_span.set(root) - try: - result = await fn(*args, **kwargs) - root.output = result - root.finish() - await recorder.async_flush() - return result - except Exception as exc: - root.finish(error=str(exc)) - await recorder.async_flush() - raise - finally: - _current_span.reset(span_token) - _current_recorder.reset(rec_token) - - return async_wrapper - else: - - @functools.wraps(fn) - def sync_wrapper(*args: Any, **kwargs: Any) -> Any: - recorder = TraceRecorder(client) - root = SpanData( - name=span_name, - kind="chain", - input=_capture_input(args, kwargs), - metadata=metadata or {}, - ) - recorder.root = root - - rec_token = _current_recorder.set(recorder) - span_token = _current_span.set(root) - try: - result = fn(*args, **kwargs) - root.output = result - root.finish() - recorder.flush() - return result - except Exception as exc: - root.finish(error=str(exc)) - recorder.flush() - raise - finally: - _current_span.reset(span_token) - _current_recorder.reset(rec_token) - - return sync_wrapper - - return decorator - - -def _capture_input(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> Any: - if args and kwargs: - return {"args": list(args), "kwargs": kwargs} - if args: - arg_list = list(args) - return arg_list if len(arg_list) > 1 else arg_list[0] - if kwargs: - return kwargs - return None diff --git a/src/layerlens/instrument/_recorder.py b/src/layerlens/instrument/_recorder.py deleted file mode 100644 index dba6a45..0000000 --- a/src/layerlens/instrument/_recorder.py +++ /dev/null @@ -1,22 +0,0 @@ -from __future__ import annotations - -from typing import Any, Optional - -from ._types import SpanData -from ._upload import upload_trace, async_upload_trace - - -class TraceRecorder: - def __init__(self, client: Any) -> None: - self._client = client - self.root: Optional[SpanData] = None - - def flush(self) -> None: - if self.root is None: - return - upload_trace(self._client, self.root.to_dict()) - - async def async_flush(self) -> None: - if self.root is None: - return - await async_upload_trace(self._client, self.root.to_dict()) diff --git a/src/layerlens/instrument/_span.py b/src/layerlens/instrument/_span.py deleted file mode 100644 index 0c929ff..0000000 --- a/src/layerlens/instrument/_span.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, Optional, Generator -from contextlib import contextmanager - -from ._types import SpanData -from ._context import _current_span, _current_recorder - - -@contextmanager -def span( - name: str, - *, - kind: str = "internal", - input: Any = None, - metadata: Optional[Dict[str, Any]] = None, -) -> Generator[SpanData, None, None]: - recorder = _current_recorder.get() - parent = _current_span.get() - - if recorder is None or parent is None: - yield SpanData(name=name, kind=kind, input=input, metadata=metadata or {}) - return - - s = SpanData( - name=name, - kind=kind, - parent_id=parent.span_id, - input=input, - metadata=metadata or {}, - ) - parent.children.append(s) - - token = _current_span.set(s) - try: - yield s - except Exception as exc: - s.finish(error=str(exc)) - raise - else: - s.finish() - finally: - _current_span.reset(token) diff --git a/src/layerlens/instrument/_types.py b/src/layerlens/instrument/_types.py deleted file mode 100644 index b589ef0..0000000 --- a/src/layerlens/instrument/_types.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import annotations - -import time -import uuid -from typing import Any, Dict, List, Optional -from dataclasses import field, dataclass - - -@dataclass -class SpanData: - name: str - span_id: str = field(default_factory=lambda: uuid.uuid4().hex[:16]) - parent_id: Optional[str] = None - start_time: float = field(default_factory=time.time) - end_time: Optional[float] = None - status: str = "ok" - kind: str = "internal" - input: Any = None - output: Any = None - error: Optional[str] = None - metadata: Dict[str, Any] = field(default_factory=dict) - children: List[SpanData] = field(default_factory=list) - - def finish(self, error: Optional[str] = None) -> None: - self.end_time = time.time() - if error is not None: - self.error = error - self.status = "error" - - def to_dict(self) -> Dict[str, Any]: - return { - "name": self.name, - "span_id": self.span_id, - "parent_id": self.parent_id, - "start_time": self.start_time, - "end_time": self.end_time, - "status": self.status, - "kind": self.kind, - "input": self.input, - "output": self.output, - "error": self.error, - "metadata": self.metadata, - "children": [c.to_dict() for c in self.children], - } diff --git a/src/layerlens/instrument/_upload.py b/src/layerlens/instrument/_upload.py deleted file mode 100644 index 6597970..0000000 --- a/src/layerlens/instrument/_upload.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -import os -import json -import logging -import tempfile -from typing import Any, Dict - -log: logging.Logger = logging.getLogger(__name__) - - -def upload_trace(client: Any, trace_data: Dict[str, Any]) -> None: - fd, path = tempfile.mkstemp(suffix=".json", prefix="layerlens_trace_") - try: - with os.fdopen(fd, "w") as f: - json.dump([trace_data], f, default=str) - client.traces.upload(path) - finally: - try: - os.unlink(path) - except OSError: - log.debug("Failed to remove temp trace file: %s", path) - - -async def async_upload_trace(client: Any, trace_data: Dict[str, Any]) -> None: - fd, path = tempfile.mkstemp(suffix=".json", prefix="layerlens_trace_") - try: - with os.fdopen(fd, "w") as f: - json.dump([trace_data], f, default=str) - await client.traces.upload(path) - finally: - try: - os.unlink(path) - except OSError: - log.debug("Failed to remove temp trace file: %s", path) diff --git a/src/layerlens/instrument/adapters/__init__.py b/src/layerlens/instrument/adapters/__init__.py deleted file mode 100644 index 9d48db4..0000000 --- a/src/layerlens/instrument/adapters/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import annotations diff --git a/src/layerlens/instrument/adapters/frameworks/__init__.py b/src/layerlens/instrument/adapters/frameworks/__init__.py deleted file mode 100644 index 9d48db4..0000000 --- a/src/layerlens/instrument/adapters/frameworks/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import annotations diff --git a/src/layerlens/instrument/adapters/frameworks/_base_framework.py b/src/layerlens/instrument/adapters/frameworks/_base_framework.py deleted file mode 100644 index 3c3ea3a..0000000 --- a/src/layerlens/instrument/adapters/frameworks/_base_framework.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import annotations - -from uuid import UUID -from typing import Any, Dict, Optional - -from ..._types import SpanData -from ..._upload import upload_trace - - -class FrameworkTracer: - def __init__(self, client: Any) -> None: - self._client = client - self._spans: Dict[str, SpanData] = {} - self._root_run_id: Optional[str] = None - - def _get_or_create_span( - self, - run_id: UUID, - parent_run_id: Optional[UUID], - name: str, - kind: str, - input: Any = None, - ) -> SpanData: - rid = str(run_id) - if rid in self._spans: - return self._spans[rid] - - parent_span: Optional[SpanData] = None - if parent_run_id is not None: - parent_span = self._spans.get(str(parent_run_id)) - - s = SpanData( - name=name, - kind=kind, - parent_id=parent_span.span_id if parent_span else None, - input=input, - ) - self._spans[rid] = s - - if parent_span is not None: - parent_span.children.append(s) - - if self._root_run_id is None: - self._root_run_id = rid - - return s - - def _finish_span(self, run_id: UUID, output: Any = None, error: Optional[str] = None) -> None: - rid = str(run_id) - s = self._spans.get(rid) - if s is None: - return - s.output = output - s.finish(error=error) - - if rid == self._root_run_id: - self._flush() - - def _flush(self) -> None: - if self._root_run_id is None: - return - root = self._spans.get(self._root_run_id) - if root is None: - return - - upload_trace(self._client, root.to_dict()) - - self._spans.clear() - self._root_run_id = None diff --git a/src/layerlens/instrument/adapters/frameworks/langchain.py b/src/layerlens/instrument/adapters/frameworks/langchain.py deleted file mode 100644 index 1e30ee6..0000000 --- a/src/layerlens/instrument/adapters/frameworks/langchain.py +++ /dev/null @@ -1,215 +0,0 @@ -from __future__ import annotations - -from uuid import UUID -from typing import Any, Dict, List, Optional, Sequence - -from ._base_framework import FrameworkTracer - -try: - from langchain_core.callbacks import BaseCallbackHandler # pyright: ignore[reportAssignmentType] -except ImportError: - - class BaseCallbackHandler: # type: ignore[no-redef] - def __init_subclass__(cls, **kwargs: Any) -> None: - raise ImportError( - "The 'langchain-core' package is required for LangChain instrumentation. " - "Install it with: pip install layerlens[langchain]" - ) - - -class LangChainCallbackHandler(BaseCallbackHandler, FrameworkTracer): - def __init__(self, client: Any) -> None: - BaseCallbackHandler.__init__(self) - FrameworkTracer.__init__(self, client) - - # -- Chain -- - - def on_chain_start( - self, - serialized: Optional[Dict[str, Any]], - inputs: Dict[str, Any], - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - serialized = serialized or {} - name = serialized.get("name") or serialized.get("id", ["unknown"])[-1] - self._get_or_create_span(run_id, parent_run_id, name=name, kind="chain", input=inputs) - - def on_chain_end( - self, - outputs: Dict[str, Any], - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - self._finish_span(run_id, output=outputs) - - def on_chain_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - self._finish_span(run_id, error=str(error)) - - # -- LLM -- - - def on_llm_start( - self, - serialized: Optional[Dict[str, Any]], - prompts: List[str], - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - serialized = serialized or {} - name = serialized.get("name") or serialized.get("id", ["unknown"])[-1] - self._get_or_create_span(run_id, parent_run_id, name=name, kind="llm", input=prompts) - - def on_chat_model_start( - self, - serialized: Optional[Dict[str, Any]], - messages: List[List[Any]], - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - serialized = serialized or {} - name = serialized.get("name") or serialized.get("id", ["unknown"])[-1] - input_data = [[_serialize_lc_message(m) for m in batch] for batch in messages] - self._get_or_create_span(run_id, parent_run_id, name=name, kind="llm", input=input_data) - - def on_llm_end( - self, - response: Any, - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - output = None - try: - generations = response.generations - if generations and generations[0]: - output = generations[0][0].text - except (AttributeError, IndexError): - pass - - s = self._spans.get(str(run_id)) - if s is not None: - try: - llm_output = response.llm_output - if llm_output: - if "token_usage" in llm_output: - s.metadata["usage"] = llm_output["token_usage"] - if "model_name" in llm_output: - s.metadata["model"] = llm_output["model_name"] - except AttributeError: - pass - - self._finish_span(run_id, output=output) - - def on_llm_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - self._finish_span(run_id, error=str(error)) - - # -- Tool -- - - def on_tool_start( - self, - serialized: Optional[Dict[str, Any]], - input_str: str, - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - name = (serialized or {}).get("name", "tool") - self._get_or_create_span(run_id, parent_run_id, name=name, kind="tool", input=input_str) - - def on_tool_end( - self, - output: str, - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - self._finish_span(run_id, output=output) - - def on_tool_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - self._finish_span(run_id, error=str(error)) - - # -- Retriever -- - - def on_retriever_start( - self, - serialized: Optional[Dict[str, Any]], - query: str, - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - name = (serialized or {}).get("name", "retriever") - self._get_or_create_span(run_id, parent_run_id, name=name, kind="retriever", input=query) - - def on_retriever_end( - self, - documents: Sequence[Any], - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - output = [_serialize_lc_document(d) for d in documents] - self._finish_span(run_id, output=output) - - def on_retriever_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - **kwargs: Any, - ) -> None: - self._finish_span(run_id, error=str(error)) - - # -- Text (required by base) -- - - def on_text(self, text: str, **kwargs: Any) -> None: - pass - - -def _serialize_lc_message(msg: Any) -> Any: - try: - return {"type": msg.type, "content": msg.content} - except AttributeError: - return str(msg) - - -def _serialize_lc_document(doc: Any) -> Any: - try: - return {"page_content": doc.page_content, "metadata": doc.metadata} - except AttributeError: - return str(doc) diff --git a/src/layerlens/instrument/adapters/frameworks/langgraph.py b/src/layerlens/instrument/adapters/frameworks/langgraph.py deleted file mode 100644 index 1d72bab..0000000 --- a/src/layerlens/instrument/adapters/frameworks/langgraph.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import annotations - -from uuid import UUID -from typing import Any, Dict, List, Optional - -from .langchain import LangChainCallbackHandler - - -class LangGraphCallbackHandler(LangChainCallbackHandler): - def on_chain_start( - self, - serialized: Optional[Dict[str, Any]], - inputs: Dict[str, Any], - *, - run_id: UUID, - parent_run_id: Optional[UUID] = None, - tags: Optional[List[str]] = None, - **kwargs: Any, - ) -> None: - serialized = serialized or {} - name = serialized.get("name") or serialized.get("id", ["unknown"])[-1] - - # Extract node name from LangGraph tags - if tags: - for tag in tags: - if isinstance(tag, str) and tag.startswith("graph:step:"): - continue - if isinstance(tag, str) and ":" not in tag: - name = tag - break - - # Check kwargs for langgraph-specific metadata - metadata = kwargs.get("metadata", {}) - if isinstance(metadata, dict): - node_name = metadata.get("langgraph_node") - if node_name: - name = node_name - - self._get_or_create_span(run_id, parent_run_id, name=name, kind="chain", input=inputs) diff --git a/src/layerlens/instrument/adapters/providers/__init__.py b/src/layerlens/instrument/adapters/providers/__init__.py deleted file mode 100644 index 9d48db4..0000000 --- a/src/layerlens/instrument/adapters/providers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import annotations diff --git a/src/layerlens/instrument/adapters/providers/_base_provider.py b/src/layerlens/instrument/adapters/providers/_base_provider.py deleted file mode 100644 index f01a935..0000000 --- a/src/layerlens/instrument/adapters/providers/_base_provider.py +++ /dev/null @@ -1,73 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, Tuple, Callable, Optional - -from ..._types import SpanData -from ..._context import _current_span, _current_recorder - - -def create_llm_span( - name: str, - kwargs: Dict[str, Any], - capture_params: frozenset[str], -) -> Tuple[Optional[SpanData], Any]: - recorder = _current_recorder.get() - parent = _current_span.get() - - if recorder is None or parent is None: - return None, None - - meta = {k: kwargs[k] for k in capture_params if k in kwargs} - - s = SpanData( - name=name, - kind="llm", - parent_id=parent.span_id, - input=_extract_messages(kwargs), - metadata=meta, - ) - parent.children.append(s) - token = _current_span.set(s) - return s, token - - -def finish_llm_span( - span: SpanData, - token: Any, - response: Any, - extract_output: Callable[[Any], Any], - extract_meta: Callable[[Any], Dict[str, Any]], -) -> None: - try: - span.output = extract_output(response) - span.metadata.update(extract_meta(response)) - span.finish() - finally: - _current_span.reset(token) - - -def fail_llm_span(span: SpanData, token: Any, error: Exception) -> None: - try: - span.finish(error=str(error)) - finally: - _current_span.reset(token) - - -def _extract_messages(kwargs: Dict[str, Any]) -> Any: - messages = kwargs.get("messages") - if messages is not None: - return [_serialize_message(m) for m in messages] - for key in ("prompt", "contents", "input"): - val = kwargs.get(key) - if val is not None: - return val - return None - - -def _serialize_message(msg: Any) -> Any: - if isinstance(msg, dict): - return msg - try: - return {"role": msg.role, "content": msg.content} - except AttributeError: - return str(msg) diff --git a/src/layerlens/instrument/adapters/providers/anthropic.py b/src/layerlens/instrument/adapters/providers/anthropic.py deleted file mode 100644 index 72be2c9..0000000 --- a/src/layerlens/instrument/adapters/providers/anthropic.py +++ /dev/null @@ -1,120 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any, Dict, Optional - -from ._base_provider import fail_llm_span, create_llm_span, finish_llm_span - -log: logging.Logger = logging.getLogger(__name__) - -_CAPTURE_PARAMS = frozenset( - { - "model", - "max_tokens", - "temperature", - "top_p", - "top_k", - "system", - "tool_choice", - } -) - - -class AnthropicProvider: - def __init__(self) -> None: - self._client: Any = None - self._originals: Dict[str, Any] = {} - - def connect_client(self, client: Any) -> Any: - self._client = client - - if hasattr(client, "messages"): - orig = client.messages.create - self._originals["messages.create"] = orig - client.messages.create = self._wrap_sync(orig) - - return client - - def disconnect(self) -> None: - if self._client is None: - return - for key, orig in self._originals.items(): - try: - parts = key.split(".") - obj = self._client - for part in parts[:-1]: - obj = getattr(obj, part) - setattr(obj, parts[-1], orig) - except Exception: - log.warning("Could not restore %s", key) - self._client = None - self._originals.clear() - - def _wrap_sync(self, original: Any) -> Any: - def wrapped(*args: Any, **kwargs: Any) -> Any: - span, token = create_llm_span("anthropic.messages.create", kwargs, _CAPTURE_PARAMS) - if span is None: - return original(*args, **kwargs) - try: - response = original(*args, **kwargs) - finish_llm_span(span, token, response, _extract_output, _extract_response_meta) - return response - except Exception as exc: - fail_llm_span(span, token, exc) - raise - - return wrapped - - -def _extract_output(response: Any) -> Any: - try: - content = response.content - if content: - block = content[0] - return {"type": block.type, "text": getattr(block, "text", None)} - except (AttributeError, IndexError): - pass - return None - - -def _extract_response_meta(response: Any) -> Dict[str, Any]: - meta: Dict[str, Any] = {} - try: - usage = response.usage - if usage is not None: - meta["usage"] = { - "input_tokens": usage.input_tokens, - "output_tokens": usage.output_tokens, - } - except AttributeError: - pass - try: - meta["response_model"] = response.model - except AttributeError: - pass - try: - meta["stop_reason"] = response.stop_reason - except AttributeError: - pass - return meta - - -# --- Convenience API --- - -_provider_instance: Optional[AnthropicProvider] = None - - -def instrument_anthropic(client: Any) -> AnthropicProvider: - global _provider_instance - if _provider_instance is not None: - _provider_instance.disconnect() - _provider_instance = AnthropicProvider() - _provider_instance.connect_client(client) - return _provider_instance - - -def uninstrument_anthropic() -> None: - global _provider_instance - if _provider_instance is not None: - _provider_instance.disconnect() - _provider_instance = None diff --git a/src/layerlens/instrument/adapters/providers/litellm.py b/src/layerlens/instrument/adapters/providers/litellm.py deleted file mode 100644 index f84497c..0000000 --- a/src/layerlens/instrument/adapters/providers/litellm.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import annotations - -from typing import Any - -from .openai import _extract_output, _extract_response_meta -from ._base_provider import fail_llm_span, create_llm_span, finish_llm_span - -_CAPTURE_PARAMS = frozenset( - { - "model", - "temperature", - "max_tokens", - "top_p", - "frequency_penalty", - "presence_penalty", - "response_format", - } -) - -_original_completion: Any = None -_original_acompletion: Any = None - - -def instrument_litellm() -> None: - try: - import litellm - except ImportError as err: - raise ImportError( - "The 'litellm' package is required for LiteLLM instrumentation. Install it with: pip install litellm" - ) from err - - global _original_completion, _original_acompletion - - if _original_completion is None: - _original_completion = litellm.completion - orig_sync = _original_completion - - def patched_completion(*args: Any, **kwargs: Any) -> Any: - span, token = create_llm_span("litellm.completion", kwargs, _CAPTURE_PARAMS) - if span is None: - return orig_sync(*args, **kwargs) - try: - response = orig_sync(*args, **kwargs) - finish_llm_span(span, token, response, _extract_output, _extract_response_meta) - return response - except Exception as exc: - fail_llm_span(span, token, exc) - raise - - litellm.completion = patched_completion - - if _original_acompletion is None: - _original_acompletion = litellm.acompletion - orig_async = _original_acompletion - - async def patched_acompletion(*args: Any, **kwargs: Any) -> Any: - span, token = create_llm_span("litellm.acompletion", kwargs, _CAPTURE_PARAMS) - if span is None: - return await orig_async(*args, **kwargs) - try: - response = await orig_async(*args, **kwargs) - finish_llm_span(span, token, response, _extract_output, _extract_response_meta) - return response - except Exception as exc: - fail_llm_span(span, token, exc) - raise - - litellm.acompletion = patched_acompletion - - -def uninstrument_litellm() -> None: - global _original_completion, _original_acompletion - try: - import litellm - except ImportError: - return - - if _original_completion is not None: - litellm.completion = _original_completion - _original_completion = None - if _original_acompletion is not None: - litellm.acompletion = _original_acompletion - _original_acompletion = None diff --git a/src/layerlens/instrument/adapters/providers/openai.py b/src/layerlens/instrument/adapters/providers/openai.py deleted file mode 100644 index 2ccd331..0000000 --- a/src/layerlens/instrument/adapters/providers/openai.py +++ /dev/null @@ -1,138 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any, Dict, Optional - -from ._base_provider import fail_llm_span, create_llm_span, finish_llm_span - -log: logging.Logger = logging.getLogger(__name__) - -_CAPTURE_PARAMS = frozenset( - { - "model", - "temperature", - "max_tokens", - "top_p", - "frequency_penalty", - "presence_penalty", - "response_format", - "tool_choice", - } -) - - -class OpenAIProvider: - def __init__(self) -> None: - self._client: Any = None - self._originals: Dict[str, Any] = {} - - def connect_client(self, client: Any) -> Any: - self._client = client - - if hasattr(client, "chat") and hasattr(client.chat, "completions"): - orig = client.chat.completions.create - self._originals["chat.completions.create"] = orig - client.chat.completions.create = self._wrap_sync(orig) - - if hasattr(client.chat.completions, "acreate"): - async_orig = client.chat.completions.acreate - self._originals["chat.completions.acreate"] = async_orig - client.chat.completions.acreate = self._wrap_async(async_orig) - - return client - - def disconnect(self) -> None: - if self._client is None: - return - for key, orig in self._originals.items(): - try: - parts = key.split(".") - obj = self._client - for part in parts[:-1]: - obj = getattr(obj, part) - setattr(obj, parts[-1], orig) - except Exception: - log.warning("Could not restore %s", key) - self._client = None - self._originals.clear() - - def _wrap_sync(self, original: Any) -> Any: - def wrapped(*args: Any, **kwargs: Any) -> Any: - span, token = create_llm_span("openai.chat.completions.create", kwargs, _CAPTURE_PARAMS) - if span is None: - return original(*args, **kwargs) - try: - response = original(*args, **kwargs) - finish_llm_span(span, token, response, _extract_output, _extract_response_meta) - return response - except Exception as exc: - fail_llm_span(span, token, exc) - raise - - return wrapped - - def _wrap_async(self, original: Any) -> Any: - async def wrapped(*args: Any, **kwargs: Any) -> Any: - span, token = create_llm_span("openai.chat.completions.create", kwargs, _CAPTURE_PARAMS) - if span is None: - return await original(*args, **kwargs) - try: - response = await original(*args, **kwargs) - finish_llm_span(span, token, response, _extract_output, _extract_response_meta) - return response - except Exception as exc: - fail_llm_span(span, token, exc) - raise - - return wrapped - - -def _extract_output(response: Any) -> Any: - try: - choices = response.choices - if choices: - msg = choices[0].message - return {"role": msg.role, "content": msg.content} - except (AttributeError, IndexError): - pass - return None - - -def _extract_response_meta(response: Any) -> Dict[str, Any]: - meta: Dict[str, Any] = {} - try: - usage = response.usage - if usage is not None: - meta["usage"] = { - "prompt_tokens": usage.prompt_tokens, - "completion_tokens": usage.completion_tokens, - "total_tokens": usage.total_tokens, - } - except AttributeError: - pass - try: - meta["response_model"] = response.model - except AttributeError: - pass - return meta - - -# --- Convenience API --- - -_provider_instance: Optional[OpenAIProvider] = None - - -def instrument_openai(client: Any) -> OpenAIProvider: - global _provider_instance - if _provider_instance is not None: - _provider_instance.disconnect() - _provider_instance = OpenAIProvider() - _provider_instance.connect_client(client) - return _provider_instance - - -def uninstrument_openai() -> None: - global _provider_instance - if _provider_instance is not None: - _provider_instance.disconnect() - _provider_instance = None diff --git a/tests/instrument/__init__.py b/tests/instrument/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/instrument/conftest.py b/tests/instrument/conftest.py deleted file mode 100644 index 0dda669..0000000 --- a/tests/instrument/conftest.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import annotations - -import json -from unittest.mock import Mock - -import pytest - - -@pytest.fixture -def mock_client(): - client = Mock() - client.traces = Mock() - client.traces.upload = Mock() - return client - - -@pytest.fixture -def capture_trace(mock_client): - uploaded = {} - - def _capture(path): - with open(path) as f: - uploaded["trace"] = json.load(f) - - mock_client.traces.upload.side_effect = _capture - return uploaded diff --git a/tests/instrument/test_adapters.py b/tests/instrument/test_adapters.py deleted file mode 100644 index 4a430d9..0000000 --- a/tests/instrument/test_adapters.py +++ /dev/null @@ -1,130 +0,0 @@ -from __future__ import annotations - -import sys -import types -import importlib -from uuid import uuid4 -from unittest.mock import Mock - - -class TestLangChainAdapter: - def _setup_langchain_mock(self): - mock_lc_core = types.ModuleType("langchain_core") - mock_lc_callbacks = types.ModuleType("langchain_core.callbacks") - - class FakeBaseCallbackHandler: - def __init__(self): - pass - - mock_lc_callbacks.BaseCallbackHandler = FakeBaseCallbackHandler - mock_lc_core.callbacks = mock_lc_callbacks - - sys.modules["langchain_core"] = mock_lc_core - sys.modules["langchain_core.callbacks"] = mock_lc_callbacks - - def _teardown_langchain_mock(self): - for key in list(sys.modules.keys()): - if key.startswith("langchain_core"): - del sys.modules[key] - - def _get_handler(self, mock_client, capture_trace): - from layerlens.instrument.adapters.frameworks import langchain as lc_mod - - importlib.reload(lc_mod) - return lc_mod.LangChainCallbackHandler(mock_client) - - def test_builds_span_tree(self, mock_client, capture_trace): - self._setup_langchain_mock() - try: - handler = self._get_handler(mock_client, capture_trace) - - chain_run_id = uuid4() - llm_run_id = uuid4() - - handler.on_chain_start( - {"name": "RunnableSequence", "id": ["RunnableSequence"]}, - {"question": "What is AI?"}, - run_id=chain_run_id, - ) - handler.on_llm_start( - {"name": "ChatOpenAI", "id": ["ChatOpenAI"]}, - ["What is AI?"], - run_id=llm_run_id, - parent_run_id=chain_run_id, - ) - - llm_response = Mock() - llm_response.generations = [[Mock(text="AI is...")]] - llm_response.llm_output = {"token_usage": {"total_tokens": 50}, "model_name": "gpt-4"} - handler.on_llm_end(llm_response, run_id=llm_run_id) - handler.on_chain_end({"output": "AI is..."}, run_id=chain_run_id) - - root = capture_trace["trace"][0] - assert root["name"] == "RunnableSequence" - assert root["kind"] == "chain" - assert len(root["children"]) == 1 - - llm = root["children"][0] - assert llm["name"] == "ChatOpenAI" - assert llm["kind"] == "llm" - assert llm["output"] == "AI is..." - assert llm["metadata"]["model"] == "gpt-4" - assert llm["metadata"]["usage"]["total_tokens"] == 50 - finally: - self._teardown_langchain_mock() - - def test_tracks_tools_and_retrievers(self, mock_client, capture_trace): - self._setup_langchain_mock() - try: - handler = self._get_handler(mock_client, capture_trace) - - chain_id = uuid4() - tool_id = uuid4() - retriever_id = uuid4() - - handler.on_chain_start({"name": "Agent"}, {"input": "test"}, run_id=chain_id) - handler.on_tool_start({"name": "search"}, "query", run_id=tool_id, parent_run_id=chain_id) - handler.on_tool_end("results", run_id=tool_id) - handler.on_retriever_start({"name": "vectorstore"}, "query", run_id=retriever_id, parent_run_id=chain_id) - - docs = [Mock(page_content="doc1", metadata={"source": "a"})] - handler.on_retriever_end(docs, run_id=retriever_id) - handler.on_chain_end({"output": "done"}, run_id=chain_id) - - root = capture_trace["trace"][0] - assert root["name"] == "Agent" - assert len(root["children"]) == 2 - assert root["children"][0]["kind"] == "tool" - assert root["children"][1]["kind"] == "retriever" - finally: - self._teardown_langchain_mock() - - def test_error_on_chain(self, mock_client, capture_trace): - self._setup_langchain_mock() - try: - handler = self._get_handler(mock_client, capture_trace) - - chain_id = uuid4() - handler.on_chain_start({"name": "FailChain"}, {"input": "x"}, run_id=chain_id) - handler.on_chain_error(ValueError("broke"), run_id=chain_id) - - root = capture_trace["trace"][0] - assert root["status"] == "error" - assert root["error"] == "broke" - finally: - self._teardown_langchain_mock() - - def test_null_serialized_handled(self, mock_client, capture_trace): - self._setup_langchain_mock() - try: - handler = self._get_handler(mock_client, capture_trace) - - run_id = uuid4() - handler.on_chain_start(None, {"input": "x"}, run_id=run_id) - handler.on_chain_end({"output": "done"}, run_id=run_id) - - root = capture_trace["trace"][0] - assert root["name"] == "unknown" - assert root["status"] == "ok" - finally: - self._teardown_langchain_mock() diff --git a/tests/instrument/test_core.py b/tests/instrument/test_core.py deleted file mode 100644 index 2b1fc00..0000000 --- a/tests/instrument/test_core.py +++ /dev/null @@ -1,163 +0,0 @@ -from __future__ import annotations - -import os - -import pytest - -from layerlens.instrument import SpanData, span, trace -from layerlens.instrument._context import _current_span, _current_recorder -from layerlens.instrument._recorder import TraceRecorder - - -class TestTraceDecorator: - def test_basic_trace(self, mock_client): - @trace(mock_client) - def my_func(x): - return x * 2 - - result = my_func(5) - assert result == 10 - mock_client.traces.upload.assert_called_once() - - def test_trace_with_custom_name(self, mock_client, capture_trace): - @trace(mock_client, name="custom_name") - def my_func(): - return "ok" - - my_func() - assert capture_trace["trace"][0]["name"] == "custom_name" - - def test_trace_captures_input(self, mock_client, capture_trace): - @trace(mock_client) - def my_func(query): - return "result" - - my_func("hello") - assert capture_trace["trace"][0]["input"] == "hello" - - def test_trace_captures_output(self, mock_client, capture_trace): - @trace(mock_client) - def my_func(): - return {"answer": 42} - - my_func() - assert capture_trace["trace"][0]["output"] == {"answer": 42} - - def test_trace_on_error(self, mock_client, capture_trace): - @trace(mock_client) - def my_func(): - raise ValueError("boom") - - with pytest.raises(ValueError, match="boom"): - my_func() - - assert capture_trace["trace"][0]["status"] == "error" - assert capture_trace["trace"][0]["error"] == "boom" - - def test_trace_cleans_up_context(self, mock_client): - @trace(mock_client) - def my_func(): - return "ok" - - my_func() - assert _current_recorder.get() is None - assert _current_span.get() is None - - def test_trace_cleans_up_context_on_error(self, mock_client): - @trace(mock_client) - def my_func(): - raise RuntimeError("fail") - - with pytest.raises(RuntimeError): - my_func() - - assert _current_recorder.get() is None - assert _current_span.get() is None - - -class TestSpanContextManager: - def test_span_creates_child(self, mock_client, capture_trace): - @trace(mock_client) - def my_func(): - with span("child_span", kind="llm") as s: - s.output = "child output" - return "done" - - my_func() - root = capture_trace["trace"][0] - assert len(root["children"]) == 1 - child = root["children"][0] - assert child["name"] == "child_span" - assert child["kind"] == "llm" - assert child["output"] == "child output" - assert child["parent_id"] == root["span_id"] - - def test_nested_spans(self, mock_client, capture_trace): - @trace(mock_client) - def my_func(): - with span("outer", kind="chain") as s1: - s1.output = "outer" - with span("inner", kind="llm") as s2: - s2.output = "inner" - return "done" - - my_func() - root = capture_trace["trace"][0] - outer = root["children"][0] - assert outer["name"] == "outer" - inner = outer["children"][0] - assert inner["name"] == "inner" - assert inner["parent_id"] == outer["span_id"] - - def test_span_on_error(self, mock_client, capture_trace): - @trace(mock_client) - def my_func(): - try: - with span("failing") as s: - raise ValueError("span error") - except ValueError: - pass - return "recovered" - - my_func() - child = capture_trace["trace"][0]["children"][0] - assert child["status"] == "error" - assert child["error"] == "span error" - - def test_span_without_trace_noops(self): - with span("orphan", kind="llm") as s: - s.output = "test" - assert s.output == "test" - - def test_multiple_sibling_spans(self, mock_client, capture_trace): - @trace(mock_client) - def my_func(): - with span("retrieve", kind="retriever") as s: - s.output = ["doc1", "doc2"] - with span("generate", kind="llm") as s: - s.output = "answer" - return "done" - - my_func() - root = capture_trace["trace"][0] - assert len(root["children"]) == 2 - assert root["children"][0]["name"] == "retrieve" - assert root["children"][1]["name"] == "generate" - - -class TestTraceRecorder: - def test_flush_calls_upload(self, mock_client): - recorder = TraceRecorder(mock_client) - recorder.root = SpanData(name="root") - recorder.root.finish() - - recorder.flush() - mock_client.traces.upload.assert_called_once() - - path = mock_client.traces.upload.call_args[0][0] - assert not os.path.exists(path) - - def test_flush_noop_without_root(self, mock_client): - recorder = TraceRecorder(mock_client) - recorder.flush() - mock_client.traces.upload.assert_not_called() diff --git a/tests/instrument/test_providers.py b/tests/instrument/test_providers.py deleted file mode 100644 index fceeb1d..0000000 --- a/tests/instrument/test_providers.py +++ /dev/null @@ -1,217 +0,0 @@ -from __future__ import annotations - -import sys -import types -from unittest.mock import Mock - -from layerlens.instrument import trace - - -def _openai_response(): - r = Mock() - r.choices = [Mock()] - r.choices[0].message = Mock() - r.choices[0].message.role = "assistant" - r.choices[0].message.content = "Hello!" - r.usage = Mock() - r.usage.prompt_tokens = 10 - r.usage.completion_tokens = 5 - r.usage.total_tokens = 15 - r.model = "gpt-4" - return r - - -def _anthropic_response(): - r = Mock() - block = Mock() - block.type = "text" - block.text = "I'm Claude!" - r.content = [block] - r.usage = Mock() - r.usage.input_tokens = 20 - r.usage.output_tokens = 10 - r.model = "claude-3-opus" - r.stop_reason = "end_turn" - return r - - -class TestOpenAIProvider: - def test_instrument_creates_span(self, mock_client, capture_trace): - from layerlens.instrument.adapters.providers.openai import OpenAIProvider - - openai_client = Mock() - openai_client.chat.completions.create = Mock(return_value=_openai_response()) - - provider = OpenAIProvider() - provider.connect_client(openai_client) - - @trace(mock_client) - def my_agent(): - return ( - openai_client.chat.completions.create(model="gpt-4", messages=[{"role": "user", "content": "Hi"}]) - .choices[0] - .message.content - ) - - my_agent() - llm = capture_trace["trace"][0]["children"][0] - assert llm["kind"] == "llm" - assert llm["name"] == "openai.chat.completions.create" - assert llm["metadata"]["model"] == "gpt-4" - assert llm["metadata"]["usage"]["total_tokens"] == 15 - assert llm["output"]["content"] == "Hello!" - - def test_passthrough_without_trace(self): - from layerlens.instrument.adapters.providers.openai import OpenAIProvider - - openai_client = Mock() - openai_client.chat.completions.create = Mock(return_value=_openai_response()) - - provider = OpenAIProvider() - provider.connect_client(openai_client) - - result = openai_client.chat.completions.create(model="gpt-4", messages=[]) - assert result.choices[0].message.content == "Hello!" - - def test_disconnect_restores(self): - from layerlens.instrument.adapters.providers.openai import OpenAIProvider - - openai_client = Mock() - original = openai_client.chat.completions.create - - provider = OpenAIProvider() - provider.connect_client(openai_client) - assert openai_client.chat.completions.create is not original - - provider.disconnect() - assert openai_client.chat.completions.create is original - - def test_instrument_convenience_function(self): - from layerlens.instrument.adapters.providers.openai import instrument_openai, uninstrument_openai - - openai_client = Mock() - original = openai_client.chat.completions.create - instrument_openai(openai_client) - assert openai_client.chat.completions.create is not original - uninstrument_openai() - - -class TestAnthropicProvider: - def test_instrument_creates_span(self, mock_client, capture_trace): - from layerlens.instrument.adapters.providers.anthropic import AnthropicProvider - - anthropic_client = Mock() - anthropic_client.messages.create = Mock(return_value=_anthropic_response()) - - provider = AnthropicProvider() - provider.connect_client(anthropic_client) - - @trace(mock_client) - def my_agent(): - return ( - anthropic_client.messages.create( - model="claude-3-opus", max_tokens=1024, messages=[{"role": "user", "content": "Hi"}] - ) - .content[0] - .text - ) - - my_agent() - llm = capture_trace["trace"][0]["children"][0] - assert llm["kind"] == "llm" - assert llm["name"] == "anthropic.messages.create" - assert llm["output"]["text"] == "I'm Claude!" - assert llm["metadata"]["usage"]["input_tokens"] == 20 - assert llm["metadata"]["response_model"] == "claude-3-opus" - assert llm["metadata"]["stop_reason"] == "end_turn" - - def test_disconnect_restores(self): - from layerlens.instrument.adapters.providers.anthropic import AnthropicProvider - - anthropic_client = Mock() - original = anthropic_client.messages.create - - provider = AnthropicProvider() - provider.connect_client(anthropic_client) - provider.disconnect() - assert anthropic_client.messages.create is original - - -class TestLiteLLMProvider: - def setup_method(self): - self.mock_litellm = types.ModuleType("litellm") - self.mock_litellm.completion = Mock(return_value=_openai_response()) - self.mock_litellm.acompletion = Mock() - sys.modules["litellm"] = self.mock_litellm - - def teardown_method(self): - for key in list(sys.modules.keys()): - if key.startswith("litellm"): - del sys.modules[key] - from layerlens.instrument.adapters.providers import litellm as litellm_adapter - - litellm_adapter._original_completion = None - litellm_adapter._original_acompletion = None - - def test_instrument_creates_span(self, mock_client, capture_trace): - from layerlens.instrument.adapters.providers.litellm import instrument_litellm - - instrument_litellm() - - @trace(mock_client) - def my_agent(): - import litellm - - return ( - litellm.completion(model="gpt-4", messages=[{"role": "user", "content": "Hi"}]) - .choices[0] - .message.content - ) - - my_agent() - llm = capture_trace["trace"][0]["children"][0] - assert llm["kind"] == "llm" - assert llm["name"] == "litellm.completion" - assert llm["metadata"]["model"] == "gpt-4" - - def test_passthrough_without_trace(self): - from layerlens.instrument.adapters.providers.litellm import instrument_litellm - - instrument_litellm() - import litellm - - result = litellm.completion(model="gpt-4", messages=[]) - assert result.choices[0].message.content == "Hello!" - - def test_uninstrument(self): - from layerlens.instrument.adapters.providers.litellm import instrument_litellm, uninstrument_litellm - - original = self.mock_litellm.completion - instrument_litellm() - assert self.mock_litellm.completion is not original - uninstrument_litellm() - assert self.mock_litellm.completion is original - - -class TestProviderErrorHandling: - def test_span_captures_error(self, mock_client, capture_trace): - from layerlens.instrument.adapters.providers.openai import OpenAIProvider - - openai_client = Mock() - openai_client.chat.completions.create = Mock(side_effect=RuntimeError("API error")) - - provider = OpenAIProvider() - provider.connect_client(openai_client) - - @trace(mock_client) - def my_agent(): - try: - openai_client.chat.completions.create(model="gpt-4", messages=[]) - except RuntimeError: - pass - return "recovered" - - my_agent() - llm = capture_trace["trace"][0]["children"][0] - assert llm["status"] == "error" - assert llm["error"] == "API error" diff --git a/tests/instrument/test_types.py b/tests/instrument/test_types.py deleted file mode 100644 index 272edb3..0000000 --- a/tests/instrument/test_types.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import annotations - -import time - -from layerlens.instrument._types import SpanData - - -class TestSpanData: - def test_defaults(self): - s = SpanData(name="test") - assert s.name == "test" - assert len(s.span_id) == 16 - assert s.parent_id is None - assert s.status == "ok" - assert s.kind == "internal" - assert s.input is None - assert s.output is None - assert s.error is None - assert s.metadata == {} - assert s.children == [] - assert s.end_time is None - assert s.start_time <= time.time() - - def test_finish_ok(self): - s = SpanData(name="test") - s.finish() - assert s.end_time is not None - assert s.status == "ok" - assert s.error is None - - def test_finish_error(self): - s = SpanData(name="test") - s.finish(error="something broke") - assert s.end_time is not None - assert s.status == "error" - assert s.error == "something broke" - - def test_to_dict(self): - parent = SpanData(name="parent") - child = SpanData(name="child", parent_id=parent.span_id) - parent.children.append(child) - - d = parent.to_dict() - assert d["name"] == "parent" - assert d["parent_id"] is None - assert len(d["children"]) == 1 - assert d["children"][0]["name"] == "child" - assert d["children"][0]["parent_id"] == parent.span_id - - def test_to_dict_nested(self): - root = SpanData(name="root") - child1 = SpanData(name="c1", parent_id=root.span_id) - child2 = SpanData(name="c2", parent_id=child1.span_id) - root.children.append(child1) - child1.children.append(child2) - - d = root.to_dict() - assert d["children"][0]["children"][0]["name"] == "c2"