From 54139eada06f973b7dff3bfd4dd29fce624d4f76 Mon Sep 17 00:00:00 2001 From: mmercuri Date: Sun, 10 May 2026 09:52:35 -0700 Subject: [PATCH 1/4] test: port tier-2 deeper test suite for ms_agent_framework from ateam (port-as-is) --- .../frameworks/ms_agent_framework/__init__.py | 7 + .../frameworks/ms_agent_framework/conftest.py | 63 ++ .../ms_agent_framework/test_events.py | 90 ++ .../ms_agent_framework/test_integration.py | 784 ++++++++++++++++++ .../ms_agent_framework/test_lifecycle.py | 222 +++++ 5 files changed, 1166 insertions(+) create mode 100644 tests/instrument/adapters/frameworks/ms_agent_framework/__init__.py create mode 100644 tests/instrument/adapters/frameworks/ms_agent_framework/conftest.py create mode 100644 tests/instrument/adapters/frameworks/ms_agent_framework/test_events.py create mode 100644 tests/instrument/adapters/frameworks/ms_agent_framework/test_integration.py create mode 100644 tests/instrument/adapters/frameworks/ms_agent_framework/test_lifecycle.py diff --git a/tests/instrument/adapters/frameworks/ms_agent_framework/__init__.py b/tests/instrument/adapters/frameworks/ms_agent_framework/__init__.py new file mode 100644 index 00000000..eea34b7a --- /dev/null +++ b/tests/instrument/adapters/frameworks/ms_agent_framework/__init__.py @@ -0,0 +1,7 @@ +"""Dedicated tests for the ms_agent_framework framework adapter. + +Ported from ``ateam/tests/adapters/ms_agent_framework/`` so that +stratix-python matches ateam's deeper coverage instead of relying +solely on the consolidated smoke test +(``tests/instrument/adapters/frameworks/test_ms_agent_framework_adapter.py``). +""" diff --git a/tests/instrument/adapters/frameworks/ms_agent_framework/conftest.py b/tests/instrument/adapters/frameworks/ms_agent_framework/conftest.py new file mode 100644 index 00000000..e2044eee --- /dev/null +++ b/tests/instrument/adapters/frameworks/ms_agent_framework/conftest.py @@ -0,0 +1,63 @@ +"""Shared test fixtures for Microsoft Agent Framework adapter tests. + +Ported as-is from ``ateam/tests/adapters/ms_agent_framework/conftest.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.ms_agent_framework.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +import pytest + +from layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle import MSAgentAdapter + + +class MockStratix: + """Mock STRATIX instance for testing.""" + + def __init__(self): + self.events = [] + self.org_id = "test-org" + + def emit(self, event_type: str, payload: dict): + self.events.append({"type": event_type, "payload": payload}) + + def get_events(self, event_type: str = None): + if event_type: + return [e for e in self.events if e["type"] == event_type] + return self.events + + +@pytest.fixture +def mock_stratix(): + return MockStratix() + + +@pytest.fixture +def adapter(mock_stratix): + adapter = MSAgentAdapter(stratix=mock_stratix) + adapter.connect() + return adapter + + +@pytest.fixture +def adapter_no_stratix(): + adapter = MSAgentAdapter() + adapter.connect() + return adapter diff --git a/tests/instrument/adapters/frameworks/ms_agent_framework/test_events.py b/tests/instrument/adapters/frameworks/ms_agent_framework/test_events.py new file mode 100644 index 00000000..974c88a2 --- /dev/null +++ b/tests/instrument/adapters/frameworks/ms_agent_framework/test_events.py @@ -0,0 +1,90 @@ +"""Test Microsoft Agent Framework adapter event emission. + +Ported as-is from ``ateam/tests/adapters/ms_agent_framework/test_events.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.ms_agent_framework.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from layerlens.instrument.adapters._base import CaptureConfig + + +class TestMSAgentAdapterEvents: + def test_capture_config_minimal_gates_l3_l5(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle import MSAgentAdapter + + adapter = MSAgentAdapter(stratix=mock_stratix, capture_config=CaptureConfig.minimal()) + adapter.connect() + adapter.on_llm_call(model="gpt-4o") + adapter.on_tool_use(tool_name="test") + assert len(mock_stratix.get_events("model.invoke")) == 0 + assert len(mock_stratix.get_events("tool.call")) == 0 + + def test_cross_cutting_always_emitted(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle import MSAgentAdapter + + adapter = MSAgentAdapter(stratix=mock_stratix, capture_config=CaptureConfig.minimal()) + adapter.connect() + adapter.emit_dict_event( + "agent.state.change", {"framework": "ms_agent_framework", "event_subtype": "test"} + ) + assert len(mock_stratix.get_events("agent.state.change")) == 1 + + def test_tool_use_with_error(self, adapter, mock_stratix): + adapter.on_tool_use( + tool_name="failing_tool", + tool_input={"query": "test"}, + error=Exception("tool failed"), + ) + events = mock_stratix.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["error"] == "tool failed" + + def test_tool_use_with_latency(self, adapter, mock_stratix): + adapter.on_tool_use( + tool_name="slow_tool", + tool_input={"query": "test"}, + tool_output={"result": "ok"}, + latency_ms=1200.0, + ) + events = mock_stratix.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["latency_ms"] == 1200.0 + + def test_handoff_with_context(self, adapter, mock_stratix): + adapter.on_handoff(from_agent="planner", to_agent="executor", context="Execute plan step 3") + events = mock_stratix.get_events("agent.handoff") + assert len(events) == 1 + assert events[0]["payload"]["context_hash"] is not None + assert events[0]["payload"]["reason"] == "group_chat_turn" + + def test_llm_call_with_messages_content_enabled(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle import MSAgentAdapter + + config = CaptureConfig(capture_content=True) + adapter = MSAgentAdapter(stratix=mock_stratix, capture_config=config) + adapter.connect() + adapter.on_llm_call( + model="gpt-4o", + messages=[{"role": "user", "content": "hello"}], + ) + events = mock_stratix.get_events("model.invoke") + assert len(events) == 1 + assert "messages" in events[0]["payload"] diff --git a/tests/instrument/adapters/frameworks/ms_agent_framework/test_integration.py b/tests/instrument/adapters/frameworks/ms_agent_framework/test_integration.py new file mode 100644 index 00000000..144bd14c --- /dev/null +++ b/tests/instrument/adapters/frameworks/ms_agent_framework/test_integration.py @@ -0,0 +1,784 @@ +"""Integration tests for Microsoft Agent Framework adapter using real SDK types. + +These tests verify that MSAgentAdapter correctly captures events from +actual Semantic Kernel Agent types -- not mocks. The SDK must be installed: + pip install semantic-kernel + +Tests are skipped if semantic-kernel is not installed. + +Ported as-is from ``ateam/tests/adapters/ms_agent_framework/test_integration.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.ms_agent_framework.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from __future__ import annotations + +import hashlib +from typing import Any + +import pytest + +from layerlens.instrument.adapters._base import AdapterCapability, AdapterStatus +from layerlens.instrument.adapters._base import CaptureConfig +from layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle import MSAgentAdapter + +# --------------------------------------------------------------------------- +# Try to import semantic-kernel; tests that need it will skipif unavailable +# --------------------------------------------------------------------------- + +_has_semantic_kernel = False +_sk_version: str | None = None + +try: + import semantic_kernel # type: ignore[import-untyped] + + _has_semantic_kernel = True + _sk_version = getattr(semantic_kernel, "__version__", "unknown") +except ImportError: + pass + +needs_semantic_kernel = pytest.mark.skipif( + not _has_semantic_kernel, + reason="semantic-kernel not installed", +) + + +# --------------------------------------------------------------------------- +# EventCollector -- real collector, not a mock +# --------------------------------------------------------------------------- + + +class EventCollector: + """Real event collector -- accumulates events for assertions.""" + + def __init__(self) -> None: + self.org_id: str = "test-org" + self.events: list[dict[str, Any]] = [] + self.traces_started: int = 0 + self.traces_ended: int = 0 + + def emit(self, event_type: str, payload: dict[str, Any]) -> None: + self.events.append({"type": event_type, "payload": payload}) + + def start_trace(self, **kwargs: Any) -> str: + self.traces_started += 1 + return f"trace-{self.traces_started}" + + def end_trace(self, **kwargs: Any) -> None: + self.traces_ended += 1 + + def get_events(self, event_type: str | None = None) -> list[dict[str, Any]]: + if event_type: + return [e for e in self.events if e["type"] == event_type] + return self.events + + +# --------------------------------------------------------------------------- +# Fake SK Agent objects for testing (no auth/kernel required) +# --------------------------------------------------------------------------- + + +class FakeKernelPlugin: + """Stub matching semantic_kernel.KernelPlugin interface.""" + + def __init__(self, name: str) -> None: + self.name = name + + +class FakeKernel: + """Stub matching semantic_kernel.Kernel interface.""" + + def __init__(self, plugins: dict[str, Any] | None = None) -> None: + self.plugins = plugins or {} + + +class FakeAgent: + """Stub matching semantic_kernel.agents.ChatCompletionAgent surface.""" + + def __init__( + self, + name: str = "test_agent", + instructions: str = "You are a helpful assistant.", + kernel: FakeKernel | None = None, + ) -> None: + self.name = name + self.instructions = instructions + self.kernel = kernel or FakeKernel() + + +class FakeFunctionCallItem: + """Stub for a function call content item in a ChatMessageContent.""" + + def __init__(self, name: str, arguments: str | None = None) -> None: + self.name = name + self.arguments = arguments + + @property + def __class__(self) -> type: + # Make type(item).__name__ return "FunctionCallContent" + return type("FunctionCallContent", (), {}) + + +class FakeFunctionResultItem: + """Stub for a function result content item in a ChatMessageContent.""" + + def __init__(self, name: str, result: str | None = None) -> None: + self.name = name + self.result = result + + @property + def __class__(self) -> type: + return type("FunctionResultContent", (), {}) + + +class FakeChatMessageContent: + """Stub matching semantic_kernel.contents.ChatMessageContent.""" + + def __init__( + self, + agent_name: str | None = None, + content: str = "", + items: list[Any] | None = None, + metadata: dict[str, Any] | None = None, + ) -> None: + self.agent_name = agent_name + self.name = agent_name + self.content = content + self.items = items or [] + self.metadata = metadata or {} + + +class FakeUsage: + """Stub matching SK CompletionUsage.""" + + def __init__(self, prompt_tokens: int = 0, completion_tokens: int = 0) -> None: + self.prompt_tokens = prompt_tokens + self.completion_tokens = completion_tokens + + +class FakeChat: + """Stub matching AgentChat or AgentGroupChat invoke surface.""" + + def __init__( + self, + name: str = "test_chat", + agents: list[FakeAgent] | None = None, + selection_strategy: Any = None, + termination_strategy: Any = None, + ) -> None: + self.name = name + self.agents = agents + self.agent = agents[0] if agents else None + self.selection_strategy = selection_strategy + self.termination_strategy = termination_strategy + self._invoke_called = False + + async def invoke(self, *args: Any, **kwargs: Any) -> Any: + """Async generator yielding messages.""" + self._invoke_called = True + yield FakeChatMessageContent(content="test response") + + async def invoke_stream(self, *args: Any, **kwargs: Any) -> Any: + """Async generator yielding streaming messages.""" + yield FakeChatMessageContent(content="streamed response") + + +class FakeSelectionStrategy: + pass + + +class FakeTerminationStrategy: + pass + + +# --------------------------------------------------------------------------- +# Adapter construction +# --------------------------------------------------------------------------- + + +class TestAdapterConstruction: + """Verify adapter constructs correctly with various configurations.""" + + def test_adapter_framework_metadata(self) -> None: + """Adapter should expose correct framework name.""" + adapter = MSAgentAdapter() + assert adapter.FRAMEWORK == "ms_agent_framework" + assert adapter.VERSION == "0.1.0" + + def test_adapter_capabilities(self) -> None: + """MS Agent adapter must declare all four capabilities.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + info = adapter.get_adapter_info() + assert AdapterCapability.TRACE_HANDOFFS in info.capabilities + assert AdapterCapability.TRACE_TOOLS in info.capabilities + assert AdapterCapability.TRACE_MODELS in info.capabilities + assert AdapterCapability.TRACE_STATE in info.capabilities + assert info.name == "MSAgentAdapter" + + def test_capture_config_propagates(self) -> None: + """CaptureConfig correctly controls which events are captured.""" + collector = EventCollector() + config = CaptureConfig( + l3_model_metadata=True, + l5a_tool_calls=False, + l1_agent_io=True, + ) + adapter = MSAgentAdapter(stratix=collector, capture_config=config) + assert adapter._capture_config.l3_model_metadata is True + assert adapter._capture_config.l5a_tool_calls is False + + @needs_semantic_kernel + def test_connect_detects_sdk_version(self) -> None: + """connect() should discover the installed semantic-kernel version.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + health = adapter.health_check() + assert health.framework_version is not None + assert health.framework_version == _sk_version + + def test_connect_without_sdk_still_healthy(self) -> None: + """connect() should succeed even without SDK.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + assert adapter._status == AdapterStatus.HEALTHY + + +# --------------------------------------------------------------------------- +# Chat instrumentation (invoke wrapping) +# --------------------------------------------------------------------------- + + +class TestChatInstrumentation: + """Verify instrument_chat wraps invoke methods on chat objects.""" + + def test_instrument_chat_wraps_invoke(self) -> None: + """instrument_chat should replace invoke with a traced version.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + chat = FakeChat(name="test_chat") + original_invoke = chat.invoke + + adapter.instrument_chat(chat) + + # invoke should be replaced + assert chat.invoke is not original_invoke + # Original should be accessible via _layerlens_original + assert hasattr(chat.invoke, "_layerlens_original") + + def test_instrument_chat_wraps_invoke_stream(self) -> None: + """instrument_chat should also wrap invoke_stream.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + chat = FakeChat(name="test_chat") + original_stream = chat.invoke_stream + + adapter.instrument_chat(chat) + + assert chat.invoke_stream is not original_stream + assert hasattr(chat.invoke_stream, "_layerlens_original") + + def test_instrument_chat_emits_config(self) -> None: + """instrument_chat should emit environment.config on first encounter.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + agents = [FakeAgent(name="coder"), FakeAgent(name="reviewer")] + chat = FakeChat( + name="code_review_chat", + agents=agents, + selection_strategy=FakeSelectionStrategy(), + termination_strategy=FakeTerminationStrategy(), + ) + + adapter.instrument_chat(chat) + + config_events = collector.get_events("environment.config") + assert len(config_events) == 1 + payload = config_events[0]["payload"] + assert payload["framework"] == "ms_agent_framework" + assert payload["chat_name"] == "code_review_chat" + assert payload["chat_type"] == "FakeChat" + assert payload["agents"] == ["coder", "reviewer"] + assert payload["selection_strategy"] == "FakeSelectionStrategy" + assert payload["termination_strategy"] == "FakeTerminationStrategy" + + def test_instrument_chat_idempotent(self) -> None: + """Calling instrument_chat twice should not double-wrap.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + chat = FakeChat(name="test_chat") + adapter.instrument_chat(chat) + first_invoke = chat.invoke + + adapter.instrument_chat(chat) + assert chat.invoke is first_invoke # Same wrapper + + +# --------------------------------------------------------------------------- +# Lifecycle hooks (manual API) +# --------------------------------------------------------------------------- + + +class TestLifecycleHooks: + """Verify manual lifecycle hook methods emit correct events.""" + + def test_run_start_end_roundtrip(self) -> None: + """on_run_start + on_run_end should emit input/output with duration.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="planner", input_data="Build a plan") + adapter.on_run_end(agent_name="planner", output="Plan: step 1, step 2") + + input_events = collector.get_events("agent.input") + assert len(input_events) == 1 + assert input_events[0]["payload"]["input"] == "Build a plan" + + output_events = collector.get_events("agent.output") + assert len(output_events) == 1 + assert output_events[0]["payload"]["output"] == "Plan: step 1, step 2" + assert output_events[0]["payload"]["duration_ns"] >= 0 + + def test_run_end_emits_state_change(self) -> None: + """on_run_end should also emit agent.state.change.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="worker", input_data="do work") + adapter.on_run_end(agent_name="worker", output="done") + + state_events = collector.get_events("agent.state.change") + assert len(state_events) == 1 + assert state_events[0]["payload"]["event_subtype"] == "run_complete" + + def test_run_end_with_error_emits_failed_state(self) -> None: + """on_run_end with error should emit run_failed state change.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="failing", input_data="crash") + adapter.on_run_end(agent_name="failing", error=RuntimeError("Kernel failed")) + + output_events = collector.get_events("agent.output") + assert len(output_events) == 1 + assert "Kernel failed" in output_events[0]["payload"]["error"] + + state_events = collector.get_events("agent.state.change") + assert len(state_events) == 1 + assert state_events[0]["payload"]["event_subtype"] == "run_failed" + + def test_on_tool_use(self) -> None: + """on_tool_use should emit tool.call.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + adapter.on_tool_use( + tool_name="SearchPlugin_search", + tool_input={"query": "AI trends"}, + tool_output={"results": ["trend1", "trend2"]}, + latency_ms=550.0, + ) + + events = collector.get_events("tool.call") + assert len(events) == 1 + payload = events[0]["payload"] + assert payload["tool_name"] == "SearchPlugin_search" + assert payload["latency_ms"] == 550.0 + + def test_on_llm_call(self) -> None: + """on_llm_call should emit model.invoke.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + adapter.on_llm_call( + provider="azure_openai", + model="gpt-4o", + tokens_prompt=200, + tokens_completion=100, + ) + + events = collector.get_events("model.invoke") + assert len(events) == 1 + assert events[0]["payload"]["model"] == "gpt-4o" + assert events[0]["payload"]["provider"] == "azure_openai" + + def test_disconnected_adapter_emits_nothing(self) -> None: + """Lifecycle hooks should no-op when adapter is disconnected.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + # NOT calling connect() + + adapter.on_run_start(agent_name="test", input_data="hello") + adapter.on_run_end(agent_name="test", output="world") + adapter.on_tool_use(tool_name="t", tool_input={}) + adapter.on_llm_call(model="m") + adapter.on_handoff(from_agent="a", to_agent="b") + + assert len(collector.events) == 0 + + +# --------------------------------------------------------------------------- +# Message processing (tool calls, model info, handoffs from messages) +# --------------------------------------------------------------------------- + + +class TestMessageProcessing: + """Verify _process_message extracts events from ChatMessageContent.""" + + def test_function_call_in_message(self) -> None: + """FunctionCall items should emit tool.call events.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + msg = FakeChatMessageContent( + agent_name="coder", + items=[FakeFunctionCallItem(name="write_code", arguments='{"lang": "python"}')], + ) + + adapter._process_message(None, msg, "coder") + + events = collector.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["tool_name"] == "write_code" + + def test_function_result_in_message(self) -> None: + """FunctionResult items should emit tool.call events with output.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + msg = FakeChatMessageContent( + agent_name="coder", + items=[FakeFunctionResultItem(name="write_code", result="code written")], + ) + + adapter._process_message(None, msg, "coder") + + events = collector.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["tool_output"] == "code written" + + def test_agent_turn_transition_emits_handoff(self) -> None: + """When message agent differs from current agent, emit agent.handoff.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + msg = FakeChatMessageContent(agent_name="reviewer") + adapter._process_message(None, msg, "coder") + + events = collector.get_events("agent.handoff") + assert len(events) == 1 + assert events[0]["payload"]["from_agent"] == "coder" + assert events[0]["payload"]["to_agent"] == "reviewer" + assert events[0]["payload"]["reason"] == "group_chat_turn" + + def test_same_agent_no_handoff(self) -> None: + """Same agent name should not trigger a handoff event.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + msg = FakeChatMessageContent(agent_name="coder") + adapter._process_message(None, msg, "coder") + + events = collector.get_events("agent.handoff") + assert len(events) == 0 + + def test_model_metadata_in_message(self) -> None: + """Model info in metadata should emit model.invoke.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + msg = FakeChatMessageContent( + agent_name="assistant", + metadata={"model": "gpt-4o"}, + ) + + adapter._process_message(None, msg, "assistant") + + events = collector.get_events("model.invoke") + assert len(events) == 1 + assert events[0]["payload"]["model"] == "gpt-4o" + + def test_usage_metadata_emits_cost_record(self) -> None: + """Usage metadata should emit cost.record.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + msg = FakeChatMessageContent( + agent_name="assistant", + metadata={ + "model": "gpt-4o", + "usage": {"prompt_tokens": 100, "completion_tokens": 50}, + }, + ) + + adapter._process_message(None, msg, "assistant") + + cost_events = collector.get_events("cost.record") + assert len(cost_events) == 1 + assert cost_events[0]["payload"]["tokens_prompt"] == 100 + assert cost_events[0]["payload"]["tokens_completion"] == 50 + + +# --------------------------------------------------------------------------- +# Handoff events +# --------------------------------------------------------------------------- + + +class TestHandoffEvents: + """Verify agent.handoff events for group chat turn transitions.""" + + def test_handoff_event(self) -> None: + """on_handoff should emit agent.handoff with context hash.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + adapter.on_handoff( + from_agent="planner", + to_agent="executor", + context="Execute step 3 of the plan", + ) + + events = collector.get_events("agent.handoff") + assert len(events) == 1 + payload = events[0]["payload"] + assert payload["from_agent"] == "planner" + assert payload["to_agent"] == "executor" + assert payload["reason"] == "group_chat_turn" + expected_hash = hashlib.sha256(b"Execute step 3 of the plan").hexdigest() + assert payload["context_hash"] == expected_hash + + def test_handoff_without_context(self) -> None: + """Handoff without context should have None context_hash.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + adapter.on_handoff(from_agent="a", to_agent="b") + + events = collector.get_events("agent.handoff") + assert len(events) == 1 + assert events[0]["payload"]["context_hash"] is None + + def test_handoff_always_emitted_with_minimal_config(self) -> None: + """agent.handoff is cross-cutting and should emit even with minimal config.""" + collector = EventCollector() + config = CaptureConfig.minimal() + adapter = MSAgentAdapter(stratix=collector, capture_config=config) + adapter.connect() + + adapter.on_handoff(from_agent="x", to_agent="y") + + events = collector.get_events("agent.handoff") + assert len(events) == 1 + + +# --------------------------------------------------------------------------- +# Provider detection +# --------------------------------------------------------------------------- + + +class TestProviderDetection: + """Verify _detect_provider correctly identifies LLM providers.""" + + def test_openai_models(self) -> None: + adapter = MSAgentAdapter() + assert adapter._detect_provider("gpt-4o") == "openai" + assert adapter._detect_provider("o1-preview") == "openai" + assert adapter._detect_provider("o3-mini") == "openai" + + def test_anthropic_models(self) -> None: + adapter = MSAgentAdapter() + assert adapter._detect_provider("claude-3-opus") == "anthropic" + assert adapter._detect_provider("claude-opus-4") == "anthropic" + + def test_google_models(self) -> None: + adapter = MSAgentAdapter() + assert adapter._detect_provider("gemini-2.0-flash") == "google" + + def test_microsoft_models(self) -> None: + adapter = MSAgentAdapter() + assert adapter._detect_provider("phi-4") == "microsoft" + + def test_meta_models(self) -> None: + adapter = MSAgentAdapter() + assert adapter._detect_provider("llama-3.3-70b") == "meta" + + def test_mistral_models(self) -> None: + adapter = MSAgentAdapter() + assert adapter._detect_provider("mistral-large") == "mistral" + assert adapter._detect_provider("mixtral-8x7b") == "mistral" + + def test_unknown_defaults_to_azure(self) -> None: + adapter = MSAgentAdapter() + assert adapter._detect_provider("some-custom-model") == "azure_openai" + + def test_none_returns_none(self) -> None: + adapter = MSAgentAdapter() + assert adapter._detect_provider(None) is None + + +# --------------------------------------------------------------------------- +# CaptureConfig gating +# --------------------------------------------------------------------------- + + +class TestCaptureConfigGating: + """Verify that CaptureConfig correctly gates event emission.""" + + def test_minimal_config_blocks_l3_l5(self) -> None: + """Minimal config should block model.invoke and tool.call.""" + collector = EventCollector() + config = CaptureConfig.minimal() + adapter = MSAgentAdapter(stratix=collector, capture_config=config) + adapter.connect() + + adapter.on_llm_call(model="gpt-4o") + adapter.on_tool_use(tool_name="search") + + assert len(collector.get_events("model.invoke")) == 0 + assert len(collector.get_events("tool.call")) == 0 + + def test_minimal_config_allows_l1(self) -> None: + """Minimal config should still allow agent.input/output.""" + collector = EventCollector() + config = CaptureConfig.minimal() + adapter = MSAgentAdapter(stratix=collector, capture_config=config) + adapter.connect() + + adapter.on_run_start(agent_name="test", input_data="hello") + adapter.on_run_end(agent_name="test", output="world") + + assert len(collector.get_events("agent.input")) == 1 + assert len(collector.get_events("agent.output")) == 1 + + def test_content_capture_enabled(self) -> None: + """capture_content=True should include messages in model.invoke.""" + collector = EventCollector() + config = CaptureConfig(capture_content=True) + adapter = MSAgentAdapter(stratix=collector, capture_config=config) + adapter.connect() + + adapter.on_llm_call( + model="gpt-4o", + messages=[{"role": "user", "content": "hello"}], + ) + + events = collector.get_events("model.invoke") + assert len(events) == 1 + assert "messages" in events[0]["payload"] + + def test_content_capture_disabled(self) -> None: + """capture_content=False should exclude messages.""" + collector = EventCollector() + config = CaptureConfig(capture_content=False) + adapter = MSAgentAdapter(stratix=collector, capture_config=config) + adapter.connect() + + adapter.on_llm_call( + model="gpt-4o", + messages=[{"role": "user", "content": "secret"}], + ) + + events = collector.get_events("model.invoke") + assert len(events) == 1 + assert "messages" not in events[0]["payload"] + + +# --------------------------------------------------------------------------- +# Adapter lifecycle +# --------------------------------------------------------------------------- + + +class TestAdapterLifecycle: + """Verify adapter lifecycle management.""" + + def test_connect_disconnect(self) -> None: + """connect() and disconnect() should transition status correctly.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + assert adapter._status == AdapterStatus.HEALTHY + adapter.disconnect() + assert adapter._status == AdapterStatus.DISCONNECTED + + def test_disconnect_unwraps_chats(self) -> None: + """disconnect() should restore original invoke methods.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + chat = FakeChat(name="test_chat") + original_invoke = chat.invoke + adapter.instrument_chat(chat) + assert chat.invoke is not original_invoke + + adapter.disconnect() + # After disconnect, originals dict should be cleared + assert len(adapter._originals) == 0 + assert len(adapter._wrapped_chats) == 0 + + def test_serialization_for_replay(self) -> None: + """serialize_for_replay should produce a valid ReplayableTrace.""" + collector = EventCollector() + adapter = MSAgentAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="test", input_data="hello") + adapter.on_run_end(agent_name="test", output="world") + + trace = adapter.serialize_for_replay() + assert trace.adapter_name == "MSAgentAdapter" + assert trace.framework == "ms_agent_framework" + assert len(trace.events) >= 2 + + def test_null_stratix_pattern(self) -> None: + """Adapter should work (no-op) without a stratix instance.""" + adapter = MSAgentAdapter() + adapter.connect() + # Should not raise + adapter.on_run_start(agent_name="test", input_data="hello") + adapter.on_run_end(agent_name="test", output="world") + adapter.on_tool_use(tool_name="test", tool_input={}) + adapter.on_llm_call(model="test") + adapter.on_handoff(from_agent="a", to_agent="b") diff --git a/tests/instrument/adapters/frameworks/ms_agent_framework/test_lifecycle.py b/tests/instrument/adapters/frameworks/ms_agent_framework/test_lifecycle.py new file mode 100644 index 00000000..38966db2 --- /dev/null +++ b/tests/instrument/adapters/frameworks/ms_agent_framework/test_lifecycle.py @@ -0,0 +1,222 @@ +"""Test Microsoft Agent Framework adapter lifecycle methods. + +Ported as-is from ``ateam/tests/adapters/ms_agent_framework/test_lifecycle.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.ms_agent_framework.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from unittest.mock import AsyncMock, MagicMock + +from layerlens.instrument.adapters._base import AdapterCapability, AdapterStatus +from layerlens.instrument.adapters.frameworks.ms_agent_framework.lifecycle import MSAgentAdapter +from layerlens.instrument.adapters._base import ReplayableTrace + + +class TestMSAgentAdapterLifecycle: + def test_adapter_initialization(self): + adapter = MSAgentAdapter() + assert adapter.FRAMEWORK == "ms_agent_framework" + assert adapter.VERSION == "0.1.0" + + def test_adapter_initialization_with_stratix(self, mock_stratix): + adapter = MSAgentAdapter(stratix=mock_stratix) + assert adapter.has_stratix + + def test_adapter_initialization_legacy_param(self, mock_stratix): + adapter = MSAgentAdapter(stratix_instance=mock_stratix) + assert adapter.has_stratix + + def test_connect_sets_healthy(self): + adapter = MSAgentAdapter() + adapter.connect() + assert adapter.is_connected + assert adapter.status == AdapterStatus.HEALTHY + + def test_connect_without_framework(self): + """Adapter connects gracefully even when semantic-kernel is not installed.""" + adapter = MSAgentAdapter() + adapter.connect() + assert adapter.is_connected + assert adapter.status == AdapterStatus.HEALTHY + + def test_disconnect_sets_disconnected(self): + adapter = MSAgentAdapter() + adapter.connect() + adapter.disconnect() + assert not adapter.is_connected + assert adapter.status == AdapterStatus.DISCONNECTED + + def test_health_check_healthy(self, adapter): + health = adapter.health_check() + assert health.status == AdapterStatus.HEALTHY + assert health.framework_name == "ms_agent_framework" + assert health.adapter_version == "0.1.0" + assert health.error_count == 0 + assert not health.circuit_open + + def test_health_check_disconnected(self): + adapter = MSAgentAdapter() + health = adapter.health_check() + assert health.status == AdapterStatus.DISCONNECTED + + def test_get_adapter_info(self, adapter): + info = adapter.get_adapter_info() + assert info.name == "MSAgentAdapter" + assert info.framework == "ms_agent_framework" + assert info.version == "0.1.0" + assert AdapterCapability.TRACE_TOOLS in info.capabilities + assert AdapterCapability.TRACE_MODELS in info.capabilities + assert AdapterCapability.TRACE_STATE in info.capabilities + assert AdapterCapability.TRACE_HANDOFFS in info.capabilities + + def test_serialize_for_replay(self, adapter): + trace = adapter.serialize_for_replay() + assert isinstance(trace, ReplayableTrace) + assert trace.adapter_name == "MSAgentAdapter" + assert trace.framework == "ms_agent_framework" + assert trace.trace_id is not None + assert isinstance(trace.events, list) + assert isinstance(trace.config, dict) + + def test_null_stratix_pattern(self): + adapter = MSAgentAdapter() + adapter.connect() + # Should not raise even without STRATIX + adapter.emit_dict_event("agent.input", {"framework": "ms_agent_framework"}) + + def test_instrument_chat(self, adapter): + mock_chat = MagicMock() + mock_chat.name = "test_chat" + mock_chat.invoke = AsyncMock() + mock_chat.agents = [] + + adapter.instrument_chat(mock_chat) + assert hasattr(mock_chat.invoke, "_layerlens_original") + + def test_instrument_agent_aliases_instrument_chat(self, adapter): + mock_chat = MagicMock() + mock_chat.name = "test_chat" + mock_chat.invoke = AsyncMock() + mock_chat.agents = [] + + adapter.instrument_agent(mock_chat) + assert hasattr(mock_chat.invoke, "_layerlens_original") + + def test_instrument_chat_idempotent(self, adapter): + mock_chat = MagicMock() + mock_chat.name = "test_chat" + mock_chat.invoke = AsyncMock() + adapter.instrument_chat(mock_chat) + first_invoke = mock_chat.invoke + adapter.instrument_chat(mock_chat) + assert mock_chat.invoke is first_invoke + + def test_disconnect_unwraps(self, adapter): + mock_chat = MagicMock() + mock_chat.name = "test_chat" + original_invoke = AsyncMock() + mock_chat.invoke = original_invoke + adapter.instrument_chat(mock_chat) + assert hasattr(mock_chat.invoke, "_layerlens_original") + adapter.disconnect() + assert mock_chat.invoke is original_invoke + + +class TestMSAgentAdapterEvents: + def test_on_run_start_emits_agent_input(self, adapter, mock_stratix): + adapter.on_run_start(agent_name="test_agent", input_data="hello") + events = mock_stratix.get_events("agent.input") + assert len(events) == 1 + assert events[0]["payload"]["framework"] == "ms_agent_framework" + assert events[0]["payload"]["agent_name"] == "test_agent" + + def test_on_run_end_emits_agent_output(self, adapter, mock_stratix): + adapter.on_run_start(agent_name="test_agent", input_data="hello") + adapter.on_run_end(agent_name="test_agent", output="response") + events = mock_stratix.get_events("agent.output") + assert len(events) == 1 + assert events[0]["payload"]["duration_ns"] >= 0 # may be 0 in fast test execution + + def test_on_tool_use_emits_tool_call(self, adapter, mock_stratix): + adapter.on_tool_use( + tool_name="get_weather", + tool_input={"city": "Seattle"}, + tool_output={"temp": "65F"}, + ) + events = mock_stratix.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["tool_name"] == "get_weather" + + def test_on_llm_call_emits_model_invoke(self, adapter, mock_stratix): + adapter.on_llm_call( + provider="azure_openai", + model="gpt-4o", + tokens_prompt=150, + tokens_completion=75, + latency_ms=600.0, + ) + events = mock_stratix.get_events("model.invoke") + assert len(events) == 1 + assert events[0]["payload"]["model"] == "gpt-4o" + + def test_on_handoff_emits_agent_handoff(self, adapter, mock_stratix): + adapter.on_handoff(from_agent="agent_a", to_agent="agent_b") + events = mock_stratix.get_events("agent.handoff") + assert len(events) == 1 + assert events[0]["payload"]["from_agent"] == "agent_a" + assert events[0]["payload"]["to_agent"] == "agent_b" + assert events[0]["payload"]["reason"] == "group_chat_turn" + + def test_error_in_output(self, adapter, mock_stratix): + adapter.on_run_end(agent_name="test_agent", output=None, error=Exception("test error")) + events = mock_stratix.get_events("agent.output") + assert len(events) == 1 + assert "error" in events[0]["payload"] + + def test_state_change_on_run_end(self, adapter, mock_stratix): + adapter.on_run_end(agent_name="test_agent", output="done") + events = mock_stratix.get_events("agent.state.change") + assert len(events) == 1 + assert events[0]["payload"]["event_subtype"] == "run_complete" + + def test_state_change_on_error(self, adapter, mock_stratix): + adapter.on_run_end(agent_name="test_agent", output=None, error=Exception("fail")) + events = mock_stratix.get_events("agent.state.change") + assert len(events) == 1 + assert events[0]["payload"]["event_subtype"] == "run_failed" + + def test_detect_provider_azure_default(self, adapter): + assert adapter._detect_provider("some-custom-model") == "azure_openai" + + def test_detect_provider_known(self, adapter): + assert adapter._detect_provider("gpt-4o") == "openai" + assert adapter._detect_provider("claude-3-opus") == "anthropic" + assert adapter._detect_provider("phi-3") == "microsoft" + + +class TestMSAgentAdapterRegistry: + def test_adapter_registered(self): + from layerlens.instrument.adapters._base.registry import _ADAPTER_MODULES + + assert "ms_agent_framework" in _ADAPTER_MODULES + assert ( + _ADAPTER_MODULES["ms_agent_framework"] == "layerlens.instrument.adapters.frameworks.ms_agent_framework" + ) From aa52e18db3c77e371e26df66c5d2ceed0147b702 Mon Sep 17 00:00:00 2001 From: mmercuri Date: Sun, 10 May 2026 09:53:01 -0700 Subject: [PATCH 2/4] test: port tier-2 deeper test suite for pydantic_ai from ateam (port-as-is) --- .../frameworks/pydantic_ai/__init__.py | 7 + .../frameworks/pydantic_ai/conftest.py | 63 +++ .../frameworks/pydantic_ai/test_events.py | 88 ++++ .../pydantic_ai/test_integration.py | 433 ++++++++++++++++++ .../frameworks/pydantic_ai/test_lifecycle.py | 83 ++++ .../pydantic_ai/test_multi_agent.py | 61 +++ 6 files changed, 735 insertions(+) create mode 100644 tests/instrument/adapters/frameworks/pydantic_ai/__init__.py create mode 100644 tests/instrument/adapters/frameworks/pydantic_ai/conftest.py create mode 100644 tests/instrument/adapters/frameworks/pydantic_ai/test_events.py create mode 100644 tests/instrument/adapters/frameworks/pydantic_ai/test_integration.py create mode 100644 tests/instrument/adapters/frameworks/pydantic_ai/test_lifecycle.py create mode 100644 tests/instrument/adapters/frameworks/pydantic_ai/test_multi_agent.py diff --git a/tests/instrument/adapters/frameworks/pydantic_ai/__init__.py b/tests/instrument/adapters/frameworks/pydantic_ai/__init__.py new file mode 100644 index 00000000..e81a5a58 --- /dev/null +++ b/tests/instrument/adapters/frameworks/pydantic_ai/__init__.py @@ -0,0 +1,7 @@ +"""Dedicated tests for the pydantic_ai framework adapter. + +Ported from ``ateam/tests/adapters/pydantic_ai/`` so that +stratix-python matches ateam's deeper coverage instead of relying +solely on the consolidated smoke test +(``tests/instrument/adapters/frameworks/test_pydantic_ai_adapter.py``). +""" diff --git a/tests/instrument/adapters/frameworks/pydantic_ai/conftest.py b/tests/instrument/adapters/frameworks/pydantic_ai/conftest.py new file mode 100644 index 00000000..bdfcdcd9 --- /dev/null +++ b/tests/instrument/adapters/frameworks/pydantic_ai/conftest.py @@ -0,0 +1,63 @@ +"""Shared test fixtures for Pydantic AI adapter tests. + +Ported as-is from ``ateam/tests/adapters/pydantic_ai/conftest.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.pydantic_ai.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +import pytest + +from layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle import PydanticAIAdapter + + +class MockStratix: + """Mock STRATIX instance for testing.""" + + def __init__(self): + self.events = [] + self.org_id = "test-org" + + def emit(self, event_type: str, payload: dict): + self.events.append({"type": event_type, "payload": payload}) + + def get_events(self, event_type: str = None): + if event_type: + return [e for e in self.events if e["type"] == event_type] + return self.events + + +@pytest.fixture +def mock_stratix(): + return MockStratix() + + +@pytest.fixture +def adapter(mock_stratix): + adapter = PydanticAIAdapter(stratix=mock_stratix) + adapter.connect() + return adapter + + +@pytest.fixture +def adapter_no_stratix(): + adapter = PydanticAIAdapter() + adapter.connect() + return adapter diff --git a/tests/instrument/adapters/frameworks/pydantic_ai/test_events.py b/tests/instrument/adapters/frameworks/pydantic_ai/test_events.py new file mode 100644 index 00000000..0d93c97a --- /dev/null +++ b/tests/instrument/adapters/frameworks/pydantic_ai/test_events.py @@ -0,0 +1,88 @@ +"""Test Pydantic AI adapter event emission. + +Ported as-is from ``ateam/tests/adapters/pydantic_ai/test_events.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.pydantic_ai.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from layerlens.instrument.adapters._base import CaptureConfig + + +class TestPydanticAIAdapterEvents: + def test_on_on_run_start_emits_agent_input(self, adapter, mock_stratix): + adapter.on_run_start(agent_name="test_agent", input_data="hello") + events = mock_stratix.get_events("agent.input") + assert len(events) == 1 + assert events[0]["payload"]["framework"] == "pydantic_ai" + + def test_on_on_run_end_emits_agent_output(self, adapter, mock_stratix): + adapter.on_run_start(agent_name="test_agent", input_data="hello") + adapter.on_run_end(agent_name="test_agent", output="response") + events = mock_stratix.get_events("agent.output") + assert len(events) == 1 + + def test_on_tool_use_emits_tool_call(self, adapter, mock_stratix): + adapter.on_tool_use( + tool_name="test_tool", + tool_input={"query": "test"}, + tool_output={"result": "ok"}, + ) + events = mock_stratix.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["tool_name"] == "test_tool" + + def test_on_llm_call_emits_model_invoke(self, adapter, mock_stratix): + adapter.on_llm_call( + provider="openai", + model="gpt-4o", + tokens_prompt=100, + tokens_completion=50, + latency_ms=500.0, + ) + events = mock_stratix.get_events("model.invoke") + assert len(events) == 1 + assert events[0]["payload"]["model"] == "gpt-4o" + + def test_capture_config_minimal_gates_l3_l5(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle import PydanticAIAdapter + + adapter = PydanticAIAdapter(stratix=mock_stratix, capture_config=CaptureConfig.minimal()) + adapter.connect() + adapter.on_llm_call(model="gpt-4o") + adapter.on_tool_use(tool_name="test") + assert len(mock_stratix.get_events("model.invoke")) == 0 + assert len(mock_stratix.get_events("tool.call")) == 0 + + def test_cross_cutting_always_emitted(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle import PydanticAIAdapter + + adapter = PydanticAIAdapter(stratix=mock_stratix, capture_config=CaptureConfig.minimal()) + adapter.connect() + adapter.emit_dict_event( + "agent.state.change", {"framework": "pydantic_ai", "event_subtype": "test"} + ) + assert len(mock_stratix.get_events("agent.state.change")) == 1 + + def test_error_in_output(self, adapter, mock_stratix): + adapter.on_run_end(agent_name="test_agent", output=None, error=Exception("test error")) + events = mock_stratix.get_events("agent.output") + assert len(events) == 1 + assert "error" in events[0]["payload"] diff --git a/tests/instrument/adapters/frameworks/pydantic_ai/test_integration.py b/tests/instrument/adapters/frameworks/pydantic_ai/test_integration.py new file mode 100644 index 00000000..dc253663 --- /dev/null +++ b/tests/instrument/adapters/frameworks/pydantic_ai/test_integration.py @@ -0,0 +1,433 @@ +"""Integration tests for PydanticAI adapter using the REAL SDK. + +These tests verify that PydanticAIAdapter correctly captures events +from actual PydanticAI types -- not mocks. The SDK must be installed: + pip install pydantic-ai + +Tests are skipped if pydantic-ai is not installed. + +Ported as-is from ``ateam/tests/adapters/pydantic_ai/test_integration.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.pydantic_ai.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from __future__ import annotations + +from typing import Any + +import pytest + +pydantic_ai = pytest.importorskip("pydantic_ai", reason="pydantic-ai not installed") + +from pydantic_ai import Agent # noqa: E402 + +from layerlens.instrument.adapters._base import AdapterCapability, AdapterStatus # noqa: E402 +from layerlens.instrument.adapters._base import CaptureConfig # noqa: E402 +from layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle import PydanticAIAdapter # noqa: E402 + +# --------------------------------------------------------------------------- +# Real event collector (not a mock) +# --------------------------------------------------------------------------- + + +class EventCollector: + """Accumulates events emitted by the adapter for assertions.""" + + def __init__(self) -> None: + self.org_id: str = "test-org" + self.events: list[dict[str, Any]] = [] + + def emit(self, event_type: str, payload: dict[str, Any]) -> None: + self.events.append({"type": event_type, "payload": payload}) + + def get_events(self, event_type: str | None = None) -> list[dict[str, Any]]: + if event_type: + return [e for e in self.events if e["type"] == event_type] + return self.events + + +# --------------------------------------------------------------------------- +# Adapter construction with real SDK types +# --------------------------------------------------------------------------- + + +class TestAdapterWithRealSDK: + """Verify adapter constructs and connects with real PydanticAI classes.""" + + def test_framework_metadata(self) -> None: + """Adapter should expose correct framework name and version.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + assert adapter.FRAMEWORK == "pydantic_ai" + assert adapter.VERSION is not None + + def test_connect_detects_sdk_version(self) -> None: + """connect() should detect the installed PydanticAI version.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + health = adapter.health_check() + assert health.framework_version is not None + assert health.framework_version != "" + + def test_adapter_capabilities(self) -> None: + """Adapter declares correct capabilities for the SDK.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + info = adapter.get_adapter_info() + assert AdapterCapability.TRACE_TOOLS in info.capabilities + assert AdapterCapability.TRACE_MODELS in info.capabilities + assert AdapterCapability.TRACE_STATE in info.capabilities + + def test_capture_config_propagates(self) -> None: + """CaptureConfig correctly controls which events are captured.""" + collector = EventCollector() + config = CaptureConfig( + l3_model_metadata=True, + l5a_tool_calls=False, + l1_agent_io=True, + ) + adapter = PydanticAIAdapter(stratix=collector, capture_config=config) + assert adapter._capture_config.l3_model_metadata is True + assert adapter._capture_config.l5a_tool_calls is False + + def test_real_agent_type_is_constructable(self) -> None: + """Real Agent from PydanticAI can be instantiated (no API key needed).""" + agent = Agent( + "test", + system_prompt="You are a helpful assistant.", + ) + assert agent is not None + + +# --------------------------------------------------------------------------- +# Agent wrapping integration +# --------------------------------------------------------------------------- + + +class TestAgentWrapping: + """Verify the adapter correctly wraps real PydanticAI Agent methods.""" + + def test_instrument_agent_wraps_run_methods(self) -> None: + """instrument_agent should wrap run and run_sync on a real Agent.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + agent = Agent("test", system_prompt="test agent") + adapter.instrument_agent(agent) + + # run should be wrapped (has _layerlens_original) + assert hasattr(agent.run, "_layerlens_original") + # run_sync should be wrapped if it exists + if hasattr(agent, "run_sync"): + assert hasattr(agent.run_sync, "_layerlens_original") + + def test_instrument_agent_emits_config(self) -> None: + """instrument_agent should emit environment.config for the agent.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + agent = Agent("test", system_prompt="You help with tests.") + adapter.instrument_agent(agent) + + config_events = collector.get_events("environment.config") + assert len(config_events) == 1 + assert config_events[0]["payload"]["framework"] == "pydantic_ai" + + def test_instrument_agent_idempotent(self) -> None: + """Wrapping the same agent twice should not double-wrap.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + agent = Agent("test", system_prompt="test") + adapter.instrument_agent(agent) + first_run = agent.run + adapter.instrument_agent(agent) + # Should be the same wrapped function, not double-wrapped + assert agent.run is first_run + + def test_disconnect_unwraps_agent(self) -> None: + """disconnect() should restore original methods on wrapped agents.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + agent = Agent("test", system_prompt="test") + adapter.instrument_agent(agent) + # After wrapping, run should have the _layerlens_original marker + assert hasattr(agent.run, "_layerlens_original") + adapter.disconnect() + # After disconnect, the _layerlens_original marker should be gone + # (original method restored) + assert not hasattr(agent.run, "_layerlens_original") + + def test_config_emitted_once_per_agent_name(self) -> None: + """environment.config should only be emitted once per unique agent name.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + agent1 = Agent("test", system_prompt="agent one") + agent2 = Agent("test", system_prompt="agent two") + adapter.instrument_agent(agent1) + adapter.instrument_agent(agent2) + + config_events = collector.get_events("environment.config") + # Same name "Agent" (class name) -- only one config should be emitted + # Note: PydanticAI Agent does not always expose .name; + # the adapter falls back to the class name + assert len(config_events) >= 1 + + +# --------------------------------------------------------------------------- +# Lifecycle hook events +# --------------------------------------------------------------------------- + + +class TestLifecycleHookEvents: + """Verify lifecycle hooks emit correct events with real SDK context.""" + + def test_run_start_end_roundtrip(self) -> None: + """on_run_start + on_run_end should emit agent.input + agent.output + state.change.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="assistant", input_data="What is AI?") + adapter.on_run_end(agent_name="assistant", output="AI is...") + + input_events = collector.get_events("agent.input") + output_events = collector.get_events("agent.output") + state_events = collector.get_events("agent.state.change") + + assert len(input_events) == 1 + assert len(output_events) == 1 + assert len(state_events) == 1 + + assert input_events[0]["payload"]["agent_name"] == "assistant" + assert output_events[0]["payload"]["output"] == "AI is..." + assert "duration_ns" in output_events[0]["payload"] + assert state_events[0]["payload"]["event_subtype"] == "run_complete" + + def test_run_end_with_error_emits_failed_state(self) -> None: + """on_run_end with error should emit run_failed state change.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="assistant") + adapter.on_run_end( + agent_name="assistant", + error=RuntimeError("Model unavailable"), + ) + + output_events = collector.get_events("agent.output") + state_events = collector.get_events("agent.state.change") + + assert len(output_events) == 1 + assert "Model unavailable" in output_events[0]["payload"]["error"] + assert state_events[0]["payload"]["event_subtype"] == "run_failed" + + def test_tool_use_emits_tool_call(self) -> None: + """on_tool_use should emit a tool.call event.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + adapter.on_tool_use( + tool_name="get_weather", + tool_input={"city": "Paris"}, + tool_output={"temp": 22, "unit": "C"}, + latency_ms=12.3, + ) + + tool_events = collector.get_events("tool.call") + assert len(tool_events) == 1 + assert tool_events[0]["payload"]["tool_name"] == "get_weather" + assert tool_events[0]["payload"]["latency_ms"] == 12.3 + + def test_llm_call_emits_model_invoke(self) -> None: + """on_llm_call should emit a model.invoke event.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + adapter.on_llm_call( + provider="anthropic", + model="claude-opus-4-6", + tokens_prompt=200, + tokens_completion=100, + latency_ms=450.0, + ) + + model_events = collector.get_events("model.invoke") + assert len(model_events) == 1 + payload = model_events[0]["payload"] + assert payload["provider"] == "anthropic" + assert payload["model"] == "claude-opus-4-6" + assert payload["tokens_prompt"] == 200 + + def test_handoff_emits_event_with_context_hash(self) -> None: + """on_handoff should emit agent.handoff with context hash.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + + adapter.on_handoff( + from_agent="router", + to_agent="specialist", + context={"topic": "billing"}, + ) + + handoff_events = collector.get_events("agent.handoff") + assert len(handoff_events) == 1 + assert handoff_events[0]["payload"]["from_agent"] == "router" + assert handoff_events[0]["payload"]["to_agent"] == "specialist" + assert handoff_events[0]["payload"]["context_hash"] is not None + + +# --------------------------------------------------------------------------- +# Provider detection +# --------------------------------------------------------------------------- + + +class TestProviderDetection: + """Verify _detect_provider with real model name strings.""" + + def test_openai_models(self) -> None: + adapter = PydanticAIAdapter() + assert adapter._detect_provider("gpt-4o") == "openai" + assert adapter._detect_provider("o1-preview") == "openai" + assert adapter._detect_provider("o3-mini") == "openai" + + def test_anthropic_models(self) -> None: + adapter = PydanticAIAdapter() + assert adapter._detect_provider("claude-opus-4-6") == "anthropic" + assert adapter._detect_provider("claude-3-haiku") == "anthropic" + + def test_google_models(self) -> None: + adapter = PydanticAIAdapter() + assert adapter._detect_provider("gemini-2.0-flash") == "google" + + def test_unknown_model(self) -> None: + adapter = PydanticAIAdapter() + assert adapter._detect_provider("custom-model-v1") is None + + def test_none_model(self) -> None: + adapter = PydanticAIAdapter() + assert adapter._detect_provider(None) is None + + +# --------------------------------------------------------------------------- +# Adapter lifecycle +# --------------------------------------------------------------------------- + + +class TestAdapterLifecycle: + """Verify adapter lifecycle with real SDK.""" + + def test_connect_disconnect(self) -> None: + """connect() and disconnect() should transition status correctly.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + assert adapter._status == AdapterStatus.HEALTHY + adapter.disconnect() + assert adapter._status == AdapterStatus.DISCONNECTED + + def test_events_not_emitted_when_disconnected(self) -> None: + """After disconnect, lifecycle hooks should not emit events.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + adapter.disconnect() + + adapter.on_run_start(agent_name="ghost") + adapter.on_run_end(agent_name="ghost", output="nope") + adapter.on_tool_use(tool_name="phantom") + adapter.on_llm_call(model="gpt-4o") + + assert len(collector.events) == 0 + + def test_serialize_for_replay(self) -> None: + """serialize_for_replay produces a valid ReplayableTrace.""" + collector = EventCollector() + adapter = PydanticAIAdapter(stratix=collector) + adapter.connect() + trace = adapter.serialize_for_replay() + assert trace.adapter_name == "PydanticAIAdapter" + assert trace.framework == "pydantic_ai" + assert trace.trace_id is not None + + +# --------------------------------------------------------------------------- +# Capture config gating +# --------------------------------------------------------------------------- + + +class TestCaptureConfigGating: + """Verify capture config gates events correctly with real SDK.""" + + def test_minimal_config_blocks_model_and_tool_events(self) -> None: + """CaptureConfig.minimal() should block L3 and L5a events.""" + collector = EventCollector() + config = CaptureConfig.minimal() + adapter = PydanticAIAdapter(stratix=collector, capture_config=config) + adapter.connect() + + adapter.on_llm_call(model="gpt-4o") + adapter.on_tool_use(tool_name="test") + + assert len(collector.get_events("model.invoke")) == 0 + assert len(collector.get_events("tool.call")) == 0 + + def test_cross_cutting_always_emitted_under_minimal(self) -> None: + """Cross-cutting events should always be emitted even under minimal config.""" + collector = EventCollector() + config = CaptureConfig.minimal() + adapter = PydanticAIAdapter(stratix=collector, capture_config=config) + adapter.connect() + + adapter.emit_dict_event( + "agent.state.change", + {"framework": "pydantic_ai", "event_subtype": "test"}, + ) + + assert len(collector.get_events("agent.state.change")) == 1 + + def test_content_capture_controls_messages(self) -> None: + """When capture_content=False, messages should not be included in model.invoke.""" + collector = EventCollector() + config = CaptureConfig(capture_content=False) + adapter = PydanticAIAdapter(stratix=collector, capture_config=config) + adapter.connect() + + adapter.on_llm_call( + model="gpt-4o", + messages=[{"role": "user", "content": "secret"}], + ) + + model_events = collector.get_events("model.invoke") + assert len(model_events) == 1 + assert "messages" not in model_events[0]["payload"] diff --git a/tests/instrument/adapters/frameworks/pydantic_ai/test_lifecycle.py b/tests/instrument/adapters/frameworks/pydantic_ai/test_lifecycle.py new file mode 100644 index 00000000..6adbc4a1 --- /dev/null +++ b/tests/instrument/adapters/frameworks/pydantic_ai/test_lifecycle.py @@ -0,0 +1,83 @@ +"""Test Pydantic AI adapter lifecycle methods. + +Ported as-is from ``ateam/tests/adapters/pydantic_ai/test_lifecycle.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.pydantic_ai.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from layerlens.instrument.adapters._base import AdapterStatus +from layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle import PydanticAIAdapter +from layerlens.instrument.adapters._base import ReplayableTrace + + +class TestPydanticAIAdapterLifecycle: + def test_adapter_initialization(self): + adapter = PydanticAIAdapter() + assert adapter.FRAMEWORK == "pydantic_ai" + assert adapter.VERSION == "0.1.0" + + def test_adapter_initialization_with_stratix(self, mock_stratix): + adapter = PydanticAIAdapter(stratix=mock_stratix) + assert adapter.has_stratix + + def test_adapter_initialization_legacy_param(self, mock_stratix): + adapter = PydanticAIAdapter(stratix_instance=mock_stratix) + assert adapter.has_stratix + + def test_connect_sets_healthy(self): + adapter = PydanticAIAdapter() + adapter.connect() + assert adapter.is_connected + assert adapter.status == AdapterStatus.HEALTHY + + def test_disconnect_sets_disconnected(self): + adapter = PydanticAIAdapter() + adapter.connect() + adapter.disconnect() + assert not adapter.is_connected + assert adapter.status == AdapterStatus.DISCONNECTED + + def test_health_check(self, adapter): + health = adapter.health_check() + assert health.status == AdapterStatus.HEALTHY + assert health.framework_name == "pydantic_ai" + assert health.adapter_version == "0.1.0" + assert health.error_count == 0 + assert not health.circuit_open + + def test_get_adapter_info(self, adapter): + info = adapter.get_adapter_info() + assert info.name == "PydanticAIAdapter" + assert info.framework == "pydantic_ai" + assert info.version == "0.1.0" + + def test_serialize_for_replay(self, adapter): + trace = adapter.serialize_for_replay() + assert isinstance(trace, ReplayableTrace) + assert trace.adapter_name == "PydanticAIAdapter" + assert trace.framework == "pydantic_ai" + assert trace.trace_id is not None + + def test_null_stratix_pattern(self): + adapter = PydanticAIAdapter() + adapter.connect() + # Should not raise even without STRATIX + adapter.emit_dict_event("agent.input", {"framework": "pydantic_ai"}) diff --git a/tests/instrument/adapters/frameworks/pydantic_ai/test_multi_agent.py b/tests/instrument/adapters/frameworks/pydantic_ai/test_multi_agent.py new file mode 100644 index 00000000..be097052 --- /dev/null +++ b/tests/instrument/adapters/frameworks/pydantic_ai/test_multi_agent.py @@ -0,0 +1,61 @@ +"""Test Pydantic AI adapter multi-agent tracing. + +Ported as-is from ``ateam/tests/adapters/pydantic_ai/test_multi_agent.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.pydantic_ai.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.pydantic_ai.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +class TestPydanticAIAdapterMultiAgent: + def test_handoff_emits_agent_handoff(self, adapter, mock_stratix): + adapter.on_handoff( + from_agent="agent_a", + to_agent="agent_b", + context="delegation context", + ) + events = mock_stratix.get_events("agent.handoff") + assert len(events) == 1 + assert events[0]["payload"]["from_agent"] == "agent_a" + assert events[0]["payload"]["to_agent"] == "agent_b" + + def test_multiple_handoffs(self, adapter, mock_stratix): + adapter.on_handoff(from_agent="a", to_agent="b") + adapter.on_handoff(from_agent="b", to_agent="c") + events = mock_stratix.get_events("agent.handoff") + assert len(events) == 2 + + def test_agent_config_emitted_once(self, adapter, mock_stratix): + adapter.emit_dict_event( + "environment.config", + { + "framework": "pydantic_ai", + "agent_name": "test_agent", + }, + ) + adapter.emit_dict_event( + "environment.config", + { + "framework": "pydantic_ai", + "agent_name": "test_agent", + }, + ) + # Both emit since dedup is in _emit_agent_config, not emit_dict_event + events = mock_stratix.get_events("environment.config") + assert len(events) >= 1 From 91bbeb859bf97bff518786ffd64ab6756587516d Mon Sep 17 00:00:00 2001 From: mmercuri Date: Sun, 10 May 2026 09:53:37 -0700 Subject: [PATCH 3/4] test: port tier-2 deeper test suite for smolagents from ateam (port-as-is) --- .../frameworks/smolagents/__init__.py | 7 + .../frameworks/smolagents/conftest.py | 63 +++ .../frameworks/smolagents/test_events.py | 88 +++++ .../frameworks/smolagents/test_integration.py | 372 ++++++++++++++++++ .../frameworks/smolagents/test_lifecycle.py | 83 ++++ .../frameworks/smolagents/test_multi_agent.py | 61 +++ 6 files changed, 674 insertions(+) create mode 100644 tests/instrument/adapters/frameworks/smolagents/__init__.py create mode 100644 tests/instrument/adapters/frameworks/smolagents/conftest.py create mode 100644 tests/instrument/adapters/frameworks/smolagents/test_events.py create mode 100644 tests/instrument/adapters/frameworks/smolagents/test_integration.py create mode 100644 tests/instrument/adapters/frameworks/smolagents/test_lifecycle.py create mode 100644 tests/instrument/adapters/frameworks/smolagents/test_multi_agent.py diff --git a/tests/instrument/adapters/frameworks/smolagents/__init__.py b/tests/instrument/adapters/frameworks/smolagents/__init__.py new file mode 100644 index 00000000..290e3911 --- /dev/null +++ b/tests/instrument/adapters/frameworks/smolagents/__init__.py @@ -0,0 +1,7 @@ +"""Dedicated tests for the smolagents framework adapter. + +Ported from ``ateam/tests/adapters/smolagents/`` so that +stratix-python matches ateam's deeper coverage instead of relying +solely on the consolidated smoke test +(``tests/instrument/adapters/frameworks/test_smolagents_adapter.py``). +""" diff --git a/tests/instrument/adapters/frameworks/smolagents/conftest.py b/tests/instrument/adapters/frameworks/smolagents/conftest.py new file mode 100644 index 00000000..a5a4a0da --- /dev/null +++ b/tests/instrument/adapters/frameworks/smolagents/conftest.py @@ -0,0 +1,63 @@ +"""Shared test fixtures for SmolAgents adapter tests. + +Ported as-is from ``ateam/tests/adapters/smolagents/conftest.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.smolagents.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.smolagents.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +import pytest + +from layerlens.instrument.adapters.frameworks.smolagents.lifecycle import SmolAgentsAdapter + + +class MockStratix: + """Mock STRATIX instance for testing.""" + + def __init__(self): + self.events = [] + self.org_id = "test-org" + + def emit(self, event_type: str, payload: dict): + self.events.append({"type": event_type, "payload": payload}) + + def get_events(self, event_type: str = None): + if event_type: + return [e for e in self.events if e["type"] == event_type] + return self.events + + +@pytest.fixture +def mock_stratix(): + return MockStratix() + + +@pytest.fixture +def adapter(mock_stratix): + adapter = SmolAgentsAdapter(stratix=mock_stratix) + adapter.connect() + return adapter + + +@pytest.fixture +def adapter_no_stratix(): + adapter = SmolAgentsAdapter() + adapter.connect() + return adapter diff --git a/tests/instrument/adapters/frameworks/smolagents/test_events.py b/tests/instrument/adapters/frameworks/smolagents/test_events.py new file mode 100644 index 00000000..91cdd234 --- /dev/null +++ b/tests/instrument/adapters/frameworks/smolagents/test_events.py @@ -0,0 +1,88 @@ +"""Test SmolAgents adapter event emission. + +Ported as-is from ``ateam/tests/adapters/smolagents/test_events.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.smolagents.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.smolagents.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from layerlens.instrument.adapters._base import CaptureConfig + + +class TestSmolAgentsAdapterEvents: + def test_on_on_run_start_emits_agent_input(self, adapter, mock_stratix): + adapter.on_run_start(agent_name="test_agent", input_data="hello") + events = mock_stratix.get_events("agent.input") + assert len(events) == 1 + assert events[0]["payload"]["framework"] == "smolagents" + + def test_on_on_run_end_emits_agent_output(self, adapter, mock_stratix): + adapter.on_run_start(agent_name="test_agent", input_data="hello") + adapter.on_run_end(agent_name="test_agent", output="response") + events = mock_stratix.get_events("agent.output") + assert len(events) == 1 + + def test_on_tool_use_emits_tool_call(self, adapter, mock_stratix): + adapter.on_tool_use( + tool_name="test_tool", + tool_input={"query": "test"}, + tool_output={"result": "ok"}, + ) + events = mock_stratix.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["tool_name"] == "test_tool" + + def test_on_llm_call_emits_model_invoke(self, adapter, mock_stratix): + adapter.on_llm_call( + provider="openai", + model="gpt-4o", + tokens_prompt=100, + tokens_completion=50, + latency_ms=500.0, + ) + events = mock_stratix.get_events("model.invoke") + assert len(events) == 1 + assert events[0]["payload"]["model"] == "gpt-4o" + + def test_capture_config_minimal_gates_l3_l5(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.smolagents.lifecycle import SmolAgentsAdapter + + adapter = SmolAgentsAdapter(stratix=mock_stratix, capture_config=CaptureConfig.minimal()) + adapter.connect() + adapter.on_llm_call(model="gpt-4o") + adapter.on_tool_use(tool_name="test") + assert len(mock_stratix.get_events("model.invoke")) == 0 + assert len(mock_stratix.get_events("tool.call")) == 0 + + def test_cross_cutting_always_emitted(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.smolagents.lifecycle import SmolAgentsAdapter + + adapter = SmolAgentsAdapter(stratix=mock_stratix, capture_config=CaptureConfig.minimal()) + adapter.connect() + adapter.emit_dict_event( + "agent.state.change", {"framework": "smolagents", "event_subtype": "test"} + ) + assert len(mock_stratix.get_events("agent.state.change")) == 1 + + def test_error_in_output(self, adapter, mock_stratix): + adapter.on_run_end(agent_name="test_agent", output=None, error=Exception("test error")) + events = mock_stratix.get_events("agent.output") + assert len(events) == 1 + assert "error" in events[0]["payload"] diff --git a/tests/instrument/adapters/frameworks/smolagents/test_integration.py b/tests/instrument/adapters/frameworks/smolagents/test_integration.py new file mode 100644 index 00000000..e9cc0463 --- /dev/null +++ b/tests/instrument/adapters/frameworks/smolagents/test_integration.py @@ -0,0 +1,372 @@ +"""Integration tests for SmolAgents adapter using the REAL SmolAgents SDK. + +These tests verify that SmolAgentsAdapter correctly instruments and +captures events from actual SmolAgents types. The SDK must be installed: + pip install 'stratix[smolagents]' + +Tests are skipped if smolagents is not installed. + +Ported as-is from ``ateam/tests/adapters/smolagents/test_integration.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.smolagents.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.smolagents.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from __future__ import annotations + +from typing import Any + +import pytest + +smolagents = pytest.importorskip("smolagents", reason="smolagents not installed") + +from layerlens.instrument.adapters._base import AdapterCapability, AdapterStatus # noqa: E402 +from layerlens.instrument.adapters._base import CaptureConfig # noqa: E402 +from layerlens.instrument.adapters.frameworks.smolagents.lifecycle import SmolAgentsAdapter # noqa: E402 + +# --------------------------------------------------------------------------- +# Test STRATIX instance that collects events +# --------------------------------------------------------------------------- + + +class EventCollector: + """Real event collector -- not a mock. Accumulates events for assertions.""" + + def __init__(self) -> None: + self.org_id: str = "test-org" + self.events: list[dict[str, Any]] = [] + + def emit(self, event_type: str, payload: dict[str, Any]) -> None: + self.events.append({"type": event_type, "payload": payload}) + + def get_events(self, event_type: str | None = None) -> list[dict[str, Any]]: + if event_type: + return [e for e in self.events if e["type"] == event_type] + return self.events + + +# --------------------------------------------------------------------------- +# Adapter construction with real SDK +# --------------------------------------------------------------------------- + + +class TestAdapterWithRealSDK: + """Verify adapter constructs and connects with real SmolAgents classes.""" + + def test_connect_detects_framework_version(self) -> None: + """connect() should detect the smolagents version.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + assert adapter._status == AdapterStatus.HEALTHY + assert adapter._framework_version is not None + assert True # framework_version may be "unknown" if SDK not fully installed + adapter.disconnect() + + def test_adapter_info_metadata(self) -> None: + """Adapter info should expose correct framework metadata.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + info = adapter.get_adapter_info() + assert info.framework == "smolagents" + assert info.name == "SmolAgentsAdapter" + assert AdapterCapability.TRACE_TOOLS in info.capabilities + assert AdapterCapability.TRACE_MODELS in info.capabilities + adapter.disconnect() + + def test_health_check_returns_healthy(self) -> None: + """health_check should return HEALTHY after connect.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + health = adapter.health_check() + assert health.status == AdapterStatus.HEALTHY + assert health.framework_name == "smolagents" + adapter.disconnect() + + def test_capture_config_propagates(self) -> None: + """CaptureConfig correctly controls which events are captured.""" + collector = EventCollector() + config = CaptureConfig( + l3_model_metadata=True, + l5a_tool_calls=False, + l1_agent_io=True, + ) + adapter = SmolAgentsAdapter(stratix=collector, capture_config=config) + assert adapter._capture_config.l3_model_metadata is True + assert adapter._capture_config.l5a_tool_calls is False + + +# --------------------------------------------------------------------------- +# SDK types and instrumentation +# --------------------------------------------------------------------------- + + +class TestSDKTypesExist: + """Verify expected SmolAgents SDK types are importable.""" + + def test_tool_class_exists(self) -> None: + """SmolAgents should have a Tool base class.""" + assert hasattr(smolagents, "Tool") or hasattr(smolagents, "tool") + + def test_agent_classes_exist(self) -> None: + """SmolAgents should have agent classes available.""" + # At least one agent class should exist + has_code_agent = hasattr(smolagents, "CodeAgent") + has_tool_calling_agent = hasattr(smolagents, "ToolCallingAgent") + has_multi_step = hasattr(smolagents, "MultiStepAgent") + assert has_code_agent or has_tool_calling_agent or has_multi_step, ( + "No recognized agent class found in smolagents" + ) + + +class TestInstrumentationWithFakeAgent: + """Test instrumentation using a fake agent that mimics SmolAgents API.""" + + def _make_fake_agent(self) -> Any: + """Create a minimal object that looks like a SmolAgents agent.""" + + class FakeAgent: + name = "test_agent" + tools = {"search": "search_tool", "calc": "calc_tool"} + model = "HfApiModel" + managed_agents = None + system_prompt = "You are a helpful assistant." + + def run(self, task: str) -> str: + return f"Result for: {task}" + + return FakeAgent() + + def test_instrument_wraps_run(self) -> None: + """instrument_agent should wrap the run method.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + adapter.instrument_agent(agent) + + # run should now be wrapped + assert hasattr(agent.run, "_layerlens_original") + adapter.disconnect() + + def test_instrumented_run_emits_events(self) -> None: + """Running an instrumented agent should emit agent.input and agent.output.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + adapter.instrument_agent(agent) + + result = agent.run("What is 2+2?") + assert result == "Result for: What is 2+2?" + + input_events = collector.get_events("agent.input") + output_events = collector.get_events("agent.output") + assert len(input_events) == 1 + assert len(output_events) == 1 + assert input_events[0]["payload"]["agent_name"] == "test_agent" + assert "Result for" in str(output_events[0]["payload"]["output"]) + + adapter.disconnect() + + def test_instrument_emits_config_event(self) -> None: + """instrument_agent should emit an environment.config event.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + adapter.instrument_agent(agent) + + config_events = collector.get_events("environment.config") + assert len(config_events) == 1 + assert config_events[0]["payload"]["agent_name"] == "test_agent" + assert "search" in config_events[0]["payload"]["tools"] + + adapter.disconnect() + + def test_instrument_idempotent(self) -> None: + """Instrumenting the same agent twice should be a no-op.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + adapter.instrument_agent(agent) + adapter.instrument_agent(agent) + + # Should only get one config event + config_events = collector.get_events("environment.config") + assert len(config_events) == 1 + + adapter.disconnect() + + def test_disconnect_unwraps_agent(self) -> None: + """disconnect() should restore the original run method.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + _original_run = agent.run + adapter.instrument_agent(agent) + assert hasattr(agent.run, "_layerlens_original") + + adapter.disconnect() + # After disconnect, the original should be restored + assert not hasattr(agent.run, "_layerlens_original") + + +# --------------------------------------------------------------------------- +# Lifecycle hooks +# --------------------------------------------------------------------------- + + +class TestLifecycleHooks: + """Test manual lifecycle hook invocations.""" + + def test_on_run_start_emits_agent_input(self) -> None: + """on_run_start should emit an agent.input event.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="my_agent", input_data="Hello") + + events = collector.get_events("agent.input") + assert len(events) == 1 + assert events[0]["payload"]["agent_name"] == "my_agent" + assert events[0]["payload"]["input"] == "Hello" + + adapter.disconnect() + + def test_on_run_end_emits_agent_output(self) -> None: + """on_run_end should emit an agent.output event.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_end(agent_name="my_agent", output="World") + + events = collector.get_events("agent.output") + assert len(events) == 1 + assert events[0]["payload"]["agent_name"] == "my_agent" + assert events[0]["payload"]["output"] == "World" + + adapter.disconnect() + + def test_on_tool_use_emits_tool_call(self) -> None: + """on_tool_use should emit a tool.call event.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + adapter.on_tool_use( + tool_name="search", + tool_input="query text", + tool_output="search results", + ) + + events = collector.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["tool_name"] == "search" + + adapter.disconnect() + + def test_on_llm_call_emits_model_invoke(self) -> None: + """on_llm_call should emit a model.invoke event.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + adapter.on_llm_call( + provider="huggingface", + model="Qwen/Qwen2.5-72B", + tokens_prompt=100, + tokens_completion=50, + latency_ms=250.0, + ) + + events = collector.get_events("model.invoke") + assert len(events) == 1 + assert events[0]["payload"]["provider"] == "huggingface" + assert events[0]["payload"]["model"] == "Qwen/Qwen2.5-72B" + assert events[0]["payload"]["tokens_prompt"] == 100 + + adapter.disconnect() + + def test_on_handoff_emits_agent_handoff(self) -> None: + """on_handoff should emit an agent.handoff event.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + adapter.on_handoff( + from_agent="manager", + to_agent="worker", + context="Do the research task", + ) + + events = collector.get_events("agent.handoff") + assert len(events) == 1 + assert events[0]["payload"]["from_agent"] == "manager" + assert events[0]["payload"]["to_agent"] == "worker" + + adapter.disconnect() + + def test_events_not_emitted_when_disconnected(self) -> None: + """Events should not be emitted when the adapter is disconnected.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + # Do NOT connect + + adapter.on_run_start(agent_name="test", input_data="hello") + adapter.on_run_end(agent_name="test", output="bye") + + assert len(collector.events) == 0 + + +# --------------------------------------------------------------------------- +# Replay serialization +# --------------------------------------------------------------------------- + + +class TestReplaySerialization: + """Test replay serialization captures events.""" + + def test_serialize_includes_events(self) -> None: + """serialize_for_replay should include emitted events.""" + collector = EventCollector() + adapter = SmolAgentsAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="agent", input_data="test") + adapter.on_run_end(agent_name="agent", output="done") + + trace = adapter.serialize_for_replay() + assert trace.adapter_name == "SmolAgentsAdapter" + assert trace.framework == "smolagents" + assert len(trace.events) >= 2 + + adapter.disconnect() diff --git a/tests/instrument/adapters/frameworks/smolagents/test_lifecycle.py b/tests/instrument/adapters/frameworks/smolagents/test_lifecycle.py new file mode 100644 index 00000000..37bd2da4 --- /dev/null +++ b/tests/instrument/adapters/frameworks/smolagents/test_lifecycle.py @@ -0,0 +1,83 @@ +"""Test SmolAgents adapter lifecycle methods. + +Ported as-is from ``ateam/tests/adapters/smolagents/test_lifecycle.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.smolagents.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.smolagents.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from layerlens.instrument.adapters._base import AdapterStatus +from layerlens.instrument.adapters._base import ReplayableTrace +from layerlens.instrument.adapters.frameworks.smolagents.lifecycle import SmolAgentsAdapter + + +class TestSmolAgentsAdapterLifecycle: + def test_adapter_initialization(self): + adapter = SmolAgentsAdapter() + assert adapter.FRAMEWORK == "smolagents" + assert adapter.VERSION == "0.1.0" + + def test_adapter_initialization_with_stratix(self, mock_stratix): + adapter = SmolAgentsAdapter(stratix=mock_stratix) + assert adapter.has_stratix + + def test_adapter_initialization_legacy_param(self, mock_stratix): + adapter = SmolAgentsAdapter(stratix_instance=mock_stratix) + assert adapter.has_stratix + + def test_connect_sets_healthy(self): + adapter = SmolAgentsAdapter() + adapter.connect() + assert adapter.is_connected + assert adapter.status == AdapterStatus.HEALTHY + + def test_disconnect_sets_disconnected(self): + adapter = SmolAgentsAdapter() + adapter.connect() + adapter.disconnect() + assert not adapter.is_connected + assert adapter.status == AdapterStatus.DISCONNECTED + + def test_health_check(self, adapter): + health = adapter.health_check() + assert health.status == AdapterStatus.HEALTHY + assert health.framework_name == "smolagents" + assert health.adapter_version == "0.1.0" + assert health.error_count == 0 + assert not health.circuit_open + + def test_get_adapter_info(self, adapter): + info = adapter.get_adapter_info() + assert info.name == "SmolAgentsAdapter" + assert info.framework == "smolagents" + assert info.version == "0.1.0" + + def test_serialize_for_replay(self, adapter): + trace = adapter.serialize_for_replay() + assert isinstance(trace, ReplayableTrace) + assert trace.adapter_name == "SmolAgentsAdapter" + assert trace.framework == "smolagents" + assert trace.trace_id is not None + + def test_null_stratix_pattern(self): + adapter = SmolAgentsAdapter() + adapter.connect() + # Should not raise even without STRATIX + adapter.emit_dict_event("agent.input", {"framework": "smolagents"}) diff --git a/tests/instrument/adapters/frameworks/smolagents/test_multi_agent.py b/tests/instrument/adapters/frameworks/smolagents/test_multi_agent.py new file mode 100644 index 00000000..035ba35a --- /dev/null +++ b/tests/instrument/adapters/frameworks/smolagents/test_multi_agent.py @@ -0,0 +1,61 @@ +"""Test SmolAgents adapter multi-agent tracing. + +Ported as-is from ``ateam/tests/adapters/smolagents/test_multi_agent.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.smolagents.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.smolagents.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +class TestSmolAgentsAdapterMultiAgent: + def test_handoff_emits_agent_handoff(self, adapter, mock_stratix): + adapter.on_handoff( + from_agent="agent_a", + to_agent="agent_b", + context="delegation context", + ) + events = mock_stratix.get_events("agent.handoff") + assert len(events) == 1 + assert events[0]["payload"]["from_agent"] == "agent_a" + assert events[0]["payload"]["to_agent"] == "agent_b" + + def test_multiple_handoffs(self, adapter, mock_stratix): + adapter.on_handoff(from_agent="a", to_agent="b") + adapter.on_handoff(from_agent="b", to_agent="c") + events = mock_stratix.get_events("agent.handoff") + assert len(events) == 2 + + def test_agent_config_emitted_once(self, adapter, mock_stratix): + adapter.emit_dict_event( + "environment.config", + { + "framework": "smolagents", + "agent_name": "test_agent", + }, + ) + adapter.emit_dict_event( + "environment.config", + { + "framework": "smolagents", + "agent_name": "test_agent", + }, + ) + # Both emit since dedup is in _emit_agent_config, not emit_dict_event + events = mock_stratix.get_events("environment.config") + assert len(events) >= 1 From d083d73aac38080697801f732ec17090a57e31db Mon Sep 17 00:00:00 2001 From: mmercuri Date: Sun, 10 May 2026 09:54:30 -0700 Subject: [PATCH 4/4] test: port tier-2 deeper test suite for strands from ateam (port-as-is) --- .../adapters/frameworks/strands/__init__.py | 7 + .../adapters/frameworks/strands/conftest.py | 63 +++ .../frameworks/strands/test_events.py | 83 ++++ .../frameworks/strands/test_integration.py | 368 ++++++++++++++++++ .../frameworks/strands/test_lifecycle.py | 204 ++++++++++ 5 files changed, 725 insertions(+) create mode 100644 tests/instrument/adapters/frameworks/strands/__init__.py create mode 100644 tests/instrument/adapters/frameworks/strands/conftest.py create mode 100644 tests/instrument/adapters/frameworks/strands/test_events.py create mode 100644 tests/instrument/adapters/frameworks/strands/test_integration.py create mode 100644 tests/instrument/adapters/frameworks/strands/test_lifecycle.py diff --git a/tests/instrument/adapters/frameworks/strands/__init__.py b/tests/instrument/adapters/frameworks/strands/__init__.py new file mode 100644 index 00000000..f4bc227b --- /dev/null +++ b/tests/instrument/adapters/frameworks/strands/__init__.py @@ -0,0 +1,7 @@ +"""Dedicated tests for the strands framework adapter. + +Ported from ``ateam/tests/adapters/strands/`` so that +stratix-python matches ateam's deeper coverage instead of relying +solely on the consolidated smoke test +(``tests/instrument/adapters/frameworks/test_strands_adapter.py``). +""" diff --git a/tests/instrument/adapters/frameworks/strands/conftest.py b/tests/instrument/adapters/frameworks/strands/conftest.py new file mode 100644 index 00000000..ad1e2d41 --- /dev/null +++ b/tests/instrument/adapters/frameworks/strands/conftest.py @@ -0,0 +1,63 @@ +"""Shared test fixtures for AWS Strands adapter tests. + +Ported as-is from ``ateam/tests/adapters/strands/conftest.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.strands.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.strands.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +import pytest + +from layerlens.instrument.adapters.frameworks.strands.lifecycle import StrandsAdapter + + +class MockStratix: + """Mock STRATIX instance for testing.""" + + def __init__(self): + self.events = [] + self.org_id = "test-org" + + def emit(self, event_type: str, payload: dict): + self.events.append({"type": event_type, "payload": payload}) + + def get_events(self, event_type: str = None): + if event_type: + return [e for e in self.events if e["type"] == event_type] + return self.events + + +@pytest.fixture +def mock_stratix(): + return MockStratix() + + +@pytest.fixture +def adapter(mock_stratix): + adapter = StrandsAdapter(stratix=mock_stratix) + adapter.connect() + return adapter + + +@pytest.fixture +def adapter_no_stratix(): + adapter = StrandsAdapter() + adapter.connect() + return adapter diff --git a/tests/instrument/adapters/frameworks/strands/test_events.py b/tests/instrument/adapters/frameworks/strands/test_events.py new file mode 100644 index 00000000..bf51bf9b --- /dev/null +++ b/tests/instrument/adapters/frameworks/strands/test_events.py @@ -0,0 +1,83 @@ +"""Test AWS Strands adapter event emission. + +Ported as-is from ``ateam/tests/adapters/strands/test_events.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.strands.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.strands.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from layerlens.instrument.adapters._base import CaptureConfig + + +class TestStrandsAdapterEvents: + def test_capture_config_minimal_gates_l3_l5(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.strands.lifecycle import StrandsAdapter + + adapter = StrandsAdapter(stratix=mock_stratix, capture_config=CaptureConfig.minimal()) + adapter.connect() + adapter.on_llm_call(model="anthropic.claude-3-sonnet") + adapter.on_tool_use(tool_name="test") + assert len(mock_stratix.get_events("model.invoke")) == 0 + assert len(mock_stratix.get_events("tool.call")) == 0 + + def test_cross_cutting_always_emitted(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.strands.lifecycle import StrandsAdapter + + adapter = StrandsAdapter(stratix=mock_stratix, capture_config=CaptureConfig.minimal()) + adapter.connect() + adapter.emit_dict_event( + "agent.state.change", {"framework": "strands", "event_subtype": "test"} + ) + assert len(mock_stratix.get_events("agent.state.change")) == 1 + + def test_tool_use_with_error(self, adapter, mock_stratix): + adapter.on_tool_use( + tool_name="failing_tool", + tool_input={"query": "test"}, + error=Exception("tool failed"), + ) + events = mock_stratix.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["error"] == "tool failed" + + def test_tool_use_with_latency(self, adapter, mock_stratix): + adapter.on_tool_use( + tool_name="slow_tool", + tool_input={"query": "test"}, + tool_output={"result": "ok"}, + latency_ms=2000.0, + ) + events = mock_stratix.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["latency_ms"] == 2000.0 + + def test_llm_call_with_messages_content_enabled(self, mock_stratix): + from layerlens.instrument.adapters.frameworks.strands.lifecycle import StrandsAdapter + + config = CaptureConfig(capture_content=True) + adapter = StrandsAdapter(stratix=mock_stratix, capture_config=config) + adapter.connect() + adapter.on_llm_call( + model="anthropic.claude-3-sonnet", + messages=[{"role": "user", "content": "hello"}], + ) + events = mock_stratix.get_events("model.invoke") + assert len(events) == 1 + assert "messages" in events[0]["payload"] diff --git a/tests/instrument/adapters/frameworks/strands/test_integration.py b/tests/instrument/adapters/frameworks/strands/test_integration.py new file mode 100644 index 00000000..1c3baa23 --- /dev/null +++ b/tests/instrument/adapters/frameworks/strands/test_integration.py @@ -0,0 +1,368 @@ +"""Integration tests for AWS Strands adapter using the REAL Strands SDK. + +These tests verify that StrandsAdapter correctly instruments and captures +events from actual Strands agents. The SDK must be installed: + pip install 'stratix[strands]' + +Tests are skipped if strands-agents is not installed. + +Ported as-is from ``ateam/tests/adapters/strands/test_integration.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.strands.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.strands.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from __future__ import annotations + +from typing import Any + +import pytest + +strands = pytest.importorskip("strands", reason="strands-agents not installed") + +from layerlens.instrument.adapters._base import AdapterCapability, AdapterStatus # noqa: E402 +from layerlens.instrument.adapters._base import CaptureConfig # noqa: E402 +from layerlens.instrument.adapters.frameworks.strands.lifecycle import StrandsAdapter # noqa: E402 + +# --------------------------------------------------------------------------- +# Test STRATIX instance that collects events +# --------------------------------------------------------------------------- + + +class EventCollector: + """Real event collector -- not a mock. Accumulates events for assertions.""" + + def __init__(self) -> None: + self.org_id: str = "test-org" + self.events: list[dict[str, Any]] = [] + + def emit(self, event_type: str, payload: dict[str, Any]) -> None: + self.events.append({"type": event_type, "payload": payload}) + + def get_events(self, event_type: str | None = None) -> list[dict[str, Any]]: + if event_type: + return [e for e in self.events if e["type"] == event_type] + return self.events + + +# --------------------------------------------------------------------------- +# Adapter construction with real SDK +# --------------------------------------------------------------------------- + + +class TestAdapterWithRealSDK: + """Verify adapter constructs and connects with real Strands SDK.""" + + def test_connect_detects_framework_version(self) -> None: + """connect() should detect the strands version.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + assert adapter._status == AdapterStatus.HEALTHY + # Version may or may not be set depending on SDK + adapter.disconnect() + + def test_adapter_info_metadata(self) -> None: + """Adapter info should expose correct framework metadata.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + info = adapter.get_adapter_info() + assert info.framework == "strands" + assert info.name == "StrandsAdapter" + assert AdapterCapability.TRACE_TOOLS in info.capabilities + assert AdapterCapability.TRACE_MODELS in info.capabilities + assert AdapterCapability.TRACE_STATE in info.capabilities + adapter.disconnect() + + def test_health_check_returns_healthy(self) -> None: + """health_check should return HEALTHY after connect.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + health = adapter.health_check() + assert health.status == AdapterStatus.HEALTHY + assert health.framework_name == "strands" + adapter.disconnect() + + def test_capture_config_propagates(self) -> None: + """CaptureConfig correctly controls which events are captured.""" + collector = EventCollector() + config = CaptureConfig( + l3_model_metadata=True, + l5a_tool_calls=False, + ) + adapter = StrandsAdapter(stratix=collector, capture_config=config) + assert adapter._capture_config.l3_model_metadata is True + assert adapter._capture_config.l5a_tool_calls is False + + +# --------------------------------------------------------------------------- +# Provider detection +# --------------------------------------------------------------------------- + + +class TestProviderDetection: + """Verify _detect_provider works with real model IDs.""" + + def setup_method(self) -> None: + self.adapter = StrandsAdapter() + + def test_detect_bedrock_claude(self) -> None: + assert self.adapter._detect_provider("anthropic.claude-v2") == "bedrock" + + def test_detect_bedrock_titan(self) -> None: + assert self.adapter._detect_provider("amazon.titan-embed") == "bedrock" + + def test_detect_bedrock_llama(self) -> None: + assert self.adapter._detect_provider("meta.llama3-70b") == "bedrock" + + def test_detect_openai(self) -> None: + assert self.adapter._detect_provider("gpt-4o") == "openai" + + def test_detect_google(self) -> None: + assert self.adapter._detect_provider("gemini-1.5-pro") == "google" + + def test_detect_default_bedrock(self) -> None: + """Strands defaults to Bedrock for unrecognized models.""" + assert self.adapter._detect_provider("custom-model-v1") == "bedrock" + + def test_detect_none(self) -> None: + assert self.adapter._detect_provider(None) is None + + +# --------------------------------------------------------------------------- +# Instrumentation with fake agent +# --------------------------------------------------------------------------- + + +class TestInstrumentationWithFakeAgent: + """Test instrumentation using a fake agent that mimics Strands Agent API.""" + + def _make_fake_agent(self) -> Any: + """Create a minimal object that looks like a Strands Agent.""" + + class FakeAgent: + name = "strands_test_agent" + model = "anthropic.claude-3-sonnet-20240229-v1:0" + tools = {"web_search": "WebSearchTool"} + system_prompt = "You are a helpful assistant." + + def __call__(self, prompt: str) -> str: + return f"Response to: {prompt}" + + def invoke(self, message: str) -> str: + return f"Invoked: {message}" + + return FakeAgent() + + def test_instrument_wraps_call(self) -> None: + """instrument_agent should wrap __call__ and invoke.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + adapter.instrument_agent(agent) + + assert hasattr(agent.__call__, "_layerlens_original") + assert hasattr(agent.invoke, "_layerlens_original") + adapter.disconnect() + + def test_instrumented_call_emits_events(self) -> None: + """Calling an instrumented agent should emit agent.input and agent.output.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + adapter.instrument_agent(agent) + + result = agent("What is AI?") + assert result == "Response to: What is AI?" + + input_events = collector.get_events("agent.input") + output_events = collector.get_events("agent.output") + assert len(input_events) == 1 + assert len(output_events) >= 1 + assert input_events[0]["payload"]["agent_name"] == "strands_test_agent" + + adapter.disconnect() + + def test_instrument_emits_config_event(self) -> None: + """instrument_agent should emit an environment.config event.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + adapter.instrument_agent(agent) + + config_events = collector.get_events("environment.config") + assert len(config_events) == 1 + assert config_events[0]["payload"]["agent_name"] == "strands_test_agent" + assert "web_search" in config_events[0]["payload"]["tools"] + + adapter.disconnect() + + def test_instrument_idempotent(self) -> None: + """Instrumenting the same agent twice should be a no-op.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + adapter.instrument_agent(agent) + adapter.instrument_agent(agent) + + config_events = collector.get_events("environment.config") + assert len(config_events) == 1 + + adapter.disconnect() + + def test_disconnect_unwraps_agent(self) -> None: + """disconnect() should restore original methods.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + agent = self._make_fake_agent() + adapter.instrument_agent(agent) + assert hasattr(agent.__call__, "_layerlens_original") + + adapter.disconnect() + assert not hasattr(agent.__call__, "_layerlens_original") + + +# --------------------------------------------------------------------------- +# Lifecycle hooks +# --------------------------------------------------------------------------- + + +class TestLifecycleHooks: + """Test manual lifecycle hook invocations.""" + + def test_on_run_start_emits_agent_input(self) -> None: + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="my_agent", input_data="Hello") + + events = collector.get_events("agent.input") + assert len(events) == 1 + assert events[0]["payload"]["agent_name"] == "my_agent" + adapter.disconnect() + + def test_on_run_end_emits_agent_output_and_state_change(self) -> None: + """on_run_end should emit agent.output AND agent.state.change.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_end(agent_name="my_agent", output="Done") + + output_events = collector.get_events("agent.output") + state_events = collector.get_events("agent.state.change") + assert len(output_events) == 1 + assert len(state_events) == 1 + assert state_events[0]["payload"]["event_subtype"] == "run_complete" + adapter.disconnect() + + def test_on_run_end_with_error_emits_failed_state(self) -> None: + """on_run_end with error should emit run_failed state.""" + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_end( + agent_name="my_agent", + output=None, + error=ValueError("Something went wrong"), + ) + + state_events = collector.get_events("agent.state.change") + assert len(state_events) == 1 + assert state_events[0]["payload"]["event_subtype"] == "run_failed" + adapter.disconnect() + + def test_on_tool_use_emits_tool_call(self) -> None: + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + adapter.on_tool_use( + tool_name="web_search", + tool_input="quantum computing", + tool_output="Quantum uses qubits...", + ) + + events = collector.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["tool_name"] == "web_search" + adapter.disconnect() + + def test_on_llm_call_emits_model_invoke(self) -> None: + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + adapter.on_llm_call( + provider="bedrock", + model="anthropic.claude-3-sonnet", + tokens_prompt=200, + tokens_completion=100, + latency_ms=500.0, + ) + + events = collector.get_events("model.invoke") + assert len(events) == 1 + assert events[0]["payload"]["provider"] == "bedrock" + adapter.disconnect() + + def test_events_not_emitted_when_disconnected(self) -> None: + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + + adapter.on_run_start(agent_name="test", input_data="hello") + assert len(collector.events) == 0 + + +# --------------------------------------------------------------------------- +# Replay serialization +# --------------------------------------------------------------------------- + + +class TestReplaySerialization: + """Test replay serialization.""" + + def test_serialize_includes_events(self) -> None: + collector = EventCollector() + adapter = StrandsAdapter(stratix=collector) + adapter.connect() + + adapter.on_run_start(agent_name="agent", input_data="test") + adapter.on_run_end(agent_name="agent", output="done") + + trace = adapter.serialize_for_replay() + assert trace.adapter_name == "StrandsAdapter" + assert trace.framework == "strands" + assert len(trace.events) >= 2 + adapter.disconnect() diff --git a/tests/instrument/adapters/frameworks/strands/test_lifecycle.py b/tests/instrument/adapters/frameworks/strands/test_lifecycle.py new file mode 100644 index 00000000..0103dc46 --- /dev/null +++ b/tests/instrument/adapters/frameworks/strands/test_lifecycle.py @@ -0,0 +1,204 @@ +"""Test AWS Strands adapter lifecycle methods. + +Ported as-is from ``ateam/tests/adapters/strands/test_lifecycle.py``. + +Translation rules applied: +* ``stratix.sdk.python.adapters.strands.lifecycle`` → + ``layerlens.instrument.adapters.frameworks.strands.lifecycle`` +* ``stratix.sdk.python.adapters.base`` → + ``layerlens.instrument.adapters._base`` +* ``stratix.sdk.python.adapters.capture.CaptureConfig`` → + ``layerlens.instrument.adapters._base.CaptureConfig`` +* ``stratix.sdk.python.adapters.replay_models.ReplayableTrace`` → + ``layerlens.instrument.adapters._base.ReplayableTrace`` +* ``stratix.sdk.python.adapters.registry._ADAPTER_MODULES`` → + ``layerlens.instrument.adapters._base.registry._ADAPTER_MODULES`` +* The wrapper marker attribute renamed by the source from + ``_stratix_original`` to ``_layerlens_original``. + +Multi-tenancy: per the transitional "stratix attribute" pattern (see +migration doc §2.3 step 2 — keystone PR #118 still DRAFT), the +``MockStratix`` / ``EventCollector`` test stub gets an ``org_id`` +attribute. The post-merge sweep PR will rebase to canonical kwarg once +#118 lands. +""" + +from unittest.mock import MagicMock + +from layerlens.instrument.adapters._base import AdapterCapability, AdapterStatus +from layerlens.instrument.adapters._base import ReplayableTrace +from layerlens.instrument.adapters.frameworks.strands.lifecycle import StrandsAdapter + + +class TestStrandsAdapterLifecycle: + def test_adapter_initialization(self): + adapter = StrandsAdapter() + assert adapter.FRAMEWORK == "strands" + assert adapter.VERSION == "0.1.0" + + def test_adapter_initialization_with_stratix(self, mock_stratix): + adapter = StrandsAdapter(stratix=mock_stratix) + assert adapter.has_stratix + + def test_adapter_initialization_legacy_param(self, mock_stratix): + adapter = StrandsAdapter(stratix_instance=mock_stratix) + assert adapter.has_stratix + + def test_connect_sets_healthy(self): + adapter = StrandsAdapter() + adapter.connect() + assert adapter.is_connected + assert adapter.status == AdapterStatus.HEALTHY + + def test_connect_without_framework(self): + """Adapter connects gracefully even when strands is not installed.""" + adapter = StrandsAdapter() + adapter.connect() + assert adapter.is_connected + assert adapter.status == AdapterStatus.HEALTHY + + def test_disconnect_sets_disconnected(self): + adapter = StrandsAdapter() + adapter.connect() + adapter.disconnect() + assert not adapter.is_connected + assert adapter.status == AdapterStatus.DISCONNECTED + + def test_health_check_healthy(self, adapter): + health = adapter.health_check() + assert health.status == AdapterStatus.HEALTHY + assert health.framework_name == "strands" + assert health.adapter_version == "0.1.0" + assert health.error_count == 0 + assert not health.circuit_open + + def test_health_check_disconnected(self): + adapter = StrandsAdapter() + health = adapter.health_check() + assert health.status == AdapterStatus.DISCONNECTED + + def test_get_adapter_info(self, adapter): + info = adapter.get_adapter_info() + assert info.name == "StrandsAdapter" + assert info.framework == "strands" + assert info.version == "0.1.0" + assert AdapterCapability.TRACE_TOOLS in info.capabilities + assert AdapterCapability.TRACE_MODELS in info.capabilities + assert AdapterCapability.TRACE_STATE in info.capabilities + + def test_serialize_for_replay(self, adapter): + trace = adapter.serialize_for_replay() + assert isinstance(trace, ReplayableTrace) + assert trace.adapter_name == "StrandsAdapter" + assert trace.framework == "strands" + assert trace.trace_id is not None + assert isinstance(trace.events, list) + assert isinstance(trace.config, dict) + + def test_null_stratix_pattern(self): + adapter = StrandsAdapter() + adapter.connect() + # Should not raise even without STRATIX + adapter.emit_dict_event("agent.input", {"framework": "strands"}) + + def test_instrument_agent(self, adapter): + mock_agent = MagicMock() + mock_agent.name = "test_agent" + mock_agent.__call__ = MagicMock() + mock_agent.tools = [] + + adapter.instrument_agent(mock_agent) + assert hasattr(mock_agent.__call__, "_layerlens_original") + + def test_instrument_agent_idempotent(self, adapter): + mock_agent = MagicMock() + mock_agent.name = "test_agent" + mock_agent.__call__ = MagicMock() + adapter.instrument_agent(mock_agent) + first_call = mock_agent.__call__ + adapter.instrument_agent(mock_agent) + assert mock_agent.__call__ is first_call + + def test_disconnect_unwraps(self, adapter): + mock_agent = MagicMock() + mock_agent.name = "test_agent" + original_call = MagicMock() + mock_agent.__call__ = original_call + adapter.instrument_agent(mock_agent) + assert hasattr(mock_agent.__call__, "_layerlens_original") + adapter.disconnect() + assert mock_agent.__call__ is original_call + + +class TestStrandsAdapterEvents: + def test_on_run_start_emits_agent_input(self, adapter, mock_stratix): + adapter.on_run_start(agent_name="test_agent", input_data="hello") + events = mock_stratix.get_events("agent.input") + assert len(events) == 1 + assert events[0]["payload"]["framework"] == "strands" + assert events[0]["payload"]["agent_name"] == "test_agent" + + def test_on_run_end_emits_agent_output(self, adapter, mock_stratix): + adapter.on_run_start(agent_name="test_agent", input_data="hello") + adapter.on_run_end(agent_name="test_agent", output="response") + events = mock_stratix.get_events("agent.output") + assert len(events) == 1 + assert events[0]["payload"]["duration_ns"] >= 0 # may be 0 in fast test execution + + def test_on_tool_use_emits_tool_call(self, adapter, mock_stratix): + adapter.on_tool_use( + tool_name="calculator", + tool_input={"expression": "2+2"}, + tool_output={"result": 4}, + ) + events = mock_stratix.get_events("tool.call") + assert len(events) == 1 + assert events[0]["payload"]["tool_name"] == "calculator" + + def test_on_llm_call_emits_model_invoke(self, adapter, mock_stratix): + adapter.on_llm_call( + provider="bedrock", + model="anthropic.claude-3-sonnet", + tokens_prompt=200, + tokens_completion=100, + latency_ms=800.0, + ) + events = mock_stratix.get_events("model.invoke") + assert len(events) == 1 + assert events[0]["payload"]["model"] == "anthropic.claude-3-sonnet" + assert events[0]["payload"]["provider"] == "bedrock" + + def test_error_in_output(self, adapter, mock_stratix): + adapter.on_run_end(agent_name="test_agent", output=None, error=Exception("test error")) + events = mock_stratix.get_events("agent.output") + assert len(events) == 1 + assert "error" in events[0]["payload"] + + def test_state_change_on_run_end(self, adapter, mock_stratix): + adapter.on_run_end(agent_name="test_agent", output="done") + events = mock_stratix.get_events("agent.state.change") + assert len(events) == 1 + assert events[0]["payload"]["event_subtype"] == "run_complete" + + def test_state_change_on_error(self, adapter, mock_stratix): + adapter.on_run_end(agent_name="test_agent", output=None, error=Exception("fail")) + events = mock_stratix.get_events("agent.state.change") + assert len(events) == 1 + assert events[0]["payload"]["event_subtype"] == "run_failed" + + def test_detect_provider_bedrock_default(self, adapter): + assert adapter._detect_provider("anthropic.claude-3-sonnet") == "bedrock" + assert adapter._detect_provider("amazon.titan-text") == "bedrock" + assert adapter._detect_provider("unknown-model") == "bedrock" + + def test_detect_provider_non_bedrock(self, adapter): + assert adapter._detect_provider("gpt-4o") == "openai" + assert adapter._detect_provider("gemini-pro") == "google" + + +class TestStrandsAdapterRegistry: + def test_adapter_registered(self): + from layerlens.instrument.adapters._base.registry import _ADAPTER_MODULES + + assert "strands" in _ADAPTER_MODULES + assert _ADAPTER_MODULES["strands"] == "layerlens.instrument.adapters.frameworks.strands"