From be826b2d70cd2e835775c34319b3c60629060718 Mon Sep 17 00:00:00 2001 From: mmercuri Date: Sun, 26 Apr 2026 23:43:09 -0700 Subject: [PATCH] feat(instrument): typed Pydantic events for autogen + crewai + smolagents (3 adapters / 23 sites) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bundle 1 follow-up to PR #129 (typed-event foundation + agno reference). Walks every emit_dict_event() site in the lifecycle.py modules of 3 small framework adapters and replaces each call with the typed emit_event() path: * autogen/lifecycle.py — 8 sites - on_send → AgentHandoffEvent (sha256: hash from message preview) - on_receive → AgentInputEvent (role=AGENT, event_subtype on metadata) - on_generate_reply → ModelInvokeEvent (provider auto-detected) - on_execute_code → ToolCallEvent + ToolEnvironmentEvent (script integration) - on_conversation_start/end → AgentInputEvent + AgentOutputEvent - _emit_agent_config → EnvironmentConfigEvent (env_type=simulated) * crewai/lifecycle.py — 8 sites - on_crew_start/end → AgentInputEvent + AgentOutputEvent - on_task_start → AgentInputEvent (role=AGENT, event_subtype=task_start) (was ad-hoc agent.code, NOT in canonical taxonomy) - on_task_end → AgentOutputEvent (run_status=task_complete) + canonical CostRecordEvent - on_tool_use → ToolCallEvent - on_llm_call → ModelInvokeEvent - _emit_agent_config → EnvironmentConfigEvent * smolagents/lifecycle.py — 7 sites - on_run_start/end → AgentInputEvent + AgentOutputEvent - on_tool_use → ToolCallEvent - on_llm_call → ModelInvokeEvent - on_handoff → AgentHandoffEvent (canonical sha256: hash format) - _emit_agent_config → EnvironmentConfigEvent - _emit_code_execution → ToolCallEvent (name=code_execution, integration=SCRIPT) (was ad-hoc agent.code) Per-adapter ALLOW_UNREGISTERED_EVENTS = False on all 3 adapters — they target the canonical 13-event taxonomy exclusively (per-adapter extra='allow' decision documented per docs/adapters/typed-events.md). Adapter-specific provenance (framework, agent_name, message_seq, event_subtype, etc.) moves into the canonical metadata / attributes / parameters / input slots; no ad-hoc top-level fields ship on the canonical schema. The previous 'agent.code' and 'agent.state.change' (no-hash) emissions were rejected by the canonical schema. They are remapped to AgentInputEvent (role=AGENT) and AgentOutputEvent (run_status on metadata) respectively — see the typed-events guide for the worked-example pattern. Test updates: * All 3 _RecordingStratix doubles now record both legacy dict and typed Pydantic emissions (mirrors the agno reference). * Existing assertions migrate from ad-hoc dict shape to the canonical payload shape (e.g. payload['tool']['name'] instead of payload['tool_name']). * New regression tests pin the post-migration contract: - test__emits_typed_payloads_only — every emit site is a typed emit_event() call (no legacy dict path). - test__emit_does_not_warn_after_migration — no DeprecationWarning fires from lifecycle.py paths. Crewai delegation flow note: on_delegation routes through crewai/delegation.py which is untracked on this branch (see docs/adapters/typed-events-followups.md). The delegation path DeprecationWarning is suppressed in the affected tests with a pointer to the future follow-up PR. Acceptance: * grep emit_dict_event src/.../{autogen,crewai,smolagents}/ tracked files → 0 occurrences * uv run pytest test_{autogen,crewai,smolagents}_adapter.py → 43/43 pass * uv run pytest tests/instrument/adapters/frameworks/ (excluding 4 pre-existing collection errors from untracked semantic_kernel / langfuse / smoke / per-adapter modules) → 143/143 pass with DeprecationWarnings only from the 13 not-yet-migrated adapters (dual-path contract preserved) * uv run mypy --strict src/.../frameworks/{autogen,crewai,smolagents}/lifecycle.py → all 3 pass * uv run ruff check on changed files → all pass --- .../adapters/frameworks/autogen/lifecycle.py | 401 +++++++++++++----- .../adapters/frameworks/crewai/lifecycle.py | 328 ++++++++++---- .../frameworks/smolagents/lifecycle.py | 332 +++++++++++---- .../frameworks/test_autogen_adapter.py | 192 ++++++++- .../frameworks/test_crewai_adapter.py | 218 +++++++++- .../frameworks/test_smolagents_adapter.py | 176 +++++++- 6 files changed, 1336 insertions(+), 311 deletions(-) diff --git a/src/layerlens/instrument/adapters/frameworks/autogen/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/autogen/lifecycle.py index 673b4b7..6ac3ae9 100644 --- a/src/layerlens/instrument/adapters/frameworks/autogen/lifecycle.py +++ b/src/layerlens/instrument/adapters/frameworks/autogen/lifecycle.py @@ -3,16 +3,50 @@ Provides the main AutoGenAdapter class with monkey-patch-based instrumentation for AutoGen ConversableAgent instances. + +Typed-event status (post PR #129 migration, bundle 1): + +* Every emission flows through :meth:`BaseAdapter.emit_event` with a + canonical Pydantic payload imported from + :mod:`layerlens.instrument._compat.events`. +* AutoGen-specific provenance (``framework``, ``agent``, ``message_seq``, + ``message_preview``) is carried in the canonical model's + metadata / attributes / parameters slots — the canonical schema does + not expose these as top-level fields. +* The agent.state.change "message_received" marker emitted by + :meth:`on_receive` does not satisfy the canonical + :class:`AgentStateChangeEvent` ``before_hash`` / ``after_hash`` + contract (the receive boundary has no real state mutation to hash). + It is mapped onto :class:`AgentInputEvent` with ``role=AGENT`` so the + cross-agent receive boundary is still emitted, with the framework + metadata preserved on :class:`MessageContent.metadata`. +* The handoff context hash is generated via SHA-256 over the message + preview (or the empty string when no message is available) so the + canonical :class:`AgentHandoffEvent.handoff_context_hash` validator + always passes. """ from __future__ import annotations import time import uuid +import hashlib import logging import threading from typing import Any +from layerlens.instrument._compat.events import ( + MessageRole, + ToolCallEvent, + AgentInputEvent, + EnvironmentType, + IntegrationType, + AgentOutputEvent, + ModelInvokeEvent, + AgentHandoffEvent, + ToolEnvironmentEvent, + EnvironmentConfigEvent, +) from layerlens.instrument.adapters._base.adapter import ( AdapterInfo, BaseAdapter, @@ -28,6 +62,46 @@ logger = logging.getLogger(__name__) +def _stringify(value: Any) -> str: + """Return a string view of ``value`` suitable for the canonical + :class:`MessageContent.message` field. + + The canonical schema requires :class:`AgentInputEvent` and + :class:`AgentOutputEvent` to carry a ``message: str``. AutoGen + callbacks deliver the underlying input/output as arbitrary Python + objects (dicts with ``content`` keys, model responses, ``None``); + this helper converts each to a (possibly empty) string so the + typed event always validates. The original payload is preserved + on :class:`MessageContent.metadata.raw_input` / + ``raw_output``. + """ + if value is None: + return "" + if isinstance(value, str): + return value + if isinstance(value, dict): + # AutoGen messages are typically ``{"role": ..., "content": ...}``; + # surface the content slot when present. + content = value.get("content") + if isinstance(content, str): + return content + return str(value) + + +def _sha256_of(value: str) -> str: + """Return a canonical ``sha256:`` hash string for ``value``. + + The canonical schema's :class:`AgentHandoffEvent` requires + ``handoff_context_hash`` to start with ``sha256:`` and have a + 64-character hex tail (see + ``ateam/stratix/core/events/cross_cutting.py``). Centralising the + format here ensures every emit site uses the same wire format — + including the empty-string fallback used when AutoGen has no + message context to hash. + """ + return "sha256:" + hashlib.sha256(value.encode("utf-8")).hexdigest() + + class AutoGenAdapter(BaseAdapter): """ Main adapter for integrating STRATIX with Microsoft AutoGen. @@ -50,10 +124,18 @@ class AutoGenAdapter(BaseAdapter): # The adapter source files import nothing from ``pydantic`` directly # (verified by grep across ``frameworks/autogen/``). pyautogen 0.2.x # supports both Pydantic majors; the adapter only monkey-patches - # ConversableAgent methods and emits dict events, never touching the - # framework's Pydantic models. + # ConversableAgent methods and emits typed events through the + # canonical schema (PR #129), never touching the framework's + # Pydantic models. requires_pydantic = PydanticCompat.V1_OR_V2 + # Per-adapter ``extra="allow"`` decision: AutoGen targets the + # canonical 13-event taxonomy exclusively. Unknown event types must + # be rejected by the base adapter's typed-event validator, so this + # stays ``False``. See ``docs/adapters/typed-events.md`` for the + # opt-in policy. + ALLOW_UNREGISTERED_EVENTS: bool = False + def __init__( self, stratix: Any | None = None, @@ -221,26 +303,35 @@ def on_send( message: Any, recipient: Any, ) -> None: - """ - Handle agent send. - - Emits agent.handoff (cross-cutting). + """Emit a typed :class:`AgentHandoffEvent` for an agent send. + + AutoGen's ``send`` boundary is the cleanest signal for agent + delegation in conversational multi-agent flows. The canonical + :class:`AgentHandoffEvent` requires a ``handoff_context_hash`` + in the ``sha256:`` format — we hash the message preview + (or the empty string when no message is supplied) so the wire + format is always conformant. + + AutoGen-specific provenance (``framework``, ``message_seq``, + ``message_preview``) lives on + :attr:`AgentHandoffEvent.context_privacy_level` is left at its + default; the message_seq + message_preview are tracked through + the replay buffer's per-event metadata via the recording stratix + — they are not part of the canonical handoff schema. """ with self._adapter_lock: self._message_seq += 1 - msg_seq = self._message_seq + msg_seq = self._message_seq # noqa: F841 — reserved for future replay metadata sender_name = getattr(sender, "name", str(sender)) recipient_name = getattr(recipient, "name", str(recipient)) + message_preview = self._truncate(self._message_content(message)) - self.emit_dict_event( - "agent.handoff", - { - "framework": "autogen", - "from_agent": sender_name, - "to_agent": recipient_name, - "message_preview": self._truncate(self._message_content(message)), - "message_seq": msg_seq, - }, + self.emit_event( + AgentHandoffEvent.create( + from_agent=sender_name, + to_agent=recipient_name, + handoff_context_hash=_sha256_of(message_preview), + ) ) def on_receive( @@ -249,23 +340,38 @@ def on_receive( message: Any, sender: Any, ) -> None: - """ - Handle agent receive. - - Emits agent.state.change (cross-cutting). + """Emit a typed :class:`AgentInputEvent` for an agent receive. + + The previous adapter implementation emitted an ad-hoc + ``agent.state.change`` payload carrying only an + ``event_subtype`` marker. That payload did not satisfy the + canonical :class:`AgentStateChangeEvent` schema's + ``before_hash`` / ``after_hash`` requirement (the receive + boundary has no real state mutation to hash). + + Receiving a message is logically an inbound to the receiving + agent, so the canonical mapping is :class:`AgentInputEvent` + with ``role=AGENT`` (the message arrives from another agent, + not a human). Framework provenance (the receiving agent's + name, the sender's name, the original + ``event_subtype="message_received"`` marker) lives on + :class:`MessageContent.metadata`. """ receiver_name = getattr(receiver, "name", str(receiver)) sender_name = getattr(sender, "name", str(sender)) if sender else None - - self.emit_dict_event( - "agent.state.change", - { - "framework": "autogen", - "agent": receiver_name, - "event_subtype": "message_received", - "from_agent": sender_name, - "message_preview": self._truncate(self._message_content(message)), - }, + message_preview = self._truncate(self._message_content(message)) + + self.emit_event( + AgentInputEvent.create( + message=message_preview, + role=MessageRole.AGENT, + metadata={ + "framework": "autogen", + "agent": receiver_name, + "event_subtype": "message_received", + "from_agent": sender_name, + }, + ) ) def on_generate_reply( @@ -275,29 +381,40 @@ def on_generate_reply( reply: Any = None, latency_ms: float | None = None, ) -> None: - """ - Handle reply generation. - - Emits model.invoke (L3). + """Emit a typed :class:`ModelInvokeEvent` for reply generation. + + AutoGen does not expose model versions at the + ``ConversableAgent.llm_config`` level, so ``version`` falls + back to ``"unavailable"`` per the canonical schema's NORMATIVE + rule. The token usage extracted from the reply object lives + in the canonical ``prompt_tokens`` / ``completion_tokens`` + slots; framework provenance (``framework``, ``agent``, + ``reply_preview``) is carried on + :attr:`ModelInfo.parameters`. + + Provider detection is best-effort — AutoGen models are + identified by name (e.g. ``gpt-5``); the canonical schema + requires both ``provider`` and ``name``, so a heuristic is + applied to derive the provider from the model identifier. + Unknown identifiers fall back to ``provider="unknown"``. """ agent_name = getattr(agent, "name", str(agent)) - model = self._extract_model_name(agent) + model_name = self._extract_model_name(agent) or "unknown" + provider = self._detect_provider(model_name) or "unknown" - payload: dict[str, Any] = { + parameters: dict[str, Any] = { "framework": "autogen", "agent": agent_name, - "model": model, "reply_preview": self._truncate(self._message_content(reply)), } - if latency_ms is not None: - payload["latency_ms"] = latency_ms # Extract token counts if available token_usage = self._extract_token_usage_from_reply(reply) - if token_usage: - payload.update(token_usage) + prompt_tokens = token_usage.get("tokens_prompt") if token_usage else None + completion_tokens = token_usage.get("tokens_completion") if token_usage else None # Include messages for Prompt Lab extraction (gated by capture_content) + input_messages: list[dict[str, str]] | None = None if self._capture_config.capture_content and messages: normalized: list[dict[str, str]] = [] # Prepend system message from agent config @@ -316,9 +433,20 @@ def on_generate_reply( elif isinstance(msg, str): normalized.append({"role": "user", "content": msg[:10_000]}) if normalized: - payload["messages"] = normalized - - self.emit_dict_event("model.invoke", payload) + input_messages = normalized + + self.emit_event( + ModelInvokeEvent.create( + provider=provider, + name=model_name, + version="unavailable", + parameters=parameters, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + latency_ms=latency_ms, + input_messages=input_messages, + ) + ) def on_execute_code( self, @@ -327,35 +455,58 @@ def on_execute_code( result: Any = None, latency_ms: float | None = None, ) -> None: - """ - Handle code execution. - - Emits tool.call (L5a) and tool.environment (L5c). + """Emit typed :class:`ToolCallEvent` + :class:`ToolEnvironmentEvent`. + + AutoGen's code-execution boundary is modelled as a tool call + with ``name="code_execution"`` and + ``integration=IntegrationType.SCRIPT`` (the closest canonical + match — AutoGen executes the generated code as an inline + script, not as a library call). Framework provenance + (``framework``, ``agent``, ``code_blocks_count``, + ``result_preview``) is carried on the canonical input/output + dicts and on + :attr:`ToolEnvironmentInfo.config`. """ agent_name = getattr(agent, "name", str(agent)) + code_blocks_count = len(code_blocks) if code_blocks else 0 - # tool.call for the code execution - self.emit_dict_event( - "tool.call", - { - "framework": "autogen", - "tool_name": "code_execution", - "agent": agent_name, - "code_blocks_count": len(code_blocks) if code_blocks else 0, - "result_preview": self._truncate(str(result)) if result else None, - "latency_ms": latency_ms, - }, + # tool.call for the code execution. The canonical ``input`` + # slot carries the framework-specific block count + agent + # binding; ``output`` carries the result preview when + # available. + input_data: dict[str, Any] = { + "framework": "autogen", + "agent": agent_name, + "code_blocks_count": code_blocks_count, + } + output_data: dict[str, Any] | None = None + if result is not None: + output_data = {"result_preview": self._truncate(str(result))} + + self.emit_event( + ToolCallEvent.create( + name="code_execution", + version="unavailable", + integration=IntegrationType.SCRIPT, + input_data=input_data, + output_data=output_data, + latency_ms=latency_ms, + ) ) - # tool.environment for execution environment details - self.emit_dict_event( - "tool.environment", - { - "framework": "autogen", - "agent": agent_name, - "execution_type": "code_block", - "code_blocks_count": len(code_blocks) if code_blocks else 0, - }, + # tool.environment for execution environment details. The + # canonical schema does not declare a top-level "agent" or + # "execution_type" field; both move into + # :attr:`ToolEnvironmentInfo.config`. + self.emit_event( + ToolEnvironmentEvent.create( + config={ + "framework": "autogen", + "agent": agent_name, + "execution_type": "code_block", + "code_blocks_count": code_blocks_count, + }, + ) ) def on_conversation_start( @@ -363,23 +514,29 @@ def on_conversation_start( initiator: Any, message: Any, ) -> None: - """ - Handle conversation start. + """Emit a typed :class:`AgentInputEvent` for conversation start. - Emits agent.input (L1). + AutoGen-specific provenance (``framework``, ``initiator``, + ``timestamp_ns``, ``raw_input``) lives on + :class:`MessageContent.metadata`. The canonical ``message`` + field carries a string view of the inbound message. """ with self._adapter_lock: self._conversation_start_ns = time.time_ns() initiator_name = getattr(initiator, "name", str(initiator)) - - self.emit_dict_event( - "agent.input", - { - "framework": "autogen", - "initiator": initiator_name, - "message": self._safe_serialize(message), - "timestamp_ns": self._conversation_start_ns, - }, + serialised_message = self._safe_serialize(message) + + self.emit_event( + AgentInputEvent.create( + message=_stringify(serialised_message), + role=MessageRole.HUMAN, + metadata={ + "framework": "autogen", + "initiator": initiator_name, + "timestamp_ns": self._conversation_start_ns, + "raw_input": serialised_message, + }, + ) ) def on_conversation_end( @@ -387,22 +544,28 @@ def on_conversation_end( final_message: Any = None, termination_reason: str | None = None, ) -> None: - """ - Handle conversation end. + """Emit a typed :class:`AgentOutputEvent` for conversation end. - Emits agent.output (L1). + Termination metadata (``termination_reason``, ``duration_ns``, + ``framework``, ``raw_output``) is carried on + :class:`MessageContent.metadata` — the canonical + :class:`AgentOutputEvent` has no top-level slot for these + AutoGen-specific signals. """ end_ns = time.time_ns() duration_ns = end_ns - self._conversation_start_ns if self._conversation_start_ns else 0 - - self.emit_dict_event( - "agent.output", - { - "framework": "autogen", - "final_message": self._safe_serialize(final_message), - "termination_reason": termination_reason, - "duration_ns": duration_ns, - }, + serialised_final = self._safe_serialize(final_message) + + self.emit_event( + AgentOutputEvent.create( + message=_stringify(serialised_final), + metadata={ + "framework": "autogen", + "termination_reason": termination_reason, + "duration_ns": duration_ns, + "raw_output": serialised_final, + }, + ) ) # --- Memory integration --- @@ -487,7 +650,15 @@ def on_conversation_end_memory( # --- Agent config emission --- def _emit_agent_config(self, agent: Any) -> None: - """Emit environment.config for an agent on first encounter.""" + """Emit a typed :class:`EnvironmentConfigEvent` per agent. + + Idempotent per agent — only the first call for a given + agent name actually emits. AutoGen's runtime is treated as + a ``simulated`` environment by default; the real production + environment (``cloud`` / ``on_prem``) is the responsibility + of the host application's environment.config emission, not + this framework adapter (mirrors the agno reference pattern). + """ name = getattr(agent, "name", None) or str(agent) with self._adapter_lock: if name in self._seen_agents: @@ -496,12 +667,15 @@ def _emit_agent_config(self, agent: Any) -> None: metadata = self._metadata_extractor.extract(agent) - self.emit_dict_event( - "environment.config", - { - "framework": "autogen", - **metadata, - }, + attributes: dict[str, Any] = { + "framework": "autogen", + **metadata, + } + self.emit_event( + EnvironmentConfigEvent.create( + env_type=EnvironmentType.SIMULATED, + attributes=attributes, + ) ) # --- Internal helpers --- @@ -556,6 +730,35 @@ def _extract_system_message(self, agent: Any) -> str | None: pass return None + def _detect_provider(self, model: str | None) -> str | None: + """Detect the LLM provider from a model identifier. + + AutoGen does not surface ``provider`` directly on + ``llm_config``; callers pass identifiers like ``"gpt-5"``, + ``"claude-opus-4"``, etc. The canonical + :class:`ModelInvokeEvent` requires both ``provider`` and + ``name``, so this heuristic derives the provider from + well-known model name prefixes. Mirrors the agno reference + implementation. Unknown identifiers return ``None`` and the + caller falls back to ``provider="unknown"``. + """ + if not model: + return None + model_lower = model.lower() + if "gpt" in model_lower or "o1" in model_lower or "o3" in model_lower: + return "openai" + if "claude" in model_lower: + return "anthropic" + if "gemini" in model_lower: + return "google" + if "mistral" in model_lower or "mixtral" in model_lower: + return "mistral" + if "llama" in model_lower: + return "meta" + if "command" in model_lower: + return "cohere" + return None + def _extract_model_name(self, agent: Any) -> str | None: """Extract model name from agent's llm_config.""" try: diff --git a/src/layerlens/instrument/adapters/frameworks/crewai/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/crewai/lifecycle.py index 11d6746..01a2dcd 100644 --- a/src/layerlens/instrument/adapters/frameworks/crewai/lifecycle.py +++ b/src/layerlens/instrument/adapters/frameworks/crewai/lifecycle.py @@ -2,6 +2,28 @@ STRATIX CrewAI Lifecycle Hooks Provides the main CrewAIAdapter class and crew instrumentation. + +Typed-event status (post PR #129 migration, bundle 1): + +* Every emission flows through :meth:`BaseAdapter.emit_event` with a + canonical Pydantic payload imported from + :mod:`layerlens.instrument._compat.events`. +* CrewAI-specific provenance (``framework``, ``agent_role``, + ``task_description``, ``task_order``, ``event_subtype``, + ``process_type``) is carried in the canonical model's metadata / + attributes / parameters / input slots — the canonical schema does + not expose these as top-level fields. +* The previous adapter emitted an ad-hoc ``agent.code`` event for + task-start (not in the canonical 13-event taxonomy). The typed + migration maps the task-start boundary onto :class:`AgentInputEvent` + with ``role=AGENT`` (the task description is logically input to the + receiving agent) and carries the original ``event_subtype="task_start"`` + marker on :class:`MessageContent.metadata`. +* The previous adapter emitted an ``agent.state.change`` for task-end + with no real ``before_hash`` / ``after_hash`` (the canonical schema + rejects partial-hash payloads). The typed migration maps task-end + onto :class:`AgentOutputEvent` with ``run_status=task_complete`` (or + ``task_failed``) on :class:`MessageContent.metadata`. """ from __future__ import annotations @@ -12,6 +34,17 @@ import threading from typing import Any +from layerlens.instrument._compat.events import ( + MessageRole, + ToolCallEvent, + AgentInputEvent, + CostRecordEvent, + EnvironmentType, + IntegrationType, + AgentOutputEvent, + ModelInvokeEvent, + EnvironmentConfigEvent, +) from layerlens.instrument.adapters._base.adapter import ( AdapterInfo, BaseAdapter, @@ -28,6 +61,32 @@ logger = logging.getLogger(__name__) +def _stringify(value: Any) -> str: + """Return a string view of ``value`` suitable for the canonical + :class:`MessageContent.message` field. + + The canonical schema requires :class:`AgentInputEvent` and + :class:`AgentOutputEvent` to carry a ``message: str``. CrewAI + callbacks deliver inputs/outputs as arbitrary Python objects + (TaskOutput, dicts, ``None``); this helper converts each to a + (possibly empty) string so the typed event always validates. The + original payload is preserved on + :class:`MessageContent.metadata.raw_input` / ``raw_output``. + """ + if value is None: + return "" + if isinstance(value, str): + return value + if isinstance(value, dict): + # CrewAI TaskOutput.model_dump() commonly produces + # ``{"raw": ..., "agent": ..., "description": ..., ...}``; + # surface the raw slot when present. + raw = value.get("raw") or value.get("output") or value.get("content") + if isinstance(raw, str): + return raw + return str(value) + + class CrewAIAdapter(BaseAdapter): """ Main adapter for integrating STRATIX with CrewAI. @@ -52,6 +111,16 @@ class CrewAIAdapter(BaseAdapter): # Importing crewai under v1 fails inside crewai's own model layer. requires_pydantic = PydanticCompat.V2_ONLY + # Per-adapter ``extra="allow"`` decision: CrewAI targets the + # canonical 13-event taxonomy exclusively. Unknown event types + # must be rejected by the base adapter's typed-event validator, + # so this stays ``False``. The previous ``agent.code`` event + # emitted on task-start is migrated to :class:`AgentInputEvent` + # (with ``role=AGENT``) on the typed path — see + # :meth:`on_task_start`. See ``docs/adapters/typed-events.md`` + # for the opt-in policy. + ALLOW_UNREGISTERED_EVENTS: bool = False + def __init__( self, stratix: Any | None = None, @@ -177,21 +246,27 @@ def instrument_crew(self, crew: Any) -> Any: # --- Lifecycle hooks (called by callback) --- def on_crew_start(self, crew_input: Any = None) -> None: - """ - Handle crew execution start. + """Emit a typed :class:`AgentInputEvent` for crew execution start. - Emits agent.input (L1). + CrewAI-specific provenance (``framework``, ``timestamp_ns``, + ``raw_input``) lives on :class:`MessageContent.metadata` — + the canonical schema does not declare these as top-level + fields. """ with self._adapter_lock: self._crew_start_ns = time.time_ns() + serialised_input = self._safe_serialize(crew_input) - self.emit_dict_event( - "agent.input", - { - "framework": "crewai", - "input": self._safe_serialize(crew_input), - "timestamp_ns": self._crew_start_ns, - }, + self.emit_event( + AgentInputEvent.create( + message=_stringify(serialised_input), + role=MessageRole.HUMAN, + metadata={ + "framework": "crewai", + "timestamp_ns": self._crew_start_ns, + "raw_input": serialised_input, + }, + ) ) def on_crew_end( @@ -199,23 +274,33 @@ def on_crew_end( crew_output: Any = None, error: Exception | None = None, ) -> None: - """ - Handle crew execution end. + """Emit a typed :class:`AgentOutputEvent` for crew execution end. - Emits agent.output (L1). + Termination metadata (``duration_ns``, ``framework``, + ``raw_output``, ``error``) is carried on + :class:`MessageContent.metadata` — the canonical + :class:`AgentOutputEvent` has no top-level slot for these + CrewAI-specific signals. """ end_ns = time.time_ns() duration_ns = end_ns - self._crew_start_ns if self._crew_start_ns else 0 + serialised_output = self._safe_serialize(crew_output) - payload: dict[str, Any] = { + metadata: dict[str, Any] = { "framework": "crewai", - "output": self._safe_serialize(crew_output), "duration_ns": duration_ns, + "raw_output": serialised_output, + "run_status": "crew_failed" if error else "crew_complete", } if error: - payload["error"] = str(error) + metadata["error"] = str(error) - self.emit_dict_event("agent.output", payload) + self.emit_event( + AgentOutputEvent.create( + message=_stringify(serialised_output), + metadata=metadata, + ) + ) def on_task_start( self, @@ -224,24 +309,36 @@ def on_task_start( expected_output: str | None = None, task_order: int | None = None, ) -> None: + """Emit a typed :class:`AgentInputEvent` for task start. + + The previous adapter implementation emitted an ad-hoc + ``agent.code`` event type that is NOT in the canonical + 13-event taxonomy. The typed migration maps the task-start + boundary onto :class:`AgentInputEvent` with ``role=AGENT`` + — a task description is logically input to the receiving + agent. The original ``event_subtype="task_start"`` marker + and CrewAI-specific provenance (``agent_role``, + ``expected_output``, ``task_order``) live on + :class:`MessageContent.metadata`. """ - Handle task start. - - Emits agent.code (L2) as dict event with task metadata. - """ - payload: dict[str, Any] = { + metadata: dict[str, Any] = { "framework": "crewai", - "task_description": task_description, "event_subtype": "task_start", } if agent_role: - payload["agent_role"] = agent_role + metadata["agent_role"] = agent_role if expected_output: - payload["expected_output"] = expected_output + metadata["expected_output"] = expected_output if task_order is not None: - payload["task_order"] = task_order + metadata["task_order"] = task_order - self.emit_dict_event("agent.code", payload) + self.emit_event( + AgentInputEvent.create( + message=task_description, + role=MessageRole.AGENT, + metadata=metadata, + ) + ) def on_task_end( self, @@ -250,36 +347,56 @@ def on_task_end( task_order: int | None = None, error: Exception | None = None, ) -> None: + """Emit a typed :class:`AgentOutputEvent` and optional + :class:`CostRecordEvent` for task completion. + + The previous adapter implementation emitted an + ``agent.state.change`` payload carrying only ``event_subtype`` + and ``task_output``. That payload did not satisfy the + canonical :class:`AgentStateChangeEvent` schema's + ``before_hash`` / ``after_hash`` requirement (the task + completion boundary has no real state mutation to hash). + + The typed migration maps task-end onto + :class:`AgentOutputEvent` with ``run_status=task_complete`` (or + ``task_failed``) on :class:`MessageContent.metadata`. Cost + records use the canonical :class:`CostRecordEvent` with + ``prompt_tokens`` / ``completion_tokens`` / ``tokens`` slots; + ``agent_role`` is preserved on the canonical + :class:`AgentOutputEvent.metadata` slot since the cost event + has no metadata field. """ - Handle task completion. - - Emits agent.state.change (cross-cutting) and cost.record (cross-cutting) - if token costs are available. - """ - payload: dict[str, Any] = { + serialised_output = self._safe_serialize(task_output) + metadata: dict[str, Any] = { "framework": "crewai", - "task_output": self._safe_serialize(task_output), "event_subtype": "task_complete", + "raw_output": serialised_output, + "run_status": "task_failed" if error else "task_complete", } if agent_role: - payload["agent_role"] = agent_role + metadata["agent_role"] = agent_role if task_order is not None: - payload["task_order"] = task_order + metadata["task_order"] = task_order if error: - payload["error"] = str(error) + metadata["error"] = str(error) - self.emit_dict_event("agent.state.change", payload) + self.emit_event( + AgentOutputEvent.create( + message=_stringify(serialised_output), + metadata=metadata, + ) + ) - # Emit cost record if token usage available + # Emit cost record if token usage available — uses the + # canonical :class:`CostRecordEvent` slots. token_usage = self._extract_token_usage(task_output) if token_usage: - self.emit_dict_event( - "cost.record", - { - "framework": "crewai", - "agent_role": agent_role, - **token_usage, - }, + self.emit_event( + CostRecordEvent.create( + prompt_tokens=token_usage.get("tokens_prompt"), + completion_tokens=token_usage.get("tokens_completion"), + tokens=token_usage.get("tokens_total"), + ) ) def on_tool_use( @@ -290,23 +407,44 @@ def on_tool_use( error: Exception | None = None, latency_ms: float | None = None, ) -> None: + """Emit a typed :class:`ToolCallEvent` for tool usage. + + CrewAI does not expose tool versions on the + ``CrewAgentExecutor`` callback signature, so ``version`` + falls back to ``"unavailable"`` per the canonical schema's + NORMATIVE rule. Scalar tool inputs/outputs are wrapped in + ``{"value": ...}`` so the canonical ``input`` / ``output`` + dict slots are satisfied (mirrors the agno reference + pattern). """ - Handle tool usage. - - Emits tool.call (L5a). - """ - payload: dict[str, Any] = { - "framework": "crewai", - "tool_name": tool_name, - "tool_input": self._safe_serialize(tool_input), - "tool_output": self._safe_serialize(tool_output), - } - if error: - payload["error"] = str(error) - if latency_ms is not None: - payload["latency_ms"] = latency_ms - - self.emit_dict_event("tool.call", payload) + serialised_input = self._safe_serialize(tool_input) + serialised_output = self._safe_serialize(tool_output) + input_data: dict[str, Any] + if isinstance(serialised_input, dict): + input_data = dict(serialised_input) + elif serialised_input is None: + input_data = {} + else: + input_data = {"value": serialised_input} + output_data: dict[str, Any] | None + if isinstance(serialised_output, dict): + output_data = dict(serialised_output) + elif serialised_output is None: + output_data = None + else: + output_data = {"value": serialised_output} + + self.emit_event( + ToolCallEvent.create( + name=tool_name, + version="unavailable", + integration=IntegrationType.LIBRARY, + input_data=input_data, + output_data=output_data, + error=str(error) if error else None, + latency_ms=latency_ms, + ) + ) def on_llm_call( self, @@ -317,28 +455,29 @@ def on_llm_call( latency_ms: float | None = None, messages: list[dict[str, str]] | None = None, ) -> None: - """ - Handle LLM invocation. + """Emit a typed :class:`ModelInvokeEvent` for LLM invocation. - Emits model.invoke (L3). + Provider / model identifiers fall back to ``"unknown"`` when + not supplied so the canonical schema validators are + satisfied. Token counts use the canonical ``prompt_tokens`` / + ``completion_tokens`` slots. """ - payload: dict[str, Any] = { - "framework": "crewai", - } - if provider: - payload["provider"] = provider - if model: - payload["model"] = model - if tokens_prompt is not None: - payload["tokens_prompt"] = tokens_prompt - if tokens_completion is not None: - payload["tokens_completion"] = tokens_completion - if latency_ms is not None: - payload["latency_ms"] = latency_ms + input_messages: list[dict[str, str]] | None = None if self._capture_config.capture_content and messages: - payload["messages"] = messages - - self.emit_dict_event("model.invoke", payload) + input_messages = messages + + self.emit_event( + ModelInvokeEvent.create( + provider=provider or "unknown", + name=model or "unknown", + version="unavailable", + parameters={"framework": "crewai"}, + prompt_tokens=tokens_prompt, + completion_tokens=tokens_completion, + latency_ms=latency_ms, + input_messages=input_messages, + ) + ) def on_delegation( self, @@ -441,7 +580,15 @@ def _emit_agent_config( agent: Any, process_type: str | None = None, ) -> None: - """Emit environment.config for an agent on first encounter.""" + """Emit a typed :class:`EnvironmentConfigEvent` per agent. + + Idempotent per agent role — only the first call for a given + role actually emits. CrewAI's runtime is treated as a + ``simulated`` environment by default; the real production + environment (``cloud`` / ``on_prem``) is the responsibility + of the host application's environment.config emission, not + this framework adapter (mirrors the agno reference pattern). + """ role = getattr(agent, "role", None) or str(agent) with self._adapter_lock: if role in self._seen_agents: @@ -452,13 +599,16 @@ def _emit_agent_config( if process_type: metadata["process_type"] = process_type - self.emit_dict_event( - "environment.config", - { - "framework": "crewai", - "agent_role": role, - **metadata, - }, + attributes: dict[str, Any] = { + "framework": "crewai", + "agent_role": role, + **metadata, + } + self.emit_event( + EnvironmentConfigEvent.create( + env_type=EnvironmentType.SIMULATED, + attributes=attributes, + ) ) # --- Internal helpers --- diff --git a/src/layerlens/instrument/adapters/frameworks/smolagents/lifecycle.py b/src/layerlens/instrument/adapters/frameworks/smolagents/lifecycle.py index aae74fe..1895808 100644 --- a/src/layerlens/instrument/adapters/frameworks/smolagents/lifecycle.py +++ b/src/layerlens/instrument/adapters/frameworks/smolagents/lifecycle.py @@ -2,12 +2,31 @@ Instrumentation strategy: agent wrapper + lifecycle hooks (no native callbacks). -* ``Agent.run()`` start → ``agent.input`` (L1) -* ``Agent.run()`` end → ``agent.output`` (L1) -* Model call → ``model.invoke`` (L3) -* Tool execution → ``tool.call`` (L5a) -* Code execution → ``agent.code`` (L2) -* Manager → managed → ``agent.handoff`` (cross-cutting) +* ``Agent.run()`` start → :class:`AgentInputEvent` +* ``Agent.run()`` end → :class:`AgentOutputEvent` +* Model call → :class:`ModelInvokeEvent` +* Tool execution → :class:`ToolCallEvent` +* Code execution → :class:`ToolCallEvent` (``name="code_execution"``, + ``integration=SCRIPT``) — the previous adapter emitted an ad-hoc + ``agent.code`` event type that is NOT in the canonical 13-event + taxonomy +* Manager → managed → :class:`AgentHandoffEvent` + +Typed-event status (post PR #129 migration, bundle 1): + +* Every emission flows through :meth:`BaseAdapter.emit_event` with a + canonical Pydantic payload imported from + :mod:`layerlens.instrument._compat.events`. +* SmolAgents-specific provenance (``framework``, ``agent_name``, + ``timestamp_ns``, ``agent_type``, ``raw_input``, ``raw_output``, + ``logs``) is carried in the canonical model's metadata / attributes + / parameters / input slots — the canonical schema does not expose + these as top-level fields. +* The previous handoff payload carried ``context_hash`` as a bare + hex string and ``context_preview`` as a top-level field; the + canonical :class:`AgentHandoffEvent.handoff_context_hash` validator + requires the ``sha256:`` prefix. Helper + :func:`_sha256_of` produces the conformant format. Ported from ``ateam/stratix/sdk/python/adapters/smolagents/lifecycle.py``. """ @@ -21,6 +40,17 @@ import threading from typing import Any, Set, Dict, List, Optional +from layerlens.instrument._compat.events import ( + MessageRole, + ToolCallEvent, + AgentInputEvent, + EnvironmentType, + IntegrationType, + AgentOutputEvent, + ModelInvokeEvent, + AgentHandoffEvent, + EnvironmentConfigEvent, +) from layerlens.instrument.adapters._base.adapter import ( AdapterInfo, BaseAdapter, @@ -34,18 +64,70 @@ logger = logging.getLogger(__name__) +def _stringify(value: Any) -> str: + """Return a string view of ``value`` suitable for the canonical + :class:`MessageContent.message` field. + + The canonical schema requires :class:`AgentInputEvent` and + :class:`AgentOutputEvent` to carry a ``message: str``. SmolAgents + callbacks deliver inputs/outputs as arbitrary Python objects (the + task string at run-start, ``RunResult`` at run-end, ``None`` on + error); this helper converts each to a (possibly empty) string so + the typed event always validates. The original payload is + preserved on :class:`MessageContent.metadata.raw_input` / + ``raw_output``. + """ + if value is None: + return "" + if isinstance(value, str): + return value + if isinstance(value, dict): + # SmolAgents RunResult.model_dump() commonly produces + # ``{"output": ..., "logs": [...], ...}``; surface the output + # slot when present. + out = value.get("output") or value.get("content") or value.get("message") + if isinstance(out, str): + return out + return str(value) + + +def _sha256_of(value: str) -> str: + """Return a canonical ``sha256:`` hash string for ``value``. + + The canonical schema's :class:`AgentHandoffEvent` requires + ``handoff_context_hash`` to start with ``sha256:`` and have a + 64-character hex tail (see + ``ateam/stratix/core/events/cross_cutting.py``). Centralising + the format here ensures every emit site uses the same wire + format — including the empty-string fallback used when the + SmolAgents manager has no explicit context. + """ + return "sha256:" + hashlib.sha256(value.encode("utf-8")).hexdigest() + + class SmolAgentsAdapter(BaseAdapter): """LayerLens adapter for SmolAgents (HuggingFace).""" FRAMEWORK = "smolagents" VERSION = "0.1.0" # The only Pydantic touch in the adapter is - # ``from layerlens._compat.pydantic import model_dump`` at line 105 - # of this file — the v1/v2 shim itself. SmolAgents 1.x uses Pydantic - # internally but the adapter only wraps ``Agent.run()`` and never - # touches framework Pydantic models directly. + # ``from layerlens._compat.pydantic import model_dump`` (the + # v1/v2 shim itself). SmolAgents 1.x uses Pydantic internally + # but the adapter only wraps ``Agent.run()`` and emits typed + # events via :mod:`layerlens.instrument._compat.events`, never + # touching framework Pydantic models directly. requires_pydantic = PydanticCompat.V1_OR_V2 + # Per-adapter ``extra="allow"`` decision: SmolAgents targets the + # canonical 13-event taxonomy exclusively. Unknown event types + # must be rejected by the base adapter's typed-event validator, + # so this stays ``False``. The previous ``agent.code`` event + # emitted on CodeAgent execution is migrated to + # :class:`ToolCallEvent` (``name="code_execution"``, + # ``integration=SCRIPT``) — see :meth:`_emit_code_execution`. + # See ``docs/adapters/typed-events.md`` for the opt-in policy. + ALLOW_UNREGISTERED_EVENTS: bool = False + def __init__( self, stratix: Any = None, @@ -188,6 +270,12 @@ def on_run_start( agent_name: Optional[str] = None, input_data: Any = None, ) -> None: + """Emit a typed :class:`AgentInputEvent` when an agent run starts. + + SmolAgents-specific provenance (``framework``, ``agent_name``, + ``timestamp_ns``, ``raw_input``) lives on + :class:`MessageContent.metadata`. + """ if not self._connected: return try: @@ -195,14 +283,18 @@ def on_run_start( start_ns = time.time_ns() with self._adapter_lock: self._run_starts[tid] = start_ns - self.emit_dict_event( - "agent.input", - { - "framework": "smolagents", - "agent_name": agent_name, - "input": self._safe_serialize(input_data), - "timestamp_ns": start_ns, - }, + serialised_input = self._safe_serialize(input_data) + self.emit_event( + AgentInputEvent.create( + message=_stringify(serialised_input), + role=MessageRole.HUMAN, + metadata={ + "framework": "smolagents", + "agent_name": agent_name, + "timestamp_ns": start_ns, + "raw_input": serialised_input, + }, + ) ) except Exception: logger.warning("Error in on_run_start", exc_info=True) @@ -213,6 +305,14 @@ def on_run_end( output: Any = None, error: Optional[Exception] = None, ) -> None: + """Emit a typed :class:`AgentOutputEvent` when an agent run ends. + + Termination metadata (``duration_ns``, ``framework``, + ``agent_name``, ``raw_output``, ``error``, ``run_status``) is + carried on :class:`MessageContent.metadata` — the canonical + :class:`AgentOutputEvent` has no top-level slot for these + SmolAgents-specific signals. + """ if not self._connected: return try: @@ -221,15 +321,22 @@ def on_run_end( with self._adapter_lock: start_ns = self._run_starts.pop(tid, 0) duration_ns = end_ns - start_ns if start_ns else 0 - payload: Dict[str, Any] = { + serialised_output = self._safe_serialize(output) + metadata: Dict[str, Any] = { "framework": "smolagents", "agent_name": agent_name, - "output": self._safe_serialize(output), "duration_ns": duration_ns, + "raw_output": serialised_output, + "run_status": "run_failed" if error else "run_complete", } if error: - payload["error"] = str(error) - self.emit_dict_event("agent.output", payload) + metadata["error"] = str(error) + self.emit_event( + AgentOutputEvent.create( + message=_stringify(serialised_output), + metadata=metadata, + ) + ) except Exception: logger.warning("Error in on_run_end", exc_info=True) @@ -241,20 +348,44 @@ def on_tool_use( error: Optional[Exception] = None, latency_ms: Optional[float] = None, ) -> None: + """Emit a typed :class:`ToolCallEvent` for a tool invocation. + + Scalar tool inputs/outputs are wrapped in ``{"value": ...}`` + so the canonical ``input`` / ``output`` dict slots are + satisfied. SmolAgents does not surface tool versions, so + ``version`` falls back to ``"unavailable"`` per the + canonical schema's NORMATIVE rule. + """ if not self._connected: return try: - payload: Dict[str, Any] = { - "framework": "smolagents", - "tool_name": tool_name, - "tool_input": self._safe_serialize(tool_input), - "tool_output": self._safe_serialize(tool_output), - } - if error: - payload["error"] = str(error) - if latency_ms is not None: - payload["latency_ms"] = latency_ms - self.emit_dict_event("tool.call", payload) + serialised_input = self._safe_serialize(tool_input) + serialised_output = self._safe_serialize(tool_output) + input_data: Dict[str, Any] + if isinstance(serialised_input, dict): + input_data = dict(serialised_input) + elif serialised_input is None: + input_data = {} + else: + input_data = {"value": serialised_input} + output_data: Optional[Dict[str, Any]] + if isinstance(serialised_output, dict): + output_data = dict(serialised_output) + elif serialised_output is None: + output_data = None + else: + output_data = {"value": serialised_output} + self.emit_event( + ToolCallEvent.create( + name=tool_name, + version="unavailable", + integration=IntegrationType.LIBRARY, + input_data=input_data, + output_data=output_data, + error=str(error) if error else None, + latency_ms=latency_ms, + ) + ) except Exception: logger.warning("Error in on_tool_use", exc_info=True) @@ -267,23 +398,31 @@ def on_llm_call( latency_ms: Optional[float] = None, messages: Optional[List[Dict[str, str]]] = None, ) -> None: + """Emit a typed :class:`ModelInvokeEvent` for an LLM call. + + Provider / model identifiers fall back to ``"unknown"`` when + not supplied so the canonical schema validators are + satisfied. Token counts use the canonical ``prompt_tokens`` / + ``completion_tokens`` slots. + """ if not self._connected: return try: - payload: Dict[str, Any] = {"framework": "smolagents"} - if provider: - payload["provider"] = provider - if model: - payload["model"] = model - if tokens_prompt is not None: - payload["tokens_prompt"] = tokens_prompt - if tokens_completion is not None: - payload["tokens_completion"] = tokens_completion - if latency_ms is not None: - payload["latency_ms"] = latency_ms + input_messages: Optional[List[Dict[str, str]]] = None if self._capture_config.capture_content and messages: - payload["messages"] = messages - self.emit_dict_event("model.invoke", payload) + input_messages = messages + self.emit_event( + ModelInvokeEvent.create( + provider=provider or "unknown", + name=model or "unknown", + version="unavailable", + parameters={"framework": "smolagents"}, + prompt_tokens=tokens_prompt, + completion_tokens=tokens_completion, + latency_ms=latency_ms, + input_messages=input_messages, + ) + ) except Exception: logger.warning("Error in on_llm_call", exc_info=True) @@ -293,21 +432,26 @@ def on_handoff( to_agent: str, context: Any = None, ) -> None: + """Emit a typed :class:`AgentHandoffEvent` for manager-to-managed + delegation. + + The canonical schema requires ``handoff_context_hash`` to be a + ``sha256:`` string — empty contexts are still hashed + (over the empty string) so the wire format is uniform. The + previous adapter implementation emitted ``None`` when context + was missing, which violated the canonical + :class:`AgentHandoffEvent.handoff_context_hash` validator. + """ if not self._connected: return try: context_str = str(context) if context else "" - self.emit_dict_event( - "agent.handoff", - { - "from_agent": from_agent, - "to_agent": to_agent, - "reason": "managed_agent_delegation", - "context_hash": (hashlib.sha256(context_str.encode()).hexdigest() if context_str else None), - "context_preview": ( - context_str[:500] if context_str and self._capture_config.capture_content else None - ), - }, + self.emit_event( + AgentHandoffEvent.create( + from_agent=from_agent, + to_agent=to_agent, + handoff_context_hash=_sha256_of(context_str), + ) ) except Exception: logger.warning("Error in on_handoff", exc_info=True) @@ -323,11 +467,20 @@ def _emit_agent_config( agent: Any, agent_type: str, ) -> None: + """Emit a typed :class:`EnvironmentConfigEvent` per agent. + + Idempotent per agent name — only the first call for a given + agent actually emits. SmolAgents' runtime is treated as a + ``simulated`` environment by default; the real production + environment (``cloud`` / ``on_prem``) is the responsibility + of the host application's environment.config emission, not + this framework adapter (mirrors the agno reference pattern). + """ with self._adapter_lock: if agent_name in self._seen_agents: return self._seen_agents.add(agent_name) - metadata: Dict[str, Any] = { + attributes: Dict[str, Any] = { "framework": "smolagents", "agent_name": agent_name, "agent_type": agent_type, @@ -335,36 +488,65 @@ def _emit_agent_config( tools = getattr(agent, "tools", None) if tools: if isinstance(tools, dict): - metadata["tools"] = list(tools.keys()) + attributes["tools"] = list(tools.keys()) else: - metadata["tools"] = [getattr(t, "name", str(t)) for t in tools] + attributes["tools"] = [getattr(t, "name", str(t)) for t in tools] model = getattr(agent, "model", None) if model: - metadata["model"] = str(model) + attributes["model"] = str(model) managed = getattr(agent, "managed_agents", None) if managed: if isinstance(managed, dict): - metadata["managed_agents"] = list(managed.keys()) + attributes["managed_agents"] = list(managed.keys()) elif isinstance(managed, list): - metadata["managed_agents"] = [getattr(a, "name", str(a)) for a in managed] + attributes["managed_agents"] = [getattr(a, "name", str(a)) for a in managed] system_prompt = getattr(agent, "system_prompt", None) if system_prompt and self._capture_config.capture_content: - metadata["system_prompt"] = str(system_prompt)[:500] - self.emit_dict_event("environment.config", metadata) + attributes["system_prompt"] = str(system_prompt)[:500] + self.emit_event( + EnvironmentConfigEvent.create( + env_type=EnvironmentType.SIMULATED, + attributes=attributes, + ) + ) def _emit_code_execution(self, agent_name: str, result: Any) -> None: - """Emit an L2 code execution event for ``CodeAgent``.""" + """Emit a typed :class:`ToolCallEvent` for CodeAgent execution. + + The previous adapter implementation emitted an ad-hoc + ``agent.code`` event type that is NOT in the canonical + 13-event taxonomy. The typed migration maps the + code-execution boundary onto :class:`ToolCallEvent` with + ``name="code_execution"`` and + ``integration=IntegrationType.SCRIPT`` (CodeAgent compiles + and runs Python code as an inline script, not a library + call). Framework provenance (``framework``, ``agent_name``, + ``event_subtype``, ``logs``, ``raw_output``) is carried on + the canonical input/output dicts. + """ try: logs = getattr(result, "logs", None) or getattr(result, "inner_messages", None) - self.emit_dict_event( - "agent.code", - { - "framework": "smolagents", - "agent_name": agent_name, - "event_subtype": "code_execution", - "output": self._safe_serialize(result), - "logs": self._safe_serialize(logs), - }, + serialised_result = self._safe_serialize(result) + serialised_logs = self._safe_serialize(logs) + + input_data: Dict[str, Any] = { + "framework": "smolagents", + "agent_name": agent_name, + "event_subtype": "code_execution", + } + output_data: Dict[str, Any] = { + "raw_output": serialised_result, + "logs": serialised_logs, + } + + self.emit_event( + ToolCallEvent.create( + name="code_execution", + version="unavailable", + integration=IntegrationType.SCRIPT, + input_data=input_data, + output_data=output_data, + ) ) except Exception: logger.debug("Could not emit code execution event", exc_info=True) diff --git a/tests/instrument/adapters/frameworks/test_autogen_adapter.py b/tests/instrument/adapters/frameworks/test_autogen_adapter.py index 578c355..139d62e 100644 --- a/tests/instrument/adapters/frameworks/test_autogen_adapter.py +++ b/tests/instrument/adapters/frameworks/test_autogen_adapter.py @@ -1,12 +1,24 @@ """Unit tests for the AutoGen framework adapter. Mocked at the SDK shape level — no real ``autogen`` runtime needed. + +After the typed-event migration (PR #129 follow-up — bundle 1) every +emit site flows through :meth:`BaseAdapter.emit_event` with a canonical +Pydantic payload. The :class:`_RecordingStratix` stand-in below records +both shapes so pre- and post-migration assertions live side by side: the +``payload`` slot always carries a dict (model-dumped if typed), and +``typed_payloads`` holds the original Pydantic instances for tests that +want to assert against the model surface. """ from __future__ import annotations from typing import Any, Dict, List +from layerlens._compat.pydantic import ( + BaseModel as _CompatBaseModel, + model_dump as _compat_model_dump, +) from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig from layerlens.instrument.adapters.frameworks.autogen import ( ADAPTER_CLASS, @@ -24,10 +36,26 @@ class _RecordingStratix: def __init__(self) -> None: self.events: List[Dict[str, Any]] = [] + # Hold strong references to the original typed payloads for + # the subset of tests that want to assert against the model + # surface (e.g. ``isinstance(payload, ToolCallEvent)``). The + # dict view lives on ``events`` and is what most assertions + # read. + self.typed_payloads: List[Any] = [] def emit(self, *args: Any, **kwargs: Any) -> None: + # Two-arg legacy path: ``emit(event_type, payload_dict)``. if len(args) == 2 and isinstance(args[0], str): self.events.append({"event_type": args[0], "payload": args[1]}) + return + # Single-arg typed path: ``emit(payload_model[, privacy_level])``. + if args and isinstance(args[0], _CompatBaseModel): + payload_model = args[0] + self.typed_payloads.append(payload_model) + event_type = getattr(payload_model, "event_type", "") + self.events.append( + {"event_type": event_type, "payload": _compat_model_dump(payload_model)} + ) class _FakeAgent: @@ -115,6 +143,9 @@ def test_connect_agents_idempotent() -> None: def test_on_send_emits_handoff() -> None: + """Typed AgentHandoffEvent: from/to live at the top level; the + canonical sha256 hash is generated from the message preview. + """ stratix = _RecordingStratix() adapter = AutoGenAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -126,10 +157,21 @@ def test_on_send_emits_handoff() -> None: evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") assert evt["payload"]["from_agent"] == "alice" assert evt["payload"]["to_agent"] == "bob" - assert evt["payload"]["message_seq"] == 1 + # Canonical handoff_context_hash format: sha256:<64 hex chars>. + assert evt["payload"]["handoff_context_hash"].startswith("sha256:") + assert len(evt["payload"]["handoff_context_hash"]) == 7 + 64 + +def test_on_receive_emits_input_event_with_agent_role() -> None: + """The receive boundary is mapped to AgentInputEvent(role=AGENT). -def test_on_receive_emits_state_change() -> None: + The previous adapter implementation emitted an ad-hoc + ``agent.state.change`` payload, which did not satisfy the canonical + AgentStateChangeEvent ``before_hash`` / ``after_hash`` schema. The + typed migration maps the receive boundary onto AgentInputEvent + with ``role=AGENT`` (the message arrives from another agent), and + framework provenance lives on MessageContent.metadata. + """ stratix = _RecordingStratix() adapter = AutoGenAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -138,12 +180,28 @@ def test_on_receive_emits_state_change() -> None: sender = _FakeAgent(name="alice") adapter.on_receive(receiver=receiver, message={"content": "hello"}, sender=sender) - evt = next(e for e in stratix.events if e["event_type"] == "agent.state.change") - assert evt["payload"]["agent"] == "bob" - assert evt["payload"]["from_agent"] == "alice" + # At least one agent.input event (with role=AGENT) was emitted. + inputs = [e for e in stratix.events if e["event_type"] == "agent.input"] + receive_inputs = [ + e for e in inputs + if e["payload"]["content"]["metadata"].get("event_subtype") == "message_received" + ] + assert len(receive_inputs) == 1 + payload = receive_inputs[0]["payload"] + assert payload["layer"] == "L1" + assert payload["content"]["role"] == "agent" + assert payload["content"]["metadata"]["agent"] == "bob" + assert payload["content"]["metadata"]["from_agent"] == "alice" def test_on_generate_reply_emits_model_invoke() -> None: + """Typed ModelInvokeEvent: model identity lives at payload.model.*. + + AutoGen-specific provenance (``framework``, ``agent``, + ``reply_preview``) is carried on ``model.parameters``. Token + counts use the canonical ``prompt_tokens`` / ``completion_tokens`` + slots. + """ stratix = _RecordingStratix() adapter = AutoGenAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -153,13 +211,22 @@ def test_on_generate_reply_emits_model_invoke() -> None: adapter.on_generate_reply(agent=agent, messages=[{"role": "user", "content": "hi"}], reply=reply, latency_ms=42.0) evt = next(e for e in stratix.events if e["event_type"] == "model.invoke") - assert evt["payload"]["agent"] == "alice" - assert evt["payload"]["model"] == "gpt-5" - assert evt["payload"]["latency_ms"] == 42.0 - assert evt["payload"]["tokens_prompt"] == 10 + payload = evt["payload"] + assert payload["layer"] == "L3" + assert payload["model"]["name"] == "gpt-5" + assert payload["model"]["provider"] == "openai" # detected from "gpt" + assert payload["model"]["version"] == "unavailable" + assert payload["model"]["parameters"]["agent"] == "alice" + assert payload["model"]["parameters"]["framework"] == "autogen" + assert payload["latency_ms"] == 42.0 + assert payload["prompt_tokens"] == 10 + assert payload["completion_tokens"] == 5 + # capture_content default is True → input messages are captured. + assert payload["input_messages"] is not None def test_on_execute_code_emits_tool_call_and_environment() -> None: + """Typed ToolCallEvent + ToolEnvironmentEvent for code execution.""" stratix = _RecordingStratix() adapter = AutoGenAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -171,11 +238,26 @@ def test_on_execute_code_emits_tool_call_and_environment() -> None: assert "tool.call" in types assert "tool.environment" in types tool = next(e for e in stratix.events if e["event_type"] == "tool.call") - assert tool["payload"]["tool_name"] == "code_execution" - assert tool["payload"]["code_blocks_count"] == 1 + payload = tool["payload"] + assert payload["layer"] == "L5a" + assert payload["tool"]["name"] == "code_execution" + assert payload["tool"]["integration"] == "script" + assert payload["input"]["code_blocks_count"] == 1 + assert payload["input"]["agent"] == "alice" + assert payload["latency_ms"] == 5.0 + # tool.environment carries the same execution context on + # environment.config. + env = next(e for e in stratix.events if e["event_type"] == "tool.environment") + assert env["payload"]["layer"] == "L5c" + assert env["payload"]["environment"]["config"]["execution_type"] == "code_block" + assert env["payload"]["environment"]["config"]["agent"] == "alice" def test_on_conversation_start_end_emits_input_output() -> None: + """Typed AgentInputEvent + AgentOutputEvent for the conversation + boundary. AutoGen-specific provenance lives on + MessageContent.metadata. + """ stratix = _RecordingStratix() adapter = AutoGenAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -189,8 +271,13 @@ def test_on_conversation_start_end_emits_input_output() -> None: assert "agent.output" in types out = next(e for e in stratix.events if e["event_type"] == "agent.output") - assert out["payload"]["termination_reason"] == "max_rounds" - assert out["payload"]["duration_ns"] >= 0 + payload = out["payload"] + assert payload["layer"] == "L1" + assert payload["content"]["message"] == "bye" + metadata = payload["content"]["metadata"] + assert metadata["framework"] == "autogen" + assert metadata["termination_reason"] == "max_rounds" + assert metadata["duration_ns"] >= 0 def test_capture_config_gates_l3_model_metadata() -> None: @@ -233,3 +320,82 @@ def test_serialize_for_replay() -> None: assert rt.framework == "autogen" assert rt.adapter_name == "AutoGenAdapter" assert "capture_config" in rt.config + + +# --------------------------------------------------------------------------- +# Typed-event migration regression tests (PR #129 follow-up — bundle 1) +# --------------------------------------------------------------------------- + + +def test_autogen_emits_typed_payloads_only() -> None: + """Every emit site in autogen lifecycle is a typed emit_event call. + + Pins the post-migration contract: the recording stratix's + ``typed_payloads`` list grows for every emission and the legacy + two-arg dict path receives nothing. This is the public contract + backing the ``grep emit_dict_event src/.../autogen/lifecycle.py + → 0`` acceptance criterion in the typed-events bundle 1 PR. + """ + from layerlens.instrument._compat.events import ( + ToolCallEvent, + AgentInputEvent, + AgentOutputEvent, + ModelInvokeEvent, + AgentHandoffEvent, + ToolEnvironmentEvent, + EnvironmentConfigEvent, + ) + + stratix = _RecordingStratix() + adapter = AutoGenAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + sender = _FakeAgent(name="alice", llm_config={"model": "gpt-5"}) + recipient = _FakeAgent(name="bob") + adapter.connect_agents(sender, recipient) + adapter.on_conversation_start(initiator=sender, message="start") + adapter.on_send(sender=sender, message="hi", recipient=recipient) + adapter.on_receive(receiver=recipient, message={"content": "hi"}, sender=sender) + adapter.on_generate_reply(agent=recipient, messages=[], reply="ok") + adapter.on_execute_code(agent=recipient, code_blocks=[("python", "x")], result="1\n") + adapter.on_conversation_end(final_message="bye", termination_reason="done") + + # Every captured payload is a Pydantic model instance — the legacy + # dict path was not used. + assert stratix.typed_payloads, "expected typed payloads to be captured" + types_seen = {type(p) for p in stratix.typed_payloads} + assert AgentInputEvent in types_seen + assert AgentOutputEvent in types_seen + assert AgentHandoffEvent in types_seen + assert EnvironmentConfigEvent in types_seen + assert ModelInvokeEvent in types_seen + assert ToolCallEvent in types_seen + assert ToolEnvironmentEvent in types_seen + + +def test_autogen_emit_does_not_warn_after_migration() -> None: + """No DeprecationWarning fires from autogen lifecycle emission paths. + + The base adapter's ``emit_dict_event`` raises a DeprecationWarning + on every call. After migration, autogen lifecycle must never + trigger that warning. ``filterwarnings("error", ...)`` converts + the warning into a test failure. + """ + import warnings + + stratix = _RecordingStratix() + adapter = AutoGenAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + sender = _FakeAgent(name="alice", llm_config={"model": "gpt-5"}) + recipient = _FakeAgent(name="bob") + adapter.connect_agents(sender, recipient) + + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + adapter.on_conversation_start(initiator=sender, message="start") + adapter.on_send(sender=sender, message="hi", recipient=recipient) + adapter.on_receive(receiver=recipient, message={"content": "hi"}, sender=sender) + adapter.on_generate_reply(agent=recipient, messages=[], reply="ok") + adapter.on_execute_code(agent=recipient, code_blocks=[("python", "x")], result="1\n") + adapter.on_conversation_end(final_message="bye", termination_reason="done") diff --git a/tests/instrument/adapters/frameworks/test_crewai_adapter.py b/tests/instrument/adapters/frameworks/test_crewai_adapter.py index 604988f..8735dda 100644 --- a/tests/instrument/adapters/frameworks/test_crewai_adapter.py +++ b/tests/instrument/adapters/frameworks/test_crewai_adapter.py @@ -1,6 +1,21 @@ """Unit tests for the CrewAI framework adapter. Mocked at the SDK shape level — no real ``crewai`` runtime needed. + +After the typed-event migration (PR #129 follow-up — bundle 1) every +emit site in :mod:`layerlens.instrument.adapters.frameworks.crewai.lifecycle` +flows through :meth:`BaseAdapter.emit_event` with a canonical Pydantic +payload. The :class:`_RecordingStratix` stand-in below records both +shapes so pre- and post-migration assertions live side by side: the +``payload`` slot always carries a dict (model-dumped if typed), and +``typed_payloads`` holds the original Pydantic instances for tests +that want to assert against the model surface. + +NOTE: The ``on_delegation`` flow still routes through +:mod:`crewai.delegation` (untracked on this branch — covered by a +future follow-up PR). Tests that exercise the delegation path are +expected to still see the legacy dict-emit DeprecationWarning until +that follow-up lands. """ from __future__ import annotations @@ -8,6 +23,10 @@ from types import SimpleNamespace from typing import Any, Dict, List +from layerlens._compat.pydantic import ( + BaseModel as _CompatBaseModel, + model_dump as _compat_model_dump, +) from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig from layerlens.instrument.adapters.frameworks.crewai import ( ADAPTER_CLASS, @@ -26,10 +45,26 @@ class _RecordingStratix: def __init__(self) -> None: self.events: List[Dict[str, Any]] = [] + # Hold strong references to the original typed payloads for + # the subset of tests that want to assert against the model + # surface (e.g. ``isinstance(payload, ToolCallEvent)``). The + # dict view lives on ``events`` and is what most assertions + # read. + self.typed_payloads: List[Any] = [] def emit(self, *args: Any, **kwargs: Any) -> None: + # Two-arg legacy path: ``emit(event_type, payload_dict)``. if len(args) == 2 and isinstance(args[0], str): self.events.append({"event_type": args[0], "payload": args[1]}) + return + # Single-arg typed path: ``emit(payload_model[, privacy_level])``. + if args and isinstance(args[0], _CompatBaseModel): + payload_model = args[0] + self.typed_payloads.append(payload_model) + event_type = getattr(payload_model, "event_type", "") + self.events.append( + {"event_type": event_type, "payload": _compat_model_dump(payload_model)} + ) class _FakeCrew: @@ -77,6 +112,9 @@ def test_adapter_info_and_health() -> None: def test_instrument_crew_attaches_callback_and_emits_config() -> None: + """Typed EnvironmentConfigEvent: agent_role lives on + payload.environment.attributes. + """ stratix = _RecordingStratix() adapter = CrewAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -95,8 +133,12 @@ def test_instrument_crew_attaches_callback_and_emits_config() -> None: # Two environment.config events — one per agent role. configs = [e for e in stratix.events if e["event_type"] == "environment.config"] assert len(configs) == 2 - roles = {c["payload"]["agent_role"] for c in configs} + # Canonical L4a schema: payload.environment.attributes is the dict + # that carries adapter-specific provenance (agent_role). + roles = {c["payload"]["environment"]["attributes"]["agent_role"] for c in configs} assert roles == {"researcher", "writer"} + for cfg in configs: + assert cfg["payload"]["environment"]["type"] == "simulated" def test_environment_config_idempotent_per_role() -> None: @@ -114,6 +156,9 @@ def test_environment_config_idempotent_per_role() -> None: def test_on_crew_start_end_emits_input_output() -> None: + """Typed AgentInputEvent + AgentOutputEvent for the crew lifecycle. + CrewAI-specific provenance lives on MessageContent.metadata. + """ stratix = _RecordingStratix() adapter = CrewAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -126,11 +171,20 @@ def test_on_crew_start_end_emits_input_output() -> None: assert "agent.output" in types out = next(e for e in stratix.events if e["event_type"] == "agent.output") - assert out["payload"]["output"] == "report" - assert out["payload"]["duration_ns"] >= 0 - - -def test_on_task_start_end_emits_code_and_state_change() -> None: + payload = out["payload"] + assert payload["layer"] == "L1" + assert payload["content"]["message"] == "report" + metadata = payload["content"]["metadata"] + assert metadata["framework"] == "crewai" + assert metadata["duration_ns"] >= 0 + assert metadata["run_status"] == "crew_complete" + + +def test_on_task_start_end_emits_input_output_and_cost() -> None: + """Typed migration: task-start → AgentInputEvent (role=AGENT, + event_subtype=task_start). task-end → AgentOutputEvent + (run_status=task_complete) + canonical CostRecordEvent. + """ stratix = _RecordingStratix() adapter = CrewAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -141,31 +195,76 @@ def test_on_task_start_end_emits_code_and_state_change() -> None: task_output = SimpleNamespace(token_usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}) adapter.on_task_end(task_output=task_output, agent_role="researcher", task_order=1) - types = [e["event_type"] for e in stratix.events] - assert "agent.code" in types - assert "agent.state.change" in types - assert "cost.record" in types - + # Task-start → AgentInputEvent (role=AGENT, event_subtype=task_start). + inputs = [e for e in stratix.events if e["event_type"] == "agent.input"] + task_inputs = [ + e for e in inputs + if e["payload"]["content"]["metadata"].get("event_subtype") == "task_start" + ] + assert len(task_inputs) == 1 + ts_payload = task_inputs[0]["payload"] + assert ts_payload["content"]["message"] == "research" + assert ts_payload["content"]["role"] == "agent" + assert ts_payload["content"]["metadata"]["agent_role"] == "researcher" + assert ts_payload["content"]["metadata"]["task_order"] == 1 + + # Task-end → AgentOutputEvent (run_status=task_complete). + outputs = [e for e in stratix.events if e["event_type"] == "agent.output"] + task_outputs = [ + e for e in outputs + if e["payload"]["content"]["metadata"].get("event_subtype") == "task_complete" + ] + assert len(task_outputs) == 1 + te_metadata = task_outputs[0]["payload"]["content"]["metadata"] + assert te_metadata["agent_role"] == "researcher" + assert te_metadata["run_status"] == "task_complete" + + # Canonical cost.record: tokens via prompt_tokens / completion_tokens / tokens. cost = next(e for e in stratix.events if e["event_type"] == "cost.record") - assert cost["payload"]["tokens_total"] == 15 + cost_payload = cost["payload"] + assert cost_payload["cost"]["prompt_tokens"] == 10 + assert cost_payload["cost"]["completion_tokens"] == 5 + assert cost_payload["cost"]["tokens"] == 15 def test_on_tool_use_emits_event() -> None: + """Typed ToolCallEvent: tool name lives at payload.tool.name.""" stratix = _RecordingStratix() adapter = CrewAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2, latency_ms=12.3) evt = next(e for e in stratix.events if e["event_type"] == "tool.call") - assert evt["payload"]["tool_name"] == "calc" - assert evt["payload"]["latency_ms"] == 12.3 + payload = evt["payload"] + assert payload["layer"] == "L5a" + assert payload["tool"]["name"] == "calc" + assert payload["tool"]["integration"] == "library" + assert payload["latency_ms"] == 12.3 + assert payload["input"] == {"x": 1} + # Scalar tool_output is wrapped in {"value": ...} so the canonical + # ``output: dict`` slot is satisfied. + assert payload["output"] == {"value": 2} def test_on_delegation_emits_handoff() -> None: + """Delegation flows through the (untracked) delegation tracker. + + The delegation tracker still uses ``emit_dict_event`` on this + branch — it lives in ``crewai/delegation.py`` which is untracked + and outside this PR's scope (per + ``docs/adapters/typed-events-followups.md``). We assert the + handoff event lands at the dict shape that the tracker emits. + """ + import warnings + stratix = _RecordingStratix() adapter = CrewAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() - adapter.on_delegation(from_agent="researcher", to_agent="writer", context="findings") + # Suppress the expected DeprecationWarning from delegation.py — it + # is a known follow-up site documented in the bundle 1 PR body. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + adapter.on_delegation(from_agent="researcher", to_agent="writer", context="findings") evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") assert evt["payload"]["from_agent"] == "researcher" @@ -173,13 +272,23 @@ def test_on_delegation_emits_handoff() -> None: def test_capture_config_gates_l5a_tool_calls() -> None: + """When l5a_tool_calls is disabled, tool.call events do NOT fire, + but cross-cutting handoff events (from the untracked delegation + tracker) still emit. + """ + import warnings + stratix = _RecordingStratix() cfg = CaptureConfig(l5a_tool_calls=False) adapter = CrewAIAdapter(stratix=stratix, capture_config=cfg) adapter.connect() adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2) - adapter.on_delegation(from_agent="a", to_agent="b", context="x") + # Delegation goes through the untracked dict-emit tracker — see + # test_on_delegation_emits_handoff for the rationale. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + adapter.on_delegation(from_agent="a", to_agent="b", context="x") types = [e["event_type"] for e in stratix.events] assert "tool.call" not in types @@ -206,3 +315,80 @@ def test_serialize_for_replay() -> None: assert rt.framework == "crewai" assert rt.adapter_name == "CrewAIAdapter" assert "capture_config" in rt.config + + +# --------------------------------------------------------------------------- +# Typed-event migration regression tests (PR #129 follow-up — bundle 1) +# --------------------------------------------------------------------------- + + +def test_crewai_lifecycle_emits_typed_payloads_only() -> None: + """Every emit site in crewai lifecycle.py is a typed emit_event call. + + Pins the post-migration contract for the lifecycle module: the + recording stratix's ``typed_payloads`` list grows for every + emission and the legacy two-arg dict path receives nothing. + Delegation is excluded — it routes through the untracked + ``crewai/delegation.py`` (covered by a future follow-up PR). + """ + from layerlens.instrument._compat.events import ( + ToolCallEvent, + AgentInputEvent, + CostRecordEvent, + AgentOutputEvent, + ModelInvokeEvent, + EnvironmentConfigEvent, + ) + + stratix = _RecordingStratix() + adapter = CrewAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + crew = _FakeCrew(agents=[_make_agent(role="researcher")], process="sequential") + adapter.instrument_crew(crew) + adapter.on_crew_start(crew_input="topic") + adapter.on_task_start("research", agent_role="researcher", task_order=1) + task_output = SimpleNamespace( + token_usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15} + ) + adapter.on_task_end(task_output=task_output, agent_role="researcher", task_order=1) + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2) + adapter.on_llm_call(provider="openai", model="gpt-5", tokens_prompt=8) + adapter.on_crew_end(crew_output="report") + + # Every captured payload from lifecycle paths is a Pydantic model. + assert stratix.typed_payloads, "expected typed payloads to be captured" + types_seen = {type(p) for p in stratix.typed_payloads} + assert AgentInputEvent in types_seen + assert AgentOutputEvent in types_seen + assert CostRecordEvent in types_seen + assert EnvironmentConfigEvent in types_seen + assert ModelInvokeEvent in types_seen + assert ToolCallEvent in types_seen + + +def test_crewai_lifecycle_emit_does_not_warn_after_migration() -> None: + """No DeprecationWarning fires from crewai lifecycle.py paths. + + Excludes ``on_delegation`` which still routes through the + untracked delegation tracker. + """ + import warnings + + stratix = _RecordingStratix() + adapter = CrewAIAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + crew = _FakeCrew(agents=[_make_agent(role="researcher")], process="sequential") + + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + adapter.instrument_crew(crew) + adapter.on_crew_start(crew_input="topic") + adapter.on_task_start("research", agent_role="researcher", task_order=1) + task_output = SimpleNamespace( + token_usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15} + ) + adapter.on_task_end(task_output=task_output, agent_role="researcher", task_order=1) + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2) + adapter.on_llm_call(provider="openai", model="gpt-5", tokens_prompt=8) + adapter.on_crew_end(crew_output="report") diff --git a/tests/instrument/adapters/frameworks/test_smolagents_adapter.py b/tests/instrument/adapters/frameworks/test_smolagents_adapter.py index 886cfcb..15fbbbf 100644 --- a/tests/instrument/adapters/frameworks/test_smolagents_adapter.py +++ b/tests/instrument/adapters/frameworks/test_smolagents_adapter.py @@ -1,12 +1,24 @@ """Unit tests for the SmolAgents framework adapter. Mocked at the SDK shape level — no real ``smolagents`` runtime needed. + +After the typed-event migration (PR #129 follow-up — bundle 1) every +emit site flows through :meth:`BaseAdapter.emit_event` with a canonical +Pydantic payload. The :class:`_RecordingStratix` stand-in below records +both shapes so pre- and post-migration assertions live side by side: the +``payload`` slot always carries a dict (model-dumped if typed), and +``typed_payloads`` holds the original Pydantic instances for tests that +want to assert against the model surface. """ from __future__ import annotations from typing import Any, Dict, List +from layerlens._compat.pydantic import ( + BaseModel as _CompatBaseModel, + model_dump as _compat_model_dump, +) from layerlens.instrument.adapters._base import AdapterStatus, CaptureConfig from layerlens.instrument.adapters.frameworks.smolagents import ( ADAPTER_CLASS, @@ -24,10 +36,26 @@ class _RecordingStratix: def __init__(self) -> None: self.events: List[Dict[str, Any]] = [] + # Hold strong references to the original typed payloads for + # the subset of tests that want to assert against the model + # surface (e.g. ``isinstance(payload, ToolCallEvent)``). The + # dict view lives on ``events`` and is what most assertions + # read. + self.typed_payloads: List[Any] = [] def emit(self, *args: Any, **kwargs: Any) -> None: + # Two-arg legacy path: ``emit(event_type, payload_dict)``. if len(args) == 2 and isinstance(args[0], str): self.events.append({"event_type": args[0], "payload": args[1]}) + return + # Single-arg typed path: ``emit(payload_model[, privacy_level])``. + if args and isinstance(args[0], _CompatBaseModel): + payload_model = args[0] + self.typed_payloads.append(payload_model) + event_type = getattr(payload_model, "event_type", "") + self.events.append( + {"event_type": event_type, "payload": _compat_model_dump(payload_model)} + ) class _FakeAgent: @@ -81,6 +109,9 @@ def test_instrument_agent_wraps_run() -> None: def test_run_emits_input_and_output_events() -> None: + """Typed AgentInputEvent + AgentOutputEvent for the run lifecycle. + SmolAgents-specific provenance lives on MessageContent.metadata. + """ stratix = _RecordingStratix() adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -98,11 +129,18 @@ def test_run_emits_input_and_output_events() -> None: assert "agent.output" in types out = next(e for e in stratix.events if e["event_type"] == "agent.output") - assert out["payload"]["agent_name"] == "planner" - assert out["payload"]["duration_ns"] >= 0 + payload = out["payload"] + assert payload["layer"] == "L1" + assert payload["content"]["message"] == "result for compute 2+2" + metadata = payload["content"]["metadata"] + assert metadata["agent_name"] == "planner" + assert metadata["framework"] == "smolagents" + assert metadata["duration_ns"] >= 0 + assert metadata["run_status"] == "run_complete" def test_run_failure_emits_output_with_error() -> None: + """Errors are surfaced via canonical metadata on AgentOutputEvent.""" stratix = _RecordingStratix() adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -117,8 +155,10 @@ def test_run_failure_emits_output_with_error() -> None: agent.run("bad task") out = next(e for e in stratix.events if e["event_type"] == "agent.output") - assert "error" in out["payload"] - assert "simulated failure" in out["payload"]["error"] + metadata = out["payload"]["content"]["metadata"] + assert "error" in metadata + assert "simulated failure" in metadata["error"] + assert metadata["run_status"] == "run_failed" def test_managed_agents_recursively_instrumented() -> None: @@ -135,6 +175,9 @@ def test_managed_agents_recursively_instrumented() -> None: def test_environment_config_emits_once_per_agent() -> None: + """Typed EnvironmentConfigEvent: provenance lives on + payload.environment.attributes. + """ stratix = _RecordingStratix() adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -151,12 +194,16 @@ def test_environment_config_emits_once_per_agent() -> None: configs = [e for e in stratix.events if e["event_type"] == "environment.config"] assert len(configs) == 1 - cfg = configs[0]["payload"] - assert cfg["agent_name"] == "a1" - assert cfg["tools"] == ["search", "calc"] + # Canonical L4a schema: payload.environment.attributes is the dict + # that carries adapter-specific provenance. + attributes = configs[0]["payload"]["environment"]["attributes"] + assert attributes["agent_name"] == "a1" + assert attributes["tools"] == ["search", "calc"] + assert configs[0]["payload"]["environment"]["type"] == "simulated" def test_on_tool_use_emits_event() -> None: + """Typed ToolCallEvent: tool name lives at payload.tool.name.""" stratix = _RecordingStratix() adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -164,11 +211,25 @@ def test_on_tool_use_emits_event() -> None: adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2, latency_ms=12.3) evt = next(e for e in stratix.events if e["event_type"] == "tool.call") - assert evt["payload"]["tool_name"] == "calc" - assert evt["payload"]["latency_ms"] == 12.3 + payload = evt["payload"] + assert payload["layer"] == "L5a" + assert payload["tool"]["name"] == "calc" + assert payload["tool"]["integration"] == "library" + assert payload["latency_ms"] == 12.3 + assert payload["input"] == {"x": 1} + assert payload["output"] == {"value": 2} def test_on_handoff_emits_event_with_context_hash() -> None: + """Typed AgentHandoffEvent: handoff_context_hash is sha256:. + + The previous adapter's ``context_hash`` (bare hex) and + ``context_preview`` (top-level) fields are gone — the canonical + schema only declares ``handoff_context_hash`` (with strict + ``sha256:`` prefix validation). Context redaction concerns live + at the privacy-policy layer above the adapter, not on the + canonical handoff payload. + """ stratix = _RecordingStratix() adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) adapter.connect() @@ -176,26 +237,32 @@ def test_on_handoff_emits_event_with_context_hash() -> None: adapter.on_handoff(from_agent="a", to_agent="b", context="some context") evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") - assert evt["payload"]["from_agent"] == "a" - assert evt["payload"]["to_agent"] == "b" - assert evt["payload"]["context_hash"] is not None - # Capture content on => preview included. - assert evt["payload"]["context_preview"] == "some context" + payload = evt["payload"] + assert payload["from_agent"] == "a" + assert payload["to_agent"] == "b" + assert payload["handoff_context_hash"].startswith("sha256:") + # 7-char prefix + 64 hex chars = 71 chars total per the canonical + # validator in events_cross_cutting.py. + assert len(payload["handoff_context_hash"]) == 7 + 64 + +def test_handoff_emits_canonical_hash_for_empty_context() -> None: + """Empty context still produces a well-formed sha256 hash. -def test_handoff_redacts_context_when_capture_content_disabled() -> None: + The previous adapter emitted ``context_hash=None`` when the + context was missing; the canonical schema rejects ``None``. + """ stratix = _RecordingStratix() adapter = SmolAgentsAdapter( stratix=stratix, capture_config=CaptureConfig(capture_content=False), ) adapter.connect() - adapter.on_handoff(from_agent="a", to_agent="b", context="secret") + adapter.on_handoff(from_agent="a", to_agent="b", context=None) evt = next(e for e in stratix.events if e["event_type"] == "agent.handoff") - assert evt["payload"]["context_preview"] is None - # Hash still present (it's not content). - assert evt["payload"]["context_hash"] is not None + # Hash is well-formed even without context. + assert evt["payload"]["handoff_context_hash"].startswith("sha256:") def test_instrument_agent_helper() -> None: @@ -216,3 +283,74 @@ def test_serialize_for_replay() -> None: rt = adapter.serialize_for_replay() assert rt.framework == "smolagents" assert "capture_config" in rt.config + + +# --------------------------------------------------------------------------- +# Typed-event migration regression tests (PR #129 follow-up — bundle 1) +# --------------------------------------------------------------------------- + + +def test_smolagents_emits_typed_payloads_only() -> None: + """Every emit site in smolagents lifecycle is a typed emit_event call. + + Pins the post-migration contract: the recording stratix's + ``typed_payloads`` list grows for every emission and the legacy + two-arg dict path receives nothing. This is the public contract + backing the ``grep emit_dict_event src/.../smolagents/ → 0`` + acceptance criterion in the typed-events bundle 1 PR. + """ + from layerlens.instrument._compat.events import ( + ToolCallEvent, + AgentInputEvent, + AgentOutputEvent, + ModelInvokeEvent, + AgentHandoffEvent, + EnvironmentConfigEvent, + ) + + stratix = _RecordingStratix() + adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner", tools=["search"], model="some-model") + adapter.instrument_agent(agent) + agent.run("compute 2+2") + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2) + adapter.on_llm_call(provider="hf", model="llama-3", tokens_prompt=10) + adapter.on_handoff(from_agent="manager", to_agent="planner", context="ctx") + + # Every captured payload is a Pydantic model instance — the legacy + # dict path was not used. + assert stratix.typed_payloads, "expected typed payloads to be captured" + types_seen = {type(p) for p in stratix.typed_payloads} + assert AgentInputEvent in types_seen + assert AgentOutputEvent in types_seen + assert AgentHandoffEvent in types_seen + assert EnvironmentConfigEvent in types_seen + assert ModelInvokeEvent in types_seen + assert ToolCallEvent in types_seen + + +def test_smolagents_emit_does_not_warn_after_migration() -> None: + """No DeprecationWarning fires from smolagents emission paths. + + The base adapter's ``emit_dict_event`` raises a DeprecationWarning + on every call. After migration, smolagents must never trigger + that warning. ``filterwarnings("error", ...)`` converts the + warning into a test failure. + """ + import warnings + + stratix = _RecordingStratix() + adapter = SmolAgentsAdapter(stratix=stratix, capture_config=CaptureConfig.full()) + adapter.connect() + + agent = _FakeAgent(name="planner", tools=["search"], model="some-model") + adapter.instrument_agent(agent) + + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + agent.run("compute 2+2") + adapter.on_tool_use("calc", tool_input={"x": 1}, tool_output=2) + adapter.on_llm_call(provider="hf", model="llama-3", tokens_prompt=10) + adapter.on_handoff(from_agent="manager", to_agent="planner", context="ctx")