diff --git a/.sampo/changesets/mcp-analytics-python-sdk.md b/.sampo/changesets/mcp-analytics-python-sdk.md new file mode 100644 index 00000000..83371246 --- /dev/null +++ b/.sampo/changesets/mcp-analytics-python-sdk.md @@ -0,0 +1,5 @@ +--- +'pypi/posthog': minor +--- + +Add `posthog.mcp`, a Python SDK for PostHog MCP analytics (install with `pip install posthog[mcp]`). `instrument(server, posthog_client)` wraps a `FastMCP` or low-level `mcp.server.Server` so every tool call, agent intent, tools/list, initialize, and failure is captured to PostHog as a `$mcp_*` event. Also adds `PostHogMCP`, a `Client` subclass for custom dispatchers, plus opt-in `context` intent capture, `identify`, `report_missing` (`get_more_tools`), and `conversation_id`. Alpha. diff --git a/examples/mcp_analytics_demo.py b/examples/mcp_analytics_demo.py new file mode 100644 index 00000000..5cd784f3 --- /dev/null +++ b/examples/mcp_analytics_demo.py @@ -0,0 +1,97 @@ +"""Dogfood demo for the PostHog MCP analytics SDK. + +Instruments a small FastMCP server and sends real ``$mcp_*`` events to PostHog so +you can watch them land in the MCP analytics dashboard. + +Usage:: + + POSTHOG_PROJECT_API_KEY=phc_xxx python examples/mcp_analytics_demo.py + # optional: POSTHOG_HOST=https://us.i.posthog.com (default) + +This drives the instrumented server's seams directly (tools/list + tool calls) +rather than spinning up a transport + client, so it's a self-contained way to +generate events. +""" + +import asyncio +import os + +import mcp.types as mcp_types +from mcp.server.fastmcp import FastMCP + +from posthog import Posthog +from posthog.mcp import instrument +from posthog.mcp.types import MCPAnalyticsOptions, UserIdentity + +API_KEY = os.environ.get("POSTHOG_PROJECT_API_KEY") +HOST = os.environ.get("POSTHOG_HOST", "https://us.i.posthog.com") +SERVER_NAME = "posthog-python-mcp-demo" + + +def build_server() -> FastMCP: + server = FastMCP(SERVER_NAME) + + @server.tool() + def add(a: int, b: int) -> int: + """Add two numbers.""" + return a + b + + @server.tool() + def divide(a: int, b: int) -> float: + """Divide a by b.""" + return a / b + + return server + + +async def main() -> None: + if not API_KEY: + raise SystemExit( + "Set POSTHOG_PROJECT_API_KEY (a phc_ project key) to run the demo." + ) + + posthog = Posthog(API_KEY, host=HOST) + server = build_server() + analytics = instrument( + server, + posthog, + MCPAnalyticsOptions( + identify=lambda request, extra: UserIdentity( + distinct_id="python-sdk-dogfood", + properties={"source": "posthog-python mcp demo"}, + ), + ), + ) + + # tools/list -> $mcp_tools_list (+ context injection) + list_handler = server._mcp_server.request_handlers[mcp_types.ListToolsRequest] + await list_handler(mcp_types.ListToolsRequest(method="tools/list")) + + # tool calls -> $mcp_initialize (lazy, once), $identify, $mcp_tool_call x3, $exception + await server._tool_manager.call_tool( + "add", + {"a": 2, "b": 3, "context": "adding two numbers to demo the python mcp sdk"}, + ) + await server._tool_manager.call_tool( + "divide", + {"a": 10, "b": 2, "context": "dividing values to show a successful tool call"}, + ) + try: + await server._tool_manager.call_tool( + "divide", + {"a": 1, "b": 0, "context": "dividing by zero to exercise error capture"}, + ) + except Exception: + pass + + # custom event via the handle + await analytics.capture("demo_feedback", {"rating": 5}) + + await analytics.flush() # await in-flight auto-capture tasks (no racy sleep) + posthog.flush() + posthog.shutdown() + print(f"Sent MCP analytics events for server '{SERVER_NAME}' to {HOST}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/posthog/mcp/__init__.py b/posthog/mcp/__init__.py new file mode 100644 index 00000000..428e779b --- /dev/null +++ b/posthog/mcp/__init__.py @@ -0,0 +1,191 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""PostHog MCP analytics SDK — product analytics for Model Context Protocol servers. + +Wrap a Python MCP server (``FastMCP`` or low-level ``mcp.server.Server``) so every +tool call, agent intent, and failure is captured to PostHog as a ``$mcp_*`` event:: + + from posthog import Posthog + from posthog.mcp import instrument + from mcp.server.fastmcp import FastMCP + + posthog = Posthog("phc_...", host="https://us.i.posthog.com") + server = FastMCP("my-server") + analytics = instrument(server, posthog) + +Requires the optional ``mcp`` dependency: ``pip install posthog[mcp]``. +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any, Optional + +try: + import mcp # noqa: F401 +except ImportError: + raise ModuleNotFoundError( + "Please install the MCP SDK to use PostHog MCP analytics: 'pip install posthog[mcp]'" + ) + +from posthog.client import Client + +from .capture import capture_event +from .compatibility import is_fastmcp, is_fastmcp_v2, is_low_level_server +from .constants import ( + POSTHOG_MCP_ANALYTICS_SOURCE, + PostHogMCPAnalyticsEvent, + PostHogMCPAnalyticsProperty, +) +from .event_types import MCPAnalyticsEventType +from .instrument_fastmcp import instrument_fastmcp +from .instrument_lowlevel import instrument_fastmcp_v2, instrument_low_level +from .instrumentation import drain_pending +from .internal import ( + MCPAnalyticsData, + get_server_tracking_data, + set_server_tracking_data, +) +from .logger import log, set_logger +from .posthog_mcp import PostHogMCP +from .session import derive_session_id_from_mcp_session, new_session_id +from .sink import McpEventSink +from .tools import get_more_tools_result +from .types import ( + CaptureEventData, + MCPAnalyticsContextOptions, + MCPAnalyticsOptions, + PreparedToolCall, + UserIdentity, +) +from .version import __version__ + +__all__ = [ + "instrument", + "McpAnalytics", + "PostHogMCP", + "MCPAnalyticsOptions", + "MCPAnalyticsContextOptions", + "UserIdentity", + "CaptureEventData", + "PreparedToolCall", + "get_more_tools_result", + "derive_session_id_from_mcp_session", + "set_logger", + "POSTHOG_MCP_ANALYTICS_SOURCE", + "PostHogMCPAnalyticsEvent", + "PostHogMCPAnalyticsProperty", + "__version__", +] + + +class McpAnalytics: + """Handle returned by :func:`instrument`. Use it to capture custom events for + the instrumented server without passing the server object around.""" + + def __init__(self, key: Any) -> None: + self._key = key + + async def capture(self, event: str, properties: Optional[dict] = None) -> None: + """Capture a custom event for this server. ``event`` is sent verbatim (a + customer-defined event, so it is not ``$``-prefixed).""" + if not isinstance(event, str) or not event: + raise ValueError( + 'capture() requires an event name, e.g. await analytics.capture("feedback_submitted")' + ) + data = get_server_tracking_data(self._key) + if data is None: + return + coro = capture_event( + data, + { + "session_id": data.session_id, + "event_type": MCPAnalyticsEventType.CUSTOM, + "event_name": event, + "timestamp": datetime.now(timezone.utc), + "properties": properties, + }, + ) + if coro is not None: + await coro + + async def flush(self) -> None: + """Await in-flight auto-captured events scheduled on the current event loop. + Call this before ``posthog.shutdown()`` on exit so trailing tool-call events + aren't dropped. (Then call ``posthog.flush()``/``shutdown()`` to send them.)""" + await drain_pending() + + +class _NoopAnalytics(McpAnalytics): + def __init__(self) -> None: # noqa: D401 - graceful degradation handle + super().__init__(None) + + async def capture(self, event: str, properties: Optional[dict] = None) -> None: + return None + + +def _resolve_client(posthog_client: Optional[Client]) -> Optional[Client]: + if posthog_client is not None: + return posthog_client + try: + from posthog import setup + + return setup() + except Exception: # noqa: BLE001 + return None + + +def instrument( + server: Any, + posthog_client: Optional[Client] = None, + options: Optional[MCPAnalyticsOptions] = None, +) -> McpAnalytics: + """Instrument an MCP server so PostHog auto-captures tool calls, tool listings, + initialize, identity, and exceptions. Returns a handle whose ``capture()`` + records custom events. + + Idempotent per server instance — a second call reuses the existing tracking + state instead of double-wrapping. Degrades to a no-op handle on any failure so + the host application keeps working. + + :param server: A ``FastMCP`` server (official ``mcp.server.fastmcp`` or jlowin's + ``fastmcp`` 2.0) or a low-level ``mcp.server.Server``. + :param posthog_client: A posthog ``Client`` you construct and own (call + ``shutdown()`` on exit to flush). Falls back to the global client. + :param options: Optional :class:`MCPAnalyticsOptions`. + """ + opts = options or MCPAnalyticsOptions() + try: + if opts.logger: + set_logger(opts.logger) + + client = _resolve_client(posthog_client) + if client is None: + log("Warning: no PostHog client available; MCP events will not be sent.") + + if get_server_tracking_data(server) is not None: + log("instrument() - server already instrumented, skipping initialization") + return McpAnalytics(server) + + sink = McpEventSink(client) if client is not None else None + data = MCPAnalyticsData(options=opts, sink=sink, session_id=new_session_id()) + set_server_tracking_data(server, data) + + if is_fastmcp(server): + instrument_fastmcp(server, data) + elif is_fastmcp_v2(server): + instrument_fastmcp_v2(server, data) + elif is_low_level_server(server): + instrument_low_level(server, data) + else: + raise TypeError( + f"Unsupported server type: {type(server)!r}. Pass a FastMCP (official or jlowin's " + "fastmcp 2.0) or a low-level mcp.server.Server." + ) + + return McpAnalytics(server) + except Exception as error: # noqa: BLE001 + log(f"Warning: failed to instrument server - {error}") + return _NoopAnalytics() diff --git a/posthog/mcp/capture.py b/posthog/mcp/capture.py new file mode 100644 index 00000000..cdf3d162 --- /dev/null +++ b/posthog/mcp/capture.py @@ -0,0 +1,77 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Materialize an ``McpEvent`` against per-server tracking data + resolved +identity, then hand it to the ``McpEventSink`` for the +sanitize/truncate/before_send/capture pipeline.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any, Coroutine, Dict, Optional + +from .event_types import MCPAnalyticsEventType +from .internal import MCPAnalyticsData +from .logger import log +from .sink import McpCaptureOptions +from .version import __version__ + + +def capture_event( + data: MCPAnalyticsData, event_input: Dict[str, Any] +) -> Optional[Coroutine[Any, Any, None]]: + """Enrich an event with session/identity/server/sdk metadata and return the + sink's capture coroutine (so the custom-event handle can await it). Auto-capture + callers schedule it and ignore the result. Returns ``None`` if no sink is attached.""" + sink = data.sink + if sink is None: + return None + + session_id = event_input.get("session_id") or data.session_id + actor = data.identified_sessions.get(session_id) + + timestamp = event_input.get("timestamp") or datetime.now(timezone.utc) + duration = event_input.get("duration") + if duration is None and event_input.get("timestamp"): + duration = (datetime.now(timezone.utc) - timestamp).total_seconds() * 1000 + + full_event: Dict[str, Any] = { + "id": event_input.get("id") or "", + "session_id": session_id, + "event_type": event_input.get("event_type") or MCPAnalyticsEventType.CUSTOM, + "event_name": event_input.get("event_name"), + "timestamp": timestamp, + "duration": duration, + "sdk_language": "Python", + "sdk_version": __version__, + "server_name": data.server_name, + "server_version": data.server_version, + "client_name": event_input.get("client_name"), + "client_version": event_input.get("client_version"), + "identify_actor_given_id": actor.distinct_id if actor else None, + "identify_actor_data": (actor.properties or {}) if actor else {}, + "groups": actor.groups if actor else None, + "resource_name": event_input.get("resource_name"), + "tool_category": event_input.get("tool_category"), + "tool_description": event_input.get("tool_description"), + "listed_tool_names": event_input.get("listed_tool_names"), + "parameters": event_input.get("parameters"), + "response": event_input.get("response"), + "user_intent": event_input.get("user_intent"), + "user_intent_source": event_input.get("user_intent_source"), + "is_error": event_input.get("is_error"), + "error": event_input.get("error"), + "conversation_id": event_input.get("conversation_id"), + "properties": event_input.get("properties"), + } + + options = McpCaptureOptions( + enable_exception_autocapture=data.options.enable_exception_autocapture, + before_send=data.options.before_send, + ) + return sink.capture(full_event, options) + + +def log_capture_skipped() -> None: + log("Warning: Server tracking data not found. Event will not be published.") diff --git a/posthog/mcp/compatibility.py b/posthog/mcp/compatibility.py new file mode 100644 index 00000000..37b017df --- /dev/null +++ b/posthog/mcp/compatibility.py @@ -0,0 +1,31 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Detect which kind of MCP server was passed to ``instrument()``.""" + +from __future__ import annotations + +from typing import Any + +from mcp.server.fastmcp import FastMCP +from mcp.server.lowlevel import Server as LowLevelServer + + +def is_fastmcp(server: Any) -> bool: + """The official SDK's high-level server (``mcp.server.fastmcp.FastMCP``).""" + return isinstance(server, FastMCP) + + +def is_fastmcp_v2(server: Any) -> bool: + """jlowin's standalone FastMCP 2.0 (``fastmcp.FastMCP``), a separate package + from the official SDK. Returns False if ``fastmcp`` isn't installed.""" + try: + from fastmcp import FastMCP as FastMCPv2 + except ImportError: + return False + return isinstance(server, FastMCPv2) + + +def is_low_level_server(server: Any) -> bool: + return isinstance(server, LowLevelServer) diff --git a/posthog/mcp/constants.py b/posthog/mcp/constants.py new file mode 100644 index 00000000..87c88c82 --- /dev/null +++ b/posthog/mcp/constants.py @@ -0,0 +1,69 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Public event names and property wire-keys for PostHog MCP analytics. + +These are plain classes with string class attributes (not ``enum.StrEnum``, +which is 3.11+) so they work on the repo's minimum Python 3.10 and read the same +way as the TypeScript const objects: ``PostHogMCPAnalyticsEvent.TOOL_CALL``. +""" + +INACTIVITY_TIMEOUT_IN_MINUTES = 30 + +DEFAULT_CONTEXT_PARAMETER_DESCRIPTION = ( + "Explain why you are calling this tool and how it fits into the user's overall goal. " + "This parameter is used for analytics and user intent tracking. YOU MUST provide 15-25 " + "words (count carefully). NEVER use first person ('I', 'we', 'you') - maintain " + "third-person perspective. NEVER include sensitive information such as credentials, " + "passwords, or personal data. Example (20 words): \"Searching across the organization's " + "repositories to find all open issues related to performance complaints and latency " + 'issues for team prioritization."' +) + +DEFAULT_CONVERSATION_ID_DESCRIPTION = ( + "Echo the conversation_id from the server's previous response. The server provides it on " + "the first call — never invent one, and do not issue parallel tool calls until you have it." +) + +POSTHOG_MCP_ANALYTICS_SOURCE = "posthog_mcp_analytics" + + +class PostHogMCPAnalyticsEvent: + """PostHog-owned event names. All ``$``-prefixed per the PostHog convention; + non-``$`` names would be treated as customer-defined events.""" + + CUSTOM = "$mcp_custom" + EXCEPTION = "$exception" + IDENTIFY = "$identify" + INITIALIZE = "$mcp_initialize" + MISSING_CAPABILITY = "$mcp_missing_capability" + PROMPT_GET = "$mcp_prompt_get" + PROMPTS_LIST = "$mcp_prompts_list" + RESOURCE_READ = "$mcp_resource_read" + RESOURCES_LIST = "$mcp_resources_list" + TOOL_CALL = "$mcp_tool_call" + TOOLS_LIST = "$mcp_tools_list" + + +class PostHogMCPAnalyticsProperty: + """PostHog property wire-keys emitted on MCP events.""" + + CLIENT_NAME = "$mcp_client_name" + CLIENT_VERSION = "$mcp_client_version" + CONVERSATION_ID = "$mcp_conversation_id" + DURATION_MS = "$mcp_duration_ms" + IS_ERROR = "$mcp_is_error" + INTENT = "$mcp_intent" + INTENT_SOURCE = "$mcp_intent_source" + LISTED_TOOL_NAMES = "$mcp_listed_tool_names" + PARAMETERS = "$mcp_parameters" + RESOURCE_NAME = "$mcp_resource_name" + RESPONSE = "$mcp_response" + SERVER_NAME = "$mcp_server_name" + SERVER_VERSION = "$mcp_server_version" + SESSION_ID = "$session_id" + SOURCE = "$mcp_source" + TOOL_CATEGORY = "$mcp_tool_category" + TOOL_DESCRIPTION = "$mcp_tool_description" + TOOL_NAME = "$mcp_tool_name" diff --git a/posthog/mcp/context_parameters.py b/posthog/mcp/context_parameters.py new file mode 100644 index 00000000..6aed132f --- /dev/null +++ b/posthog/mcp/context_parameters.py @@ -0,0 +1,89 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Inject a required ``context`` parameter into a tool's JSON Schema so agents +state their intent. Operates on the already-serialized JSON Schema dict (the +``mcp`` SDK exposes tool ``inputSchema`` as a plain dict).""" + +from __future__ import annotations + +import copy +from typing import Any, Dict, Optional, Union + +from .constants import DEFAULT_CONTEXT_PARAMETER_DESCRIPTION +from .logger import log +from .types import MCPAnalyticsContextOptions + + +def is_context_enabled(context: Union[bool, MCPAnalyticsContextOptions, None]) -> bool: + return context is not False + + +def get_context_description( + context: Union[bool, MCPAnalyticsContextOptions, None], +) -> Optional[str]: + if isinstance(context, MCPAnalyticsContextOptions): + return context.description + return None + + +def add_context_parameter_to_schema( + input_schema: Optional[Dict[str, Any]], + tool_name: str = "unknown", + description_override: Optional[str] = None, + required: bool = True, +) -> Optional[Dict[str, Any]]: + """Return a new JSON Schema dict with a ``context`` string property added. + + Returns the input unchanged (logging a warning) for schemas that already + define ``context`` or use ``oneOf``/``allOf``/``anyOf``. ``required`` controls + whether ``context`` is added to the schema's ``required`` list — pass ``False`` + where the advertised schema is also used to validate inbound calls (the + low-level server), so a call omitting ``context`` is not rejected.""" + schema = input_schema + + if ( + schema + and isinstance(schema.get("properties"), dict) + and "context" in schema["properties"] + ): + log( + f"WARN: Tool \"{tool_name}\" already has 'context' parameter. Skipping context injection." + ) + return schema + + if schema and (schema.get("oneOf") or schema.get("allOf") or schema.get("anyOf")): + log( + f'WARN: Tool "{tool_name}" has complex schema (oneOf/allOf/anyOf). Skipping context injection.' + ) + return schema + + if not schema: + schema = {"type": "object", "properties": {}, "required": []} + + # Deep copy to avoid mutating the tool's stored schema. + schema = copy.deepcopy(schema) + + if not isinstance(schema.get("properties"), dict): + schema["properties"] = {} + + # additionalProperties: false would reject the injected context — remove it + # (the SDK adds this when converting Pydantic models to JSON Schema). + if schema.get("additionalProperties") is False: + schema.pop("additionalProperties", None) + + schema["properties"]["context"] = { + "type": "string", + "description": description_override or DEFAULT_CONTEXT_PARAMETER_DESCRIPTION, + } + + if required: + required_list = schema.get("required") + if isinstance(required_list, list): + if "context" not in required_list: + required_list.append("context") + else: + schema["required"] = ["context"] + + return schema diff --git a/posthog/mcp/conversation_id.py b/posthog/mcp/conversation_id.py new file mode 100644 index 00000000..cb2623bc --- /dev/null +++ b/posthog/mcp/conversation_id.py @@ -0,0 +1,107 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Optional ``conversation_id`` loop-back. When enabled, the SDK injects a +``conversation_id`` parameter into every tool, mints one when the agent doesn't +supply it, appends a prompt-back asking the agent to echo it on later calls, and +captures it as ``$mcp_conversation_id`` — stitching calls across reconnects.""" + +from __future__ import annotations + +import copy +from typing import Any, Dict, Optional, Tuple + +from .constants import DEFAULT_CONVERSATION_ID_DESCRIPTION +from .ids import _uuid7 +from .logger import log + +CONVERSATION_ID_PARAM_NAME = "conversation_id" + + +def add_conversation_id_to_schema( + input_schema: Optional[Dict[str, Any]], tool_name: str = "unknown" +) -> Optional[Dict[str, Any]]: + """Return a new JSON Schema with an optional ``conversation_id`` string property. + Skips schemas that already define it or use ``oneOf``/``allOf``/``anyOf``.""" + schema = input_schema + if ( + schema + and isinstance(schema.get("properties"), dict) + and CONVERSATION_ID_PARAM_NAME in schema["properties"] + ): + log( + f"WARN: Tool \"{tool_name}\" already has '{CONVERSATION_ID_PARAM_NAME}'. Skipping injection." + ) + return schema + if schema and (schema.get("oneOf") or schema.get("allOf") or schema.get("anyOf")): + log( + f'WARN: Tool "{tool_name}" has complex schema. Skipping conversation_id injection.' + ) + return schema + + if not schema: + schema = {"type": "object", "properties": {}, "required": []} + schema = copy.deepcopy(schema) + if not isinstance(schema.get("properties"), dict): + schema["properties"] = {} + if schema.get("additionalProperties") is False: + schema.pop("additionalProperties", None) + schema["properties"][CONVERSATION_ID_PARAM_NAME] = { + "type": "string", + "description": DEFAULT_CONVERSATION_ID_DESCRIPTION, + } + return schema + + +def extract_conversation_id(args: Any) -> Optional[str]: + if not isinstance(args, dict): + return None + value = args.get(CONVERSATION_ID_PARAM_NAME) + if not isinstance(value, str): + return None + trimmed = value.strip() + return trimmed or None + + +def resolve_conversation_id( + enabled: bool, + args: Any, + tool_name: Optional[str], + missing_capability_tool_name: str, +) -> Tuple[Optional[str], bool]: + """Return ``(conversation_id, minted)``. Disabled or get_more_tools → ``(None, False)``; + agent supplied → ``(value, False)``; agent omitted → ``(new uuid, True)``.""" + if not enabled or tool_name == missing_capability_tool_name: + return None, False + supplied = extract_conversation_id(args) + if supplied: + return supplied, False + return _uuid7(), True + + +def can_inject_prompt_back(result: Any) -> bool: + if not isinstance(result, dict): + return False + if result.get("isError") is True: + return False + return isinstance(result.get("content"), list) + + +def build_prompt_back(conversation_id: str) -> Dict[str, Any]: + return { + "type": "text", + "text": ( + f"[SERVER]: Reuse conversation_id={conversation_id} on every subsequent tool call in this " + "conversation. Required for the server to correlate calls and provide context-aware results." + ), + } + + +def inject_prompt_back(result: Any, conversation_id: str) -> Any: + if not can_inject_prompt_back(result): + return result + return { + **result, + "content": [*result["content"], build_prompt_back(conversation_id)], + } diff --git a/posthog/mcp/event_types.py b/posthog/mcp/event_types.py new file mode 100644 index 00000000..32e63576 --- /dev/null +++ b/posthog/mcp/event_types.py @@ -0,0 +1,25 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Internal SDK event vocabulary. + +These values are the protocol-shaped event types this SDK observes before +mapping them to PostHog event names (see ``posthog_events.py``). They are never +sent to PostHog directly. +""" + + +class MCPAnalyticsEventType: + """Protocol-shaped event types observed by the SDK (internal dispatch keys).""" + + IDENTIFY = "posthog:identify" + CUSTOM = "posthog:custom" + MCP_MISSING_CAPABILITY = "mcp:missing_capability" + MCP_INITIALIZE = "mcp:initialize" + MCP_PROMPTS_GET = "mcp:prompts/get" + MCP_PROMPTS_LIST = "mcp:prompts/list" + MCP_RESOURCES_LIST = "mcp:resources/list" + MCP_RESOURCES_READ = "mcp:resources/read" + MCP_TOOLS_CALL = "mcp:tools/call" + MCP_TOOLS_LIST = "mcp:tools/list" diff --git a/posthog/mcp/exceptions.py b/posthog/mcp/exceptions.py new file mode 100644 index 00000000..bf6e6f42 --- /dev/null +++ b/posthog/mcp/exceptions.py @@ -0,0 +1,87 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Build PostHog error-tracking properties (``$exception_list`` / +``$exception_level``) from arbitrary thrown values, reusing posthog-python's own +``exceptions_from_error_tuple`` so MCP tool failures group and symbolicate the +same way as exceptions from any other PostHog SDK. +""" + +from __future__ import annotations + +from typing import Any, List + +from posthog.exception_utils import exceptions_from_error_tuple + +from .types import ErrorProperties + + +def capture_exception(error: Any) -> ErrorProperties: + """Return the ``$exception_list`` shape for any thrown value (Exception, + string, CallToolResult, or arbitrary object).""" + # MCP SDK converts tool errors to a CallToolResult, which carries only a + # human-readable message — extract it so the exception still says something. + if _is_call_tool_result(error): + return _from_message(_extract_call_tool_result_message(error)) + + if isinstance(error, BaseException): + exc_info = (type(error), error, error.__traceback__) + return { + "$exception_list": exceptions_from_error_tuple(exc_info), + "$exception_level": "error", + } + + if isinstance(error, str): + return _from_message(error) + + return _from_message(_safe_str(error)) + + +def _from_message(message: str) -> ErrorProperties: + return { + "$exception_list": [ + { + "mechanism": {"type": "generic", "handled": True}, + "type": "Error", + "value": message, + } + ], + "$exception_level": "error", + } + + +def _is_call_tool_result(value: Any) -> bool: + """Detect a CallToolResult error (``{isError, content: [...]}``), whether a + dict or a pydantic model from the ``mcp`` SDK.""" + if isinstance(value, dict): + return "isError" in value and isinstance(value.get("content"), list) + return hasattr(value, "isError") and isinstance( + getattr(value, "content", None), list + ) + + +def _extract_call_tool_result_message(result: Any) -> str: + content = ( + result.get("content") + if isinstance(result, dict) + else getattr(result, "content", []) + ) + texts: List[str] = [] + for part in content or []: + part_type = ( + part.get("type") if isinstance(part, dict) else getattr(part, "type", None) + ) + text = ( + part.get("text") if isinstance(part, dict) else getattr(part, "text", None) + ) + if part_type == "text" and isinstance(text, str): + texts.append(text) + return " ".join(texts).strip() or "Unknown error" + + +def _safe_str(value: Any) -> str: + try: + return str(value) + except Exception: + return "Unknown error" diff --git a/posthog/mcp/ids.py b/posthog/mcp/ids.py new file mode 100644 index 00000000..225c76fd --- /dev/null +++ b/posthog/mcp/ids.py @@ -0,0 +1,59 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""ID generation for MCP analytics. + +``new_prefixed_id`` mints ``evt_`` / ``ses_`` ids. +``deterministic_prefixed_id`` maps an MCP protocol session id to a stable SDK +session id so the same MCP session reuses the same ``$session_id`` across server +restarts. UUIDv7 is implemented inline (RFC 9562) so we take on no extra +dependency; the FNV-1a hash is a faithful port of the TypeScript SDK. +""" + +from __future__ import annotations + +import os +import time +import uuid +from typing import Literal + +MCPAnalyticsIDPrefix = Literal["evt", "ses"] + + +def _uuid7() -> str: + """Generate a UUIDv7 (time-ordered) per RFC 9562, with no external dependency.""" + unix_ts_ms = int(time.time() * 1000) & ((1 << 48) - 1) + rand_a = int.from_bytes(os.urandom(2), "big") & 0x0FFF # 12 bits + rand_b = int.from_bytes(os.urandom(8), "big") & ((1 << 62) - 1) # 62 bits + + value = unix_ts_ms << 80 + value |= 0x7 << 76 # version 7 + value |= rand_a << 64 + value |= 0b10 << 62 # RFC 4122 variant + value |= rand_b + return str(uuid.UUID(int=value)) + + +def new_prefixed_id(prefix: MCPAnalyticsIDPrefix) -> str: + return f"{prefix}_{_uuid7()}" + + +def deterministic_prefixed_id(prefix: MCPAnalyticsIDPrefix, value: str) -> str: + """Deterministic id derived from an arbitrary string. + + Uses the FNV-1a 64-bit hash (mixed twice to fill 32 hex chars). Not + cryptographic; we only need a stable, low-collision input -> output mapping. + """ + return f"{prefix}_{_fnv1a_hex(value)}{_fnv1a_hex(f'{value}::salt')}" + + +def _fnv1a_hex(value: str) -> str: + # 64-bit FNV-1a implemented with two 32-bit halves, mirroring the TS SDK. + h1 = 0x84222325 + h2 = 0xCBF29CE4 + for ch in value: + c = ord(ch) + h1 = ((h1 ^ c) * 0x000001B3) & 0xFFFFFFFF + h2 = ((h2 ^ c) * 0x00000193) & 0xFFFFFFFF + return f"{h1:08x}{h2:08x}" diff --git a/posthog/mcp/instrument_fastmcp.py b/posthog/mcp/instrument_fastmcp.py new file mode 100644 index 00000000..8de4bf9d --- /dev/null +++ b/posthog/mcp/instrument_fastmcp.py @@ -0,0 +1,310 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""FastMCP adapter. + +Rather than wrap each tool individually (as the TS high-level adapter does with a +Proxy), we wrap two *central* seams the ``mcp`` SDK routes everything through: + +* ``ToolManager.call_tool`` — every tool call dispatches here. We strip the + injected ``context`` before Pydantic validation, time the call, capture the + result/exception, and re-raise. Late-registered tools are covered automatically. +* the low-level ``ListToolsRequest`` handler — every ``tools/list`` response is + built here. We capture ``$mcp_tools_list`` and inject the ``context`` parameter + into each advertised tool schema. + +``$mcp_initialize`` is emitted lazily on the first tool call (the Python SDK +handles ``initialize`` in the session layer, not via ``request_handlers``). +""" + +from __future__ import annotations + +import inspect +import time +from typing import Any, Dict, Optional, Tuple + +import mcp.types as mcp_types + +from .context_parameters import ( + add_context_parameter_to_schema, + get_context_description, + is_context_enabled, +) +from .conversation_id import ( + add_conversation_id_to_schema, + build_prompt_back, + resolve_conversation_id, +) +from .instrumentation import ( + append_get_more_tools, + build_tool_call_request, + extract_tools, + prepare_request, + read_tool_category, + record_missing_capability, + record_tool_call, + record_tools_list, + request_to_dict, +) +from .internal import MCPAnalyticsData +from .logger import log +from .session import resolve_session_id +from .tools import ( + GET_MORE_TOOLS_NAME as _GET_MORE_TOOLS_NAME, + get_more_tools_result_text, + resolve_missing_capability_tool_name, +) + +_INJECTED_KEYS = ("context", "conversation_id") +_WRAPPED_FLAG = "__posthog_mcp_wrapped__" + + +def instrument_fastmcp(server: Any, data: MCPAnalyticsData) -> None: + data.server_name = getattr(server, "name", None) or getattr( + getattr(server, "_mcp_server", None), "name", None + ) + data.server_version = getattr(getattr(server, "_mcp_server", None), "version", None) + _wrap_tool_manager_call(server, data) + _wrap_list_tools_handler(server, data) + + +# --- tool call seam ---------------------------------------------------------- + + +def _wrap_tool_manager_call(server: Any, data: MCPAnalyticsData) -> None: + tool_manager = getattr(server, "_tool_manager", None) + if tool_manager is None: + log( + "Warning: FastMCP server has no _tool_manager; tool calls will not be captured." + ) + return + + original = tool_manager.call_tool + if getattr(original, _WRAPPED_FLAG, False): + return + + async def wrapped( + name: str, + arguments: Dict[str, Any], + context: Any = None, + convert_result: bool = False, + ) -> Any: + client_name, client_version = _client_info(context) + mcp_session_id = _mcp_session_id(context) + request = build_tool_call_request(name, arguments) + extra: Dict[str, Any] = {"session_id": mcp_session_id} + + session_id = await prepare_request( + data, + mcp_session_id=mcp_session_id, + client_name=client_name, + client_version=client_version, + request=request, + extra=extra, + ) + + missing_name = resolve_missing_capability_tool_name(data.options) + if data.options.report_missing and name == missing_name: + await record_missing_capability( + data, + session_id, + tool_name=missing_name, + context=(arguments or {}).get("context"), + arguments=arguments, + client_name=client_name, + client_version=client_version, + extra=extra, + ) + return [ + mcp_types.TextContent(type="text", text=get_more_tools_result_text()) + ] + + conversation_id, minted = resolve_conversation_id( + data.options.enable_conversation_id, arguments, name, missing_name + ) + + call_arguments = arguments + if isinstance(arguments, dict) and not _tool_owns_context(server, name): + call_arguments = { + k: v for k, v in arguments.items() if k not in _INJECTED_KEYS + } + + start = time.monotonic() + try: + result = await original( + name, call_arguments, context=context, convert_result=convert_result + ) + except Exception as error: + # The minted prompt-back was never delivered to the agent — don't stamp + # an orphan conversation_id it can't echo (an agent-supplied id is kept). + await record_tool_call( + data, + session_id, + name=name, + arguments=arguments, + error=error, + duration_ms=(time.monotonic() - start) * 1000, + client_name=client_name, + client_version=client_version, + conversation_id=None if minted else conversation_id, + extra=extra, + ) + raise + + # Inject the prompt-back first, then capture the delivered result. Only stamp + # a minted conversation_id when it was actually appended to what the agent got. + delivered_conversation_id = conversation_id + if minted and conversation_id: + injected = _inject_prompt_back(result, conversation_id) + if injected is result: + delivered_conversation_id = ( + None # not injectable (e.g. tuple/scalar result) + ) + result = injected + + await record_tool_call( + data, + session_id, + name=name, + arguments=arguments, + result=result, + duration_ms=(time.monotonic() - start) * 1000, + client_name=client_name, + client_version=client_version, + conversation_id=delivered_conversation_id, + extra=extra, + ) + return result + + setattr(wrapped, _WRAPPED_FLAG, True) + tool_manager.call_tool = wrapped + + +# --- tools/list seam --------------------------------------------------------- + + +def _wrap_list_tools_handler(server: Any, data: MCPAnalyticsData) -> None: + low_level = getattr(server, "_mcp_server", None) + if low_level is None: + return + handlers = low_level.request_handlers + original = handlers.get(mcp_types.ListToolsRequest) + if original is None or getattr(original, _WRAPPED_FLAG, False): + return + + async def list_handler(req: Any) -> Any: + # The low-level server calls the handler with None to populate its tool + # cache; don't capture or inject on that internal pass. + if req is None: + return await original(req) + + result = await original(req) + tools = extract_tools(result) + + names = [] + for tool in tools: + names.append(tool.name) + if getattr(tool, "description", None): + data.tool_descriptions[tool.name] = tool.description + category = read_tool_category(tool) + if category: + data.tool_categories[tool.name] = category + + context_enabled = is_context_enabled(data.options.context) + description = get_context_description(data.options.context) + for tool in tools: + if tool.name == _GET_MORE_TOOLS_NAME: + continue + owns_context = _tool_owns_context(server, tool.name) + schema = getattr(tool, "inputSchema", None) + if context_enabled and not owns_context: + schema = add_context_parameter_to_schema(schema, tool.name, description) + if data.options.enable_conversation_id: + schema = add_conversation_id_to_schema(schema, tool.name) + if schema is not getattr(tool, "inputSchema", None): + try: + tool.inputSchema = schema + except Exception: # noqa: BLE001 - some schema attrs may be read-only + log(f"WARN: could not set inputSchema on tool {tool.name}") + + if data.options.report_missing: + missing_name = resolve_missing_capability_tool_name(data.options) + if not any(t.name == missing_name for t in tools): + append_get_more_tools(result, missing_name) + names.append(missing_name) + + session_id = await resolve_session_id(data, None) + await record_tools_list( + data, session_id, names=names, request=request_to_dict(req) + ) + + return result + + setattr(list_handler, _WRAPPED_FLAG, True) + handlers[mcp_types.ListToolsRequest] = list_handler + + +# --- helpers ----------------------------------------------------------------- + + +def _inject_prompt_back(result: Any, conversation_id: str) -> Any: + """Append the conversation_id prompt-back to a tool result so the agent echoes + it on later calls. Handles every shape ToolManager.call_tool can return: + a ``(content_list, structured)`` tuple (the convert_result=True production path), + a bare content list, or a ``{content: [...]}`` dict. Returns the result unchanged + (so the caller can detect non-delivery) for shapes we can't append to.""" + block = mcp_types.TextContent( + type="text", text=build_prompt_back(conversation_id)["text"] + ) + if isinstance(result, tuple) and len(result) == 2 and isinstance(result[0], list): + return ([*result[0], block], result[1]) + if isinstance(result, list): + return [*result, block] + if ( + isinstance(result, dict) + and isinstance(result.get("content"), list) + and not result.get("isError") + ): + return {**result, "content": [*result["content"], block]} + return result + + +def _tool_owns_context(server: Any, name: str) -> bool: + """True when the tool's own function declares a ``context`` parameter — then we + must not inject or strip our analytics ``context``.""" + tool_manager = getattr(server, "_tool_manager", None) + if tool_manager is None: + return False + tool = tool_manager.get_tool(name) + fn = getattr(tool, "fn", None) + if fn is None: + return False + try: + return "context" in inspect.signature(fn).parameters + except (TypeError, ValueError): + return False + + +def _client_info(context: Any) -> Tuple[Optional[str], Optional[str]]: + try: + client_params = context.request_context.session.client_params + if client_params and client_params.clientInfo: + return client_params.clientInfo.name, client_params.clientInfo.version + except Exception: # noqa: BLE001 + pass + return None, None + + +def _mcp_session_id(context: Any) -> Optional[str]: + """Best-effort transport session id (e.g. the ``Mcp-Session-Id`` header on the + streamable-HTTP transport). Returns ``None`` for stdio, where the SDK-generated + session is used instead.""" + try: + request = getattr(context.request_context, "request", None) + headers = getattr(request, "headers", None) + if headers is not None: + return headers.get("mcp-session-id") + except Exception: # noqa: BLE001 + pass + return None diff --git a/posthog/mcp/instrument_lowlevel.py b/posthog/mcp/instrument_lowlevel.py new file mode 100644 index 00000000..9a69d2ae --- /dev/null +++ b/posthog/mcp/instrument_lowlevel.py @@ -0,0 +1,290 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Low-level ``mcp.server.Server`` adapter. + +The low-level server keeps its handlers in a public ``request_handlers`` dict, so +we wrap the ``CallToolRequest`` and ``ListToolsRequest`` entries directly. Unlike +FastMCP, the low-level ``call_tool`` handler catches exceptions and returns a +``CallToolResult`` with ``isError=True`` rather than raising — so we detect errors +from the result, not a ``try/except``. Session and client info are read from the +server's ``request_context`` contextvar (the handler receives only the request). +""" + +from __future__ import annotations + +import time +from typing import Any, Optional, Tuple + +import mcp.types as mcp_types + +from .context_parameters import ( + add_context_parameter_to_schema, + get_context_description, + is_context_enabled, +) +from .conversation_id import ( + add_conversation_id_to_schema, + build_prompt_back, + resolve_conversation_id, +) +from .instrumentation import ( + append_get_more_tools, + build_tool_call_request, + extract_tools, + prepare_request, + read_tool_category, + record_missing_capability, + record_tool_call, + record_tools_list, + request_to_dict, +) +from .internal import MCPAnalyticsData +from .logger import log +from .session import resolve_session_id +from .tools import ( + GET_MORE_TOOLS_NAME as _GET_MORE_TOOLS_NAME, + get_more_tools_result_text, + resolve_missing_capability_tool_name, +) + +_INJECTED_KEYS = ("context", "conversation_id") +_WRAPPED_FLAG = "__posthog_mcp_wrapped__" + + +def instrument_low_level(server: Any, data: MCPAnalyticsData) -> None: + """Instrument a raw ``mcp.server.Server``. ``context`` is injected as an + optional schema property and NOT stripped — that schema is also the call's + validation schema, and a typical ``(name, arguments)`` handler ignores extra keys.""" + data.server_name = getattr(server, "name", None) + data.server_version = getattr(server, "version", None) + _wrap_call_tool(server, data, strip_injected=False) + _wrap_list_tools(server, data, context_required=False) + + +def instrument_fastmcp_v2(server: Any, data: MCPAnalyticsData) -> None: + """Instrument jlowin's standalone ``fastmcp.FastMCP`` (FastMCP 2.0). It exposes a + ``_mcp_server`` (a subclass of the official low-level Server) with the same + ``request_handlers`` seam, but validates tool args against the function + signature and rejects unexpected kwargs — so we STRIP the injected + ``context``/``conversation_id`` before dispatch (like the official FastMCP path).""" + low_level = getattr(server, "_mcp_server", None) + if low_level is None: + log("Warning: fastmcp.FastMCP has no _mcp_server; cannot instrument.") + return + data.server_name = getattr(server, "name", None) or getattr(low_level, "name", None) + data.server_version = getattr(server, "version", None) or getattr( + low_level, "version", None + ) + _wrap_call_tool(low_level, data, strip_injected=True) + _wrap_list_tools(low_level, data, context_required=True) + + +def _wrap_call_tool( + server: Any, data: MCPAnalyticsData, *, strip_injected: bool +) -> None: + handlers = server.request_handlers + original = handlers.get(mcp_types.CallToolRequest) + if original is None or getattr(original, _WRAPPED_FLAG, False): + return + + async def handler(req: Any) -> Any: + name = req.params.name + arguments = dict(req.params.arguments or {}) + client_name, client_version = _client_info(server) + mcp_session_id = _mcp_session_id(server) + request = build_tool_call_request(name, arguments) + extra = {"session_id": mcp_session_id} + + session_id = await prepare_request( + data, + mcp_session_id=mcp_session_id, + client_name=client_name, + client_version=client_version, + request=request, + extra=extra, + ) + + missing_name = resolve_missing_capability_tool_name(data.options) + if data.options.report_missing and name == missing_name: + await record_missing_capability( + data, + session_id, + tool_name=missing_name, + context=arguments.get("context"), + arguments=arguments, + client_name=client_name, + client_version=client_version, + extra=extra, + ) + return mcp_types.ServerResult( + mcp_types.CallToolResult( + content=[ + mcp_types.TextContent( + type="text", text=get_more_tools_result_text() + ) + ], + isError=False, + ) + ) + + conversation_id, minted = resolve_conversation_id( + data.options.enable_conversation_id, arguments, name, missing_name + ) + + # On raw low-level servers `context`/`conversation_id` are injected as + # *optional* schema properties and left in place (a (name, arguments) + # handler ignores extra keys). FastMCP 2.0 validates against the function + # signature and rejects unexpected kwargs, so strip them before dispatch. + if strip_injected and req.params.arguments: + for key in _INJECTED_KEYS: + req.params.arguments.pop(key, None) + + start = time.monotonic() + result = await original(req) + duration_ms = (time.monotonic() - start) * 1000 + + # The low-level handler already converted any exception to a + # CallToolResult(isError=True); record_tool_call detects that from the result. + call_result = getattr(result, "root", result) + + # Inject the prompt-back before capture; only stamp a minted conversation_id + # when it was actually delivered (not on isError / non-list results), so we + # don't record an orphan id the agent never received. + delivered_conversation_id = conversation_id + if minted and conversation_id: + content = getattr(call_result, "content", None) + if not getattr(call_result, "isError", False) and isinstance(content, list): + content.append( + mcp_types.TextContent( + type="text", text=build_prompt_back(conversation_id)["text"] + ) + ) + else: + delivered_conversation_id = None + + await record_tool_call( + data, + session_id, + name=name, + arguments=arguments, + result=call_result, + duration_ms=duration_ms, + client_name=client_name, + client_version=client_version, + conversation_id=delivered_conversation_id, + extra=extra, + ) + return result + + setattr(handler, _WRAPPED_FLAG, True) + handlers[mcp_types.CallToolRequest] = handler + + +def _wrap_list_tools( + server: Any, data: MCPAnalyticsData, *, context_required: bool +) -> None: + handlers = server.request_handlers + original = handlers.get(mcp_types.ListToolsRequest) + if original is None or getattr(original, _WRAPPED_FLAG, False): + return + + async def handler(req: Any) -> Any: + # The server calls the handler with None to populate its tool cache; + # don't capture or inject on that internal pass. + if req is None: + return await original(req) + + result = await original(req) + tools = extract_tools(result) + + names = [] + for tool in tools: + names.append(tool.name) + if getattr(tool, "description", None): + data.tool_descriptions[tool.name] = tool.description + category = read_tool_category(tool) + if category: + data.tool_categories[tool.name] = category + + context_enabled = is_context_enabled(data.options.context) + description = get_context_description(data.options.context) + for tool in tools: + if tool.name == _GET_MORE_TOOLS_NAME: + continue + schema = getattr(tool, "inputSchema", None) + # required follows the path: raw low-level validates the call against + # this same schema (optional), FastMCP 2.0 strips it first (required-advisory). + if context_enabled and not _schema_has_context(schema): + schema = add_context_parameter_to_schema( + schema, tool.name, description, required=context_required + ) + if data.options.enable_conversation_id: + schema = add_conversation_id_to_schema(schema, tool.name) + if schema is not getattr(tool, "inputSchema", None): + try: + tool.inputSchema = schema + except Exception: # noqa: BLE001 + log(f"WARN: could not set inputSchema on tool {tool.name}") + + if data.options.report_missing: + missing_name = resolve_missing_capability_tool_name(data.options) + if not any(t.name == missing_name for t in tools): + append_get_more_tools(result, missing_name) + names.append(missing_name) + + client_name, client_version = _client_info(server) + session_id = await resolve_session_id(data, _mcp_session_id(server)) + await record_tools_list( + data, + session_id, + names=names, + request=request_to_dict(req), + client_name=client_name, + client_version=client_version, + extra={"session_id": _mcp_session_id(server)}, + ) + + return result + + setattr(handler, _WRAPPED_FLAG, True) + handlers[mcp_types.ListToolsRequest] = handler + + +def _schema_has_context(schema: Any) -> bool: + return ( + isinstance(schema, dict) + and isinstance(schema.get("properties"), dict) + and "context" in schema["properties"] + ) + + +def _request_context(server: Any) -> Any: + try: + return server.request_context + except (LookupError, AttributeError): + return None + + +def _client_info(server: Any) -> Tuple[Optional[str], Optional[str]]: + ctx = _request_context(server) + try: + client_params = ctx.session.client_params + if client_params and client_params.clientInfo: + return client_params.clientInfo.name, client_params.clientInfo.version + except Exception: # noqa: BLE001 + pass + return None, None + + +def _mcp_session_id(server: Any) -> Optional[str]: + ctx = _request_context(server) + try: + request = getattr(ctx, "request", None) + headers = getattr(request, "headers", None) + if headers is not None: + return headers.get("mcp-session-id") + except Exception: # noqa: BLE001 + pass + return None diff --git a/posthog/mcp/instrumentation.py b/posthog/mcp/instrumentation.py new file mode 100644 index 00000000..f94907eb --- /dev/null +++ b/posthog/mcp/instrumentation.py @@ -0,0 +1,342 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Shared tool-call / tools-list / initialize lifecycle used by both the FastMCP +and low-level server adapters. The adapters resolve transport-specific details +(client info, session id, raw result shape) and delegate the analytics flow here +so both stay in sync.""" + +from __future__ import annotations + +import asyncio +import threading +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional, Set + +from .capture import capture_event +from .event_types import MCPAnalyticsEventType +from .exceptions import capture_exception +from .intent import resolve_tool_call_intent, set_event_intent +from .internal import MCPAnalyticsData, handle_identify, resolve_event_properties +from .logger import log +from .sanitization import build_captured_mcp_parameters +from .session import resolve_session_id + +# Keep strong refs to in-flight capture tasks/futures so they aren't GC'd mid-flight, +# and so the asyncio ones can be awaited via drain_pending() before shutdown. Holds +# asyncio.Task (running-loop path) or concurrent.futures.Future (sync background-loop path). +_BACKGROUND_TASKS: Set[Any] = set() + +# A single daemon event loop for hosts with no running loop (sync dispatchers +# like PostHogMCP). Created lazily and reused, so we never leak a loop per call. +_bg_loop: Optional[asyncio.AbstractEventLoop] = None +_bg_loop_lock = threading.Lock() + + +def _get_background_loop() -> asyncio.AbstractEventLoop: + global _bg_loop + if _bg_loop is None: + with _bg_loop_lock: + if _bg_loop is None: + loop = asyncio.new_event_loop() + threading.Thread( + target=loop.run_forever, name="posthog-mcp-capture", daemon=True + ).start() + _bg_loop = loop + return _bg_loop + + +def _on_task_done(task: Any) -> None: + _BACKGROUND_TASKS.discard(task) + try: + if not task.cancelled() and task.exception() is not None: + log(f"background capture task failed: {task.exception()}") + except Exception: # noqa: BLE001 - never let bookkeeping raise + pass + + +def fire_and_forget(coro: Optional[Any]) -> None: + """Schedule a capture coroutine without blocking the tool path. No-ops if the + coroutine is ``None`` (no sink). Runs on the current loop when there is one, + otherwise on a shared daemon loop (sync hosts) — never creates a throwaway loop.""" + if coro is None: + return + try: + asyncio.get_running_loop() + except RuntimeError: + # No running loop (sync host) — schedule on the shared background loop. + future = asyncio.run_coroutine_threadsafe(coro, _get_background_loop()) + _BACKGROUND_TASKS.add(future) + future.add_done_callback(_on_task_done) + return + task = asyncio.ensure_future(coro) + _BACKGROUND_TASKS.add(task) + task.add_done_callback(_on_task_done) + + +async def drain_pending() -> None: + """Await in-flight capture tasks scheduled on the current event loop. Lets a host + flush analytics before ``posthog.shutdown()`` instead of racing a sleep.""" + pending = [ + t + for t in list(_BACKGROUND_TASKS) + if isinstance(t, asyncio.Task) and not t.done() + ] + if pending: + await asyncio.gather(*pending, return_exceptions=True) + + +def is_tool_result_error(result: Any) -> bool: + """MCP tool results signal errors via ``isError: true`` rather than raising.""" + if isinstance(result, dict): + return result.get("isError") is True + return getattr(result, "isError", None) is True + + +def build_tool_call_request( + name: str, arguments: Optional[Dict[str, Any]] +) -> Dict[str, Any]: + return { + "method": "tools/call", + "params": {"name": name, "arguments": arguments or {}}, + } + + +def _to_jsonable(obj: Any) -> Any: + if hasattr(obj, "model_dump"): + try: + return obj.model_dump(mode="json") + except Exception: # noqa: BLE001 + return str(obj) + if isinstance(obj, (list, tuple)): + return [_to_jsonable(item) for item in obj] + if isinstance(obj, dict): + return {key: _to_jsonable(value) for key, value in obj.items()} + return obj + + +def _wrap_response(result: Any) -> Any: + """Shape a tool result into the ``{content: [...]}`` form the sanitizer + understands (so image/audio/blob blocks get redacted).""" + serialized = _to_jsonable(result) + if isinstance(serialized, list): + return {"content": serialized} + return serialized + + +async def _maybe_emit_initialize( + data: MCPAnalyticsData, + session_id: str, + client_name: Optional[str], + client_version: Optional[str], + extra: Optional[Dict[str, Any]], +) -> None: + """Lazily emit ``$mcp_initialize`` once per session. The Python MCP SDK handles + ``InitializeRequest`` inside the session layer (not ``request_handlers``), so we + synthesize the event from the first instrumented request that carries client info.""" + if session_id in data.initialized_sessions: + return + data.mark_session_initialized(session_id) + event: Dict[str, Any] = { + "event_type": MCPAnalyticsEventType.MCP_INITIALIZE, + "session_id": session_id, + "client_name": client_name, + "client_version": client_version, + "timestamp": datetime.now(timezone.utc), + } + await _apply_event_properties( + data, event, {"method": "initialize", "params": {}}, extra + ) + fire_and_forget(capture_event(data, event)) + + +async def _apply_event_properties( + data: MCPAnalyticsData, + event: Dict[str, Any], + request: Dict[str, Any], + extra: Optional[Dict[str, Any]], +) -> None: + """Resolve the customer's ``event_properties`` callback and stamp it onto the + event — applied to every auto-captured event type, matching the TS SDK.""" + props = await resolve_event_properties(data, request, extra) + if props is not None: + event["properties"] = props + + +async def prepare_request( + data: MCPAnalyticsData, + *, + mcp_session_id: Optional[str], + client_name: Optional[str], + client_version: Optional[str], + request: Dict[str, Any], + extra: Optional[Dict[str, Any]], +) -> str: + """Resolve the session id, lazily emit initialize, and run identify. Returns + the session id to stamp on the event for this request.""" + session_id = await resolve_session_id(data, mcp_session_id) + await _maybe_emit_initialize(data, session_id, client_name, client_version, extra) + identify_event = await handle_identify(data, session_id, request, extra) + if identify_event: + fire_and_forget(capture_event(data, identify_event)) + return session_id + + +async def record_tool_call( + data: MCPAnalyticsData, + session_id: str, + *, + name: str, + arguments: Optional[Dict[str, Any]], + result: Any = None, + error: Any = None, + duration_ms: Optional[float] = None, + client_name: Optional[str] = None, + client_version: Optional[str] = None, + conversation_id: Optional[str] = None, + extra: Optional[Dict[str, Any]] = None, +) -> None: + # Analytics must never change what the tool returns or raises: any failure + # building/publishing the event is logged and swallowed here. + try: + request = build_tool_call_request(name, arguments) + event: Dict[str, Any] = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_CALL, + "session_id": session_id, + "resource_name": name, + "tool_description": data.tool_descriptions.get(name), + "tool_category": data.tool_categories.get(name), + "parameters": build_captured_mcp_parameters(request), + "duration": duration_ms, + "client_name": client_name, + "client_version": client_version, + "conversation_id": conversation_id, + "is_error": False, + } + set_event_intent(event, await resolve_tool_call_intent(data, request, extra)) + + if error is not None: + event["is_error"] = True + event["error"] = capture_exception(error) + elif result is not None: + event["response"] = _wrap_response(result) + if is_tool_result_error(result): + event["is_error"] = True + event["error"] = capture_exception(result) + + props = await resolve_event_properties(data, request, extra) + if props is not None: + event["properties"] = props + + fire_and_forget(capture_event(data, event)) + except Exception as err: # noqa: BLE001 - isolate analytics from the tool path + log(f"record_tool_call failed (event dropped, tool unaffected): {err}") + + +def extract_tools(result: Any) -> list: + """Pull the tool list out of a ListTools ServerResult (a copy — to MUTATE the + real list use ``append_get_more_tools``).""" + root = getattr(result, "root", result) + return list(getattr(root, "tools", []) or []) + + +def append_get_more_tools(result: Any, name: str) -> None: + """Append the get_more_tools virtual tool to the real ListToolsResult.tools list.""" + import mcp.types as mcp_types + + from .tools import build_report_missing_descriptor + + descriptor = build_report_missing_descriptor(name) + tool = mcp_types.Tool( + name=descriptor["name"], + description=descriptor["description"], + inputSchema=descriptor["inputSchema"], + annotations=descriptor["annotations"], + ) + root = getattr(result, "root", result) + tools_list = getattr(root, "tools", None) + if isinstance(tools_list, list): + tools_list.append(tool) + + +def read_tool_category(tool: Any) -> Optional[str]: + """Read a tool's product category from its ``_meta.category``.""" + meta = getattr(tool, "meta", None) + if isinstance(meta, dict): + category = meta.get("category") + if isinstance(category, str): + return category + return None + + +def request_to_dict(req: Any) -> Dict[str, Any]: + """Shape a request object into the JSON-RPC-ish dict the sanitizer expects.""" + method = getattr(req, "method", None) or "tools/list" + params = getattr(req, "params", None) + params_dict: Any = {} + if params is not None and hasattr(params, "model_dump"): + try: + params_dict = params.model_dump(mode="json") + except Exception: # noqa: BLE001 + params_dict = {} + return {"method": method, "params": params_dict} + + +async def record_missing_capability( + data: MCPAnalyticsData, + session_id: str, + *, + tool_name: str, + context: Optional[str], + arguments: Optional[Dict[str, Any]], + client_name: Optional[str] = None, + client_version: Optional[str] = None, + extra: Optional[Dict[str, Any]] = None, +) -> None: + """Record a ``get_more_tools`` call as ``$mcp_missing_capability``, with the + agent's stated need as ``$mcp_intent``.""" + try: + request = build_tool_call_request(tool_name, arguments) + event: Dict[str, Any] = { + "event_type": MCPAnalyticsEventType.MCP_MISSING_CAPABILITY, + "session_id": session_id, + "resource_name": tool_name, + "parameters": build_captured_mcp_parameters(request), + "client_name": client_name, + "client_version": client_version, + } + if isinstance(context, str) and context.strip(): + event["user_intent"] = context.strip() + event["user_intent_source"] = "context_parameter" + await _apply_event_properties(data, event, request, extra) + fire_and_forget(capture_event(data, event)) + except Exception as err: # noqa: BLE001 - isolate analytics from the tool path + log(f"record_missing_capability failed (event dropped): {err}") + + +async def record_tools_list( + data: MCPAnalyticsData, + session_id: str, + *, + names: List[str], + request: Dict[str, Any], + client_name: Optional[str] = None, + client_version: Optional[str] = None, + extra: Optional[Dict[str, Any]] = None, +) -> None: + try: + event: Dict[str, Any] = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_LIST, + "session_id": session_id, + "listed_tool_names": names, + "parameters": build_captured_mcp_parameters(request), + "client_name": client_name, + "client_version": client_version, + "is_error": False, + "timestamp": datetime.now(timezone.utc), + } + await _apply_event_properties(data, event, request, extra) + fire_and_forget(capture_event(data, event)) + except Exception as err: # noqa: BLE001 - isolate analytics from the tool path + log(f"record_tools_list failed (event dropped): {err}") diff --git a/posthog/mcp/intent.py b/posthog/mcp/intent.py new file mode 100644 index 00000000..e7b751f1 --- /dev/null +++ b/posthog/mcp/intent.py @@ -0,0 +1,73 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Resolve ``$mcp_intent`` from the agent-supplied ``context`` argument (source +``context_parameter``) or the customer's ``intent_fallback`` callback (source +``inferred``).""" + +from __future__ import annotations + +import asyncio +from typing import Any, Dict, Optional, Tuple + +from .context_parameters import is_context_enabled +from .internal import MCPAnalyticsData, _maybe_await +from .logger import log + +# (intent, source) +ResolvedIntent = Tuple[str, str] + + +def _get_context_argument(request: Dict[str, Any]) -> Optional[str]: + params = request.get("params") or {} + arguments = params.get("arguments") or {} + context = arguments.get("context") + if isinstance(context, str) and context.strip(): + return context + return None + + +def _normalize_intent(intent: Any) -> Optional[str]: + if not isinstance(intent, str): + return None + trimmed = intent.strip() + return trimmed or None + + +async def _run_intent_fallback( + data: MCPAnalyticsData, request: Dict[str, Any], extra: Optional[Dict[str, Any]] +) -> Optional[ResolvedIntent]: + if not data.options.intent_fallback: + return None + try: + result = data.options.intent_fallback(request, extra) + if asyncio.iscoroutine(result): + result = await _maybe_await(result) + intent = _normalize_intent(result) + return (intent, "inferred") if intent else None + except Exception as error: # noqa: BLE001 + log(f"intent_fallback callback error: {error}") + return None + + +async def resolve_tool_call_intent( + data: MCPAnalyticsData, + request: Dict[str, Any], + extra: Optional[Dict[str, Any]] = None, +) -> Optional[ResolvedIntent]: + context_argument = _get_context_argument(request) + name = (request.get("params") or {}).get("name") + if ( + is_context_enabled(data.options.context) + and name != "get_more_tools" + and context_argument + ): + return (context_argument, "context_parameter") + return await _run_intent_fallback(data, request, extra) + + +def set_event_intent(event: Dict[str, Any], resolved: Optional[ResolvedIntent]) -> None: + if not resolved: + return + event["user_intent"], event["user_intent_source"] = resolved diff --git a/posthog/mcp/internal.py b/posthog/mcp/internal.py new file mode 100644 index 00000000..95676c80 --- /dev/null +++ b/posthog/mcp/internal.py @@ -0,0 +1,204 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Per-server tracking state, the bounded identity LRU, and identity resolution. + +Per-server state lives in a module-level ``weakref.WeakKeyDictionary`` keyed by +the server object, so state is isolated per server and garbage-collected with it +(the Python equivalent of the TS ``WeakMap``). +""" + +from __future__ import annotations + +import asyncio +import json +import weakref +from collections import OrderedDict +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Dict, Optional + +from .logger import log +from .sink import McpEventSink +from .types import MCPAnalyticsOptions, UserIdentity + + +class IdentityCache: + """Bounded LRU of session identities, isolated per server so identities never + bleed across server instances.""" + + def __init__(self, max_size: int = 1000) -> None: + self._cache: "OrderedDict[str, UserIdentity]" = OrderedDict() + self._max_size = max_size + + def get(self, session_id: str) -> Optional[UserIdentity]: + identity = self._cache.get(session_id) + if identity is None: + return None + self._cache.move_to_end(session_id) + return identity + + def set(self, session_id: str, identity: UserIdentity) -> None: + if session_id in self._cache: + del self._cache[session_id] + elif len(self._cache) >= self._max_size: + self._cache.popitem(last=False) + self._cache[session_id] = identity + + def has(self, session_id: str) -> bool: + return session_id in self._cache + + def size(self) -> int: + return len(self._cache) + + +@dataclass +class MCPAnalyticsData: + """All per-server tracking state.""" + + options: MCPAnalyticsOptions + sink: Optional[McpEventSink] = None + session_id: str = "" + session_source: str = "generated" # "generated" | "mcp" + last_mcp_session_id: Optional[str] = None + last_activity: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + identified_sessions: IdentityCache = field(default_factory=IdentityCache) + tool_categories: Dict[str, str] = field(default_factory=dict) + tool_descriptions: Dict[str, str] = field(default_factory=dict) + # Bounded FIFO of sessions we've emitted $mcp_initialize for, so a long-lived + # server can't accumulate one entry per session forever. + initialized_sessions: "OrderedDict[str, None]" = field(default_factory=OrderedDict) + server_name: Optional[str] = None + server_version: Optional[str] = None + session_lock: asyncio.Lock = field(default_factory=asyncio.Lock) + + def mark_session_initialized(self, session_id: str) -> None: + self.initialized_sessions[session_id] = None + while len(self.initialized_sessions) > _MAX_INITIALIZED_SESSIONS: + self.initialized_sessions.popitem(last=False) + + +_MAX_INITIALIZED_SESSIONS = 1000 + + +_server_tracking: "weakref.WeakKeyDictionary[Any, MCPAnalyticsData]" = ( + weakref.WeakKeyDictionary() +) + + +def get_server_tracking_data(server: Any) -> Optional[MCPAnalyticsData]: + return _server_tracking.get(server) + + +def set_server_tracking_data(server: Any, data: MCPAnalyticsData) -> None: + _server_tracking[server] = data + + +def are_identities_equal(a: UserIdentity, b: UserIdentity) -> bool: + if a.distinct_id != b.distinct_id: + return False + if json.dumps(a.groups or {}, sort_keys=True) != json.dumps( + b.groups or {}, sort_keys=True + ): + return False + a_props = a.properties or {} + b_props = b.properties or {} + if set(a_props.keys()) != set(b_props.keys()): + return False + for key in a_props: + if json.dumps(a_props[key], sort_keys=True, default=str) != json.dumps( + b_props[key], sort_keys=True, default=str + ): + return False + return True + + +def merge_identities( + previous: Optional[UserIdentity], nxt: UserIdentity +) -> UserIdentity: + if previous is None: + return nxt + return UserIdentity( + distinct_id=nxt.distinct_id, + properties={**(previous.properties or {}), **(nxt.properties or {})}, + groups=nxt.groups if nxt.groups is not None else previous.groups, + ) + + +async def _maybe_await(value: Any) -> Any: + if asyncio.iscoroutine(value) or asyncio.isfuture(value): + return await value + return value + + +async def handle_identify( + data: MCPAnalyticsData, + session_id: str, + request: Dict[str, Any], + extra: Optional[Dict[str, Any]] = None, +) -> Optional[Dict[str, Any]]: + """Resolve the optional ``identify`` callback, dedupe against the identity + cache, and return an ``$identify`` event to emit only when the identity has + materially changed (otherwise ``None``).""" + if not data.options.identify: + return None + + try: + identify = data.options.identify + if isinstance(identify, UserIdentity): + identity_result: Optional[UserIdentity] = identify + else: + identity_result = await _maybe_await(identify(request, extra)) + + if not identity_result: + log( + f"Warning: Supplied identify function returned null for session {session_id}" + ) + return None + + previous = data.identified_sessions.get(session_id) + merged = merge_identities(previous, identity_result) + has_changed = not (previous and are_identities_equal(previous, merged)) + data.identified_sessions.set(session_id, merged) + + if has_changed: + from .event_types import MCPAnalyticsEventType + + log(f"Identified session {session_id}") + return { + "session_id": session_id, + "resource_name": _get_request_resource_name(request), + "event_type": MCPAnalyticsEventType.IDENTIFY, + "parameters": {"request": request, "extra": extra}, + "timestamp": datetime.now(timezone.utc), + } + except Exception as error: # noqa: BLE001 + log( + f"Error: identify function threw while identifying session {session_id} - {error}" + ) + return None + + +async def resolve_event_properties( + data: MCPAnalyticsData, + request: Dict[str, Any], + extra: Optional[Dict[str, Any]] = None, +) -> Optional[Dict[str, Any]]: + if not data.options.event_properties: + return None + try: + return await _maybe_await(data.options.event_properties(request, extra)) or None + except Exception as e: # noqa: BLE001 + log(f"event_properties callback error: {e}") + return None + + +def _get_request_resource_name(request: Any) -> str: + if not isinstance(request, dict): + return "Unknown" + params = request.get("params") + if not isinstance(params, dict): + return "Unknown" + name = params.get("name") + return name if isinstance(name, str) else "Unknown" diff --git a/posthog/mcp/logger.py b/posthog/mcp/logger.py new file mode 100644 index 00000000..02060e8a --- /dev/null +++ b/posthog/mcp/logger.py @@ -0,0 +1,33 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""STDIO-safe logger. + +MCP servers running over the STDIO transport use stdout/stderr to exchange +protocol messages, so the SDK must never ``print``. We accept a ``logger`` +option on the public API; when omitted, log calls are silently dropped. Plug in +any callable (e.g. a file logger, or ``print`` for non-STDIO transports). +""" + +from __future__ import annotations + +from typing import Callable, Optional + +LoggerFn = Callable[[str], None] + +_active_logger: Optional[LoggerFn] = None + + +def set_logger(logger: Optional[LoggerFn]) -> None: + global _active_logger + _active_logger = logger + + +def log(message: str) -> None: + if _active_logger is not None: + try: + _active_logger(message) + except Exception: + # never let logging blow up the tracking pipeline + pass diff --git a/posthog/mcp/posthog_events.py b/posthog/mcp/posthog_events.py new file mode 100644 index 00000000..4d5d2ba5 --- /dev/null +++ b/posthog/mcp/posthog_events.py @@ -0,0 +1,194 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Translate a processed internal ``Event`` into 1-2 ``PostHogCaptureEvent`` +payloads (the main ``$mcp_*`` event plus an optional ``$exception`` sibling).""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any, Dict, List + +from .constants import ( + POSTHOG_MCP_ANALYTICS_SOURCE, + PostHogMCPAnalyticsEvent, + PostHogMCPAnalyticsProperty, +) +from .event_types import MCPAnalyticsEventType +from .types import Event, PostHogCaptureEvent + +_BUILT_IN_EVENT_NAME_BY_TYPE = { + MCPAnalyticsEventType.CUSTOM: PostHogMCPAnalyticsEvent.CUSTOM, + MCPAnalyticsEventType.IDENTIFY: PostHogMCPAnalyticsEvent.IDENTIFY, + MCPAnalyticsEventType.MCP_MISSING_CAPABILITY: PostHogMCPAnalyticsEvent.MISSING_CAPABILITY, + MCPAnalyticsEventType.MCP_INITIALIZE: PostHogMCPAnalyticsEvent.INITIALIZE, + MCPAnalyticsEventType.MCP_PROMPTS_GET: PostHogMCPAnalyticsEvent.PROMPT_GET, + MCPAnalyticsEventType.MCP_PROMPTS_LIST: PostHogMCPAnalyticsEvent.PROMPTS_LIST, + MCPAnalyticsEventType.MCP_RESOURCES_LIST: PostHogMCPAnalyticsEvent.RESOURCES_LIST, + MCPAnalyticsEventType.MCP_RESOURCES_READ: PostHogMCPAnalyticsEvent.RESOURCE_READ, + MCPAnalyticsEventType.MCP_TOOLS_CALL: PostHogMCPAnalyticsEvent.TOOL_CALL, + MCPAnalyticsEventType.MCP_TOOLS_LIST: PostHogMCPAnalyticsEvent.TOOLS_LIST, +} + +_P = PostHogMCPAnalyticsProperty + + +def _get_distinct_id(event: Event) -> str: + return ( + event.get("identify_actor_given_id") or event.get("session_id") or "anonymous" + ) + + +def _get_timestamp(event: Event) -> datetime: + return event.get("timestamp") or datetime.now(timezone.utc) + + +def build_posthog_capture_events( + event: Event, enable_exception_autocapture: bool = True +) -> List[PostHogCaptureEvent]: + batch = [_build_capture_event(event)] + if ( + event.get("is_error") + and event.get("error") + and enable_exception_autocapture is not False + ): + batch.append(_build_exception_event(event)) + return batch + + +def _build_capture_event(event: Event) -> PostHogCaptureEvent: + properties: Dict[str, Any] = {_P.SOURCE: POSTHOG_MCP_ANALYTICS_SOURCE} + _add_session_id(event, properties) + _add_conversation_id(event, properties) + _add_person_processing(event, properties) + _add_groups(event, properties) + _add_common_properties(event, properties) + _add_custom_properties(event, properties) + + event_name = ( + event.get("event_name") or _BUILT_IN_EVENT_NAME_BY_TYPE[event["event_type"]] + ) + return { + "event": event_name, + "distinct_id": _get_distinct_id(event), + "properties": properties, + "timestamp": _get_timestamp(event), + } + + +def _add_session_id(event: Event, properties: Dict[str, Any]) -> None: + session_id = event.get("session_id") + if isinstance(session_id, str) and len(session_id) > 0: + properties[_P.SESSION_ID] = session_id + + +def _add_conversation_id(event: Event, properties: Dict[str, Any]) -> None: + conversation_id = event.get("conversation_id") + if conversation_id is not None and conversation_id != "": + properties[_P.CONVERSATION_ID] = conversation_id + + +def _add_groups(event: Event, properties: Dict[str, Any]) -> None: + groups = event.get("groups") + if groups: + properties["$groups"] = groups + + +def _add_person_processing(event: Event, properties: Dict[str, Any]) -> None: + # Without a resolved identity the distinct id is just the session id, so + # processing a person profile would mint one anonymous person per session. + if not event.get("identify_actor_given_id"): + properties["$process_person_profile"] = False + + +def _is_tool_call(event: Event) -> bool: + return event.get("event_type") == MCPAnalyticsEventType.MCP_TOOLS_CALL + + +def _add_common_properties(event: Event, properties: Dict[str, Any]) -> None: + if event.get("resource_name"): + properties[_P.RESOURCE_NAME] = event["resource_name"] + if _is_tool_call(event): + properties[_P.TOOL_NAME] = event["resource_name"] + if event.get("tool_description") and _is_tool_call(event): + properties[_P.TOOL_DESCRIPTION] = event["tool_description"] + if event.get("tool_category") and _is_tool_call(event): + properties[_P.TOOL_CATEGORY] = event["tool_category"] + if ( + event.get("listed_tool_names") + and len(event["listed_tool_names"]) > 0 + and event.get("event_type") == MCPAnalyticsEventType.MCP_TOOLS_LIST + ): + properties[_P.LISTED_TOOL_NAMES] = event["listed_tool_names"] + if event.get("duration") is not None: + properties[_P.DURATION_MS] = event["duration"] + if event.get("server_name"): + properties[_P.SERVER_NAME] = event["server_name"] + if event.get("server_version"): + properties[_P.SERVER_VERSION] = event["server_version"] + if event.get("client_name"): + properties[_P.CLIENT_NAME] = event["client_name"] + if event.get("client_version"): + properties[_P.CLIENT_VERSION] = event["client_version"] + if event.get("user_intent"): + properties[_P.INTENT] = event["user_intent"] + if event.get("user_intent_source"): + properties[_P.INTENT_SOURCE] = event["user_intent_source"] + if event.get("is_error") is not None: + properties[_P.IS_ERROR] = event["is_error"] + if event.get("parameters") is not None: + properties[_P.PARAMETERS] = event["parameters"] + if event.get("response") is not None: + properties[_P.RESPONSE] = event["response"] + identify_actor_data = event.get("identify_actor_data") + if identify_actor_data and len(identify_actor_data) > 0: + # Person properties from identify().properties go straight to $set. + properties["$set"] = {**identify_actor_data} + + +def _add_custom_properties(event: Event, properties: Dict[str, Any]) -> None: + custom = event.get("properties") + if custom: + for key, value in custom.items(): + properties[key] = value + + +def _build_exception_event(event: Event) -> PostHogCaptureEvent: + properties: Dict[str, Any] = {} + _add_session_id(event, properties) + _add_conversation_id(event, properties) + _add_person_processing(event, properties) + _add_groups(event, properties) + + error = event.get("error") + if error: + # Spread the core $exception_list / $exception_level so MCP tool failures + # use the same error-tracking contract as every other SDK. + properties.update(error) + + if event.get("resource_name"): + properties[_P.RESOURCE_NAME] = event["resource_name"] + if _is_tool_call(event): + properties[_P.TOOL_NAME] = event["resource_name"] + if event.get("tool_description") and _is_tool_call(event): + properties[_P.TOOL_DESCRIPTION] = event["tool_description"] + if event.get("tool_category") and _is_tool_call(event): + properties[_P.TOOL_CATEGORY] = event["tool_category"] + if event.get("server_name"): + properties[_P.SERVER_NAME] = event["server_name"] + if event.get("server_version"): + properties[_P.SERVER_VERSION] = event["server_version"] + if event.get("client_name"): + properties[_P.CLIENT_NAME] = event["client_name"] + if event.get("client_version"): + properties[_P.CLIENT_VERSION] = event["client_version"] + + _add_custom_properties(event, properties) + + return { + "event": PostHogMCPAnalyticsEvent.EXCEPTION, + "distinct_id": _get_distinct_id(event), + "properties": properties, + "timestamp": _get_timestamp(event), + } diff --git a/posthog/mcp/posthog_mcp.py b/posthog/mcp/posthog_mcp.py new file mode 100644 index 00000000..ba5f3dcc --- /dev/null +++ b/posthog/mcp/posthog_mcp.py @@ -0,0 +1,316 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""``PostHogMCP`` — a posthog ``Client`` subclass with first-class MCP analytics, +for custom dispatchers (Hono/edge/HTTP) where there is no ``Server``/``FastMCP`` +to wrap. The host resolves identity + context per request and calls the capture +methods directly. MCP events flow through the same sanitize -> truncate -> +``$exception`` fan-out pipeline as ``instrument()``. +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional, Union + +from posthog.client import Client + +from .context_parameters import ( + add_context_parameter_to_schema, + get_context_description, + is_context_enabled, +) +from .event_types import MCPAnalyticsEventType +from .exceptions import capture_exception +from .instrumentation import fire_and_forget +from .sink import McpCaptureOptions, McpEventSink +from .tools import build_report_missing_descriptor +from .types import ( + JsonRecord, + MCPAnalyticsContextOptions, + PreparedToolCall, +) + +_GET_MORE_TOOLS_NAME = "get_more_tools" + + +class PostHogMCP(Client): + """A drop-in posthog ``Client`` with ``capture_tool_call`` / ``capture_initialize`` + / ``capture_tools_list`` / ``capture_missing_capability`` plus ``prepare_tool_list`` + and ``prepare_tool_call`` helpers. ``capture``, ``flush``, ``shutdown``, feature + flags, etc. all work unchanged.""" + + def __init__( + self, + api_key: str, + missing_capability_tool_name: Optional[str] = None, + **kwargs: Any, + ) -> None: + super().__init__(api_key, **kwargs) + self._mcp_sink = McpEventSink(self) + self._missing_capability_tool_name = ( + missing_capability_tool_name or _GET_MORE_TOOLS_NAME + ) + + # --- capture methods ----------------------------------------------------- + + def capture_tool_call( + self, + tool_name: str, + *, + intent: Optional[str] = None, + intent_source: Optional[str] = None, + parameters: Any = None, + response: Any = None, + duration_ms: Optional[float] = None, + is_error: bool = False, + error: Any = None, + category: Optional[str] = None, + tool_description: Optional[str] = None, + distinct_id: Optional[str] = None, + session_id: Optional[str] = None, + set_properties: Optional[JsonRecord] = None, + groups: Optional[Dict[str, str]] = None, + properties: Optional[JsonRecord] = None, + timestamp: Optional[datetime] = None, + ) -> None: + """Capture a tool invocation. Emits ``$mcp_tool_call`` (+ ``$exception`` on error).""" + event = self._base_event( + MCPAnalyticsEventType.MCP_TOOLS_CALL, + distinct_id, + session_id, + set_properties, + groups, + properties, + timestamp, + ) + event["resource_name"] = tool_name + event["tool_description"] = tool_description + event["tool_category"] = category + event["parameters"] = parameters + event["response"] = response + event["duration"] = duration_ms + event["is_error"] = is_error + _apply_intent(event, intent, intent_source) + if is_error: + event["error"] = capture_exception( + error if error is not None else f"Tool {tool_name} returned an error" + ) + self._emit(event) + + def capture_initialize( + self, + *, + client_name: Optional[str] = None, + client_version: Optional[str] = None, + parameters: Any = None, + response: Any = None, + duration_ms: Optional[float] = None, + distinct_id: Optional[str] = None, + session_id: Optional[str] = None, + set_properties: Optional[JsonRecord] = None, + groups: Optional[Dict[str, str]] = None, + properties: Optional[JsonRecord] = None, + timestamp: Optional[datetime] = None, + ) -> None: + """Capture the connection handshake. Emits ``$mcp_initialize``.""" + event = self._base_event( + MCPAnalyticsEventType.MCP_INITIALIZE, + distinct_id, + session_id, + set_properties, + groups, + properties, + timestamp, + ) + event["client_name"] = client_name + event["client_version"] = client_version + event["parameters"] = parameters + event["response"] = response + event["duration"] = duration_ms + self._emit(event) + + def capture_tools_list( + self, + *, + tool_names: Optional[List[str]] = None, + parameters: Any = None, + response: Any = None, + duration_ms: Optional[float] = None, + is_error: bool = False, + error: Any = None, + distinct_id: Optional[str] = None, + session_id: Optional[str] = None, + set_properties: Optional[JsonRecord] = None, + groups: Optional[Dict[str, str]] = None, + properties: Optional[JsonRecord] = None, + timestamp: Optional[datetime] = None, + ) -> None: + """Capture a ``tools/list`` response. Emits ``$mcp_tools_list`` with the + advertised tool names (``$mcp_listed_tool_names``).""" + event = self._base_event( + MCPAnalyticsEventType.MCP_TOOLS_LIST, + distinct_id, + session_id, + set_properties, + groups, + properties, + timestamp, + ) + event["listed_tool_names"] = tool_names + event["parameters"] = parameters + event["response"] = response + event["duration"] = duration_ms + event["is_error"] = is_error + if is_error: + event["error"] = capture_exception( + error if error is not None else "tools/list failed" + ) + self._emit(event) + + def capture_missing_capability( + self, + *, + context: Optional[str] = None, + parameters: Any = None, + distinct_id: Optional[str] = None, + session_id: Optional[str] = None, + set_properties: Optional[JsonRecord] = None, + groups: Optional[Dict[str, str]] = None, + properties: Optional[JsonRecord] = None, + timestamp: Optional[datetime] = None, + ) -> None: + """Capture a ``get_more_tools`` call as a missing-capability report. Emits + ``$mcp_missing_capability`` with the agent's description as ``$mcp_intent``.""" + event = self._base_event( + MCPAnalyticsEventType.MCP_MISSING_CAPABILITY, + distinct_id, + session_id, + set_properties, + groups, + properties, + timestamp, + ) + event["resource_name"] = self._missing_capability_tool_name + event["parameters"] = parameters + _apply_intent(event, context, "context_parameter") + self._emit(event) + + # --- prepare helpers ----------------------------------------------------- + + def prepare_tool_list( + self, + tools: List[Any], + context: Union[bool, MCPAnalyticsContextOptions] = True, + report_missing: bool = False, + ) -> List[Any]: + """Inject the ``context`` argument into every tool so agents state their + intent (captured as ``$mcp_intent``), and optionally append the + ``get_more_tools`` virtual tool (``report_missing=True``). Returns a new + list; dict tools are copied, tool objects are mutated in place.""" + if is_context_enabled(context): + description = get_context_description(context) + prepared = [self._inject_context(tool, description) for tool in tools] + else: + prepared = list(tools) + + if report_missing and not any( + _tool_name(t) == self._missing_capability_tool_name for t in prepared + ): + prepared.append( + build_report_missing_descriptor(self._missing_capability_tool_name) + ) + return prepared + + def prepare_tool_call( + self, name: str, args: Optional[JsonRecord] = None + ) -> PreparedToolCall: + """Pull the agent's intent off the injected ``context`` argument, strip + ``context`` from the arguments, and flag the ``get_more_tools`` virtual tool.""" + raw_context = (args or {}).get("context") + intent = ( + raw_context.strip() + if isinstance(raw_context, str) and raw_context.strip() + else None + ) + return PreparedToolCall( + args=_strip_context(args), + intent=intent, + intent_source="context_parameter" if intent else None, + is_missing_capability=name == self._missing_capability_tool_name, + ) + + # --- internals ----------------------------------------------------------- + + def _base_event( + self, + event_type: str, + distinct_id: Optional[str], + session_id: Optional[str], + set_properties: Optional[JsonRecord], + groups: Optional[Dict[str, str]], + properties: Optional[JsonRecord], + timestamp: Optional[datetime], + ) -> Dict[str, Any]: + event: Dict[str, Any] = { + "event_type": event_type, + "session_id": session_id, + "timestamp": timestamp or datetime.now(timezone.utc), + "properties": properties, + "groups": groups, + } + if distinct_id: + event["identify_actor_given_id"] = distinct_id + if set_properties: + event["identify_actor_data"] = set_properties + return event + + def _emit(self, event: Dict[str, Any]) -> None: + # Fire-and-forget, mirroring posthog-node: never block or raise into the host. + options = McpCaptureOptions(enable_exception_autocapture=True) + fire_and_forget(self._mcp_sink.capture(event, options)) + + def _inject_context(self, tool: Any, description: Optional[str]) -> Any: + if isinstance(tool, dict): + name = tool.get("name", "unknown") + if name == self._missing_capability_tool_name: + return tool + new_schema = add_context_parameter_to_schema( + tool.get("inputSchema"), name, description + ) + return {**tool, "inputSchema": new_schema} + + name = getattr(tool, "name", "unknown") + if name == self._missing_capability_tool_name: + return tool + new_schema = add_context_parameter_to_schema( + getattr(tool, "inputSchema", None), name, description + ) + try: + tool.inputSchema = new_schema + except Exception: # noqa: BLE001 + pass + return tool + + +def _apply_intent( + event: Dict[str, Any], intent: Optional[str], source: Optional[str] +) -> None: + trimmed = intent.strip() if isinstance(intent, str) else "" + if not trimmed: + return + event["user_intent"] = trimmed + event["user_intent_source"] = source or "context_parameter" + + +def _strip_context(args: Optional[JsonRecord]) -> Optional[JsonRecord]: + if not args or "context" not in args: + return args + return {k: v for k, v in args.items() if k != "context"} + + +def _tool_name(tool: Any) -> Optional[str]: + if isinstance(tool, dict): + return tool.get("name") + return getattr(tool, "name", None) diff --git a/posthog/mcp/sanitization.py b/posthog/mcp/sanitization.py new file mode 100644 index 00000000..2224d5f2 --- /dev/null +++ b/posthog/mcp/sanitization.py @@ -0,0 +1,185 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Event sanitization: redact non-text response content blocks, large base64 +strings, PostHog tokens, and sensitive keys. Pure functions that return new +objects without mutating the input; run after customer redaction (``before_send`` +runs later in the pipeline) but before truncation. +""" + +from __future__ import annotations + +import re +from typing import Any, Dict + +# SDK-injected arguments stripped from captured $mcp_parameters (they surface as +# dedicated properties: $mcp_intent and $mcp_conversation_id). +_INJECTED_ARGUMENT_NAMES = ("context", "conversation_id") +_REDACTED_VALUE = "[redacted]" +_BASE64_PATTERN = re.compile(r"^[A-Za-z0-9+/\n\r]+=*$") +_SIZE_GATE = 10_240 +_POSTHOG_TOKEN_PATTERN = re.compile(r"\bph[a-z]_[A-Za-z0-9_-]{20,}\b") +_SENSITIVE_KEY_PATTERN = re.compile( + r"^(authorization|cookie|set-cookie|x-api-key|api[-_]?key|api[-_]?token|" + r"access[-_]?token|refresh[-_]?token|token|password|secret|client[-_]?secret|" + r"private[-_]?key)$", + re.IGNORECASE, +) + + +def _is_record(value: Any) -> bool: + return isinstance(value, dict) + + +def _should_redact_key(key: str) -> bool: + return bool(_SENSITIVE_KEY_PATTERN.match(key)) + + +def _sanitize_string(value: str) -> str: + if len(value) >= _SIZE_GATE and _BASE64_PATTERN.match(value): + return "[binary data redacted - not supported by PostHog MCP analytics]" + return _POSTHOG_TOKEN_PATTERN.sub(_REDACTED_VALUE, value) + + +def sanitize_captured_value(value: Any) -> Any: + if value is None: + return value + if isinstance(value, str): + return _sanitize_string(value) + if isinstance(value, list): + return [sanitize_captured_value(item) for item in value] + # bool is an int subclass; both pass through unchanged. + if not isinstance(value, dict): + return value + + result: Dict[str, Any] = {} + for key, nested in value.items(): + result[key] = ( + _REDACTED_VALUE + if _should_redact_key(str(key)) + else sanitize_captured_value(nested) + ) + return result + + +def sanitize_event(event: Dict[str, Any]) -> Dict[str, Any]: + """Sanitize an event's response, parameters, and user_intent. Returns a new + shallow copy; does not mutate the input.""" + result = {**event} + + if result.get("response") is not None: + result["response"] = _sanitize_response(result["response"]) + + if result.get("parameters") is not None: + result["parameters"] = sanitize_captured_value(result["parameters"]) + + # The intent comes straight from an agent-narrated `context` string, so it + # can contain a secret the LLM read aloud. Redact it like any other value. + if result.get("user_intent") is not None: + result["user_intent"] = sanitize_captured_value(result["user_intent"]) + + return result + + +def _sanitize_response(response: Any) -> Any: + if response is None or not isinstance(response, (dict, list, str)): + return sanitize_captured_value(response) + + sanitized = sanitize_captured_value(response) + if not _is_record(sanitized): + return sanitized + + result = {**sanitized} + content = result.get("content") + if isinstance(content, list): + result["content"] = [_sanitize_content_block(block) for block in content] + + if result.get("structuredContent") is not None and isinstance( + result["structuredContent"], (dict, list) + ): + result["structuredContent"] = sanitize_captured_value( + result["structuredContent"] + ) + + return result + + +def _sanitize_content_block(block: Any) -> Any: + if not _is_record(block): + return block + + block_type = block.get("type") + if block_type == "text": + return sanitize_captured_value(block) + if block_type == "image": + return { + "type": "text", + "text": "[image content redacted - not supported by PostHog MCP analytics]", + } + if block_type == "audio": + return { + "type": "text", + "text": "[audio content redacted - not supported by PostHog MCP analytics]", + } + if block_type == "resource": + return _sanitize_resource_block(block) + if block_type == "resource_link": + return sanitize_captured_value(block) + return { + "type": "text", + "text": f'[unsupported content type "{block_type}" redacted - not supported by PostHog MCP analytics]', + } + + +def _sanitize_resource_block(block: Dict[str, Any]) -> Any: + resource = block.get("resource") + if isinstance(resource, dict) and "blob" in resource: + return { + "type": "text", + "text": "[binary resource content redacted - not supported by PostHog MCP analytics]", + } + return sanitize_captured_value(block) + + +def build_captured_mcp_parameters(request: Any) -> Dict[str, Any]: + """Build the sanitized ``$mcp_parameters`` payload from a request, stripping + the injected ``context`` argument before logging.""" + if not _is_record(request): + return {"request": sanitize_captured_value(request)} + + captured_request: Dict[str, Any] = {} + for key in ("id", "jsonrpc", "method"): + if key in request: + captured_request[key] = sanitize_captured_value(request[key]) + + if "params" in request: + captured_request["params"] = _build_captured_mcp_params(request["params"]) + + return {"request": captured_request} + + +def _build_captured_mcp_params(params: Any) -> Any: + if not _is_record(params): + return sanitize_captured_value(params) + + captured: Dict[str, Any] = {} + for key, value in params.items(): + captured[key] = ( + _build_captured_mcp_arguments(value) + if key == "arguments" + else sanitize_captured_value(value) + ) + return captured + + +def _build_captured_mcp_arguments(arguments: Any) -> Any: + if not _is_record(arguments): + return sanitize_captured_value(arguments) + + captured: Dict[str, Any] = {} + for key, value in arguments.items(): + if key in _INJECTED_ARGUMENT_NAMES: + continue + captured[key] = sanitize_captured_value(value) + return captured diff --git a/posthog/mcp/session.py b/posthog/mcp/session.py new file mode 100644 index 00000000..63c08ce1 --- /dev/null +++ b/posthog/mcp/session.py @@ -0,0 +1,55 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Session id resolution: prefer a transport-supplied MCP session id (derived +deterministically so it survives restarts) over an SDK-generated one, which +rolls over after an inactivity timeout.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Optional + +from .constants import INACTIVITY_TIMEOUT_IN_MINUTES +from .ids import deterministic_prefixed_id, new_prefixed_id +from .internal import MCPAnalyticsData + + +def new_session_id() -> str: + return new_prefixed_id("ses") + + +def derive_session_id_from_mcp_session(mcp_session_id: str) -> str: + """Deterministic SDK session id for an MCP protocol session, so the same MCP + session correlates to one ``$session_id`` across server restarts.""" + return deterministic_prefixed_id("ses", mcp_session_id) + + +async def resolve_session_id( + data: MCPAnalyticsData, mcp_session_id: Optional[str] +) -> str: + """Resolve the session id for a request. Mutates per-server state under a lock + so concurrent async requests can't race on session rotation.""" + async with data.session_lock: + now = datetime.now(timezone.utc) + + if mcp_session_id: + data.session_id = derive_session_id_from_mcp_session(mcp_session_id) + data.last_mcp_session_id = mcp_session_id + data.session_source = "mcp" + data.last_activity = now + return data.session_id + + # Once a session is MCP-derived, keep it even if a later request arrives + # without the MCP session id, so the session doesn't fragment. + if data.session_source == "mcp" and data.last_mcp_session_id: + data.last_activity = now + return data.session_id + + timeout_seconds = INACTIVITY_TIMEOUT_IN_MINUTES * 60 + if (now - data.last_activity).total_seconds() > timeout_seconds: + data.session_id = new_session_id() + data.session_source = "generated" + data.last_activity = now + return data.session_id diff --git a/posthog/mcp/sink.py b/posthog/mcp/sink.py new file mode 100644 index 00000000..f077117b --- /dev/null +++ b/posthog/mcp/sink.py @@ -0,0 +1,117 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""The capture pipeline: sanitize -> truncate -> fan out into ``$mcp_*`` / +``$exception`` payloads -> ``before_send`` -> ``Client.capture()``. + +``process_mcp_event`` is the single source of truth for the transform, so tests +assert on exactly the payloads that reach ``capture()``. ``McpEventSink`` wraps a +user-supplied posthog ``Client`` and does the actual capture. The SDK never owns +the client lifecycle — the host constructs it and calls ``shutdown()``. +""" + +from __future__ import annotations + +import inspect +from dataclasses import dataclass +from typing import List, Optional, Tuple + +from posthog.client import Client + +from .ids import _uuid7, new_prefixed_id +from .logger import log +from .posthog_events import PostHogCaptureEvent, build_posthog_capture_events +from .sanitization import sanitize_event +from .truncation import truncate_event +from .types import BeforeSendFn, Event, McpEvent + + +@dataclass +class McpCaptureOptions: + """Per-event toggles consulted by the sink when fanning out an event.""" + + enable_exception_autocapture: bool = True + before_send: Optional[BeforeSendFn] = None + + +async def process_mcp_event( + event: McpEvent, options: McpCaptureOptions +) -> Optional[Tuple[Event, List[PostHogCaptureEvent]]]: + """Run an MCP event through the full transform. Returns ``None`` (and logs) + if a transform stage raises, so the event is dropped rather than partially + sent. Payloads dropped by ``before_send`` are filtered out.""" + processed: McpEvent = event + + try: + processed = sanitize_event(processed) + except Exception as err: + log(f"Failed to sanitize event: {err}") + return None + + try: + processed = truncate_event(processed) + except Exception as err: + log(f"Failed to truncate event: {err}") + return None + + if not processed.get("id"): + processed["id"] = new_prefixed_id("evt") + + built = build_posthog_capture_events( + processed, options.enable_exception_autocapture + ) + captures = await _apply_before_send(built, options.before_send) + return processed, captures + + +async def _apply_before_send( + captures: List[PostHogCaptureEvent], before_send: Optional[BeforeSendFn] +) -> List[PostHogCaptureEvent]: + if before_send is None: + return captures + + kept: List[PostHogCaptureEvent] = [] + for capture in captures: + try: + result = before_send(capture) + if inspect.isawaitable(result): + result = await result + if result: + kept.append(result) + except Exception as err: + log( + f"before_send threw for event {capture.get('event')}; dropping it: {err}" + ) + return kept + + +class McpEventSink: + """Wraps a user-supplied posthog ``Client`` and pushes events through the + pipeline. Errors at any stage are logged and the event dropped, never + re-raised into tool code.""" + + def __init__(self, posthog: Client) -> None: + self._posthog = posthog + + async def capture(self, event: McpEvent, options: McpCaptureOptions) -> None: + result = await process_mcp_event(event, options) + if result is None: + return + + full_event, captures = result + try: + for capture_event in captures: + self._posthog.capture( + capture_event["event"], + distinct_id=capture_event["distinct_id"], + properties=capture_event["properties"], + timestamp=capture_event.get("timestamp"), + uuid=_uuid7(), + ) + log( + f"Captured PostHog event {full_event.get('id')} | {full_event.get('event_type')} | " + f"{full_event.get('duration')} ms | {full_event.get('identify_actor_given_id') or 'anonymous'}" + ) + except Exception as err: + log(f"Failed to capture PostHog event {full_event.get('id')}: {err}") diff --git a/posthog/mcp/tools.py b/posthog/mcp/tools.py new file mode 100644 index 00000000..08a2ef1c --- /dev/null +++ b/posthog/mcp/tools.py @@ -0,0 +1,76 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""The ``get_more_tools`` virtual tool: a tool advertised to agents so they can +report a capability the server doesn't offer yet. Calling it emits +``$mcp_missing_capability`` (not ``$mcp_tool_call``).""" + +from __future__ import annotations + +from typing import Any, Dict, Optional + +from .logger import log + +GET_MORE_TOOLS_NAME = "get_more_tools" + +_GET_MORE_TOOLS_RESULT_TEXT = ( + "Unfortunately, we have shown you the full tool list. We have noted your feedback " + "and will work to improve the tool list in the future." +) + + +def resolve_missing_capability_tool_name(options: Any = None) -> str: + """The configured name of the virtual tool, falling back to the default. + Resolve through here everywhere (inject + detect) so a custom name can't drift.""" + name = ( + getattr(options, "missing_capability_tool_name", None) + if options is not None + else None + ) + return name or GET_MORE_TOOLS_NAME + + +def build_report_missing_descriptor(name: str = GET_MORE_TOOLS_NAME) -> Dict[str, Any]: + """The advertised descriptor for the virtual tool (plain dict; adapters build + the framework's Tool object from it).""" + return { + "name": name, + "description": ( + "Check for additional tools whenever your task might benefit from specialized " + "capabilities - even if existing tools could work as a fallback." + ), + "inputSchema": { + "type": "object", + "properties": { + "context": { + "type": "string", + "description": "A description of your goal and what kind of tool would help accomplish it.", + } + }, + "required": ["context"], + }, + "annotations": { + "title": "Get More Tools", + "readOnlyHint": True, + "openWorldHint": True, + "idempotentHint": True, + "destructiveHint": False, + }, + } + + +def get_more_tools_result() -> Dict[str, Any]: + """The canned acknowledgement returned to the agent after it calls + ``get_more_tools``. Reply with this from a custom dispatcher; the ``instrument()`` + path returns it automatically.""" + return {"content": [{"type": "text", "text": _GET_MORE_TOOLS_RESULT_TEXT}]} + + +def get_more_tools_result_text() -> str: + return _GET_MORE_TOOLS_RESULT_TEXT + + +def handle_report_missing(context: Optional[str]) -> Dict[str, Any]: + log(f"Missing tool reported: {context!r}") + return get_more_tools_result() diff --git a/posthog/mcp/truncation.py b/posthog/mcp/truncation.py new file mode 100644 index 00000000..d0e6c2b2 --- /dev/null +++ b/posthog/mcp/truncation.py @@ -0,0 +1,359 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Layered truncation so an event fits within a byte budget before capture: + +1. Field-level string limits (user_intent, resource_name, metadata fields). +2. Error frame limiting + message caps on the ``$exception_list`` shape. +3. Response content text limits (32KB per text block). +4. Recursive normalization of user-controlled fields (depth/breadth/string caps). +5. Size-targeted truncation: progressive depth reduction, then trimming the + largest string fields until under MAX_EVENT_BYTES. +""" + +from __future__ import annotations + +import copy +import json +import math +from datetime import datetime +from typing import Any, Dict, List, Optional + +MAX_DEPTH = 10 +MAX_BREADTH = 100 +MAX_STRING_LENGTH = 32_768 # 32KB +MAX_EVENT_BYTES = 102_400 # 100KB + +_MAX_USER_INTENT_LENGTH = 2048 +_MAX_ERROR_MESSAGE_LENGTH = 2048 +_MAX_RESOURCE_NAME_LENGTH = 256 +_MAX_METADATA_LENGTH = 256 +_MAX_STACK_FRAMES = 50 +_MAX_CONTENT_TEXT_LENGTH = 32_768 + +_TRUNCATION_SUFFIX = "..." + +_METADATA_FIELDS = ( + ("user_intent", _MAX_USER_INTENT_LENGTH), + ("resource_name", _MAX_RESOURCE_NAME_LENGTH), + ("server_name", _MAX_METADATA_LENGTH), + ("server_version", _MAX_METADATA_LENGTH), + ("client_name", _MAX_METADATA_LENGTH), + ("client_version", _MAX_METADATA_LENGTH), +) + +_NORMALIZED_FIELDS = ("parameters", "response", "identify_actor_data", "error") + + +# --- normalize --------------------------------------------------------------- + + +def normalize( + value: Any, + depth: int = MAX_DEPTH, + max_breadth: int = MAX_BREADTH, + max_string_length: int = MAX_STRING_LENGTH, +) -> Any: + """Recursively normalize a value: cap strings, coerce non-serializable + values, convert datetimes, detect cycles, and bound depth/breadth.""" + return _visit(value, depth, max_breadth, max_string_length, set()) + + +def _visit( + value: Any, + remaining_depth: int, + max_breadth: int, + max_string_length: int, + memo: set, +) -> Any: + if value is None: + return None + if isinstance(value, bool): # before int — bool is an int subclass + return value + if isinstance(value, (int, float)): + if isinstance(value, float): + if math.isnan(value): + return "[NaN]" + if math.isinf(value): + return "[Infinity]" if value > 0 else "[-Infinity]" + return value + if isinstance(value, str): + if len(value) > max_string_length: + return value[:max_string_length] + _TRUNCATION_SUFFIX + return value + if isinstance(value, datetime): + return value.isoformat() + if callable(value): + return f"[Function: {getattr(value, '__name__', '') or ''}]" + + if isinstance(value, (list, tuple)): + oid = id(value) + if oid in memo: + return "[Circular ~]" + if remaining_depth <= 0: + return "[Array]" + memo.add(oid) + result: Any = _visit_array( + list(value), remaining_depth - 1, max_breadth, max_string_length, memo + ) + memo.discard(oid) + return result + + if isinstance(value, dict): + oid = id(value) + if oid in memo: + return "[Circular ~]" + if remaining_depth <= 0: + return "[Object]" + memo.add(oid) + result = _visit_object( + value, remaining_depth - 1, max_breadth, max_string_length, memo + ) + memo.discard(oid) + return result + + return str(value) + + +def _visit_array( + arr: List[Any], + remaining_depth: int, + max_breadth: int, + max_string_length: int, + memo: set, +) -> List[Any]: + result: List[Any] = [] + for i, item in enumerate(arr): + if i >= max_breadth: + result.append("[MaxProperties ~]") + break + result.append( + _visit(item, remaining_depth, max_breadth, max_string_length, memo) + ) + return result + + +def _visit_object( + obj: Dict[Any, Any], + remaining_depth: int, + max_breadth: int, + max_string_length: int, + memo: set, +) -> Dict[Any, Any]: + result: Dict[Any, Any] = {} + count = 0 + for key, val in obj.items(): + if count >= max_breadth: + result["..."] = "[MaxProperties ~]" + break + result[key] = _visit(val, remaining_depth, max_breadth, max_string_length, memo) + count += 1 + return result + + +# --- field-level helpers ----------------------------------------------------- + + +def _truncate_string(value: Optional[str], max_length: int) -> Optional[str]: + if not isinstance(value, str): + return value + if len(value) <= max_length: + return value + return value[:max_length] + _TRUNCATION_SUFFIX + + +def _truncate_stack_frames(frames: Optional[List[Any]]) -> Optional[List[Any]]: + if not frames or len(frames) <= _MAX_STACK_FRAMES: + return frames + half = _MAX_STACK_FRAMES // 2 + return frames[:half] + frames[-half:] + + +def _truncate_exception_list(error: Dict[str, Any]) -> Dict[str, Any]: + exception_list = error.get("$exception_list") + if not isinstance(exception_list, list): + return error + result = {**error} + truncated = [] + for exception in exception_list: + nxt = {**exception} + if isinstance(nxt.get("value"), str): + nxt["value"] = _truncate_string(nxt["value"], _MAX_ERROR_MESSAGE_LENGTH) + stacktrace = nxt.get("stacktrace") + if isinstance(stacktrace, dict) and stacktrace.get("frames"): + nxt["stacktrace"] = { + **stacktrace, + "frames": _truncate_stack_frames(stacktrace["frames"]), + } + truncated.append(nxt) + result["$exception_list"] = truncated + return result + + +def _truncate_response_content(response: Any) -> Any: + if not isinstance(response, dict): + return response + result = {**response} + content = result.get("content") + if isinstance(content, list): + new_content = [] + for block in content: + if ( + isinstance(block, dict) + and block.get("type") == "text" + and isinstance(block.get("text"), str) + and len(block["text"]) > _MAX_CONTENT_TEXT_LENGTH + ): + new_content.append( + { + **block, + "text": block["text"][:_MAX_CONTENT_TEXT_LENGTH] + + _TRUNCATION_SUFFIX, + } + ) + else: + new_content.append(block) + result["content"] = new_content + return result + + +# --- size-targeted truncation ------------------------------------------------ + + +def _json_default(obj: Any) -> Any: + if isinstance(obj, datetime): + return obj.isoformat() + return str(obj) + + +def _json_byte_size(value: Any) -> int: + return len( + json.dumps(value, default=_json_default, separators=(",", ":")).encode("utf-8") + ) + + +def _collect_string_paths( + obj: Any, current_path: List[str], results: List[Dict[str, Any]] +) -> None: + if isinstance(obj, str): + if len(obj) > 100: + results.append({"path": list(current_path), "length": len(obj)}) + return + if isinstance(obj, list): + for i, item in enumerate(obj): + _collect_string_paths(item, current_path + [str(i)], results) + return + if isinstance(obj, dict): + for key, value in obj.items(): + _collect_string_paths(value, current_path + [str(key)], results) + + +def _get_nested_value(obj: Any, path: List[str]) -> Any: + current = obj + for key in path: + if isinstance(current, list): + current = current[int(key)] + elif isinstance(current, dict): + current = current.get(key) + else: + return None + return current + + +def _set_nested_value(obj: Any, path: List[str], value: Any) -> None: + current = obj + for key in path[:-1]: + if isinstance(current, list): + current = current[int(key)] + elif isinstance(current, dict): + current = current.get(key) + else: + return + final_key = path[-1] + if isinstance(current, list): + current[int(final_key)] = value + elif isinstance(current, dict): + current[final_key] = value + + +def _truncate_largest_fields(obj: Any, max_bytes: int) -> Any: + result = copy.deepcopy(obj) + + for _ in range(10): + current_size = _json_byte_size(result) + if current_size <= max_bytes: + return result + excess = current_size - max_bytes + + string_paths: List[Dict[str, Any]] = [] + _collect_string_paths(result, [], string_paths) + string_paths.sort(key=lambda p: p["length"], reverse=True) + if not string_paths: + break + + remaining = excess + 200 # buffer for JSON overhead from added "..." suffixes + truncated = False + for entry in string_paths: + if remaining <= 0: + break + length = entry["length"] + reduction = min(remaining, length // 2) + if reduction < 10: + continue + new_length = length - reduction + current_value = _get_nested_value(result, entry["path"]) + if not isinstance(current_value, str): + continue + _set_nested_value( + result, entry["path"], current_value[:new_length] + _TRUNCATION_SUFFIX + ) + remaining -= reduction + truncated = True + + if not truncated: + break + + return result + + +def _truncate_to_size(event: Dict[str, Any]) -> Dict[str, Any]: + if _json_byte_size(event) <= MAX_EVENT_BYTES: + return event + + for depth in range(MAX_DEPTH - 1, 0, -1): + reduced = {**event} + for field in _NORMALIZED_FIELDS: + if reduced.get(field) is not None: + reduced[field] = normalize(reduced[field], depth) + if _json_byte_size(reduced) <= MAX_EVENT_BYTES: + return reduced + + minimal = {**event} + for field in _NORMALIZED_FIELDS: + if minimal.get(field) is not None: + minimal[field] = normalize(minimal[field], 1) + return _truncate_largest_fields(minimal, MAX_EVENT_BYTES) + + +def truncate_event(event: Dict[str, Any]) -> Dict[str, Any]: + result = {**event} + + # Layer 1: field-level string limits + for key, max_length in _METADATA_FIELDS: + if isinstance(result.get(key), str): + result[key] = _truncate_string(result[key], max_length) + + if isinstance(result.get("error"), dict): + result["error"] = _truncate_exception_list(result["error"]) + + if result.get("response") is not None: + result["response"] = _truncate_response_content(result["response"]) + + # Layer 2: recursive normalization on user-controlled fields + for field in _NORMALIZED_FIELDS: + if result.get(field) is not None: + result[field] = normalize(result[field]) + + # Layer 3: size-targeted normalization + return _truncate_to_size(result) diff --git a/posthog/mcp/types.py b/posthog/mcp/types.py new file mode 100644 index 00000000..8e09203f --- /dev/null +++ b/posthog/mcp/types.py @@ -0,0 +1,137 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +"""Shared types for the MCP analytics SDK. + +The internal ``Event``/``McpEvent`` is modeled as a ``dict`` (typed via +``TypedDict``, ``total=False``) to faithfully mirror the TypeScript SDK's plain +objects: the pipeline shallow-copies with ``{**event}``, reads fields with +``.get()``, and JSON-serializes the whole event for byte-size budgeting. Keys +are snake_case internally; ``posthog_events`` maps them to the ``$mcp_*`` wire +keys. Public option/identity shapes (added with the server adapters) are +dataclasses for a nicer API. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Awaitable, Callable, Dict, Optional, TypedDict, Union + +from .logger import LoggerFn + +JsonRecord = Dict[str, Any] + +# PostHog error-tracking properties (the ``$exception_list`` / ``$exception_level`` shape). +ErrorProperties = Dict[str, Any] + +MCPAnalyticsIntentSource = str # "context_parameter" | "inferred" + +# Internal MCP event as it flows through the SDK before capture. Modeled as a +# plain dict (constructed and read with ``.get()`` throughout) to mirror the TS +# plain-object pipeline. Snake_case keys map to the ``$mcp_*`` wire keys in +# ``posthog_events``. Known keys: client_name, client_version, conversation_id, +# duration, error, event_name, event_type, groups, id, identify_actor_data, +# identify_actor_given_id, is_error, listed_tool_names, parameters, properties, +# resource_name, response, server_name, server_version, session_id, timestamp, +# tool_category, tool_description, user_intent, user_intent_source. +Event = Dict[str, Any] +McpEvent = Dict[str, Any] + + +class PostHogCaptureEvent(TypedDict, total=False): + """A fully-built payload ready for ``Client.capture()``.""" + + distinct_id: str + event: str + properties: Dict[str, Any] + timestamp: datetime + + +# Hook invoked for every event just before capture. Return the (possibly +# mutated) event to send it, or a nullish value to drop it. May be sync or async. +BeforeSendFn = Callable[ + [PostHogCaptureEvent], + Union[Optional[PostHogCaptureEvent], Awaitable[Optional[PostHogCaptureEvent]]], +] + + +@dataclass +class UserIdentity: + """Resolved identity for a session. ``distinct_id`` becomes ``distinct_id``; + ``properties`` go to ``$set``; ``groups`` (``{group_type: group_key}``) are + stamped on every event as ``$groups``.""" + + distinct_id: str + properties: Optional[JsonRecord] = None + groups: Optional[Dict[str, str]] = None + + +@dataclass +class MCPAnalyticsContextOptions: + description: Optional[str] = None + + +# request is a JSON-RPC-shaped dict; extra carries session_id / headers. +IdentifyFn = Callable[ + ..., Any +] # (request, extra) -> Optional[UserIdentity] | awaitable +IntentFallbackFn = Callable[..., Any] # (request, extra) -> Optional[str] | awaitable +EventPropertiesFn = Callable[..., Any] # (request, extra) -> Optional[dict] | awaitable + + +@dataclass +class MCPAnalyticsOptions: + """Configuration for ``instrument()``. Mirrors the TypeScript SDK's options.""" + + logger: Optional[LoggerFn] = None + report_missing: bool = False + missing_capability_tool_name: Optional[str] = None + enable_conversation_id: bool = False + enable_exception_autocapture: bool = True + # Inject a required `context` parameter on every tool to capture user intent. + context: Union[bool, MCPAnalyticsContextOptions] = True + # Identify the calling user — a callable (request, extra) -> UserIdentity|None + # (sync or async), or a static UserIdentity. + identify: Optional[Union[IdentifyFn, UserIdentity]] = None + # Called when a tool is invoked without an explicit `context` argument. + intent_fallback: Optional[IntentFallbackFn] = None + # Inspect/modify/drop each event right before it is sent to PostHog. + before_send: Optional[BeforeSendFn] = None + # Extra properties merged onto every auto-captured event. + event_properties: Optional[EventPropertiesFn] = None + + +@dataclass +class CaptureEventData: + """Payload for the custom-event handle returned by ``instrument()``.""" + + event: str + properties: Optional[JsonRecord] = None + + +@dataclass +class PreparedToolCall: + """Result of :meth:`PostHogMCP.prepare_tool_call`: the intent pulled off the + call, the arguments with the injected ``context`` stripped, and whether the + call targeted the ``get_more_tools`` virtual tool.""" + + args: Optional[JsonRecord] = None + intent: Optional[str] = None + intent_source: Optional[str] = None + is_missing_capability: bool = False + + +@dataclass +class SessionInfo: + client_name: Optional[str] = None + client_version: Optional[str] = None + server_name: Optional[str] = None + server_version: Optional[str] = None + sdk_language: str = "Python" + sdk_version: Optional[str] = None + ip_address: Optional[str] = None + identify_actor_given_id: Optional[str] = None + identify_actor_data: JsonRecord = field(default_factory=dict) + identify_actor_groups: Optional[Dict[str, str]] = None diff --git a/posthog/mcp/version.py b/posthog/mcp/version.py new file mode 100644 index 00000000..f58af853 --- /dev/null +++ b/posthog/mcp/version.py @@ -0,0 +1,7 @@ +# Portions of this package are derived from MCPCat/mcpcat-typescript-sdk +# Copyright (c) 2025 MCPcat +# Licensed under the MIT License: https://github.com/MCPCat/mcpcat-typescript-sdk/blob/main/LICENSE + +# Version of the PostHog MCP analytics SDK surface. Informational only — it is +# not stamped onto captured events. The package itself ships inside posthog. +__version__ = "0.1.0" diff --git a/posthog/test/mcp/__init__.py b/posthog/test/mcp/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/posthog/test/mcp/test_fastmcp.py b/posthog/test/mcp/test_fastmcp.py new file mode 100644 index 00000000..1a2cd489 --- /dev/null +++ b/posthog/test/mcp/test_fastmcp.py @@ -0,0 +1,213 @@ +"""End-to-end tests for the FastMCP adapter (Milestone 2).""" + +import asyncio + +import pytest + +import mcp.types as mcp_types +from mcp.server.fastmcp import FastMCP + +from posthog.mcp import instrument +from posthog.mcp.types import MCPAnalyticsOptions, UserIdentity + + +class FakeClient: + """Records capture() calls instead of sending them.""" + + def __init__(self): + self.events = [] + + def capture( + self, + event, + distinct_id=None, + properties=None, + timestamp=None, + uuid=None, + **kwargs, + ): + self.events.append( + {"event": event, "distinct_id": distinct_id, "properties": properties or {}} + ) + return None + + +def make_server(): + server = FastMCP("test-server") + + @server.tool() + def add(a: int, b: int) -> int: + return a + b + + @server.tool() + def boom() -> str: + raise ValueError("explode") + + return server + + +async def _flush(): + """Let fire-and-forget capture tasks run to completion.""" + import posthog.mcp.instrumentation as instr + + for _ in range(10): + await asyncio.sleep(0) + pending = [t for t in list(instr._BACKGROUND_TASKS) if not t.done()] + if pending: + await asyncio.gather(*pending, return_exceptions=True) + await asyncio.sleep(0) + + +def _events(client, name): + return [e for e in client.events if e["event"] == name] + + +async def _list_tools(server): + handler = server._mcp_server.request_handlers[mcp_types.ListToolsRequest] + return await handler(mcp_types.ListToolsRequest(method="tools/list")) + + +# --- tools/list -------------------------------------------------------------- + + +async def test_list_tools_injects_context_and_captures(): + server = make_server() + client = FakeClient() + instrument(server, client) + + result = await _list_tools(server) + await _flush() + + tools = result.root.tools + add_tool = next(t for t in tools if t.name == "add") + assert "context" in add_tool.inputSchema["properties"] + assert "context" in add_tool.inputSchema["required"] + + listed = _events(client, "$mcp_tools_list") + assert listed + assert set(listed[0]["properties"]["$mcp_listed_tool_names"]) == {"add", "boom"} + + +async def test_context_injection_can_be_disabled(): + server = make_server() + client = FakeClient() + instrument(server, client, MCPAnalyticsOptions(context=False)) + + result = await _list_tools(server) + add_tool = next(t for t in result.root.tools if t.name == "add") + assert "context" not in add_tool.inputSchema.get("properties", {}) + + +# --- tools/call -------------------------------------------------------------- + + +async def test_tool_call_captures_intent_and_strips_context(): + server = make_server() + client = FakeClient() + instrument(server, client) + + received = {} + original_add = server._tool_manager.get_tool("add").fn + + def spy_add(a: int, b: int) -> int: + received["args"] = {"a": a, "b": b} + return original_add(a, b) + + server._tool_manager.get_tool("add").fn = spy_add + + await server._tool_manager.call_tool( + "add", {"a": 2, "b": 3, "context": "summing two numbers for the user's report"} + ) + await _flush() + + # the injected `context` never reached the tool implementation + assert received["args"] == {"a": 2, "b": 3} + + calls = _events(client, "$mcp_tool_call") + assert len(calls) == 1 + props = calls[0]["properties"] + assert props["$mcp_tool_name"] == "add" + assert props["$mcp_intent"] == "summing two numbers for the user's report" + assert props["$mcp_intent_source"] == "context_parameter" + assert props["$mcp_is_error"] is False + assert "$mcp_duration_ms" in props + # context is stripped from captured parameters too + assert "context" not in props["$mcp_parameters"]["request"]["params"]["arguments"] + + +async def test_initialize_emitted_once_per_session(): + server = make_server() + client = FakeClient() + instrument(server, client) + + await server._tool_manager.call_tool( + "add", {"a": 1, "b": 1, "context": "first call to warm up"} + ) + await server._tool_manager.call_tool( + "add", {"a": 2, "b": 2, "context": "second call for the total"} + ) + await _flush() + + assert len(_events(client, "$mcp_initialize")) == 1 + assert len(_events(client, "$mcp_tool_call")) == 2 + + +async def test_tool_call_error_is_captured_and_reraised(): + server = make_server() + client = FakeClient() + instrument(server, client) + + with pytest.raises(Exception): + await server._tool_manager.call_tool( + "boom", {"context": "attempting the risky operation"} + ) + await _flush() + + calls = _events(client, "$mcp_tool_call") + assert calls and calls[0]["properties"]["$mcp_is_error"] is True + exceptions = _events(client, "$exception") + assert exceptions + assert exceptions[0]["properties"]["$exception_list"][0]["value"] == "explode" + + +async def test_identify_sets_distinct_id_and_groups(): + server = make_server() + client = FakeClient() + instrument( + server, + client, + MCPAnalyticsOptions( + identify=lambda request, extra: UserIdentity( + distinct_id="user_42", + properties={"plan": "pro"}, + groups={"organization": "org_7"}, + ) + ), + ) + + await server._tool_manager.call_tool( + "add", {"a": 1, "b": 2, "context": "checking identity flows through"} + ) + await _flush() + + calls = _events(client, "$mcp_tool_call") + assert calls[0]["distinct_id"] == "user_42" + assert calls[0]["properties"]["$groups"] == {"organization": "org_7"} + assert "$process_person_profile" not in calls[0]["properties"] + # an $identify event was emitted + assert _events(client, "$identify") + + +async def test_instrument_is_idempotent(): + server = make_server() + client = FakeClient() + instrument(server, client) + wrapped_call = server._tool_manager.call_tool + instrument(server, client) + assert server._tool_manager.call_tool is wrapped_call # not double-wrapped + + +async def test_unsupported_server_returns_noop_handle(): + handle = instrument(object(), FakeClient()) + # graceful no-op: capture does nothing and does not raise + await handle.capture("anything") diff --git a/posthog/test/mcp/test_fastmcp_v2.py b/posthog/test/mcp/test_fastmcp_v2.py new file mode 100644 index 00000000..9a6c6c52 --- /dev/null +++ b/posthog/test/mcp/test_fastmcp_v2.py @@ -0,0 +1,125 @@ +"""Tests for jlowin's standalone FastMCP 2.0 (the `fastmcp` package), distinct +from the official SDK's mcp.server.fastmcp.FastMCP.""" + +import asyncio + +import pytest + +pytest.importorskip("fastmcp") + +import mcp.types as mcp_types # noqa: E402 +from fastmcp import FastMCP # noqa: E402 + +from posthog.mcp import instrument # noqa: E402 +from posthog.mcp.types import MCPAnalyticsOptions # noqa: E402 + + +class FakeClient: + def __init__(self): + self.events = [] + + def capture( + self, + event, + distinct_id=None, + properties=None, + timestamp=None, + uuid=None, + **kwargs, + ): + self.events.append( + {"event": event, "distinct_id": distinct_id, "properties": properties or {}} + ) + return None + + +async def _flush(): + import posthog.mcp.instrumentation as instr + + for _ in range(10): + await asyncio.sleep(0) + pending = [t for t in list(instr._BACKGROUND_TASKS) if not t.done()] + if pending: + await asyncio.gather(*pending, return_exceptions=True) + await asyncio.sleep(0) + + +def _events(client, name): + return [e for e in client.events if e["event"] == name] + + +def make_server(): + server = FastMCP("jlowin-probe") + + @server.tool + def add(a: int, b: int) -> int: + return a + b + + return server + + +async def _list(server): + handler = server._mcp_server.request_handlers[mcp_types.ListToolsRequest] + return await handler(mcp_types.ListToolsRequest(method="tools/list")) + + +async def _call(server, name, arguments): + handler = server._mcp_server.request_handlers[mcp_types.CallToolRequest] + return await handler( + mcp_types.CallToolRequest( + method="tools/call", + params=mcp_types.CallToolRequestParams(name=name, arguments=arguments), + ) + ) + + +async def test_jlowin_list_injects_context(): + server = make_server() + client = FakeClient() + instrument(server, client) + + result = await _list(server) + await _flush() + + add_tool = next(t for t in result.root.tools if t.name == "add") + assert "context" in add_tool.inputSchema["properties"] + assert _events(client, "$mcp_tools_list") + + +async def test_jlowin_call_strips_context_so_validation_passes(): + server = make_server() + client = FakeClient() + instrument(server, client) + + out = await _call( + server, + "add", + {"a": 2, "b": 3, "context": "summing two numbers for the monthly report"}, + ) + await _flush() + + # Without stripping, jlowin rejects the extra `context` kwarg with isError. + assert out.root.isError is False + calls = _events(client, "$mcp_tool_call") + assert calls and calls[0]["properties"]["$mcp_tool_name"] == "add" + assert ( + calls[0]["properties"]["$mcp_intent"] + == "summing two numbers for the monthly report" + ) + assert calls[0]["properties"]["$mcp_is_error"] is False + + +async def test_jlowin_report_missing_advertises_get_more_tools(): + server = make_server() + client = FakeClient() + instrument(server, client, MCPAnalyticsOptions(report_missing=True)) + + result = await _list(server) + assert "get_more_tools" in [t.name for t in result.root.tools] + + out = await _call( + server, "get_more_tools", {"context": "need a tool that exports to CSV"} + ) + await _flush() + assert out.root.isError is False + assert _events(client, "$mcp_missing_capability") diff --git a/posthog/test/mcp/test_features_m4.py b/posthog/test/mcp/test_features_m4.py new file mode 100644 index 00000000..2c361e09 --- /dev/null +++ b/posthog/test/mcp/test_features_m4.py @@ -0,0 +1,265 @@ +"""Tests for M4 parity features: get_more_tools (missing capability) + conversation_id.""" + +import asyncio + +import mcp.types as mcp_types +from mcp.server.fastmcp import FastMCP +from mcp.server.lowlevel import Server + +from posthog.mcp import PostHogMCP, get_more_tools_result, instrument +from posthog.mcp.types import MCPAnalyticsOptions + + +class FakeClient: + def __init__(self): + self.events = [] + + def capture( + self, + event, + distinct_id=None, + properties=None, + timestamp=None, + uuid=None, + **kwargs, + ): + self.events.append( + {"event": event, "distinct_id": distinct_id, "properties": properties or {}} + ) + return None + + +async def _flush(): + import posthog.mcp.instrumentation as instr + + for _ in range(10): + await asyncio.sleep(0) + pending = [t for t in list(instr._BACKGROUND_TASKS) if not t.done()] + if pending: + await asyncio.gather(*pending, return_exceptions=True) + await asyncio.sleep(0) + + +def _events(client, name): + return [e for e in client.events if e["event"] == name] + + +def make_fastmcp(): + server = FastMCP("m4-fastmcp") + + @server.tool() + def add(a: int, b: int) -> str: + return f"sum is {a + b}" + + return server + + +def make_lowlevel(): + server = Server("m4-lowlevel") + + @server.list_tools() + async def list_tools(): + return [ + mcp_types.Tool( + name="echo", + description="Echo", + inputSchema={ + "type": "object", + "properties": {"msg": {"type": "string"}}, + "required": ["msg"], + }, + ) + ] + + @server.call_tool() + async def call_tool(name, arguments): + return [mcp_types.TextContent(type="text", text=str(arguments.get("msg")))] + + return server + + +def _call_request(name, arguments): + return mcp_types.CallToolRequest( + method="tools/call", + params=mcp_types.CallToolRequestParams(name=name, arguments=arguments), + ) + + +# --- get_more_tools / missing capability ------------------------------------- + + +async def test_fastmcp_report_missing_advertises_and_captures(): + server = make_fastmcp() + client = FakeClient() + instrument(server, client, MCPAnalyticsOptions(report_missing=True)) + + list_handler = server._mcp_server.request_handlers[mcp_types.ListToolsRequest] + result = await list_handler(mcp_types.ListToolsRequest(method="tools/list")) + tool_names = [t.name for t in result.root.tools] + assert "get_more_tools" in tool_names + + canned = await server._tool_manager.call_tool( + "get_more_tools", {"context": "wanted a tool to export results to CSV"} + ) + await _flush() + + assert "noted your feedback" in canned[0].text + missing = _events(client, "$mcp_missing_capability") + assert ( + missing + and missing[0]["properties"]["$mcp_intent"] + == "wanted a tool to export results to CSV" + ) + # a get_more_tools call is NOT a normal tool call + assert _events(client, "$mcp_tool_call") == [] + + +async def test_lowlevel_report_missing_advertises_and_captures(): + server = make_lowlevel() + client = FakeClient() + instrument(server, client, MCPAnalyticsOptions(report_missing=True)) + + list_handler = server.request_handlers[mcp_types.ListToolsRequest] + result = await list_handler(mcp_types.ListToolsRequest(method="tools/list")) + assert "get_more_tools" in [t.name for t in result.root.tools] + + call_handler = server.request_handlers[mcp_types.CallToolRequest] + out = await call_handler( + _call_request("get_more_tools", {"context": "need a scheduling tool"}) + ) + await _flush() + + assert out.root.isError is False + assert "noted your feedback" in out.root.content[0].text + assert _events(client, "$mcp_missing_capability") + + +# --- conversation_id --------------------------------------------------------- + + +async def test_fastmcp_conversation_id_captured(): + server = make_fastmcp() + client = FakeClient() + instrument(server, client, MCPAnalyticsOptions(enable_conversation_id=True)) + + await server._tool_manager.call_tool( + "add", + {"a": 1, "b": 2, "context": "summing for the report"}, + convert_result=True, + ) + await _flush() + + calls = _events(client, "$mcp_tool_call") + assert calls and calls[0]["properties"].get("$mcp_conversation_id") # minted + + +async def test_lowlevel_conversation_id_captured_and_prompt_back(): + server = make_lowlevel() + client = FakeClient() + instrument(server, client, MCPAnalyticsOptions(enable_conversation_id=True)) + + call_handler = server.request_handlers[mcp_types.CallToolRequest] + out = await call_handler(_call_request("echo", {"msg": "hi", "context": "echoing"})) + await _flush() + + calls = _events(client, "$mcp_tool_call") + conv_id = calls[0]["properties"].get("$mcp_conversation_id") + assert conv_id + # prompt-back appended to the result so the agent echoes the id + texts = [c.text for c in out.root.content if getattr(c, "type", None) == "text"] + assert any(f"conversation_id={conv_id}" in t for t in texts) + + +async def test_conversation_id_reused_when_supplied(): + server = make_lowlevel() + client = FakeClient() + instrument(server, client, MCPAnalyticsOptions(enable_conversation_id=True)) + + call_handler = server.request_handlers[mcp_types.CallToolRequest] + await call_handler( + _call_request( + "echo", {"msg": "hi", "conversation_id": "conv-123", "context": "x"} + ) + ) + await _flush() + + calls = _events(client, "$mcp_tool_call") + assert calls[0]["properties"]["$mcp_conversation_id"] == "conv-123" + # the injected conversation_id is stripped from captured params (surfaces only as $mcp_conversation_id) + args = calls[0]["properties"]["$mcp_parameters"]["request"]["params"]["arguments"] + assert "conversation_id" not in args + + +async def test_conversation_id_not_stamped_when_prompt_back_undeliverable(): + # A tool that errors -> the minted prompt-back can't be delivered, so we must NOT + # record an orphan $mcp_conversation_id the agent never received. + server = Server("conv-err") + + @server.list_tools() + async def _lt(): + return [ + mcp_types.Tool( + name="boom", + description="b", + inputSchema={"type": "object", "properties": {}}, + ) + ] + + @server.call_tool() + async def _ct(name, arguments): + raise ValueError("kaboom") + + client = FakeClient() + instrument(server, client, MCPAnalyticsOptions(enable_conversation_id=True)) + + out = await server.request_handlers[mcp_types.CallToolRequest]( + _call_request("boom", {"context": "trying"}) + ) + await _flush() + + assert out.root.isError is True + calls = _events(client, "$mcp_tool_call") + assert calls and calls[0]["properties"]["$mcp_is_error"] is True + assert "$mcp_conversation_id" not in calls[0]["properties"] + + +async def test_event_properties_applied_to_all_event_types(): + server = make_lowlevel() + client = FakeClient() + instrument( + server, + client, + MCPAnalyticsOptions(event_properties=lambda request, extra: {"tenant": "acme"}), + ) + + await server.request_handlers[mcp_types.ListToolsRequest]( + mcp_types.ListToolsRequest(method="tools/list") + ) + await server.request_handlers[mcp_types.CallToolRequest]( + _call_request("echo", {"msg": "hi", "context": "x"}) + ) + await _flush() + + # event_properties must land on every auto-captured event type, not just $mcp_tool_call + for name in ("$mcp_tools_list", "$mcp_initialize", "$mcp_tool_call"): + evs = _events(client, name) + assert evs, f"no {name} event captured" + assert evs[0]["properties"].get("tenant") == "acme", ( + f"{name} missing event_properties" + ) + + +# --- PostHogMCP -------------------------------------------------------------- + + +def test_posthog_mcp_prepare_tool_list_report_missing(): + client = PostHogMCP("phc_test", host="https://us.i.posthog.com") + tools = [{"name": "search", "inputSchema": {"type": "object", "properties": {}}}] + prepared = client.prepare_tool_list(tools, report_missing=True) + assert any(t["name"] == "get_more_tools" for t in prepared) + + +def test_get_more_tools_result_shape(): + result = get_more_tools_result() + assert result["content"][0]["type"] == "text" + assert "noted your feedback" in result["content"][0]["text"] diff --git a/posthog/test/mcp/test_lowlevel.py b/posthog/test/mcp/test_lowlevel.py new file mode 100644 index 00000000..e9b09c72 --- /dev/null +++ b/posthog/test/mcp/test_lowlevel.py @@ -0,0 +1,158 @@ +"""End-to-end tests for the low-level mcp.server.Server adapter (Milestone 3).""" + +import asyncio + +import mcp.types as mcp_types +from mcp.server.lowlevel import Server + +from posthog.mcp import instrument + + +class FakeClient: + def __init__(self): + self.events = [] + + def capture( + self, + event, + distinct_id=None, + properties=None, + timestamp=None, + uuid=None, + **kwargs, + ): + self.events.append( + {"event": event, "distinct_id": distinct_id, "properties": properties or {}} + ) + return None + + +def make_server(): + server = Server("test-lowlevel") + + @server.list_tools() + async def list_tools(): + return [ + mcp_types.Tool( + name="echo", + description="Echo back a message", + inputSchema={ + "type": "object", + "properties": {"msg": {"type": "string"}}, + "required": ["msg"], + }, + ) + ] + + @server.call_tool() + async def call_tool(name, arguments): + if name == "echo": + return [mcp_types.TextContent(type="text", text=str(arguments.get("msg")))] + raise ValueError("boom") + + return server + + +async def _flush(): + import posthog.mcp.instrumentation as instr + + for _ in range(10): + await asyncio.sleep(0) + pending = [t for t in list(instr._BACKGROUND_TASKS) if not t.done()] + if pending: + await asyncio.gather(*pending, return_exceptions=True) + await asyncio.sleep(0) + + +def _events(client, name): + return [e for e in client.events if e["event"] == name] + + +def _call_request(name, arguments): + return mcp_types.CallToolRequest( + method="tools/call", + params=mcp_types.CallToolRequestParams(name=name, arguments=arguments), + ) + + +async def test_list_tools_injects_optional_context_and_captures(): + server = make_server() + client = FakeClient() + instrument(server, client) + + handler = server.request_handlers[mcp_types.ListToolsRequest] + result = await handler(mcp_types.ListToolsRequest(method="tools/list")) + await _flush() + + tool = result.root.tools[0] + assert "context" in tool.inputSchema["properties"] + # context is OPTIONAL on the low-level path (schema is also the validation schema) + assert "context" not in tool.inputSchema.get("required", []) + + listed = _events(client, "$mcp_tools_list") + assert listed and listed[0]["properties"]["$mcp_listed_tool_names"] == ["echo"] + + +async def test_tool_call_success_captures_intent(): + server = make_server() + client = FakeClient() + instrument(server, client) + + # populate the tool cache first + await server.request_handlers[mcp_types.ListToolsRequest]( + mcp_types.ListToolsRequest(method="tools/list") + ) + + handler = server.request_handlers[mcp_types.CallToolRequest] + result = await handler( + _call_request( + "echo", {"msg": "hi", "context": "echoing a message for the test"} + ) + ) + await _flush() + + assert result.root.isError is False + calls = _events(client, "$mcp_tool_call") + assert len(calls) == 1 + props = calls[0]["properties"] + assert props["$mcp_tool_name"] == "echo" + assert props["$mcp_intent"] == "echoing a message for the test" + assert props["$mcp_is_error"] is False + # context is stripped from captured parameters + assert "context" not in props["$mcp_parameters"]["request"]["params"]["arguments"] + + +async def test_tool_call_error_captured_from_is_error_result(): + server = make_server() + client = FakeClient() + instrument(server, client) + + handler = server.request_handlers[mcp_types.CallToolRequest] + # "fail" is unlisted -> no validation -> handler raises -> isError result + result = await handler( + _call_request("fail", {"context": "trying a tool that errors"}) + ) + await _flush() + + assert result.root.isError is True + calls = _events(client, "$mcp_tool_call") + assert calls and calls[0]["properties"]["$mcp_is_error"] is True + exceptions = _events(client, "$exception") + assert ( + exceptions + and "boom" in exceptions[0]["properties"]["$exception_list"][0]["value"] + ) + + +async def test_initialize_emitted_once(): + server = make_server() + client = FakeClient() + instrument(server, client) + + handler = server.request_handlers[mcp_types.CallToolRequest] + await handler(_call_request("echo", {"msg": "a", "context": "first call"})) + await handler(_call_request("echo", {"msg": "b", "context": "second call"})) + await _flush() + + assert len(_events(client, "$mcp_initialize")) == 1 + assert len(_events(client, "$mcp_tool_call")) == 2 diff --git a/posthog/test/mcp/test_pipeline.py b/posthog/test/mcp/test_pipeline.py new file mode 100644 index 00000000..3ca52058 --- /dev/null +++ b/posthog/test/mcp/test_pipeline.py @@ -0,0 +1,356 @@ +"""Unit tests for the MCP analytics core pipeline (Milestone 1, no server).""" + +from datetime import datetime, timezone + +from posthog.mcp.constants import ( + POSTHOG_MCP_ANALYTICS_SOURCE, + PostHogMCPAnalyticsEvent, + PostHogMCPAnalyticsProperty, +) +from posthog.mcp.event_types import MCPAnalyticsEventType +from posthog.mcp.exceptions import capture_exception +from posthog.mcp.ids import deterministic_prefixed_id, new_prefixed_id +from posthog.mcp.posthog_events import build_posthog_capture_events +from posthog.mcp.sanitization import ( + build_captured_mcp_parameters, + sanitize_captured_value, + sanitize_event, +) +from posthog.mcp.sink import McpCaptureOptions, process_mcp_event +from posthog.mcp.truncation import MAX_EVENT_BYTES, normalize, truncate_event + +# --- ids --------------------------------------------------------------------- + + +def test_new_prefixed_id_shape(): + sid = new_prefixed_id("ses") + assert sid.startswith("ses_") + # uuid7 string form: 8-4-4-4-12 + uuid_part = sid[len("ses_") :] + assert len(uuid_part.split("-")) == 5 + assert uuid_part[14] == "7" # version nibble + + +def test_new_prefixed_id_unique_and_time_ordered(): + import time + + first = [new_prefixed_id("evt") for _ in range(25)] + time.sleep(0.005) + second = [new_prefixed_id("evt") for _ in range(25)] + assert len(set(first + second)) == 50 # unique + # uuidv7 is time-ordered across milliseconds: every id minted later sorts after earlier ones + assert max(first) < min(second) + + +def test_deterministic_prefixed_id_is_stable(): + a = deterministic_prefixed_id("ses", "mcp-session-123") + b = deterministic_prefixed_id("ses", "mcp-session-123") + c = deterministic_prefixed_id("ses", "other") + assert a == b + assert a != c + assert a.startswith("ses_") + assert len(a[len("ses_") :]) == 32 # two 16-char fnv1a halves + + +# --- sanitization ------------------------------------------------------------ + + +def test_sanitize_redacts_posthog_token(): + out = sanitize_captured_value("my key is phx_abcdefghijklmnopqrstuvwxyz123 ok") + assert "phx_" not in out + assert "[redacted]" in out + + +def test_sanitize_redacts_sensitive_keys(): + out = sanitize_captured_value( + {"authorization": "Bearer x", "api_key": "k", "safe": "keep"} + ) + assert out["authorization"] == "[redacted]" + assert out["api_key"] == "[redacted]" + assert out["safe"] == "keep" + + +def test_sanitize_redacts_large_base64(): + blob = "A" * 11000 + assert sanitize_captured_value(blob).startswith("[binary data redacted") + + +def test_sanitize_event_replaces_image_and_audio_blocks(): + event = { + "response": { + "content": [ + {"type": "text", "text": "hello"}, + {"type": "image", "data": "base64...", "mimeType": "image/png"}, + {"type": "audio", "data": "base64...", "mimeType": "audio/wav"}, + ] + } + } + out = sanitize_event(event) + blocks = out["response"]["content"] + assert blocks[0] == {"type": "text", "text": "hello"} + assert blocks[1]["type"] == "text" and "image content redacted" in blocks[1]["text"] + assert blocks[2]["type"] == "text" and "audio content redacted" in blocks[2]["text"] + + +def test_sanitize_event_redacts_blob_resource(): + event = { + "response": { + "content": [{"type": "resource", "resource": {"blob": "AAAA", "uri": "x"}}] + } + } + out = sanitize_event(event) + assert "binary resource content redacted" in out["response"]["content"][0]["text"] + + +def test_sanitize_does_not_mutate_input(): + event = {"parameters": {"token": "phx_aaaaaaaaaaaaaaaaaaaaaaaa"}} + sanitize_event(event) + assert event["parameters"]["token"] == "phx_aaaaaaaaaaaaaaaaaaaaaaaa" + + +# --- truncation -------------------------------------------------------------- + + +def test_normalize_caps_long_strings(): + out = normalize("x" * 40000) + assert out.endswith("...") + assert len(out) == 32_768 + 3 + + +def test_normalize_detects_cycles(): + a = {} + a["self"] = a + out = normalize(a) + assert out["self"] == "[Circular ~]" + + +def test_normalize_limits_depth(): + deep = {"a": {"b": {"c": {"d": {"e": {"f": "deep"}}}}}} + out = normalize(deep, depth=2) + # at depth 2 the nested object should be collapsed to a marker + assert out["a"]["b"] in ("[Object]", {"c": "[Object]"}) or isinstance( + out["a"]["b"], (dict, str) + ) + + +def test_normalize_handles_nan_and_infinity(): + assert normalize(float("nan")) == "[NaN]" + assert normalize(float("inf")) == "[Infinity]" + assert normalize(float("-inf")) == "[-Infinity]" + + +def test_truncate_event_enforces_byte_budget(): + event = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_CALL, + "session_id": "ses_x", + "timestamp": datetime.now(timezone.utc), + "parameters": {"big": ["y" * 5000 for _ in range(60)]}, + } + out = truncate_event(event) + import json + + size = len(json.dumps(out, default=str, separators=(",", ":")).encode("utf-8")) + assert size <= MAX_EVENT_BYTES + + +# --- posthog_events ---------------------------------------------------------- + + +def test_build_tool_call_event_properties(): + event = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_CALL, + "session_id": "ses_abc", + "resource_name": "search_events", + "tool_description": "Search events", + "tool_category": "Logs", + "duration": 12.5, + "user_intent": "find churn cohort", + "user_intent_source": "context_parameter", + "is_error": False, + "timestamp": datetime.now(timezone.utc), + } + [capture] = build_posthog_capture_events(event) + props = capture["properties"] + assert capture["event"] == PostHogMCPAnalyticsEvent.TOOL_CALL + assert capture["distinct_id"] == "ses_abc" + assert props[PostHogMCPAnalyticsProperty.SOURCE] == POSTHOG_MCP_ANALYTICS_SOURCE + assert props[PostHogMCPAnalyticsProperty.TOOL_NAME] == "search_events" + assert props[PostHogMCPAnalyticsProperty.TOOL_CATEGORY] == "Logs" + assert props[PostHogMCPAnalyticsProperty.INTENT] == "find churn cohort" + assert props[PostHogMCPAnalyticsProperty.INTENT_SOURCE] == "context_parameter" + assert props[PostHogMCPAnalyticsProperty.SESSION_ID] == "ses_abc" + # anonymous (no identity) => person processing disabled + assert props["$process_person_profile"] is False + + +def test_identity_enables_person_processing_and_set(): + event = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_CALL, + "session_id": "ses_abc", + "identify_actor_given_id": "user_1", + "identify_actor_data": {"email": "a@b.com"}, + "groups": {"organization": "org_1"}, + "timestamp": datetime.now(timezone.utc), + } + [capture] = build_posthog_capture_events(event) + props = capture["properties"] + assert capture["distinct_id"] == "user_1" + assert "$process_person_profile" not in props + assert props["$set"] == {"email": "a@b.com"} + assert props["$groups"] == {"organization": "org_1"} + + +def test_listed_tool_names_only_on_tools_list(): + event = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_LIST, + "session_id": "ses_abc", + "listed_tool_names": ["a", "b"], + "timestamp": datetime.now(timezone.utc), + } + [capture] = build_posthog_capture_events(event) + assert capture["event"] == PostHogMCPAnalyticsEvent.TOOLS_LIST + assert capture["properties"][PostHogMCPAnalyticsProperty.LISTED_TOOL_NAMES] == [ + "a", + "b", + ] + + +def test_custom_event_name_is_verbatim(): + event = { + "event_type": MCPAnalyticsEventType.CUSTOM, + "event_name": "feedback_submitted", + "session_id": "ses_abc", + "properties": {"rating": 5}, + "timestamp": datetime.now(timezone.utc), + } + [capture] = build_posthog_capture_events(event) + assert capture["event"] == "feedback_submitted" + assert capture["properties"]["rating"] == 5 + + +def test_exception_fan_out(): + event = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_CALL, + "session_id": "ses_abc", + "resource_name": "broken_tool", + "is_error": True, + "error": capture_exception(ValueError("boom")), + "timestamp": datetime.now(timezone.utc), + } + captures = build_posthog_capture_events(event) + assert len(captures) == 2 + main, exc = captures + assert main["event"] == PostHogMCPAnalyticsEvent.TOOL_CALL + assert exc["event"] == PostHogMCPAnalyticsEvent.EXCEPTION + assert exc["properties"]["$exception_list"][0]["value"] == "boom" + assert exc["properties"][PostHogMCPAnalyticsProperty.TOOL_NAME] == "broken_tool" + + +def test_exception_fan_out_disabled(): + event = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_CALL, + "session_id": "ses_abc", + "is_error": True, + "error": capture_exception("boom"), + "timestamp": datetime.now(timezone.utc), + } + captures = build_posthog_capture_events(event, enable_exception_autocapture=False) + assert len(captures) == 1 + + +# --- exceptions -------------------------------------------------------------- + + +def test_capture_exception_from_exception_has_stacktrace(): + try: + raise RuntimeError("kaboom") + except RuntimeError as e: + props = capture_exception(e) + assert props["$exception_level"] == "error" + entry = props["$exception_list"][0] + assert entry["value"] == "kaboom" + assert entry["type"] == "RuntimeError" + assert "stacktrace" in entry + + +def test_capture_exception_from_call_tool_result_dict(): + result = { + "isError": True, + "content": [{"type": "text", "text": "tool failed badly"}], + } + props = capture_exception(result) + assert props["$exception_list"][0]["value"] == "tool failed badly" + + +def test_capture_exception_from_string(): + props = capture_exception("plain message") + assert props["$exception_list"][0]["value"] == "plain message" + assert props["$exception_list"][0]["type"] == "Error" + + +# --- process_mcp_event (full pipeline) --------------------------------------- + + +def test_build_captured_mcp_parameters_strips_context(): + request = { + "method": "tools/call", + "params": {"name": "search", "arguments": {"q": "x", "context": "intent text"}}, + } + captured = build_captured_mcp_parameters(request) + args = captured["request"]["params"]["arguments"] + assert ( + "context" not in args + ) # the injected analytics param never lands in $mcp_parameters + assert args["q"] == "x" + assert captured["request"]["method"] == "tools/call" + + +async def test_process_mcp_event_basic(): + event = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_CALL, + "session_id": "ses_abc", + "resource_name": "t", + "parameters": build_captured_mcp_parameters( + {"params": {"arguments": {"q": "x", "context": "intent text"}}} + ), + "timestamp": datetime.now(timezone.utc), + } + result = await process_mcp_event(event, McpCaptureOptions()) + assert result is not None + full_event, captures = result + assert full_event["id"].startswith("evt_") + assert len(captures) == 1 + args = captures[0]["properties"][PostHogMCPAnalyticsProperty.PARAMETERS]["request"][ + "params" + ]["arguments"] + assert "context" not in args + assert args["q"] == "x" + + +async def test_before_send_can_drop_event(): + event = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_CALL, + "session_id": "ses_abc", + "timestamp": datetime.now(timezone.utc), + } + result = await process_mcp_event( + event, McpCaptureOptions(before_send=lambda e: None) + ) + assert result is not None + _, captures = result + assert captures == [] + + +async def test_before_send_can_mutate_event_async(): + async def before_send(e): + e["properties"]["added"] = True + return e + + event = { + "event_type": MCPAnalyticsEventType.MCP_TOOLS_CALL, + "session_id": "ses_abc", + "timestamp": datetime.now(timezone.utc), + } + result = await process_mcp_event(event, McpCaptureOptions(before_send=before_send)) + _, captures = result + assert captures[0]["properties"]["added"] is True diff --git a/posthog/test/mcp/test_posthog_mcp.py b/posthog/test/mcp/test_posthog_mcp.py new file mode 100644 index 00000000..ac777c3f --- /dev/null +++ b/posthog/test/mcp/test_posthog_mcp.py @@ -0,0 +1,128 @@ +"""Tests for the PostHogMCP custom-dispatcher client (Milestone 3).""" + +import asyncio + +from posthog.mcp import PostHogMCP + + +def make_client(): + client = PostHogMCP("phc_test", host="https://us.i.posthog.com") + captured = [] + # Intercept the inherited Client.capture so nothing is sent over the network. + client.capture = lambda event, **kwargs: captured.append({"event": event, **kwargs}) + return client, captured + + +async def _flush(): + import posthog.mcp.instrumentation as instr + + for _ in range(10): + await asyncio.sleep(0) + pending = [t for t in list(instr._BACKGROUND_TASKS) if not t.done()] + if pending: + await asyncio.gather(*pending, return_exceptions=True) + await asyncio.sleep(0) + + +def _events(captured, name): + return [e for e in captured if e["event"] == name] + + +async def test_capture_tool_call_success(): + client, captured = make_client() + client.capture_tool_call( + "search_docs", + intent="finding the install guide", + intent_source="context_parameter", + duration_ms=42, + distinct_id="user_1", + groups={"organization": "org_1"}, + ) + await _flush() + + calls = _events(captured, "$mcp_tool_call") + assert len(calls) == 1 + props = calls[0]["properties"] + assert props["$mcp_tool_name"] == "search_docs" + assert props["$mcp_intent"] == "finding the install guide" + assert props["$mcp_is_error"] is False + assert props["$mcp_duration_ms"] == 42 + assert props["$groups"] == {"organization": "org_1"} + assert calls[0]["distinct_id"] == "user_1" + + +async def test_capture_tool_call_error_fans_out_exception(): + client, captured = make_client() + client.capture_tool_call( + "broken", is_error=True, error=RuntimeError("kaboom"), distinct_id="u" + ) + await _flush() + + assert _events(captured, "$mcp_tool_call")[0]["properties"]["$mcp_is_error"] is True + exc = _events(captured, "$exception") + assert exc and exc[0]["properties"]["$exception_list"][0]["value"] == "kaboom" + + +async def test_capture_initialize_and_tools_list(): + client, captured = make_client() + client.capture_initialize( + client_name="claude-code", client_version="1.2.3", distinct_id="u" + ) + client.capture_tools_list(tool_names=["a", "b"], distinct_id="u") + await _flush() + + init = _events(captured, "$mcp_initialize") + assert init and init[0]["properties"]["$mcp_client_name"] == "claude-code" + listed = _events(captured, "$mcp_tools_list") + assert listed and listed[0]["properties"]["$mcp_listed_tool_names"] == ["a", "b"] + + +async def test_capture_missing_capability(): + client, captured = make_client() + client.capture_missing_capability( + context="wanted a tool to export to CSV", distinct_id="u" + ) + await _flush() + + missing = _events(captured, "$mcp_missing_capability") + assert ( + missing + and missing[0]["properties"]["$mcp_intent"] == "wanted a tool to export to CSV" + ) + + +def test_prepare_tool_call_extracts_intent_and_strips_context(): + client, _ = make_client() + prepared = client.prepare_tool_call( + "search", {"q": "x", "context": "looking up the answer"} + ) + assert prepared.intent == "looking up the answer" + assert prepared.intent_source == "context_parameter" + assert prepared.args == {"q": "x"} + assert prepared.is_missing_capability is False + + prepared_missing = client.prepare_tool_call( + "get_more_tools", {"context": "need something else"} + ) + assert prepared_missing.is_missing_capability is True + + +def test_prepare_tool_list_injects_context_into_dicts(): + client, _ = make_client() + tools = [ + { + "name": "search", + "inputSchema": {"type": "object", "properties": {"q": {"type": "string"}}}, + } + ] + prepared = client.prepare_tool_list(tools) + assert "context" in prepared[0]["inputSchema"]["properties"] + # original tool dict is untouched + assert "context" not in tools[0]["inputSchema"]["properties"] + + +def test_prepare_tool_list_can_be_disabled(): + client, _ = make_client() + tools = [{"name": "search", "inputSchema": {"type": "object", "properties": {}}}] + prepared = client.prepare_tool_list(tools, context=False) + assert "context" not in prepared[0]["inputSchema"]["properties"] diff --git a/pyproject.toml b/pyproject.toml index c3359753..afa3ffd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ Repository = "https://github.com/posthog/posthog-python" [project.optional-dependencies] langchain = ["langchain>=0.2.0"] +mcp = ["mcp>=1.26.0"] otel = [ "opentelemetry-sdk>=1.20.0", "opentelemetry-exporter-otlp-proto-http>=1.20.0", @@ -80,6 +81,8 @@ test = [ "pydantic>=2.12.0", "parameterized>=0.8.1", "claude-agent-sdk", + "mcp>=1.26.0", + "fastmcp>=2.0", "opentelemetry-sdk>=1.20.0", "opentelemetry-exporter-otlp-proto-http>=1.20.0", "pytest-bdd>=8.1.0", @@ -96,11 +99,13 @@ packages = [ "posthog.ai.gemini", "posthog.ai.claude_agent_sdk", "posthog.ai.otel", + "posthog.mcp", "posthog.test", "posthog.test.ai", "posthog.test.ai.openai_agents", "posthog.test.ai.claude_agent_sdk", "posthog.test.ai.otel", + "posthog.test.mcp", "posthog.integrations", ] diff --git a/references/public_api_snapshot.txt b/references/public_api_snapshot.txt index d5512f32..f18667c7 100644 --- a/references/public_api_snapshot.txt +++ b/references/public_api_snapshot.txt @@ -309,6 +309,124 @@ alias posthog.inner_set_context_session -> posthog.contexts.set_context_session alias posthog.inner_tag -> posthog.contexts.tag alias posthog.integrations.django.Client -> posthog.client.Client alias posthog.integrations.django.contexts -> posthog.contexts +alias posthog.mcp.CaptureEventData -> posthog.mcp.types.CaptureEventData +alias posthog.mcp.MCPAnalyticsContextOptions -> posthog.mcp.types.MCPAnalyticsContextOptions +alias posthog.mcp.MCPAnalyticsOptions -> posthog.mcp.types.MCPAnalyticsOptions +alias posthog.mcp.POSTHOG_MCP_ANALYTICS_SOURCE -> posthog.mcp.constants.POSTHOG_MCP_ANALYTICS_SOURCE +alias posthog.mcp.PostHogMCP -> posthog.mcp.posthog_mcp.PostHogMCP +alias posthog.mcp.PostHogMCPAnalyticsEvent -> posthog.mcp.constants.PostHogMCPAnalyticsEvent +alias posthog.mcp.PostHogMCPAnalyticsProperty -> posthog.mcp.constants.PostHogMCPAnalyticsProperty +alias posthog.mcp.PreparedToolCall -> posthog.mcp.types.PreparedToolCall +alias posthog.mcp.UserIdentity -> posthog.mcp.types.UserIdentity +alias posthog.mcp.__version__ -> posthog.mcp.version.__version__ +alias posthog.mcp.capture.MCPAnalyticsData -> posthog.mcp.internal.MCPAnalyticsData +alias posthog.mcp.capture.MCPAnalyticsEventType -> posthog.mcp.event_types.MCPAnalyticsEventType +alias posthog.mcp.capture.McpCaptureOptions -> posthog.mcp.sink.McpCaptureOptions +alias posthog.mcp.capture.__version__ -> posthog.mcp.version.__version__ +alias posthog.mcp.capture.log -> posthog.mcp.logger.log +alias posthog.mcp.context_parameters.DEFAULT_CONTEXT_PARAMETER_DESCRIPTION -> posthog.mcp.constants.DEFAULT_CONTEXT_PARAMETER_DESCRIPTION +alias posthog.mcp.context_parameters.MCPAnalyticsContextOptions -> posthog.mcp.types.MCPAnalyticsContextOptions +alias posthog.mcp.context_parameters.log -> posthog.mcp.logger.log +alias posthog.mcp.conversation_id.DEFAULT_CONVERSATION_ID_DESCRIPTION -> posthog.mcp.constants.DEFAULT_CONVERSATION_ID_DESCRIPTION +alias posthog.mcp.conversation_id.log -> posthog.mcp.logger.log +alias posthog.mcp.derive_session_id_from_mcp_session -> posthog.mcp.session.derive_session_id_from_mcp_session +alias posthog.mcp.exceptions.ErrorProperties -> posthog.mcp.types.ErrorProperties +alias posthog.mcp.exceptions.exceptions_from_error_tuple -> posthog.exception_utils.exceptions_from_error_tuple +alias posthog.mcp.get_more_tools_result -> posthog.mcp.tools.get_more_tools_result +alias posthog.mcp.instrument_fastmcp.MCPAnalyticsData -> posthog.mcp.internal.MCPAnalyticsData +alias posthog.mcp.instrument_fastmcp.add_context_parameter_to_schema -> posthog.mcp.context_parameters.add_context_parameter_to_schema +alias posthog.mcp.instrument_fastmcp.add_conversation_id_to_schema -> posthog.mcp.conversation_id.add_conversation_id_to_schema +alias posthog.mcp.instrument_fastmcp.append_get_more_tools -> posthog.mcp.instrumentation.append_get_more_tools +alias posthog.mcp.instrument_fastmcp.build_prompt_back -> posthog.mcp.conversation_id.build_prompt_back +alias posthog.mcp.instrument_fastmcp.build_tool_call_request -> posthog.mcp.instrumentation.build_tool_call_request +alias posthog.mcp.instrument_fastmcp.extract_tools -> posthog.mcp.instrumentation.extract_tools +alias posthog.mcp.instrument_fastmcp.get_context_description -> posthog.mcp.context_parameters.get_context_description +alias posthog.mcp.instrument_fastmcp.get_more_tools_result_text -> posthog.mcp.tools.get_more_tools_result_text +alias posthog.mcp.instrument_fastmcp.is_context_enabled -> posthog.mcp.context_parameters.is_context_enabled +alias posthog.mcp.instrument_fastmcp.log -> posthog.mcp.logger.log +alias posthog.mcp.instrument_fastmcp.prepare_request -> posthog.mcp.instrumentation.prepare_request +alias posthog.mcp.instrument_fastmcp.read_tool_category -> posthog.mcp.instrumentation.read_tool_category +alias posthog.mcp.instrument_fastmcp.record_missing_capability -> posthog.mcp.instrumentation.record_missing_capability +alias posthog.mcp.instrument_fastmcp.record_tool_call -> posthog.mcp.instrumentation.record_tool_call +alias posthog.mcp.instrument_fastmcp.record_tools_list -> posthog.mcp.instrumentation.record_tools_list +alias posthog.mcp.instrument_fastmcp.request_to_dict -> posthog.mcp.instrumentation.request_to_dict +alias posthog.mcp.instrument_fastmcp.resolve_conversation_id -> posthog.mcp.conversation_id.resolve_conversation_id +alias posthog.mcp.instrument_fastmcp.resolve_missing_capability_tool_name -> posthog.mcp.tools.resolve_missing_capability_tool_name +alias posthog.mcp.instrument_fastmcp.resolve_session_id -> posthog.mcp.session.resolve_session_id +alias posthog.mcp.instrument_lowlevel.MCPAnalyticsData -> posthog.mcp.internal.MCPAnalyticsData +alias posthog.mcp.instrument_lowlevel.add_context_parameter_to_schema -> posthog.mcp.context_parameters.add_context_parameter_to_schema +alias posthog.mcp.instrument_lowlevel.add_conversation_id_to_schema -> posthog.mcp.conversation_id.add_conversation_id_to_schema +alias posthog.mcp.instrument_lowlevel.append_get_more_tools -> posthog.mcp.instrumentation.append_get_more_tools +alias posthog.mcp.instrument_lowlevel.build_prompt_back -> posthog.mcp.conversation_id.build_prompt_back +alias posthog.mcp.instrument_lowlevel.build_tool_call_request -> posthog.mcp.instrumentation.build_tool_call_request +alias posthog.mcp.instrument_lowlevel.extract_tools -> posthog.mcp.instrumentation.extract_tools +alias posthog.mcp.instrument_lowlevel.get_context_description -> posthog.mcp.context_parameters.get_context_description +alias posthog.mcp.instrument_lowlevel.get_more_tools_result_text -> posthog.mcp.tools.get_more_tools_result_text +alias posthog.mcp.instrument_lowlevel.is_context_enabled -> posthog.mcp.context_parameters.is_context_enabled +alias posthog.mcp.instrument_lowlevel.log -> posthog.mcp.logger.log +alias posthog.mcp.instrument_lowlevel.prepare_request -> posthog.mcp.instrumentation.prepare_request +alias posthog.mcp.instrument_lowlevel.read_tool_category -> posthog.mcp.instrumentation.read_tool_category +alias posthog.mcp.instrument_lowlevel.record_missing_capability -> posthog.mcp.instrumentation.record_missing_capability +alias posthog.mcp.instrument_lowlevel.record_tool_call -> posthog.mcp.instrumentation.record_tool_call +alias posthog.mcp.instrument_lowlevel.record_tools_list -> posthog.mcp.instrumentation.record_tools_list +alias posthog.mcp.instrument_lowlevel.request_to_dict -> posthog.mcp.instrumentation.request_to_dict +alias posthog.mcp.instrument_lowlevel.resolve_conversation_id -> posthog.mcp.conversation_id.resolve_conversation_id +alias posthog.mcp.instrument_lowlevel.resolve_missing_capability_tool_name -> posthog.mcp.tools.resolve_missing_capability_tool_name +alias posthog.mcp.instrument_lowlevel.resolve_session_id -> posthog.mcp.session.resolve_session_id +alias posthog.mcp.instrumentation.MCPAnalyticsData -> posthog.mcp.internal.MCPAnalyticsData +alias posthog.mcp.instrumentation.MCPAnalyticsEventType -> posthog.mcp.event_types.MCPAnalyticsEventType +alias posthog.mcp.instrumentation.build_captured_mcp_parameters -> posthog.mcp.sanitization.build_captured_mcp_parameters +alias posthog.mcp.instrumentation.capture_event -> posthog.mcp.capture.capture_event +alias posthog.mcp.instrumentation.capture_exception -> posthog.mcp.exceptions.capture_exception +alias posthog.mcp.instrumentation.handle_identify -> posthog.mcp.internal.handle_identify +alias posthog.mcp.instrumentation.log -> posthog.mcp.logger.log +alias posthog.mcp.instrumentation.resolve_event_properties -> posthog.mcp.internal.resolve_event_properties +alias posthog.mcp.instrumentation.resolve_session_id -> posthog.mcp.session.resolve_session_id +alias posthog.mcp.instrumentation.resolve_tool_call_intent -> posthog.mcp.intent.resolve_tool_call_intent +alias posthog.mcp.instrumentation.set_event_intent -> posthog.mcp.intent.set_event_intent +alias posthog.mcp.intent.MCPAnalyticsData -> posthog.mcp.internal.MCPAnalyticsData +alias posthog.mcp.intent.is_context_enabled -> posthog.mcp.context_parameters.is_context_enabled +alias posthog.mcp.intent.log -> posthog.mcp.logger.log +alias posthog.mcp.internal.MCPAnalyticsOptions -> posthog.mcp.types.MCPAnalyticsOptions +alias posthog.mcp.internal.McpEventSink -> posthog.mcp.sink.McpEventSink +alias posthog.mcp.internal.UserIdentity -> posthog.mcp.types.UserIdentity +alias posthog.mcp.internal.log -> posthog.mcp.logger.log +alias posthog.mcp.posthog_events.Event -> posthog.mcp.types.Event +alias posthog.mcp.posthog_events.MCPAnalyticsEventType -> posthog.mcp.event_types.MCPAnalyticsEventType +alias posthog.mcp.posthog_events.POSTHOG_MCP_ANALYTICS_SOURCE -> posthog.mcp.constants.POSTHOG_MCP_ANALYTICS_SOURCE +alias posthog.mcp.posthog_events.PostHogCaptureEvent -> posthog.mcp.types.PostHogCaptureEvent +alias posthog.mcp.posthog_events.PostHogMCPAnalyticsEvent -> posthog.mcp.constants.PostHogMCPAnalyticsEvent +alias posthog.mcp.posthog_events.PostHogMCPAnalyticsProperty -> posthog.mcp.constants.PostHogMCPAnalyticsProperty +alias posthog.mcp.posthog_mcp.Client -> posthog.client.Client +alias posthog.mcp.posthog_mcp.JsonRecord -> posthog.mcp.types.JsonRecord +alias posthog.mcp.posthog_mcp.MCPAnalyticsContextOptions -> posthog.mcp.types.MCPAnalyticsContextOptions +alias posthog.mcp.posthog_mcp.MCPAnalyticsEventType -> posthog.mcp.event_types.MCPAnalyticsEventType +alias posthog.mcp.posthog_mcp.McpCaptureOptions -> posthog.mcp.sink.McpCaptureOptions +alias posthog.mcp.posthog_mcp.McpEventSink -> posthog.mcp.sink.McpEventSink +alias posthog.mcp.posthog_mcp.PreparedToolCall -> posthog.mcp.types.PreparedToolCall +alias posthog.mcp.posthog_mcp.add_context_parameter_to_schema -> posthog.mcp.context_parameters.add_context_parameter_to_schema +alias posthog.mcp.posthog_mcp.build_report_missing_descriptor -> posthog.mcp.tools.build_report_missing_descriptor +alias posthog.mcp.posthog_mcp.capture_exception -> posthog.mcp.exceptions.capture_exception +alias posthog.mcp.posthog_mcp.fire_and_forget -> posthog.mcp.instrumentation.fire_and_forget +alias posthog.mcp.posthog_mcp.get_context_description -> posthog.mcp.context_parameters.get_context_description +alias posthog.mcp.posthog_mcp.is_context_enabled -> posthog.mcp.context_parameters.is_context_enabled +alias posthog.mcp.session.INACTIVITY_TIMEOUT_IN_MINUTES -> posthog.mcp.constants.INACTIVITY_TIMEOUT_IN_MINUTES +alias posthog.mcp.session.MCPAnalyticsData -> posthog.mcp.internal.MCPAnalyticsData +alias posthog.mcp.session.deterministic_prefixed_id -> posthog.mcp.ids.deterministic_prefixed_id +alias posthog.mcp.session.new_prefixed_id -> posthog.mcp.ids.new_prefixed_id +alias posthog.mcp.set_logger -> posthog.mcp.logger.set_logger +alias posthog.mcp.sink.BeforeSendFn -> posthog.mcp.types.BeforeSendFn +alias posthog.mcp.sink.Client -> posthog.client.Client +alias posthog.mcp.sink.Event -> posthog.mcp.types.Event +alias posthog.mcp.sink.McpEvent -> posthog.mcp.types.McpEvent +alias posthog.mcp.sink.PostHogCaptureEvent -> posthog.mcp.posthog_events.PostHogCaptureEvent +alias posthog.mcp.sink.build_posthog_capture_events -> posthog.mcp.posthog_events.build_posthog_capture_events +alias posthog.mcp.sink.log -> posthog.mcp.logger.log +alias posthog.mcp.sink.new_prefixed_id -> posthog.mcp.ids.new_prefixed_id +alias posthog.mcp.sink.sanitize_event -> posthog.mcp.sanitization.sanitize_event +alias posthog.mcp.sink.truncate_event -> posthog.mcp.truncation.truncate_event +alias posthog.mcp.tools.log -> posthog.mcp.logger.log +alias posthog.mcp.types.LoggerFn -> posthog.mcp.logger.LoggerFn alias posthog.request.VERSION -> posthog.version.VERSION alias posthog.request.remove_trailing_slash -> posthog.utils.remove_trailing_slash alias posthog.set_socket_options -> posthog.request.set_socket_options @@ -603,6 +721,117 @@ attribute posthog.integrations.django.PosthogContextMiddleware.sync_capable = Tr attribute posthog.integrations.django.PosthogContextMiddleware.tag_map = cast('Optional[Callable[[Dict[str, Any]], Dict[str, Any]]]', settings.POSTHOG_MW_TAG_MAP) attribute posthog.is_server = True attribute posthog.log_captured_exceptions = False +attribute posthog.mcp.constants.DEFAULT_CONTEXT_PARAMETER_DESCRIPTION = 'Explain why you are calling this tool and how it fits into the user\'s overall goal. This parameter is used for analytics and user intent tracking. YOU MUST provide 15-25 words (count carefully). NEVER use first person (\'I\', \'we\', \'you\') - maintain third-person perspective. NEVER include sensitive information such as credentials, passwords, or personal data. Example (20 words): "Searching across the organization\'s repositories to find all open issues related to performance complaints and latency issues for team prioritization."' +attribute posthog.mcp.constants.DEFAULT_CONVERSATION_ID_DESCRIPTION = "Echo the conversation_id from the server's previous response. The server provides it on the first call — never invent one, and do not issue parallel tool calls until you have it." +attribute posthog.mcp.constants.INACTIVITY_TIMEOUT_IN_MINUTES = 30 +attribute posthog.mcp.constants.POSTHOG_MCP_ANALYTICS_SOURCE = 'posthog_mcp_analytics' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.CUSTOM = '$mcp_custom' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.EXCEPTION = '$exception' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.IDENTIFY = '$identify' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.INITIALIZE = '$mcp_initialize' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.MISSING_CAPABILITY = '$mcp_missing_capability' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.PROMPTS_LIST = '$mcp_prompts_list' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.PROMPT_GET = '$mcp_prompt_get' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.RESOURCES_LIST = '$mcp_resources_list' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.RESOURCE_READ = '$mcp_resource_read' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.TOOLS_LIST = '$mcp_tools_list' +attribute posthog.mcp.constants.PostHogMCPAnalyticsEvent.TOOL_CALL = '$mcp_tool_call' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.CLIENT_NAME = '$mcp_client_name' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.CLIENT_VERSION = '$mcp_client_version' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.CONVERSATION_ID = '$mcp_conversation_id' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.DURATION_MS = '$mcp_duration_ms' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.INTENT = '$mcp_intent' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.INTENT_SOURCE = '$mcp_intent_source' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.IS_ERROR = '$mcp_is_error' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.LISTED_TOOL_NAMES = '$mcp_listed_tool_names' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.PARAMETERS = '$mcp_parameters' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.RESOURCE_NAME = '$mcp_resource_name' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.RESPONSE = '$mcp_response' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.SERVER_NAME = '$mcp_server_name' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.SERVER_VERSION = '$mcp_server_version' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.SESSION_ID = '$session_id' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.SOURCE = '$mcp_source' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.TOOL_CATEGORY = '$mcp_tool_category' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.TOOL_DESCRIPTION = '$mcp_tool_description' +attribute posthog.mcp.constants.PostHogMCPAnalyticsProperty.TOOL_NAME = '$mcp_tool_name' +attribute posthog.mcp.conversation_id.CONVERSATION_ID_PARAM_NAME = 'conversation_id' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.CUSTOM = 'posthog:custom' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.IDENTIFY = 'posthog:identify' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.MCP_INITIALIZE = 'mcp:initialize' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.MCP_MISSING_CAPABILITY = 'mcp:missing_capability' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.MCP_PROMPTS_GET = 'mcp:prompts/get' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.MCP_PROMPTS_LIST = 'mcp:prompts/list' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.MCP_RESOURCES_LIST = 'mcp:resources/list' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.MCP_RESOURCES_READ = 'mcp:resources/read' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.MCP_TOOLS_CALL = 'mcp:tools/call' +attribute posthog.mcp.event_types.MCPAnalyticsEventType.MCP_TOOLS_LIST = 'mcp:tools/list' +attribute posthog.mcp.ids.MCPAnalyticsIDPrefix = Literal['evt', 'ses'] +attribute posthog.mcp.intent.ResolvedIntent = Tuple[str, str] +attribute posthog.mcp.internal.MCPAnalyticsData.identified_sessions: IdentityCache = field(default_factory=IdentityCache) +attribute posthog.mcp.internal.MCPAnalyticsData.initialized_sessions: 'OrderedDict[str, None]' = field(default_factory=OrderedDict) +attribute posthog.mcp.internal.MCPAnalyticsData.last_activity: datetime = field(default_factory=(lambda: datetime.now(timezone.utc))) +attribute posthog.mcp.internal.MCPAnalyticsData.last_mcp_session_id: Optional[str] = None +attribute posthog.mcp.internal.MCPAnalyticsData.options: MCPAnalyticsOptions +attribute posthog.mcp.internal.MCPAnalyticsData.server_name: Optional[str] = None +attribute posthog.mcp.internal.MCPAnalyticsData.server_version: Optional[str] = None +attribute posthog.mcp.internal.MCPAnalyticsData.session_id: str = '' +attribute posthog.mcp.internal.MCPAnalyticsData.session_lock: asyncio.Lock = field(default_factory=(asyncio.Lock)) +attribute posthog.mcp.internal.MCPAnalyticsData.session_source: str = 'generated' +attribute posthog.mcp.internal.MCPAnalyticsData.sink: Optional[McpEventSink] = None +attribute posthog.mcp.internal.MCPAnalyticsData.tool_categories: Dict[str, str] = field(default_factory=dict) +attribute posthog.mcp.internal.MCPAnalyticsData.tool_descriptions: Dict[str, str] = field(default_factory=dict) +attribute posthog.mcp.logger.LoggerFn = Callable[[str], None] +attribute posthog.mcp.sink.McpCaptureOptions.before_send: Optional[BeforeSendFn] = None +attribute posthog.mcp.sink.McpCaptureOptions.enable_exception_autocapture: bool = True +attribute posthog.mcp.tools.GET_MORE_TOOLS_NAME = 'get_more_tools' +attribute posthog.mcp.truncation.MAX_BREADTH = 100 +attribute posthog.mcp.truncation.MAX_DEPTH = 10 +attribute posthog.mcp.truncation.MAX_EVENT_BYTES = 102400 +attribute posthog.mcp.truncation.MAX_STRING_LENGTH = 32768 +attribute posthog.mcp.types.BeforeSendFn = Callable[[PostHogCaptureEvent], Union[Optional[PostHogCaptureEvent], Awaitable[Optional[PostHogCaptureEvent]]]] +attribute posthog.mcp.types.CaptureEventData.event: str +attribute posthog.mcp.types.CaptureEventData.properties: Optional[JsonRecord] = None +attribute posthog.mcp.types.ErrorProperties = Dict[str, Any] +attribute posthog.mcp.types.Event = Dict[str, Any] +attribute posthog.mcp.types.EventPropertiesFn = Callable[..., Any] +attribute posthog.mcp.types.IdentifyFn = Callable[..., Any] +attribute posthog.mcp.types.IntentFallbackFn = Callable[..., Any] +attribute posthog.mcp.types.JsonRecord = Dict[str, Any] +attribute posthog.mcp.types.MCPAnalyticsContextOptions.description: Optional[str] = None +attribute posthog.mcp.types.MCPAnalyticsIntentSource = str +attribute posthog.mcp.types.MCPAnalyticsOptions.before_send: Optional[BeforeSendFn] = None +attribute posthog.mcp.types.MCPAnalyticsOptions.context: Union[bool, MCPAnalyticsContextOptions] = True +attribute posthog.mcp.types.MCPAnalyticsOptions.enable_conversation_id: bool = False +attribute posthog.mcp.types.MCPAnalyticsOptions.enable_exception_autocapture: bool = True +attribute posthog.mcp.types.MCPAnalyticsOptions.event_properties: Optional[EventPropertiesFn] = None +attribute posthog.mcp.types.MCPAnalyticsOptions.identify: Optional[Union[IdentifyFn, UserIdentity]] = None +attribute posthog.mcp.types.MCPAnalyticsOptions.intent_fallback: Optional[IntentFallbackFn] = None +attribute posthog.mcp.types.MCPAnalyticsOptions.logger: Optional[LoggerFn] = None +attribute posthog.mcp.types.MCPAnalyticsOptions.missing_capability_tool_name: Optional[str] = None +attribute posthog.mcp.types.MCPAnalyticsOptions.report_missing: bool = False +attribute posthog.mcp.types.McpEvent = Dict[str, Any] +attribute posthog.mcp.types.PostHogCaptureEvent.distinct_id: str +attribute posthog.mcp.types.PostHogCaptureEvent.event: str +attribute posthog.mcp.types.PostHogCaptureEvent.properties: Dict[str, Any] +attribute posthog.mcp.types.PostHogCaptureEvent.timestamp: datetime +attribute posthog.mcp.types.PreparedToolCall.args: Optional[JsonRecord] = None +attribute posthog.mcp.types.PreparedToolCall.intent: Optional[str] = None +attribute posthog.mcp.types.PreparedToolCall.intent_source: Optional[str] = None +attribute posthog.mcp.types.PreparedToolCall.is_missing_capability: bool = False +attribute posthog.mcp.types.SessionInfo.client_name: Optional[str] = None +attribute posthog.mcp.types.SessionInfo.client_version: Optional[str] = None +attribute posthog.mcp.types.SessionInfo.identify_actor_data: JsonRecord = field(default_factory=dict) +attribute posthog.mcp.types.SessionInfo.identify_actor_given_id: Optional[str] = None +attribute posthog.mcp.types.SessionInfo.identify_actor_groups: Optional[Dict[str, str]] = None +attribute posthog.mcp.types.SessionInfo.ip_address: Optional[str] = None +attribute posthog.mcp.types.SessionInfo.sdk_language: str = 'Python' +attribute posthog.mcp.types.SessionInfo.sdk_version: Optional[str] = None +attribute posthog.mcp.types.SessionInfo.server_name: Optional[str] = None +attribute posthog.mcp.types.SessionInfo.server_version: Optional[str] = None +attribute posthog.mcp.types.UserIdentity.distinct_id: str +attribute posthog.mcp.types.UserIdentity.groups: Optional[Dict[str, str]] = None +attribute posthog.mcp.types.UserIdentity.properties: Optional[JsonRecord] = None +attribute posthog.mcp.version.__version__ = '0.1.0' attribute posthog.on_error = None attribute posthog.personal_api_key = None attribute posthog.poll_interval = 30 @@ -768,6 +997,22 @@ class posthog.flag_definition_cache.FlagDefinitionCacheData class posthog.flag_definition_cache.FlagDefinitionCacheProvider class posthog.integrations.celery.PosthogCeleryIntegration(client: Optional[Client] = None, capture_exceptions: bool = True, capture_task_lifecycle_events: bool = True, propagate_context: bool = True, task_filter: Optional[Callable[[Optional[str], dict[str, Any]], bool]] = None) class posthog.integrations.django.PosthogContextMiddleware(get_response) +class posthog.mcp.McpAnalytics(key: Any) +class posthog.mcp.constants.PostHogMCPAnalyticsEvent +class posthog.mcp.constants.PostHogMCPAnalyticsProperty +class posthog.mcp.event_types.MCPAnalyticsEventType +class posthog.mcp.internal.IdentityCache(max_size: int = 1000) +class posthog.mcp.internal.MCPAnalyticsData(options: MCPAnalyticsOptions, sink: Optional[McpEventSink] = None, session_id: str = '', session_source: str = 'generated', last_mcp_session_id: Optional[str] = None, last_activity: datetime = (lambda: datetime.now(timezone.utc))(), identified_sessions: IdentityCache = IdentityCache(), tool_categories: Dict[str, str] = dict(), tool_descriptions: Dict[str, str] = dict(), initialized_sessions: 'OrderedDict[str, None]' = OrderedDict(), server_name: Optional[str] = None, server_version: Optional[str] = None, session_lock: asyncio.Lock = asyncio.Lock()) +class posthog.mcp.posthog_mcp.PostHogMCP(api_key: str, missing_capability_tool_name: Optional[str] = None, **kwargs: Any) +class posthog.mcp.sink.McpCaptureOptions(enable_exception_autocapture: bool = True, before_send: Optional[BeforeSendFn] = None) +class posthog.mcp.sink.McpEventSink(posthog: Client) +class posthog.mcp.types.CaptureEventData(event: str, properties: Optional[JsonRecord] = None) +class posthog.mcp.types.MCPAnalyticsContextOptions(description: Optional[str] = None) +class posthog.mcp.types.MCPAnalyticsOptions(logger: Optional[LoggerFn] = None, report_missing: bool = False, missing_capability_tool_name: Optional[str] = None, enable_conversation_id: bool = False, enable_exception_autocapture: bool = True, context: Union[bool, MCPAnalyticsContextOptions] = True, identify: Optional[Union[IdentifyFn, UserIdentity]] = None, intent_fallback: Optional[IntentFallbackFn] = None, before_send: Optional[BeforeSendFn] = None, event_properties: Optional[EventPropertiesFn] = None) +class posthog.mcp.types.PostHogCaptureEvent +class posthog.mcp.types.PreparedToolCall(args: Optional[JsonRecord] = None, intent: Optional[str] = None, intent_source: Optional[str] = None, is_missing_capability: bool = False) +class posthog.mcp.types.SessionInfo(client_name: Optional[str] = None, client_version: Optional[str] = None, server_name: Optional[str] = None, server_version: Optional[str] = None, sdk_language: str = 'Python', sdk_version: Optional[str] = None, ip_address: Optional[str] = None, identify_actor_given_id: Optional[str] = None, identify_actor_data: JsonRecord = dict(), identify_actor_groups: Optional[Dict[str, str]] = None) +class posthog.mcp.types.UserIdentity(distinct_id: str, properties: Optional[JsonRecord] = None, groups: Optional[Dict[str, str]] = None) class posthog.poller.Poller(interval, execute, *args, **kwargs) class posthog.request.APIError(status: Union[int, str], message: str, retry_after: Optional[float] = None) class posthog.request.DatetimeSerializer @@ -951,6 +1196,64 @@ function posthog.identify_context(distinct_id: str) function posthog.integrations.django.markcoroutinefunction(func) function posthog.join() -> None function posthog.load_feature_flags() +function posthog.mcp.capture.capture_event(data: MCPAnalyticsData, event_input: Dict[str, Any]) -> Optional[Coroutine[Any, Any, None]] +function posthog.mcp.capture.log_capture_skipped() -> None +function posthog.mcp.compatibility.is_fastmcp(server: Any) -> bool +function posthog.mcp.compatibility.is_fastmcp_v2(server: Any) -> bool +function posthog.mcp.compatibility.is_low_level_server(server: Any) -> bool +function posthog.mcp.context_parameters.add_context_parameter_to_schema(input_schema: Optional[Dict[str, Any]], tool_name: str = 'unknown', description_override: Optional[str] = None, required: bool = True) -> Optional[Dict[str, Any]] +function posthog.mcp.context_parameters.get_context_description(context: Union[bool, MCPAnalyticsContextOptions, None]) -> Optional[str] +function posthog.mcp.context_parameters.is_context_enabled(context: Union[bool, MCPAnalyticsContextOptions, None]) -> bool +function posthog.mcp.conversation_id.add_conversation_id_to_schema(input_schema: Optional[Dict[str, Any]], tool_name: str = 'unknown') -> Optional[Dict[str, Any]] +function posthog.mcp.conversation_id.build_prompt_back(conversation_id: str) -> Dict[str, Any] +function posthog.mcp.conversation_id.can_inject_prompt_back(result: Any) -> bool +function posthog.mcp.conversation_id.extract_conversation_id(args: Any) -> Optional[str] +function posthog.mcp.conversation_id.inject_prompt_back(result: Any, conversation_id: str) -> Any +function posthog.mcp.conversation_id.resolve_conversation_id(enabled: bool, args: Any, tool_name: Optional[str], missing_capability_tool_name: str) -> Tuple[Optional[str], bool] +function posthog.mcp.exceptions.capture_exception(error: Any) -> ErrorProperties +function posthog.mcp.ids.deterministic_prefixed_id(prefix: MCPAnalyticsIDPrefix, value: str) -> str +function posthog.mcp.ids.new_prefixed_id(prefix: MCPAnalyticsIDPrefix) -> str +function posthog.mcp.instrument(server: Any, posthog_client: Optional[Client] = None, options: Optional[MCPAnalyticsOptions] = None) -> McpAnalytics +function posthog.mcp.instrument_fastmcp.instrument_fastmcp(server: Any, data: MCPAnalyticsData) -> None +function posthog.mcp.instrument_lowlevel.instrument_fastmcp_v2(server: Any, data: MCPAnalyticsData) -> None +function posthog.mcp.instrument_lowlevel.instrument_low_level(server: Any, data: MCPAnalyticsData) -> None +function posthog.mcp.instrumentation.append_get_more_tools(result: Any, name: str) -> None +function posthog.mcp.instrumentation.build_tool_call_request(name: str, arguments: Optional[Dict[str, Any]]) -> Dict[str, Any] +function posthog.mcp.instrumentation.drain_pending() -> None +function posthog.mcp.instrumentation.extract_tools(result: Any) -> list +function posthog.mcp.instrumentation.fire_and_forget(coro: Optional[Any]) -> None +function posthog.mcp.instrumentation.is_tool_result_error(result: Any) -> bool +function posthog.mcp.instrumentation.prepare_request(data: MCPAnalyticsData, *, mcp_session_id: Optional[str], client_name: Optional[str], client_version: Optional[str], request: Dict[str, Any], extra: Optional[Dict[str, Any]]) -> str +function posthog.mcp.instrumentation.read_tool_category(tool: Any) -> Optional[str] +function posthog.mcp.instrumentation.record_missing_capability(data: MCPAnalyticsData, session_id: str, *, tool_name: str, context: Optional[str], arguments: Optional[Dict[str, Any]], client_name: Optional[str] = None, client_version: Optional[str] = None, extra: Optional[Dict[str, Any]] = None) -> None +function posthog.mcp.instrumentation.record_tool_call(data: MCPAnalyticsData, session_id: str, *, name: str, arguments: Optional[Dict[str, Any]], result: Any = None, error: Any = None, duration_ms: Optional[float] = None, client_name: Optional[str] = None, client_version: Optional[str] = None, conversation_id: Optional[str] = None, extra: Optional[Dict[str, Any]] = None) -> None +function posthog.mcp.instrumentation.record_tools_list(data: MCPAnalyticsData, session_id: str, *, names: List[str], request: Dict[str, Any], client_name: Optional[str] = None, client_version: Optional[str] = None, extra: Optional[Dict[str, Any]] = None) -> None +function posthog.mcp.instrumentation.request_to_dict(req: Any) -> Dict[str, Any] +function posthog.mcp.intent.resolve_tool_call_intent(data: MCPAnalyticsData, request: Dict[str, Any], extra: Optional[Dict[str, Any]] = None) -> Optional[ResolvedIntent] +function posthog.mcp.intent.set_event_intent(event: Dict[str, Any], resolved: Optional[ResolvedIntent]) -> None +function posthog.mcp.internal.are_identities_equal(a: UserIdentity, b: UserIdentity) -> bool +function posthog.mcp.internal.get_server_tracking_data(server: Any) -> Optional[MCPAnalyticsData] +function posthog.mcp.internal.handle_identify(data: MCPAnalyticsData, session_id: str, request: Dict[str, Any], extra: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]] +function posthog.mcp.internal.merge_identities(previous: Optional[UserIdentity], nxt: UserIdentity) -> UserIdentity +function posthog.mcp.internal.resolve_event_properties(data: MCPAnalyticsData, request: Dict[str, Any], extra: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]] +function posthog.mcp.internal.set_server_tracking_data(server: Any, data: MCPAnalyticsData) -> None +function posthog.mcp.logger.log(message: str) -> None +function posthog.mcp.logger.set_logger(logger: Optional[LoggerFn]) -> None +function posthog.mcp.posthog_events.build_posthog_capture_events(event: Event, enable_exception_autocapture: bool = True) -> List[PostHogCaptureEvent] +function posthog.mcp.sanitization.build_captured_mcp_parameters(request: Any) -> Dict[str, Any] +function posthog.mcp.sanitization.sanitize_captured_value(value: Any) -> Any +function posthog.mcp.sanitization.sanitize_event(event: Dict[str, Any]) -> Dict[str, Any] +function posthog.mcp.session.derive_session_id_from_mcp_session(mcp_session_id: str) -> str +function posthog.mcp.session.new_session_id() -> str +function posthog.mcp.session.resolve_session_id(data: MCPAnalyticsData, mcp_session_id: Optional[str]) -> str +function posthog.mcp.sink.process_mcp_event(event: McpEvent, options: McpCaptureOptions) -> Optional[Tuple[Event, List[PostHogCaptureEvent]]] +function posthog.mcp.tools.build_report_missing_descriptor(name: str = GET_MORE_TOOLS_NAME) -> Dict[str, Any] +function posthog.mcp.tools.get_more_tools_result() -> Dict[str, Any] +function posthog.mcp.tools.get_more_tools_result_text() -> str +function posthog.mcp.tools.handle_report_missing(context: Optional[str]) -> Dict[str, Any] +function posthog.mcp.tools.resolve_missing_capability_tool_name(options: Any = None) -> str +function posthog.mcp.truncation.normalize(value: Any, depth: int = MAX_DEPTH, max_breadth: int = MAX_BREADTH, max_string_length: int = MAX_STRING_LENGTH) -> Any +function posthog.mcp.truncation.truncate_event(event: Dict[str, Any]) -> Dict[str, Any] function posthog.new_context(fresh: bool = False, capture_exceptions: Optional[bool] = None, client: Optional[Client] = None) function posthog.request.batch_post(api_key: str, host: Optional[str] = None, gzip: bool = False, timeout: int = 15, path: str = EVENTS_ENDPOINT, **kwargs) -> requests.Response function posthog.request.determine_server_host(host: Optional[str]) -> str @@ -1135,6 +1438,20 @@ method posthog.integrations.django.PosthogContextMiddleware.aextract_tags(reques method posthog.integrations.django.PosthogContextMiddleware.extract_request_user(request) method posthog.integrations.django.PosthogContextMiddleware.extract_tags(request) method posthog.integrations.django.PosthogContextMiddleware.process_exception(request, exception) +method posthog.mcp.McpAnalytics.capture(event: str, properties: Optional[dict] = None) -> None +method posthog.mcp.McpAnalytics.flush() -> None +method posthog.mcp.internal.IdentityCache.get(session_id: str) -> Optional[UserIdentity] +method posthog.mcp.internal.IdentityCache.has(session_id: str) -> bool +method posthog.mcp.internal.IdentityCache.set(session_id: str, identity: UserIdentity) -> None +method posthog.mcp.internal.IdentityCache.size() -> int +method posthog.mcp.internal.MCPAnalyticsData.mark_session_initialized(session_id: str) -> None +method posthog.mcp.posthog_mcp.PostHogMCP.capture_initialize(*, client_name: Optional[str] = None, client_version: Optional[str] = None, parameters: Any = None, response: Any = None, duration_ms: Optional[float] = None, distinct_id: Optional[str] = None, session_id: Optional[str] = None, set_properties: Optional[JsonRecord] = None, groups: Optional[Dict[str, str]] = None, properties: Optional[JsonRecord] = None, timestamp: Optional[datetime] = None) -> None +method posthog.mcp.posthog_mcp.PostHogMCP.capture_missing_capability(*, context: Optional[str] = None, parameters: Any = None, distinct_id: Optional[str] = None, session_id: Optional[str] = None, set_properties: Optional[JsonRecord] = None, groups: Optional[Dict[str, str]] = None, properties: Optional[JsonRecord] = None, timestamp: Optional[datetime] = None) -> None +method posthog.mcp.posthog_mcp.PostHogMCP.capture_tool_call(tool_name: str, *, intent: Optional[str] = None, intent_source: Optional[str] = None, parameters: Any = None, response: Any = None, duration_ms: Optional[float] = None, is_error: bool = False, error: Any = None, category: Optional[str] = None, tool_description: Optional[str] = None, distinct_id: Optional[str] = None, session_id: Optional[str] = None, set_properties: Optional[JsonRecord] = None, groups: Optional[Dict[str, str]] = None, properties: Optional[JsonRecord] = None, timestamp: Optional[datetime] = None) -> None +method posthog.mcp.posthog_mcp.PostHogMCP.capture_tools_list(*, tool_names: Optional[List[str]] = None, parameters: Any = None, response: Any = None, duration_ms: Optional[float] = None, is_error: bool = False, error: Any = None, distinct_id: Optional[str] = None, session_id: Optional[str] = None, set_properties: Optional[JsonRecord] = None, groups: Optional[Dict[str, str]] = None, properties: Optional[JsonRecord] = None, timestamp: Optional[datetime] = None) -> None +method posthog.mcp.posthog_mcp.PostHogMCP.prepare_tool_call(name: str, args: Optional[JsonRecord] = None) -> PreparedToolCall +method posthog.mcp.posthog_mcp.PostHogMCP.prepare_tool_list(tools: List[Any], context: Union[bool, MCPAnalyticsContextOptions] = True, report_missing: bool = False) -> List[Any] +method posthog.mcp.sink.McpEventSink.capture(event: McpEvent, options: McpCaptureOptions) -> None method posthog.poller.Poller.run() method posthog.poller.Poller.stop() method posthog.request.DatetimeSerializer.default(obj: Any) @@ -1207,6 +1524,30 @@ module posthog.flag_definition_cache module posthog.integrations module posthog.integrations.celery module posthog.integrations.django +module posthog.mcp +module posthog.mcp.capture +module posthog.mcp.compatibility +module posthog.mcp.constants +module posthog.mcp.context_parameters +module posthog.mcp.conversation_id +module posthog.mcp.event_types +module posthog.mcp.exceptions +module posthog.mcp.ids +module posthog.mcp.instrument_fastmcp +module posthog.mcp.instrument_lowlevel +module posthog.mcp.instrumentation +module posthog.mcp.intent +module posthog.mcp.internal +module posthog.mcp.logger +module posthog.mcp.posthog_events +module posthog.mcp.posthog_mcp +module posthog.mcp.sanitization +module posthog.mcp.session +module posthog.mcp.sink +module posthog.mcp.tools +module posthog.mcp.truncation +module posthog.mcp.types +module posthog.mcp.version module posthog.poller module posthog.request module posthog.types