diff --git a/.changeset/green-ravens-validate.md b/.changeset/green-ravens-validate.md new file mode 100644 index 00000000..1115db2b --- /dev/null +++ b/.changeset/green-ravens-validate.md @@ -0,0 +1,5 @@ +--- +'pypi/posthog': patch +--- + +Validate custom event UUIDs before sending events. diff --git a/posthog/__init__.py b/posthog/__init__.py index 9f0b4893..bf8c540e 100644 --- a/posthog/__init__.py +++ b/posthog/__init__.py @@ -370,7 +370,9 @@ def capture(event: str, **kwargs: Unpack[OptionalCaptureArgs]) -> Optional[str]: properties: Dict of event properties timestamp: When the event occurred uuid: Unique identifier for this event. If omitted, one is generated - and returned. + and returned. If provided, it must be a valid UUID string or + uuid.UUID instance; invalid values are ignored and replaced with + a newly generated UUID. groups: Dict of group types and IDs flags: A FeatureFlagEvaluations snapshot from evaluate_flags(). The exact values from the snapshot are attached with no extra /flags @@ -441,7 +443,9 @@ def set(**kwargs: Unpack[OptionalSetArgs]) -> Optional[str]: properties: Dict of person properties to set. timestamp: When the properties were set. uuid: Unique identifier for this operation. If omitted, one is - generated and returned. + generated and returned. If provided, it must be a valid UUID + string or uuid.UUID instance; invalid values are ignored and + replaced with a newly generated UUID. disable_geoip: Whether to disable GeoIP lookup. Details: @@ -471,7 +475,9 @@ def set_once(**kwargs: Unpack[OptionalSetArgs]) -> Optional[str]: properties: Dict of person properties to set only once. timestamp: When the properties were set. uuid: Unique identifier for this operation. If omitted, one is - generated and returned. + generated and returned. If provided, it must be a valid UUID + string or uuid.UUID instance; invalid values are ignored and + replaced with a newly generated UUID. disable_geoip: Whether to disable GeoIP lookup. Details: diff --git a/posthog/args.py b/posthog/args.py index 7f69214b..e871ea4b 100644 --- a/posthog/args.py +++ b/posthog/args.py @@ -24,7 +24,8 @@ class OptionalCaptureArgs(TypedDict): timestamp: When the event occurred (defaults to current time) uuid: Unique identifier for this specific event. If not provided, one is generated. The event UUID is returned, so you can correlate it with actions in your app (like showing users an - error ID if you capture an exception). + error ID if you capture an exception). If provided, it must be a valid UUID string or + uuid.UUID instance; invalid values are ignored and replaced with a newly generated UUID. groups: Group identifiers to associate with this event (format: {group_type: group_key}) flags: A ``FeatureFlagEvaluations`` snapshot from ``evaluate_flags()``. The exact flag values from the snapshot are attached to the event with no additional network call — @@ -40,7 +41,7 @@ class OptionalCaptureArgs(TypedDict): distinct_id: NotRequired[Optional[ID_TYPES]] properties: NotRequired[Optional[Dict[str, Any]]] timestamp: NotRequired[Optional[Union[datetime, str]]] - uuid: NotRequired[Optional[str]] + uuid: NotRequired[Optional[Union[str, UUID]]] groups: NotRequired[Optional[Dict[str, str]]] flags: NotRequired[Optional["FeatureFlagEvaluations"]] send_feature_flags: NotRequired[ @@ -61,14 +62,16 @@ class OptionalSetArgs(TypedDict): properties: Dictionary of properties to set on the person timestamp: When the properties were set (defaults to current time) uuid: Unique identifier for this operation. If not provided, one is generated. This - UUID is returned, so you can correlate it with actions in your app. + UUID is returned, so you can correlate it with actions in your app. If provided, + it must be a valid UUID string or uuid.UUID instance; invalid values are ignored + and replaced with a newly generated UUID. disable_geoip: Whether to disable GeoIP lookup for this operation. Defaults to False. """ distinct_id: NotRequired[Optional[ID_TYPES]] properties: NotRequired[Optional[Dict[str, Any]]] timestamp: NotRequired[Optional[Union[datetime, str]]] - uuid: NotRequired[Optional[str]] + uuid: NotRequired[Optional[Union[str, UUID]]] disable_geoip: NotRequired[Optional[bool]] diff --git a/posthog/client.py b/posthog/client.py index 800cc577..19fe15c6 100644 --- a/posthog/client.py +++ b/posthog/client.py @@ -9,7 +9,7 @@ import weakref from datetime import datetime, timedelta, timezone from typing import Any, Dict, List, Optional, Union -from uuid import uuid4 +from uuid import UUID, uuid4 from typing_extensions import Unpack @@ -113,6 +113,26 @@ def get_identity_state(passed) -> tuple[str, bool]: return (str(uuid4()), True) +def _stringify_event_uuid(value) -> str: + if isinstance(value, UUID): + return str(value) + + stringified = stringify_id(value) + if not stringified: + raise ValueError( + f"Invalid event uuid {value!r}. Expected a valid UUID string or uuid.UUID instance." + ) + + try: + UUID(stringified) + except ValueError: + raise ValueError( + f"Invalid event uuid {value!r}. Expected a valid UUID string or uuid.UUID instance." + ) from None + + return stringified + + def add_context_tags(properties): properties = properties or {} current_context = _get_current_context() @@ -728,7 +748,9 @@ def capture( distinct_id: The distinct ID of the user. properties: A dictionary of properties to include with the event. timestamp: The timestamp of the event. - uuid: A unique identifier for the event. + uuid: A unique identifier for the event. If provided, it must be a + valid UUID string or uuid.UUID instance; invalid values are + ignored and replaced with a newly generated UUID. groups: A dictionary of group information. flags: A FeatureFlagEvaluations snapshot from evaluate_flags(). The exact values from the snapshot are attached with no extra /flags @@ -941,7 +963,9 @@ def set(self, **kwargs: Unpack[OptionalSetArgs]) -> Optional[str]: distinct_id: The distinct ID of the user. properties: A dictionary of properties to set. timestamp: The timestamp of the event. - uuid: A unique identifier for the event. + uuid: A unique identifier for the event. If provided, it must be a + valid UUID string or uuid.UUID instance; invalid values are + ignored and replaced with a newly generated UUID. disable_geoip: Whether to disable GeoIP for this event. Examples: @@ -989,7 +1013,9 @@ def set_once(self, **kwargs: Unpack[OptionalSetArgs]) -> Optional[str]: distinct_id: The distinct ID of the user. properties: A dictionary of properties to set once. timestamp: The timestamp of the event. - uuid: A unique identifier for the event. + uuid: A unique identifier for the event. If provided, it must be a + valid UUID string or uuid.UUID instance; invalid values are + ignored and replaced with a newly generated UUID. disable_geoip: Whether to disable GeoIP for this event. Examples: @@ -1033,7 +1059,7 @@ def group_identify( group_key: str, properties: Optional[Dict[str, Any]] = None, timestamp: Optional[Union[datetime, str]] = None, - uuid: Optional[str] = None, + uuid: Optional[Union[str, UUID]] = None, disable_geoip: Optional[bool] = None, distinct_id: Optional[ID_TYPES] = None, ) -> Optional[str]: @@ -1045,7 +1071,9 @@ def group_identify( group_key: The unique identifier for the group. properties: A dictionary of properties to set on the group. timestamp: The timestamp of the event. - uuid: A unique identifier for the event. + uuid: A unique identifier for the event. If provided, it must be a + valid UUID string or uuid.UUID instance; invalid values are + ignored and replaced with a newly generated UUID. disable_geoip: Whether to disable GeoIP for this event. distinct_id: The distinct ID of the user performing the action. @@ -1101,7 +1129,9 @@ def alias( previous_id: The previous distinct ID. distinct_id: The new distinct ID to alias to. timestamp: The timestamp of the event. - uuid: A unique identifier for the event. + uuid: A unique identifier for the event. If provided, it must be a + valid UUID string or uuid.UUID instance; invalid values are + ignored and replaced with a newly generated UUID. disable_geoip: Whether to disable GeoIP for this event. Examples: @@ -1348,8 +1378,11 @@ def _enqueue(self, msg, disable_geoip): if "uuid" in msg: uuid = msg.pop("uuid") - if uuid: - msg["uuid"] = stringify_id(uuid) + if uuid is not None: + try: + msg["uuid"] = _stringify_event_uuid(uuid) + except ValueError as e: + self.log.error("%s Falling back to a generated UUID.", e) if "uuid" not in msg: # Always send a uuid, so we can always return one diff --git a/posthog/test/test_client.py b/posthog/test/test_client.py index e9d7054c..cb8d4b5d 100644 --- a/posthog/test/test_client.py +++ b/posthog/test/test_client.py @@ -1,7 +1,7 @@ import time import unittest from datetime import datetime -from uuid import uuid4 +from uuid import UUID, uuid4 from unittest import mock from parameterized import parameterized @@ -217,6 +217,73 @@ def test_basic_capture_with_uuid(self): self.assertEqual(msg["properties"]["$lib"], "posthog-python") self.assertEqual(msg["properties"]["$lib_version"], VERSION) + def test_basic_capture_with_uuid_object(self): + with mock.patch("posthog.client.batch_post") as mock_post: + client = Client(FAKE_TEST_API_KEY, on_error=self.set_fail, sync_mode=True) + uuid = UUID("00000000-0000-4000-8000-000000000002") + msg_uuid = client.capture( + "python test event", distinct_id="distinct_id", uuid=uuid + ) + self.assertEqual(msg_uuid, str(uuid)) + self.assertFalse(self.failed) + + mock_post.assert_called_once() + msg = mock_post.call_args[1]["batch"][0] + self.assertEqual(msg["uuid"], str(uuid)) + + @parameterized.expand( + [ + ("empty string", ""), + ("invalid string", "not-a-uuid"), + ("short string", "1234"), + ("integer", 123), + ] + ) + def test_capture_with_invalid_uuid_logs_and_falls_back_to_generated_uuid( + self, _name, invalid_uuid + ): + with mock.patch("posthog.client.batch_post") as mock_post: + client = Client(FAKE_TEST_API_KEY, on_error=self.set_fail, sync_mode=True) + with self.assertLogs("posthog", level="ERROR") as logs: + msg_uuid = client.capture( + "python test event", distinct_id="distinct_id", uuid=invalid_uuid + ) + + self.assertIsNotNone(msg_uuid) + UUID(msg_uuid) + mock_post.assert_called_once() + msg = mock_post.call_args[1]["batch"][0] + self.assertEqual(msg["uuid"], msg_uuid) + self.assertNotEqual(msg["uuid"], str(invalid_uuid)) + self.assertTrue( + any( + f"Invalid event uuid {invalid_uuid!r}" in message + and "Expected a valid UUID string or uuid.UUID instance" in message + and "Falling back to a generated UUID" in message + for message in logs.output + ) + ) + + @parameterized.expand( + [ + ("empty string", ""), + ("invalid string", "not-a-uuid"), + ("short string", "1234"), + ("integer", 123), + ] + ) + def test_capture_with_invalid_uuid_falls_back_in_debug(self, _name, invalid_uuid): + with mock.patch("posthog.client.batch_post") as mock_post: + client = Client(FAKE_TEST_API_KEY, debug=True, sync_mode=True) + with self.assertLogs("posthog", level="ERROR"): + msg_uuid = client.capture( + "python test event", distinct_id="distinct_id", uuid=invalid_uuid + ) + + self.assertIsNotNone(msg_uuid) + UUID(msg_uuid) + mock_post.assert_called_once() + def test_basic_capture_with_project_api_key(self): with mock.patch("posthog.client.batch_post") as mock_post: client = Client( @@ -1271,10 +1338,10 @@ def test_advanced_capture(self): distinct_id="distinct_id", properties={"property": "value"}, timestamp=datetime(2014, 9, 3), - uuid="new-uuid", + uuid="00000000-0000-4000-8000-000000000001", ) - self.assertEqual(msg_uuid, "new-uuid") + self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001") # Get the enqueued message from the mock mock_post.assert_called_once() @@ -1286,7 +1353,7 @@ def test_advanced_capture(self): self.assertEqual(msg["event"], "python test event") self.assertEqual(msg["properties"]["$lib"], "posthog-python") self.assertEqual(msg["properties"]["$lib_version"], VERSION) - self.assertEqual(msg["uuid"], "new-uuid") + self.assertEqual(msg["uuid"], "00000000-0000-4000-8000-000000000001") self.assertEqual(msg["distinct_id"], "distinct_id") self.assertTrue("$groups" not in msg["properties"]) @@ -1337,10 +1404,10 @@ def test_advanced_set(self): distinct_id="distinct_id", properties={"trait": "value"}, timestamp=datetime(2014, 9, 3), - uuid="new-uuid", + uuid="00000000-0000-4000-8000-000000000001", ) - self.assertEqual(msg_uuid, "new-uuid") + self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001") # Get the enqueued message from the mock mock_post.assert_called_once() @@ -1352,7 +1419,7 @@ def test_advanced_set(self): self.assertEqual(msg["properties"]["$lib"], "posthog-python") self.assertEqual(msg["properties"]["$lib_version"], VERSION) self.assertTrue(isinstance(msg["timestamp"], str)) - self.assertEqual(msg["uuid"], "new-uuid") + self.assertEqual(msg["uuid"], "00000000-0000-4000-8000-000000000001") self.assertEqual(msg["distinct_id"], "distinct_id") def test_basic_set_once(self): @@ -1381,10 +1448,10 @@ def test_advanced_set_once(self): distinct_id="distinct_id", properties={"trait": "value"}, timestamp=datetime(2014, 9, 3), - uuid="new-uuid", + uuid="00000000-0000-4000-8000-000000000001", ) - self.assertEqual(msg_uuid, "new-uuid") + self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001") # Get the enqueued message from the mock mock_post.assert_called_once() @@ -1396,7 +1463,7 @@ def test_advanced_set_once(self): self.assertEqual(msg["properties"]["$lib"], "posthog-python") self.assertEqual(msg["properties"]["$lib_version"], VERSION) self.assertTrue(isinstance(msg["timestamp"], str)) - self.assertEqual(msg["uuid"], "new-uuid") + self.assertEqual(msg["uuid"], "00000000-0000-4000-8000-000000000001") self.assertEqual(msg["distinct_id"], "distinct_id") def test_basic_group_identify(self): @@ -1465,10 +1532,10 @@ def test_advanced_group_identify(self): "id:5", {"trait": "value"}, timestamp=datetime(2014, 9, 3), - uuid="new-uuid", + uuid="00000000-0000-4000-8000-000000000001", ) - self.assertEqual(msg_uuid, "new-uuid") + self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001") # Get the enqueued message from the mock mock_post.assert_called_once() @@ -1498,11 +1565,11 @@ def test_advanced_group_identify_with_distinct_id(self): "id:5", {"trait": "value"}, timestamp=datetime(2014, 9, 3), - uuid="new-uuid", + uuid="00000000-0000-4000-8000-000000000001", distinct_id="distinct_id", ) - self.assertEqual(msg_uuid, "new-uuid") + self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001") # Get the enqueued message from the mock mock_post.assert_called_once() diff --git a/references/public_api_snapshot.txt b/references/public_api_snapshot.txt index 2ba8a253..2657f385 100644 --- a/references/public_api_snapshot.txt +++ b/references/public_api_snapshot.txt @@ -437,12 +437,12 @@ attribute posthog.args.OptionalCaptureArgs.groups: NotRequired[Optional[Dict[str attribute posthog.args.OptionalCaptureArgs.properties: NotRequired[Optional[Dict[str, Any]]] attribute posthog.args.OptionalCaptureArgs.send_feature_flags: NotRequired[Optional[Union[bool, SendFeatureFlagsOptions]]] attribute posthog.args.OptionalCaptureArgs.timestamp: NotRequired[Optional[Union[datetime, str]]] -attribute posthog.args.OptionalCaptureArgs.uuid: NotRequired[Optional[str]] +attribute posthog.args.OptionalCaptureArgs.uuid: NotRequired[Optional[Union[str, UUID]]] attribute posthog.args.OptionalSetArgs.disable_geoip: NotRequired[Optional[bool]] attribute posthog.args.OptionalSetArgs.distinct_id: NotRequired[Optional[ID_TYPES]] attribute posthog.args.OptionalSetArgs.properties: NotRequired[Optional[Dict[str, Any]]] attribute posthog.args.OptionalSetArgs.timestamp: NotRequired[Optional[Union[datetime, str]]] -attribute posthog.args.OptionalSetArgs.uuid: NotRequired[Optional[str]] +attribute posthog.args.OptionalSetArgs.uuid: NotRequired[Optional[Union[str, UUID]]] attribute posthog.before_send = None attribute posthog.bucketed_rate_limiter.Number = Union[int, float] attribute posthog.bucketed_rate_limiter.ONE_DAY_IN_SECONDS = 86400.0 @@ -1059,7 +1059,7 @@ method posthog.client.Client.get_feature_payloads(distinct_id, groups=None, pers method posthog.client.Client.get_feature_variants(distinct_id, groups=None, person_properties=None, group_properties=None, disable_geoip=None, flag_keys_to_evaluate: Optional[list[str]] = None, device_id: Optional[str] = None) -> dict[str, Union[bool, str]] method posthog.client.Client.get_flags_decision(distinct_id: Optional[ID_TYPES] = None, groups: Optional[dict] = None, person_properties=None, group_properties=None, disable_geoip=None, flag_keys_to_evaluate: Optional[list[str]] = None, device_id: Optional[str] = None) -> FlagsResponse method posthog.client.Client.get_remote_config_payload(key: str) -method posthog.client.Client.group_identify(group_type: str, group_key: str, properties: Optional[Dict[str, Any]] = None, timestamp: Optional[Union[datetime, str]] = None, uuid: Optional[str] = None, disable_geoip: Optional[bool] = None, distinct_id: Optional[ID_TYPES] = None) -> Optional[str] +method posthog.client.Client.group_identify(group_type: str, group_key: str, properties: Optional[Dict[str, Any]] = None, timestamp: Optional[Union[datetime, str]] = None, uuid: Optional[Union[str, UUID]] = None, disable_geoip: Optional[bool] = None, distinct_id: Optional[ID_TYPES] = None) -> Optional[str] method posthog.client.Client.join() -> None method posthog.client.Client.load_feature_flags() method posthog.client.Client.new_context(fresh=False, capture_exceptions=True)