Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/green-ravens-validate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'pypi/posthog': patch
---

Validate custom event UUIDs before sending events.
12 changes: 9 additions & 3 deletions posthog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,9 @@ def capture(event: str, **kwargs: Unpack[OptionalCaptureArgs]) -> Optional[str]:
properties: Dict of event properties
timestamp: When the event occurred
uuid: Unique identifier for this event. If omitted, one is generated
and returned.
and returned. If provided, it must be a valid UUID string or
uuid.UUID instance; invalid values are ignored and replaced with
a newly generated UUID.
groups: Dict of group types and IDs
flags: A FeatureFlagEvaluations snapshot from evaluate_flags(). The
exact values from the snapshot are attached with no extra /flags
Expand Down Expand Up @@ -441,7 +443,9 @@ def set(**kwargs: Unpack[OptionalSetArgs]) -> Optional[str]:
properties: Dict of person properties to set.
timestamp: When the properties were set.
uuid: Unique identifier for this operation. If omitted, one is
generated and returned.
generated and returned. If provided, it must be a valid UUID
string or uuid.UUID instance; invalid values are ignored and
replaced with a newly generated UUID.
disable_geoip: Whether to disable GeoIP lookup.

Details:
Expand Down Expand Up @@ -471,7 +475,9 @@ def set_once(**kwargs: Unpack[OptionalSetArgs]) -> Optional[str]:
properties: Dict of person properties to set only once.
timestamp: When the properties were set.
uuid: Unique identifier for this operation. If omitted, one is
generated and returned.
generated and returned. If provided, it must be a valid UUID
string or uuid.UUID instance; invalid values are ignored and
replaced with a newly generated UUID.
disable_geoip: Whether to disable GeoIP lookup.

Details:
Expand Down
11 changes: 7 additions & 4 deletions posthog/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class OptionalCaptureArgs(TypedDict):
timestamp: When the event occurred (defaults to current time)
uuid: Unique identifier for this specific event. If not provided, one is generated. The event
UUID is returned, so you can correlate it with actions in your app (like showing users an
error ID if you capture an exception).
error ID if you capture an exception). If provided, it must be a valid UUID string or
uuid.UUID instance; invalid values are ignored and replaced with a newly generated UUID.
groups: Group identifiers to associate with this event (format: {group_type: group_key})
flags: A ``FeatureFlagEvaluations`` snapshot from ``evaluate_flags()``. The exact flag
values from the snapshot are attached to the event with no additional network call —
Expand All @@ -40,7 +41,7 @@ class OptionalCaptureArgs(TypedDict):
distinct_id: NotRequired[Optional[ID_TYPES]]
properties: NotRequired[Optional[Dict[str, Any]]]
timestamp: NotRequired[Optional[Union[datetime, str]]]
uuid: NotRequired[Optional[str]]
uuid: NotRequired[Optional[Union[str, UUID]]]
groups: NotRequired[Optional[Dict[str, str]]]
flags: NotRequired[Optional["FeatureFlagEvaluations"]]
send_feature_flags: NotRequired[
Expand All @@ -61,14 +62,16 @@ class OptionalSetArgs(TypedDict):
properties: Dictionary of properties to set on the person
timestamp: When the properties were set (defaults to current time)
uuid: Unique identifier for this operation. If not provided, one is generated. This
UUID is returned, so you can correlate it with actions in your app.
UUID is returned, so you can correlate it with actions in your app. If provided,
it must be a valid UUID string or uuid.UUID instance; invalid values are ignored
and replaced with a newly generated UUID.
disable_geoip: Whether to disable GeoIP lookup for this operation. Defaults to False.
"""

distinct_id: NotRequired[Optional[ID_TYPES]]
properties: NotRequired[Optional[Dict[str, Any]]]
timestamp: NotRequired[Optional[Union[datetime, str]]]
uuid: NotRequired[Optional[str]]
uuid: NotRequired[Optional[Union[str, UUID]]]
disable_geoip: NotRequired[Optional[bool]]


Expand Down
51 changes: 42 additions & 9 deletions posthog/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import weakref
from datetime import datetime, timedelta, timezone
from typing import Any, Dict, List, Optional, Union
from uuid import uuid4
from uuid import UUID, uuid4

from typing_extensions import Unpack

Expand Down Expand Up @@ -113,6 +113,26 @@ def get_identity_state(passed) -> tuple[str, bool]:
return (str(uuid4()), True)


def _stringify_event_uuid(value) -> str:
if isinstance(value, UUID):
return str(value)

stringified = stringify_id(value)
if not stringified:
raise ValueError(
f"Invalid event uuid {value!r}. Expected a valid UUID string or uuid.UUID instance."
)
Comment thread
marandaneto marked this conversation as resolved.

try:
UUID(stringified)
except ValueError:
raise ValueError(
f"Invalid event uuid {value!r}. Expected a valid UUID string or uuid.UUID instance."
) from None

return stringified


def add_context_tags(properties):
properties = properties or {}
current_context = _get_current_context()
Expand Down Expand Up @@ -728,7 +748,9 @@ def capture(
distinct_id: The distinct ID of the user.
properties: A dictionary of properties to include with the event.
timestamp: The timestamp of the event.
uuid: A unique identifier for the event.
uuid: A unique identifier for the event. If provided, it must be a
valid UUID string or uuid.UUID instance; invalid values are
ignored and replaced with a newly generated UUID.
groups: A dictionary of group information.
flags: A FeatureFlagEvaluations snapshot from evaluate_flags(). The
exact values from the snapshot are attached with no extra /flags
Expand Down Expand Up @@ -941,7 +963,9 @@ def set(self, **kwargs: Unpack[OptionalSetArgs]) -> Optional[str]:
distinct_id: The distinct ID of the user.
properties: A dictionary of properties to set.
timestamp: The timestamp of the event.
uuid: A unique identifier for the event.
uuid: A unique identifier for the event. If provided, it must be a
valid UUID string or uuid.UUID instance; invalid values are
ignored and replaced with a newly generated UUID.
disable_geoip: Whether to disable GeoIP for this event.

Examples:
Expand Down Expand Up @@ -989,7 +1013,9 @@ def set_once(self, **kwargs: Unpack[OptionalSetArgs]) -> Optional[str]:
distinct_id: The distinct ID of the user.
properties: A dictionary of properties to set once.
timestamp: The timestamp of the event.
uuid: A unique identifier for the event.
uuid: A unique identifier for the event. If provided, it must be a
valid UUID string or uuid.UUID instance; invalid values are
ignored and replaced with a newly generated UUID.
disable_geoip: Whether to disable GeoIP for this event.

Examples:
Expand Down Expand Up @@ -1033,7 +1059,7 @@ def group_identify(
group_key: str,
properties: Optional[Dict[str, Any]] = None,
timestamp: Optional[Union[datetime, str]] = None,
uuid: Optional[str] = None,
uuid: Optional[Union[str, UUID]] = None,
disable_geoip: Optional[bool] = None,
distinct_id: Optional[ID_TYPES] = None,
) -> Optional[str]:
Expand All @@ -1045,7 +1071,9 @@ def group_identify(
group_key: The unique identifier for the group.
properties: A dictionary of properties to set on the group.
timestamp: The timestamp of the event.
uuid: A unique identifier for the event.
uuid: A unique identifier for the event. If provided, it must be a
valid UUID string or uuid.UUID instance; invalid values are
ignored and replaced with a newly generated UUID.
disable_geoip: Whether to disable GeoIP for this event.
distinct_id: The distinct ID of the user performing the action.

Expand Down Expand Up @@ -1101,7 +1129,9 @@ def alias(
previous_id: The previous distinct ID.
distinct_id: The new distinct ID to alias to.
timestamp: The timestamp of the event.
uuid: A unique identifier for the event.
uuid: A unique identifier for the event. If provided, it must be a
valid UUID string or uuid.UUID instance; invalid values are
ignored and replaced with a newly generated UUID.
disable_geoip: Whether to disable GeoIP for this event.

Examples:
Expand Down Expand Up @@ -1348,8 +1378,11 @@ def _enqueue(self, msg, disable_geoip):

if "uuid" in msg:
uuid = msg.pop("uuid")
if uuid:
msg["uuid"] = stringify_id(uuid)
if uuid is not None:
try:
msg["uuid"] = _stringify_event_uuid(uuid)
except ValueError as e:
self.log.error("%s Falling back to a generated UUID.", e)

if "uuid" not in msg:
# Always send a uuid, so we can always return one
Expand Down
95 changes: 81 additions & 14 deletions posthog/test/test_client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import time
import unittest
from datetime import datetime
from uuid import uuid4
from uuid import UUID, uuid4

from unittest import mock
from parameterized import parameterized
Expand Down Expand Up @@ -217,6 +217,73 @@ def test_basic_capture_with_uuid(self):
self.assertEqual(msg["properties"]["$lib"], "posthog-python")
self.assertEqual(msg["properties"]["$lib_version"], VERSION)

def test_basic_capture_with_uuid_object(self):
with mock.patch("posthog.client.batch_post") as mock_post:
client = Client(FAKE_TEST_API_KEY, on_error=self.set_fail, sync_mode=True)
uuid = UUID("00000000-0000-4000-8000-000000000002")
msg_uuid = client.capture(
"python test event", distinct_id="distinct_id", uuid=uuid
)
self.assertEqual(msg_uuid, str(uuid))
self.assertFalse(self.failed)

mock_post.assert_called_once()
msg = mock_post.call_args[1]["batch"][0]
self.assertEqual(msg["uuid"], str(uuid))

@parameterized.expand(
[
("empty string", ""),
("invalid string", "not-a-uuid"),
("short string", "1234"),
("integer", 123),
]
)
def test_capture_with_invalid_uuid_logs_and_falls_back_to_generated_uuid(
self, _name, invalid_uuid
):
with mock.patch("posthog.client.batch_post") as mock_post:
client = Client(FAKE_TEST_API_KEY, on_error=self.set_fail, sync_mode=True)
with self.assertLogs("posthog", level="ERROR") as logs:
msg_uuid = client.capture(
"python test event", distinct_id="distinct_id", uuid=invalid_uuid
)

self.assertIsNotNone(msg_uuid)
UUID(msg_uuid)
mock_post.assert_called_once()
msg = mock_post.call_args[1]["batch"][0]
self.assertEqual(msg["uuid"], msg_uuid)
self.assertNotEqual(msg["uuid"], str(invalid_uuid))
self.assertTrue(
any(
f"Invalid event uuid {invalid_uuid!r}" in message
and "Expected a valid UUID string or uuid.UUID instance" in message
and "Falling back to a generated UUID" in message
for message in logs.output
)
)

@parameterized.expand(
[
("empty string", ""),
("invalid string", "not-a-uuid"),
("short string", "1234"),
("integer", 123),
]
)
def test_capture_with_invalid_uuid_falls_back_in_debug(self, _name, invalid_uuid):
with mock.patch("posthog.client.batch_post") as mock_post:
client = Client(FAKE_TEST_API_KEY, debug=True, sync_mode=True)
with self.assertLogs("posthog", level="ERROR"):
msg_uuid = client.capture(
"python test event", distinct_id="distinct_id", uuid=invalid_uuid
)

self.assertIsNotNone(msg_uuid)
UUID(msg_uuid)
mock_post.assert_called_once()

def test_basic_capture_with_project_api_key(self):
with mock.patch("posthog.client.batch_post") as mock_post:
client = Client(
Expand Down Expand Up @@ -1271,10 +1338,10 @@ def test_advanced_capture(self):
distinct_id="distinct_id",
properties={"property": "value"},
timestamp=datetime(2014, 9, 3),
uuid="new-uuid",
uuid="00000000-0000-4000-8000-000000000001",
)

self.assertEqual(msg_uuid, "new-uuid")
self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001")

# Get the enqueued message from the mock
mock_post.assert_called_once()
Expand All @@ -1286,7 +1353,7 @@ def test_advanced_capture(self):
self.assertEqual(msg["event"], "python test event")
self.assertEqual(msg["properties"]["$lib"], "posthog-python")
self.assertEqual(msg["properties"]["$lib_version"], VERSION)
self.assertEqual(msg["uuid"], "new-uuid")
self.assertEqual(msg["uuid"], "00000000-0000-4000-8000-000000000001")
self.assertEqual(msg["distinct_id"], "distinct_id")
self.assertTrue("$groups" not in msg["properties"])

Expand Down Expand Up @@ -1337,10 +1404,10 @@ def test_advanced_set(self):
distinct_id="distinct_id",
properties={"trait": "value"},
timestamp=datetime(2014, 9, 3),
uuid="new-uuid",
uuid="00000000-0000-4000-8000-000000000001",
)

self.assertEqual(msg_uuid, "new-uuid")
self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001")

# Get the enqueued message from the mock
mock_post.assert_called_once()
Expand All @@ -1352,7 +1419,7 @@ def test_advanced_set(self):
self.assertEqual(msg["properties"]["$lib"], "posthog-python")
self.assertEqual(msg["properties"]["$lib_version"], VERSION)
self.assertTrue(isinstance(msg["timestamp"], str))
self.assertEqual(msg["uuid"], "new-uuid")
self.assertEqual(msg["uuid"], "00000000-0000-4000-8000-000000000001")
self.assertEqual(msg["distinct_id"], "distinct_id")

def test_basic_set_once(self):
Expand Down Expand Up @@ -1381,10 +1448,10 @@ def test_advanced_set_once(self):
distinct_id="distinct_id",
properties={"trait": "value"},
timestamp=datetime(2014, 9, 3),
uuid="new-uuid",
uuid="00000000-0000-4000-8000-000000000001",
)

self.assertEqual(msg_uuid, "new-uuid")
self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001")

# Get the enqueued message from the mock
mock_post.assert_called_once()
Expand All @@ -1396,7 +1463,7 @@ def test_advanced_set_once(self):
self.assertEqual(msg["properties"]["$lib"], "posthog-python")
self.assertEqual(msg["properties"]["$lib_version"], VERSION)
self.assertTrue(isinstance(msg["timestamp"], str))
self.assertEqual(msg["uuid"], "new-uuid")
self.assertEqual(msg["uuid"], "00000000-0000-4000-8000-000000000001")
self.assertEqual(msg["distinct_id"], "distinct_id")

def test_basic_group_identify(self):
Expand Down Expand Up @@ -1465,10 +1532,10 @@ def test_advanced_group_identify(self):
"id:5",
{"trait": "value"},
timestamp=datetime(2014, 9, 3),
uuid="new-uuid",
uuid="00000000-0000-4000-8000-000000000001",
)

self.assertEqual(msg_uuid, "new-uuid")
self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001")

# Get the enqueued message from the mock
mock_post.assert_called_once()
Expand Down Expand Up @@ -1498,11 +1565,11 @@ def test_advanced_group_identify_with_distinct_id(self):
"id:5",
{"trait": "value"},
timestamp=datetime(2014, 9, 3),
uuid="new-uuid",
uuid="00000000-0000-4000-8000-000000000001",
distinct_id="distinct_id",
)

self.assertEqual(msg_uuid, "new-uuid")
self.assertEqual(msg_uuid, "00000000-0000-4000-8000-000000000001")

# Get the enqueued message from the mock
mock_post.assert_called_once()
Expand Down
6 changes: 3 additions & 3 deletions references/public_api_snapshot.txt
Original file line number Diff line number Diff line change
Expand Up @@ -437,12 +437,12 @@ attribute posthog.args.OptionalCaptureArgs.groups: NotRequired[Optional[Dict[str
attribute posthog.args.OptionalCaptureArgs.properties: NotRequired[Optional[Dict[str, Any]]]
attribute posthog.args.OptionalCaptureArgs.send_feature_flags: NotRequired[Optional[Union[bool, SendFeatureFlagsOptions]]]
attribute posthog.args.OptionalCaptureArgs.timestamp: NotRequired[Optional[Union[datetime, str]]]
attribute posthog.args.OptionalCaptureArgs.uuid: NotRequired[Optional[str]]
attribute posthog.args.OptionalCaptureArgs.uuid: NotRequired[Optional[Union[str, UUID]]]
attribute posthog.args.OptionalSetArgs.disable_geoip: NotRequired[Optional[bool]]
attribute posthog.args.OptionalSetArgs.distinct_id: NotRequired[Optional[ID_TYPES]]
attribute posthog.args.OptionalSetArgs.properties: NotRequired[Optional[Dict[str, Any]]]
attribute posthog.args.OptionalSetArgs.timestamp: NotRequired[Optional[Union[datetime, str]]]
attribute posthog.args.OptionalSetArgs.uuid: NotRequired[Optional[str]]
attribute posthog.args.OptionalSetArgs.uuid: NotRequired[Optional[Union[str, UUID]]]
attribute posthog.before_send = None
attribute posthog.bucketed_rate_limiter.Number = Union[int, float]
attribute posthog.bucketed_rate_limiter.ONE_DAY_IN_SECONDS = 86400.0
Expand Down Expand Up @@ -1059,7 +1059,7 @@ method posthog.client.Client.get_feature_payloads(distinct_id, groups=None, pers
method posthog.client.Client.get_feature_variants(distinct_id, groups=None, person_properties=None, group_properties=None, disable_geoip=None, flag_keys_to_evaluate: Optional[list[str]] = None, device_id: Optional[str] = None) -> dict[str, Union[bool, str]]
method posthog.client.Client.get_flags_decision(distinct_id: Optional[ID_TYPES] = None, groups: Optional[dict] = None, person_properties=None, group_properties=None, disable_geoip=None, flag_keys_to_evaluate: Optional[list[str]] = None, device_id: Optional[str] = None) -> FlagsResponse
method posthog.client.Client.get_remote_config_payload(key: str)
method posthog.client.Client.group_identify(group_type: str, group_key: str, properties: Optional[Dict[str, Any]] = None, timestamp: Optional[Union[datetime, str]] = None, uuid: Optional[str] = None, disable_geoip: Optional[bool] = None, distinct_id: Optional[ID_TYPES] = None) -> Optional[str]
method posthog.client.Client.group_identify(group_type: str, group_key: str, properties: Optional[Dict[str, Any]] = None, timestamp: Optional[Union[datetime, str]] = None, uuid: Optional[Union[str, UUID]] = None, disable_geoip: Optional[bool] = None, distinct_id: Optional[ID_TYPES] = None) -> Optional[str]
method posthog.client.Client.join() -> None
method posthog.client.Client.load_feature_flags()
method posthog.client.Client.new_context(fresh=False, capture_exceptions=True)
Expand Down
Loading