From f8b2c2eecaca6bce86ee7a64208c30af58c944c1 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 10 Nov 2025 23:53:13 +0000 Subject: [PATCH 1/2] Add UUIDv7 support with transparent timestamp preservation Implements to_uuidv7() and from_uuidv7() methods for converting between ULID and UUIDv7 formats. The implementation supports two modes: - compliant=False (default): Preserves all 80 bits of randomness by directly mapping the 48-bit ULID timestamp into the UUIDv7 structure without lossy fractional conversion. This enables perfect bit-for-bit round-trip conversion (ULID -> UUIDv7 -> ULID). - compliant=True: Sets RFC 4122 version (0x7) and variant (0b10) bits properly, losing 6 bits of randomness but producing spec-compliant UUIDv7s. Uses fractional millisecond encoding in subsec_a field. The from_uuidv7() method automatically detects whether a UUIDv7 is compliant by checking the version bits, and decodes accordingly to preserve maximum accuracy. Key features: - Transparent timestamp preservation in both modes - Perfect round-trip with compliant=False (default) - Monotonic ordering preserved when converting ULID sequences - Compatible with external UUIDv7s (e.g., from PostgreSQL, other libraries) - Comprehensive test coverage including edge cases This allows python-ulid to serve as a bridge between ULID and UUIDv7 ecosystems while maintaining the sortability and timestamp properties that make both formats useful. --- tests/test_ulid.py | 108 +++++++++++++++++++++++++++++++++++++++++ ulid/__init__.py | 117 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) diff --git a/tests/test_ulid.py b/tests/test_ulid.py index 11ae933..711681f 100644 --- a/tests/test_ulid.py +++ b/tests/test_ulid.py @@ -110,6 +110,7 @@ def test_idempotency() -> None: assert ULID.from_bytes(ulid.bytes) == ulid assert ULID.from_str(str(ulid)) == ulid assert ULID.from_uuid(ulid.to_uuid()) == ulid + assert ULID.from_uuidv7(ulid.to_uuidv7(compliant=False)) == ulid assert ULID.from_int(int(ulid)) == ulid assert ULID.from_hex(ulid.hex) == ulid assert ULID.parse(ulid) == ulid @@ -131,6 +132,113 @@ def test_to_uuid4() -> None: assert uuid.version == 4 # noqa: PLR2004 +def test_to_uuidv7_non_compliant() -> None: + """Test non-compliant UUIDv7 conversion preserves all randomness.""" + ulid = ULID() + uuid7 = ulid.to_uuidv7(compliant=False) + assert isinstance(uuid7, uuid.UUID) + # Non-compliant may not have version/variant bits set correctly + # but should preserve all data for perfect round-trip + + +def test_to_uuidv7_compliant() -> None: + """Test compliant UUIDv7 has correct version and variant bits.""" + ulid = ULID() + uuid7 = ulid.to_uuidv7(compliant=True) + assert isinstance(uuid7, uuid.UUID) + assert uuid7.version == 7 # noqa: PLR2004 + # Check variant bits are RFC 4122 compliant (10xx xxxx) + # The variant field is in byte 8 (bits 64-65) + variant_byte = uuid7.bytes[8] + assert (variant_byte & 0xC0) == 0x80 # Variant bits should be 0b10xxxxxx + + +def test_uuidv7_perfect_roundtrip() -> None: + """Test perfect round-trip conversion with compliant=False.""" + ulid = ULID() + uuid7 = ulid.to_uuidv7(compliant=False) + ulid_restored = ULID.from_uuidv7(uuid7) + # Perfect round-trip: all 128 bits should be identical + assert ulid_restored == ulid + assert ulid_restored.bytes == ulid.bytes + assert ulid_restored.milliseconds == ulid.milliseconds + + +def test_uuidv7_compliant_roundtrip() -> None: + """Test round-trip with compliant=True preserves timestamp but loses some randomness.""" + ulid = ULID() + uuid7 = ulid.to_uuidv7(compliant=True) + ulid_restored = ULID.from_uuidv7(uuid7) + # Timestamp should be preserved (may have small rounding error due to subsec encoding) + assert abs(ulid_restored.milliseconds - ulid.milliseconds) <= 1 + # Full ULID won't match due to lost randomness in version/variant bits + assert ulid_restored.bytes != ulid.bytes + + +def test_uuidv7_timestamp_preservation() -> None: + """Test that UUIDv7 conversion preserves timestamp accurately.""" + # Test with a specific known timestamp + test_timestamp = 1699564800.123 # 2023-11-10 00:00:00.123 UTC + ulid = ULID.from_timestamp(test_timestamp) + + uuid7_non_compliant = ulid.to_uuidv7(compliant=False) + ulid_from_uuid7 = ULID.from_uuidv7(uuid7_non_compliant) + + # Check timestamp is preserved (within 1ms tolerance due to encoding) + assert abs(ulid_from_uuid7.timestamp - test_timestamp) < 0.001 + assert ulid_from_uuid7.milliseconds == ulid.milliseconds + + +def test_uuidv7_monotonic_ordering() -> None: + """Test that UUIDv7s maintain monotonic ordering like ULIDs.""" + with freeze_time() as frozen_time: + ulids = [] + uuid7s = [] + for i in range(10): + ulid = ULID() + ulids.append(ulid) + uuid7s.append(ulid.to_uuidv7(compliant=False)) + frozen_time.tick() + + # Both ULIDs and UUID7s should be sorted + assert_sorted(ulids) + assert_sorted([u.bytes for u in uuid7s]) + + +@freeze_time() +def test_uuidv7_same_millisecond() -> None: + """Test UUIDv7 conversion with multiple ULIDs in same millisecond.""" + ulids = [ULID() for _ in range(100)] + uuid7s = [u.to_uuidv7(compliant=False) for u in ulids] + + # All should maintain monotonic ordering + assert_sorted(ulids) + assert_sorted([u.bytes for u in uuid7s]) + + # Perfect round-trip for all + for ulid, uuid7 in zip(ulids, uuid7s): + assert ULID.from_uuidv7(uuid7) == ulid + + +def test_from_uuidv7_with_external_uuid() -> None: + """Test creating ULID from an external UUIDv7.""" + # Create a UUIDv7-like UUID with known timestamp + # This simulates a UUIDv7 created by another system + unix_sec = 1699564800 # 2023-11-10 00:00:00 UTC + msec_fraction = 500 + subsec_a = (msec_fraction * 4096) // 1000 # Convert to 12-bit fixed-point + + # Build a compliant UUIDv7 + uuid_int = (unix_sec << 92) | (subsec_a << 80) | (0x7 << 76) | (0x2 << 62) + uuid7 = uuid.UUID(bytes=uuid_int.to_bytes(16, byteorder="big")) + + ulid = ULID.from_uuidv7(uuid7) + + # Check timestamp is correctly extracted + expected_ms = unix_sec * 1000 + msec_fraction + assert abs(ulid.milliseconds - expected_ms) <= 1 # Allow 1ms tolerance + + def test_hash() -> None: ulid1 = ULID() ulid2 = ULID() diff --git a/ulid/__init__.py b/ulid/__init__.py index d327385..7e8b664 100644 --- a/ulid/__init__.py +++ b/ulid/__init__.py @@ -281,6 +281,123 @@ def to_uuid4(self) -> uuid.UUID: """ return uuid.UUID(bytes=self.bytes, version=4) + def to_uuidv7(self, compliant: bool = False) -> uuid.UUID: + """Convert the :class:`ULID` to a UUIDv7 (:class:`uuid.UUID` version 7). + + UUIDv7 encodes a Unix timestamp with sub-second precision in the first 48 bits, + similar to ULID's timestamp format. This allows transparent preservation of the + timestamp during conversion. + + Args: + compliant: If True, sets RFC 4122 version (0x7) and variant (0b10) bits, + losing 6 bits of randomness. If False (default), preserves all 80 bits + of randomness by clobbering version/variant bits, enabling perfect + round-trip conversion. Most tools (PostgreSQL, standard libraries) + accept non-compliant UUIDv7s. + + Examples: + + >>> ulid = ULID() + >>> uuid7 = ulid.to_uuidv7() # Perfect round-trip + >>> assert ULID.from_uuidv7(uuid7) == ulid + >>> uuid7_compliant = ulid.to_uuidv7(compliant=True) # RFC 4122 compliant + >>> uuid7_compliant.version + 7 + """ + # ULID: [48 bits timestamp_ms][80 bits randomness] + # UUIDv7: [36 bits unix_sec][12 bits subsec_a][4 bits ver][12 bits subsec_b] + # [2 bits var][62 bits subsec_seq_node] + + timestamp_ms = self.milliseconds + + # Get the 80 bits of randomness from ULID + randomness_bits = int.from_bytes(self.bytes[6:], byteorder="big") + + if compliant: + # RFC 4122 compliant: proper timestamp encoding + version/variant bits + unix_sec = timestamp_ms // 1000 + msec_fraction = timestamp_ms % 1000 + + # Convert millisecond fraction to 12-bit fixed-point (subsec_a) + # Formula: msec / 1000 * 4096 to get 12-bit fractional representation + subsec_a = (msec_fraction * 4096) // 1000 + + # Extract 74 bits of randomness (losing 6 bits for version/variant) + # subsec_b: 12 bits from randomness + # subsec_seq_node: 62 bits from randomness + subsec_b = (randomness_bits >> 68) & 0xFFF # Top 12 bits + subsec_seq_node = randomness_bits & ((1 << 62) - 1) # Bottom 62 bits + + # Build UUIDv7 with version and variant bits + uuid_int = (unix_sec << 92) | (subsec_a << 80) | (0x7 << 76) | (subsec_b << 64) | (0x2 << 62) | subsec_seq_node + else: + # Non-compliant: preserve all bits for perfect round-trip + # For perfect round-trip, split 48-bit timestamp directly into 36+12 bits + # without lossy conversion to fractional representation + unix_sec = timestamp_ms >> 12 # Top 36 bits + subsec_a = timestamp_ms & 0xFFF # Bottom 12 bits + + # Bits 0-35: unix_sec (36 bits of timestamp) + # Bits 36-47: subsec_a (12 bits of timestamp) + # Bits 48-127: all 80 bits of randomness (clobbering version/variant) + uuid_int = (unix_sec << 92) | (subsec_a << 80) | randomness_bits + + uuid_bytes = uuid_int.to_bytes(16, byteorder="big") + return uuid.UUID(bytes=uuid_bytes) + + @classmethod + @validate_type(uuid.UUID) + def from_uuidv7(cls, value: uuid.UUID) -> Self: + """Create a new :class:`ULID` from a UUIDv7 (:class:`uuid.UUID` version 7). + + Extracts the timestamp from the UUIDv7's first 48 bits (unix seconds + sub-second + precision) and treats the remaining 80 bits as randomness. This provides transparent + round-trip conversion with :meth:`to_uuidv7`. + + For UUIDv7s created with ``compliant=False``, this provides perfect bit-for-bit + round-trip. For compliant UUIDv7s from external sources, the timestamp is extracted + using the RFC 4122 interpretation. + + Examples: + + >>> uuid7 = uuid.UUID('01936c5e-f4c0-7000-8000-000000000000') + >>> ulid = ULID.from_uuidv7(uuid7) + >>> ulid.datetime + datetime.datetime(2025, 11, 10, ...) + """ + uuid_int = int.from_bytes(value.bytes, byteorder="big") + + # Extract timestamp components from UUIDv7 layout + # Bits 0-35: unix_sec (36 bits) + # Bits 36-47: subsec_a (12 bits) + unix_sec = (uuid_int >> 92) & ((1 << 36) - 1) + subsec_a = (uuid_int >> 80) & 0xFFF + + # Check if this looks like a compliant UUIDv7 by checking version bits + # Bits 48-51 should be 0x7 for compliant UUIDv7 + version_bits = (uuid_int >> 76) & 0xF + is_compliant = version_bits == 0x7 + + if is_compliant: + # Compliant UUIDv7: interpret subsec_a as fractional time + # Convert 12-bit subsec_a back to milliseconds + msec_fraction = (subsec_a * 1000) // 4096 + timestamp_ms = unix_sec * 1000 + msec_fraction + else: + # Non-compliant (our format): direct bit mapping for perfect round-trip + # Reconstruct 48-bit timestamp from 36+12 bit split + timestamp_ms = (unix_sec << 12) | subsec_a + + # Extract all 80 bits after the timestamp (bits 48-127) as randomness + # This includes version/variant bits if present, enabling perfect round-trip + randomness_bits = uuid_int & ((1 << 80) - 1) + + # Build ULID bytes: [48-bit timestamp][80-bit randomness] + timestamp_bytes = timestamp_ms.to_bytes(6, byteorder="big") + randomness_bytes = randomness_bits.to_bytes(10, byteorder="big") + + return cls.from_bytes(timestamp_bytes + randomness_bytes) + def __repr__(self) -> str: return f"ULID({self!s})" From 1a4d251528cdbec49b7f5166ff915a8e7f95f92d Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 11 Nov 2025 00:04:25 +0000 Subject: [PATCH 2/2] Fix UUIDv7 implementation to always preserve timestamp transparently The previous implementation incorrectly used different timestamp encodings for compliant vs non-compliant modes. This has been corrected so that: - The timestamp is ALWAYS encoded in the first 48 bits as milliseconds (identical to ULID's timestamp format) - This provides transparent, lossless timestamp preservation in both modes - Only the version/variant bits and randomness placement differ between modes Changes: - Simplified to_uuidv7(): Always uses (timestamp_ms << 80) | randomness - Simplified from_uuidv7(): Always extracts timestamp from first 48 bits - Updated tests to verify perfect timestamp preservation in both modes - Removed complex fractional/subsecond conversion logic This matches the UUIDv7 spec where the first 48 bits contain unix_ts_ms (Unix timestamp in milliseconds), making ULID and UUIDv7 timestamp formats directly compatible. --- tests/test_ulid.py | 38 ++++++++++++----------- ulid/__init__.py | 75 +++++++++++----------------------------------- 2 files changed, 39 insertions(+), 74 deletions(-) diff --git a/tests/test_ulid.py b/tests/test_ulid.py index 711681f..d36e556 100644 --- a/tests/test_ulid.py +++ b/tests/test_ulid.py @@ -169,9 +169,9 @@ def test_uuidv7_compliant_roundtrip() -> None: ulid = ULID() uuid7 = ulid.to_uuidv7(compliant=True) ulid_restored = ULID.from_uuidv7(uuid7) - # Timestamp should be preserved (may have small rounding error due to subsec encoding) - assert abs(ulid_restored.milliseconds - ulid.milliseconds) <= 1 - # Full ULID won't match due to lost randomness in version/variant bits + # Timestamp should be perfectly preserved + assert ulid_restored.milliseconds == ulid.milliseconds + # Full ULID won't match due to lost randomness in version/variant bits (6 bits lost) assert ulid_restored.bytes != ulid.bytes @@ -181,12 +181,14 @@ def test_uuidv7_timestamp_preservation() -> None: test_timestamp = 1699564800.123 # 2023-11-10 00:00:00.123 UTC ulid = ULID.from_timestamp(test_timestamp) - uuid7_non_compliant = ulid.to_uuidv7(compliant=False) - ulid_from_uuid7 = ULID.from_uuidv7(uuid7_non_compliant) + # Test both compliant and non-compliant modes + for compliant in [False, True]: + uuid7 = ulid.to_uuidv7(compliant=compliant) + ulid_from_uuid7 = ULID.from_uuidv7(uuid7) - # Check timestamp is preserved (within 1ms tolerance due to encoding) - assert abs(ulid_from_uuid7.timestamp - test_timestamp) < 0.001 - assert ulid_from_uuid7.milliseconds == ulid.milliseconds + # Check timestamp is perfectly preserved (exact millisecond match) + assert ulid_from_uuid7.milliseconds == ulid.milliseconds + assert abs(ulid_from_uuid7.timestamp - test_timestamp) < 0.001 def test_uuidv7_monotonic_ordering() -> None: @@ -222,21 +224,23 @@ def test_uuidv7_same_millisecond() -> None: def test_from_uuidv7_with_external_uuid() -> None: """Test creating ULID from an external UUIDv7.""" - # Create a UUIDv7-like UUID with known timestamp + # Create a UUIDv7 with known timestamp (compliant format) # This simulates a UUIDv7 created by another system - unix_sec = 1699564800 # 2023-11-10 00:00:00 UTC - msec_fraction = 500 - subsec_a = (msec_fraction * 4096) // 1000 # Convert to 12-bit fixed-point + timestamp_ms = 1699564800500 # 2023-11-10 00:00:00.500 UTC + + # Build a compliant UUIDv7: [48-bit timestamp_ms][4-bit version][12-bit rand_a][2-bit variant][62-bit rand_b] + # For this test, we'll use some random values for rand_a and rand_b + rand_a = 0xABC # 12 bits + rand_b = 0x1234567890ABCDEF # 62 bits (only bottom 62 bits will be used) + rand_b = rand_b & ((1 << 62) - 1) # Mask to 62 bits - # Build a compliant UUIDv7 - uuid_int = (unix_sec << 92) | (subsec_a << 80) | (0x7 << 76) | (0x2 << 62) + uuid_int = (timestamp_ms << 80) | (0x7 << 76) | (rand_a << 64) | (0x2 << 62) | rand_b uuid7 = uuid.UUID(bytes=uuid_int.to_bytes(16, byteorder="big")) ulid = ULID.from_uuidv7(uuid7) - # Check timestamp is correctly extracted - expected_ms = unix_sec * 1000 + msec_fraction - assert abs(ulid.milliseconds - expected_ms) <= 1 # Allow 1ms tolerance + # Check timestamp is correctly extracted (should be exact since it's in milliseconds) + assert ulid.milliseconds == timestamp_ms def test_hash() -> None: diff --git a/ulid/__init__.py b/ulid/__init__.py index 7e8b664..90bc545 100644 --- a/ulid/__init__.py +++ b/ulid/__init__.py @@ -284,9 +284,8 @@ def to_uuid4(self) -> uuid.UUID: def to_uuidv7(self, compliant: bool = False) -> uuid.UUID: """Convert the :class:`ULID` to a UUIDv7 (:class:`uuid.UUID` version 7). - UUIDv7 encodes a Unix timestamp with sub-second precision in the first 48 bits, - similar to ULID's timestamp format. This allows transparent preservation of the - timestamp during conversion. + UUIDv7 encodes a Unix timestamp in milliseconds in the first 48 bits (just like ULID). + The timestamp is always transparently preserved regardless of compliant mode. Args: compliant: If True, sets RFC 4122 version (0x7) and variant (0b10) bits, @@ -304,9 +303,8 @@ def to_uuidv7(self, compliant: bool = False) -> uuid.UUID: >>> uuid7_compliant.version 7 """ - # ULID: [48 bits timestamp_ms][80 bits randomness] - # UUIDv7: [36 bits unix_sec][12 bits subsec_a][4 bits ver][12 bits subsec_b] - # [2 bits var][62 bits subsec_seq_node] + # ULID: [48 bits timestamp_ms][80 bits randomness] + # UUIDv7: [48 bits timestamp_ms][4 bits ver][12 bits rand_a][2 bits var][62 bits rand_b] timestamp_ms = self.milliseconds @@ -314,33 +312,17 @@ def to_uuidv7(self, compliant: bool = False) -> uuid.UUID: randomness_bits = int.from_bytes(self.bytes[6:], byteorder="big") if compliant: - # RFC 4122 compliant: proper timestamp encoding + version/variant bits - unix_sec = timestamp_ms // 1000 - msec_fraction = timestamp_ms % 1000 - - # Convert millisecond fraction to 12-bit fixed-point (subsec_a) - # Formula: msec / 1000 * 4096 to get 12-bit fractional representation - subsec_a = (msec_fraction * 4096) // 1000 - + # RFC 4122 compliant: set version and variant bits, losing 6 bits of randomness # Extract 74 bits of randomness (losing 6 bits for version/variant) - # subsec_b: 12 bits from randomness - # subsec_seq_node: 62 bits from randomness - subsec_b = (randomness_bits >> 68) & 0xFFF # Top 12 bits - subsec_seq_node = randomness_bits & ((1 << 62) - 1) # Bottom 62 bits + rand_a = (randomness_bits >> 68) & 0xFFF # Top 12 bits + rand_b = randomness_bits & ((1 << 62) - 1) # Bottom 62 bits - # Build UUIDv7 with version and variant bits - uuid_int = (unix_sec << 92) | (subsec_a << 80) | (0x7 << 76) | (subsec_b << 64) | (0x2 << 62) | subsec_seq_node + # Build UUIDv7: [48-bit timestamp_ms][4-bit version][12-bit rand_a][2-bit variant][62-bit rand_b] + uuid_int = (timestamp_ms << 80) | (0x7 << 76) | (rand_a << 64) | (0x2 << 62) | rand_b else: - # Non-compliant: preserve all bits for perfect round-trip - # For perfect round-trip, split 48-bit timestamp directly into 36+12 bits - # without lossy conversion to fractional representation - unix_sec = timestamp_ms >> 12 # Top 36 bits - subsec_a = timestamp_ms & 0xFFF # Bottom 12 bits - - # Bits 0-35: unix_sec (36 bits of timestamp) - # Bits 36-47: subsec_a (12 bits of timestamp) - # Bits 48-127: all 80 bits of randomness (clobbering version/variant) - uuid_int = (unix_sec << 92) | (subsec_a << 80) | randomness_bits + # Non-compliant: preserve all 80 bits of randomness for perfect round-trip + # Build UUIDv7: [48-bit timestamp_ms][80-bit randomness] (clobbers version/variant) + uuid_int = (timestamp_ms << 80) | randomness_bits uuid_bytes = uuid_int.to_bytes(16, byteorder="big") return uuid.UUID(bytes=uuid_bytes) @@ -350,13 +332,9 @@ def to_uuidv7(self, compliant: bool = False) -> uuid.UUID: def from_uuidv7(cls, value: uuid.UUID) -> Self: """Create a new :class:`ULID` from a UUIDv7 (:class:`uuid.UUID` version 7). - Extracts the timestamp from the UUIDv7's first 48 bits (unix seconds + sub-second - precision) and treats the remaining 80 bits as randomness. This provides transparent - round-trip conversion with :meth:`to_uuidv7`. - - For UUIDv7s created with ``compliant=False``, this provides perfect bit-for-bit - round-trip. For compliant UUIDv7s from external sources, the timestamp is extracted - using the RFC 4122 interpretation. + Extracts the timestamp from the UUIDv7's first 48 bits (milliseconds since epoch) + and the remaining 80 bits as randomness. The timestamp is always transparently + preserved, providing perfect round-trip conversion with :meth:`to_uuidv7`. Examples: @@ -367,26 +345,9 @@ def from_uuidv7(cls, value: uuid.UUID) -> Self: """ uuid_int = int.from_bytes(value.bytes, byteorder="big") - # Extract timestamp components from UUIDv7 layout - # Bits 0-35: unix_sec (36 bits) - # Bits 36-47: subsec_a (12 bits) - unix_sec = (uuid_int >> 92) & ((1 << 36) - 1) - subsec_a = (uuid_int >> 80) & 0xFFF - - # Check if this looks like a compliant UUIDv7 by checking version bits - # Bits 48-51 should be 0x7 for compliant UUIDv7 - version_bits = (uuid_int >> 76) & 0xF - is_compliant = version_bits == 0x7 - - if is_compliant: - # Compliant UUIDv7: interpret subsec_a as fractional time - # Convert 12-bit subsec_a back to milliseconds - msec_fraction = (subsec_a * 1000) // 4096 - timestamp_ms = unix_sec * 1000 + msec_fraction - else: - # Non-compliant (our format): direct bit mapping for perfect round-trip - # Reconstruct 48-bit timestamp from 36+12 bit split - timestamp_ms = (unix_sec << 12) | subsec_a + # Extract timestamp from UUIDv7 layout (always in first 48 bits) + # Bits 0-47: timestamp_ms (48 bits) + timestamp_ms = uuid_int >> 80 # Extract all 80 bits after the timestamp (bits 48-127) as randomness # This includes version/variant bits if present, enabling perfect round-trip