From 6a9b25fb701315766c48151ac68f263a2b00a58f Mon Sep 17 00:00:00 2001 From: Sam Hames Date: Wed, 17 Sep 2025 12:26:05 +1000 Subject: [PATCH 1/3] Initial tests + interface changes to work with memoryviews as well as bytes objects when deserialising - this enables deserialising from mmap'ed files without copying --- pyroaring/abstract_bitmap.pxi | 20 ++++++++++++++------ test.py | 27 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/pyroaring/abstract_bitmap.pxi b/pyroaring/abstract_bitmap.pxi index 9608703..39b9e4e 100644 --- a/pyroaring/abstract_bitmap.pxi +++ b/pyroaring/abstract_bitmap.pxi @@ -12,11 +12,16 @@ try: except NameError: # python 3 pass -cdef croaring.roaring_bitmap_t *deserialize_ptr(bytes buff): + +cdef croaring.roaring_bitmap_t *deserialize_ptr(const unsigned char[:] buff): cdef croaring.roaring_bitmap_t *ptr cdef const char *reason_failure = NULL + + cdef char* buffer_ptr = &buff[0] + buff_size = len(buff) - ptr = croaring.roaring_bitmap_portable_deserialize_safe(buff, buff_size) + ptr = croaring.roaring_bitmap_portable_deserialize_safe(buffer_ptr, buff_size) + if ptr == NULL: raise ValueError("Could not deserialize bitmap") # Validate the bitmap @@ -26,11 +31,14 @@ cdef croaring.roaring_bitmap_t *deserialize_ptr(bytes buff): raise ValueError(f"Invalid bitmap after deserialization: {reason_failure.decode('utf-8')}") return ptr -cdef croaring.roaring64_bitmap_t *deserialize64_ptr(bytes buff): +cdef croaring.roaring64_bitmap_t *deserialize64_ptr(const unsigned char[:] buff): cdef croaring.roaring64_bitmap_t *ptr cdef const char *reason_failure = NULL + + cdef char* buffer_ptr = &buff[0] + buff_size = len(buff) - ptr = croaring.roaring64_bitmap_portable_deserialize_safe(buff, buff_size) + ptr = croaring.roaring64_bitmap_portable_deserialize_safe(buffer_ptr, buff_size) if ptr == NULL: raise ValueError("Could not deserialize bitmap") # Validate the bitmap @@ -760,7 +768,7 @@ cdef class AbstractBitMap: @classmethod - def deserialize(cls, bytes buff): + def deserialize(cls, const unsigned char[:] buff): """ Generate a bitmap from the given serialization. See AbstractBitMap.serialize for the reverse operation. @@ -1221,7 +1229,7 @@ cdef class AbstractBitMap64: @classmethod - def deserialize(cls, bytes buff): + def deserialize(cls, const unsigned char[:] buff): """ Generate a bitmap from the given serialization. See AbstractBitMap64.serialize for the reverse operation. diff --git a/test.py b/test.py index e57369d..ec53851 100755 --- a/test.py +++ b/test.py @@ -874,6 +874,33 @@ def test_serialization( assert isinstance(new_bm, cls2) self.assert_is_not(old_bm, new_bm) + @given(bitmap_cls, bitmap_cls, hyp_many_collections) + def test_deserialization_from_memoryview( + self, + cls1: type[EitherBitMap], + cls2: type[EitherBitMap], + values: list[HypCollection] + ) -> None: + old_bms = [cls1(vals) for vals in values] + + # Create a memoryview with all of the items concatenated into a single bytes + # object. + serialized = [bm.serialize() for bm in old_bms] + sizes = [len(ser) for ser in serialized] + starts = [0] + for s in sizes: + starts.append(s + starts[-1]) + + combined = b''.join(serialized) + mv = memoryview(combined) + + new_bms = [cls2.deserialize(mv[start: start + size])for start, size in zip(starts, sizes)] + + for old_bm, new_bm in zip(old_bms, new_bms): + assert old_bm == new_bm + assert isinstance(new_bm, cls2) + self.assert_is_not(old_bm, new_bm) + @given(bitmap_cls, hyp_collection, st.integers(min_value=2, max_value=pickle.HIGHEST_PROTOCOL)) def test_pickle_protocol( self, From 47603b08b0e14a1c12e94ce2c0bc6a53b04d33a9 Mon Sep 17 00:00:00 2001 From: Sam Hames Date: Wed, 17 Sep 2025 13:21:50 +1000 Subject: [PATCH 2/3] Test readable and writable memoryviews (bytes and bytearray respectively) --- test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test.py b/test.py index ec53851..abd4802 100755 --- a/test.py +++ b/test.py @@ -892,14 +892,14 @@ def test_deserialization_from_memoryview( starts.append(s + starts[-1]) combined = b''.join(serialized) - mv = memoryview(combined) - - new_bms = [cls2.deserialize(mv[start: start + size])for start, size in zip(starts, sizes)] - - for old_bm, new_bm in zip(old_bms, new_bms): - assert old_bm == new_bm - assert isinstance(new_bm, cls2) - self.assert_is_not(old_bm, new_bm) + mutable_combined = bytearray(combined) + + for source in (combined, mutable_combined): + with memoryview(source) as mv: + new_bms = [cls2.deserialize(mv[start: start + size])for start, size in zip(starts, sizes)] + for old_bm, new_bm in zip(old_bms, new_bms): + assert old_bm == new_bm + assert isinstance(new_bm, cls2) @given(bitmap_cls, hyp_collection, st.integers(min_value=2, max_value=pickle.HIGHEST_PROTOCOL)) def test_pickle_protocol( From 7db1318dfd1e2a5ec1e201e8a1071f5b5496e57f Mon Sep 17 00:00:00 2001 From: Sam Hames Date: Wed, 17 Sep 2025 14:18:27 +1000 Subject: [PATCH 3/3] Update assert_is_not to restore original state so bitmaps can be reused --- test.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/test.py b/test.py index abd4802..be51da7 100755 --- a/test.py +++ b/test.py @@ -143,21 +143,37 @@ def bitmap_sample(bitmap: AbstractBitMap, size: int) -> list[int]: return [bitmap[i] for i in indices] def assert_is_not(self, bitmap1: AbstractBitMap, bitmap2: AbstractBitMap) -> None: + add1 = remove1 = add2 = remove2 = -1 if isinstance(bitmap1, BitMap): if bitmap1: - bitmap1.remove(bitmap1[0]) + remove1 = bitmap1[0] + bitmap1.remove(remove1) else: - bitmap1.add(27) + add1 = 27 + bitmap1.add(add1) elif isinstance(bitmap2, BitMap): if bitmap2: - bitmap2.remove(bitmap1[0]) + remove2 = bitmap2[0] + bitmap2.remove(remove2) else: - bitmap2.add(27) + add2 = 27 + bitmap2.add(add2) else: # The two are non-mutable, cannot do anything... return if bitmap1 == bitmap2: pytest.fail( 'The two bitmaps are identical (modifying one also modifies the other).') + # Restore the bitmaps to their original point + else: + if add1 >= 0: + bitmap1.remove(add1) + if remove1 >= 0: + bitmap1.add(remove1) + if add2 >= 0: + bitmap2.remove(add2) + if remove2 >= 0: + bitmap2.add(remove2) + class TestBasic(Util): @@ -900,6 +916,7 @@ def test_deserialization_from_memoryview( for old_bm, new_bm in zip(old_bms, new_bms): assert old_bm == new_bm assert isinstance(new_bm, cls2) + self.assert_is_not(old_bm, new_bm) @given(bitmap_cls, hyp_collection, st.integers(min_value=2, max_value=pickle.HIGHEST_PROTOCOL)) def test_pickle_protocol(