diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 34f32ab1..fc33a58f 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -139,7 +139,7 @@ checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" [[package]] name = "roaring" -version = "0.11.1" +version = "0.11.2" dependencies = [ "bytemuck", "byteorder", diff --git a/fuzz/fuzz_targets/arbitrary_ops/mod.rs b/fuzz/fuzz_targets/arbitrary_ops/mod.rs index 423b1b6e..b53731d7 100644 --- a/fuzz/fuzz_targets/arbitrary_ops/mod.rs +++ b/fuzz/fuzz_targets/arbitrary_ops/mod.rs @@ -103,6 +103,8 @@ pub enum BitmapIteratorOperation { AdvanceBackTo(Num), Nth(Num), NthBack(Num), + NextRange, + NextRangeBack, } impl ReadBitmapOperation { @@ -466,6 +468,69 @@ impl<'a> CRoaringIterRange<'a> { } self.next_back() } + + fn next_range(&mut self) -> Option> { + if self.empty { + return None; + } + self.cursor.reset_at_or_after(self.start); + let range_start = self.cursor.current()?; + if range_start > self.end_inclusive { + self.empty = true; + return None; + } + let mut range_end_inclusive = range_start; + while range_end_inclusive < self.end_inclusive { + if let Some(next) = self.cursor.next() { + if next == range_end_inclusive + 1 { + range_end_inclusive = next; + continue; + } + } + break; + } + + if range_end_inclusive == self.end_inclusive { + self.empty = true; + } else { + self.start = range_end_inclusive + 1; + } + + Some(range_start..=range_end_inclusive) + } + + fn next_range_back(&mut self) -> Option> { + if self.empty { + return None; + } + self.cursor.reset_at_or_after(self.end_inclusive); + if self.cursor.current().is_none_or(|n| n > self.end_inclusive) { + self.cursor.move_prev(); + } + let range_end_inclusive = self.cursor.current()?; + if range_end_inclusive < self.start { + self.empty = true; + return None; + } + let mut range_start = range_end_inclusive; + while range_start > self.start { + if let Some(prev) = self.cursor.prev() { + if prev == range_start - 1 { + range_start = prev; + continue; + } + } + break; + } + + if range_start == self.start { + self.empty = true; + } else { + self.end_inclusive = range_start - 1; + } + + Some(range_start..=range_end_inclusive) + } } impl BitmapIteratorOperation { @@ -491,6 +556,12 @@ impl BitmapIteratorOperation { BitmapIteratorOperation::NthBack(n) => { assert_eq!(x.nth_back(n.0), y.nth_back(n.0 as usize)); } + BitmapIteratorOperation::NextRange => { + assert_eq!(x.next_range(), y.next_range()); + } + BitmapIteratorOperation::NextRangeBack => { + assert_eq!(x.next_range_back(), y.next_range_back()); + } } } } diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 4ed11cdd..49a6564b 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -438,6 +438,14 @@ impl DoubleEndedIterator for Iter<'_> { impl ExactSizeIterator for Iter<'_> {} impl Iter<'_> { + pub(crate) fn peek(&self) -> Option { + self.inner.peek().map(|i| util::join(self.key, i)) + } + + pub(crate) fn peek_back(&self) -> Option { + self.inner.peek_back().map(|i| util::join(self.key, i)) + } + pub(crate) fn advance_to(&mut self, index: u16) { self.inner.advance_to(index); } @@ -445,6 +453,26 @@ impl Iter<'_> { pub(crate) fn advance_back_to(&mut self, index: u16) { self.inner.advance_back_to(index); } + + /// Returns the range of consecutive set bits from the current position to the end of the current run + /// + /// After this call, the iterator will be positioned at the first item after the returned range. + /// Returns `None` if the iterator is exhausted. + pub(crate) fn next_range(&mut self) -> Option> { + self.inner + .next_range() + .map(|r| util::join(self.key, *r.start())..=util::join(self.key, *r.end())) + } + + /// Returns the range of consecutive set bits from the start of the current run to the current back position + /// + /// After this call, the back of the iterator will be positioned at the last item before the returned range. + /// Returns `None` if the iterator is exhausted. + pub(crate) fn next_range_back(&mut self) -> Option> { + self.inner + .next_range_back() + .map(|r| util::join(self.key, *r.start())..=util::join(self.key, *r.end())) + } } impl fmt::Debug for Container { diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 1c4b09c3..49a5cbc5 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -92,6 +92,100 @@ fn advance_to_impl<'a, It>( } } +fn next_range_impl<'a, It>( + front_iter: &mut Option>, + containers: &mut It, + back_iter: &mut Option>, +) -> Option> +where + It: Iterator + Clone, + It: AsRef<[Container]>, + It::Item: IntoIterator>, +{ + let range = loop { + if let Some(r) = and_then_or_clear(front_iter, container::Iter::next_range) { + break r; + } + *front_iter = match containers.next() { + Some(inner) => Some(inner.into_iter()), + None => return and_then_or_clear(back_iter, container::Iter::next_range), + } + }; + let (range_start, mut range_end) = (*range.start(), *range.end()); + while range_end & 0xFFFF == 0xFFFF { + let Some(after_end) = range_end.checked_add(1) else { + return Some(range_start..=range_end); + }; + let (next_key, _) = util::split(after_end); + + if containers.as_ref().first().is_some_and(|c| c.key == next_key && c.contains(0)) { + let mut iter = containers.next().unwrap().into_iter(); + let next_range = iter.next_range().unwrap(); + *front_iter = Some(iter); + debug_assert_eq!(*next_range.start(), after_end); + range_end = *next_range.end(); + } else { + if let Some(iter) = back_iter { + if iter.peek() == Some(after_end) { + let next_range = iter.next_range().unwrap(); + debug_assert_eq!(*next_range.start(), after_end); + range_end = *next_range.end(); + } + } + break; + } + } + + Some(range_start..=range_end) +} + +fn next_range_back_impl<'a, It>( + front_iter: &mut Option>, + containers: &mut It, + back_iter: &mut Option>, +) -> Option> +where + It: DoubleEndedIterator, + It: AsRef<[Container]>, + It::Item: IntoIterator>, +{ + let range = loop { + if let Some(r) = and_then_or_clear(back_iter, container::Iter::next_range_back) { + break r; + } + *back_iter = match containers.next_back() { + Some(inner) => Some(inner.into_iter()), + None => return and_then_or_clear(front_iter, container::Iter::next_range_back), + } + }; + let (mut range_start, range_end) = (*range.start(), *range.end()); + while range_start & 0xFFFF == 0 { + let Some(before_start) = range_start.checked_sub(1) else { + return Some(range_start..=range_end); + }; + let (prev_key, _) = util::split(before_start); + + if containers.as_ref().last().is_some_and(|c| c.key == prev_key && c.contains(u16::MAX)) { + let mut iter = containers.next_back().unwrap().into_iter(); + let next_range = iter.next_range_back().unwrap(); + *back_iter = Some(iter); + debug_assert_eq!(*next_range.end(), before_start); + range_start = *next_range.start(); + } else { + if let Some(iter) = front_iter { + if iter.key == prev_key && iter.peek_back() == Some(before_start) { + let next_range = iter.next_range_back().unwrap(); + debug_assert_eq!(*next_range.end(), before_start); + range_start = *next_range.start(); + } + } + break; + } + } + + Some(range_start..=range_end) +} + fn advance_back_to_impl<'a, It>( n: u32, front_iter: &mut Option>, @@ -197,6 +291,46 @@ impl Iter<'_> { pub fn advance_back_to(&mut self, n: u32) { advance_back_to_impl(n, &mut self.front, &mut self.containers, &mut self.back); } + + /// Returns the range of consecutive set bits from the current position to the end of the current run + /// + /// After this call, the iterator will be positioned at the first item after the returned range. + /// Returns `None` if the iterator is exhausted. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let bm = RoaringBitmap::from([1, 2, 4, 5]); + /// let mut iter = bm.iter(); + /// assert_eq!(iter.next_range(), Some(1..=2)); + /// assert_eq!(iter.next(), Some(4)); + /// assert_eq!(iter.next_range(), Some(5..=5)); + /// ``` + pub fn next_range(&mut self) -> Option> { + next_range_impl(&mut self.front, &mut self.containers, &mut self.back) + } + + /// Returns the range of consecutive set bits from the start of the current run to the current back position + /// + /// After this call, the back of the iterator will be positioned at the last item before the returned range. + /// Returns `None` if the iterator is exhausted. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let bm = RoaringBitmap::from([1, 2, 4, 5]); + /// let mut iter = bm.iter(); + /// assert_eq!(iter.next_range_back(), Some(4..=5)); + /// assert_eq!(iter.next_back(), Some(2)); + /// assert_eq!(iter.next_range_back(), Some(1..=1)); + /// ``` + pub fn next_range_back(&mut self) -> Option> { + next_range_back_impl(&mut self.front, &mut self.containers, &mut self.back) + } } impl IntoIter { @@ -245,6 +379,46 @@ impl IntoIter { pub fn advance_back_to(&mut self, n: u32) { advance_back_to_impl(n, &mut self.front, &mut self.containers, &mut self.back); } + + /// Returns the range of consecutive set bits from the current position to the end of the current run + /// + /// After this call, the iterator will be positioned at the first item after the returned range. + /// Returns `None` if the iterator is exhausted. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let bm = RoaringBitmap::from([1, 2, 4, 5]); + /// let mut iter = bm.into_iter(); + /// assert_eq!(iter.next_range(), Some(1..=2)); + /// assert_eq!(iter.next(), Some(4)); + /// assert_eq!(iter.next_range(), Some(5..=5)); + /// ``` + pub fn next_range(&mut self) -> Option> { + next_range_impl(&mut self.front, &mut self.containers, &mut self.back) + } + + /// Returns the range of consecutive set bits from the start of the current run to the current back position + /// + /// After this call, the back of the iterator will be positioned at the last item before the returned range. + /// Returns `None` if the iterator is exhausted. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let bm = RoaringBitmap::from([1, 2, 4, 5]); + /// let mut iter = bm.into_iter(); + /// assert_eq!(iter.next_range_back(), Some(4..=5)); + /// assert_eq!(iter.next_back(), Some(2)); + /// assert_eq!(iter.next_range_back(), Some(1..=1)); + /// ``` + pub fn next_range_back(&mut self) -> Option> { + next_range_back_impl(&mut self.front, &mut self.containers, &mut self.back) + } } fn size_hint_impl( diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index 716df636..173b7def 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -25,6 +25,38 @@ pub(crate) struct ArrayStore { vec: Vec, } +/// Return the first contiguous range of elements in a sorted slice. +pub(crate) fn first_contiguous_range_len(slice: &[u16]) -> usize { + let [first, rest @ ..] = slice else { + // Explicitly empty range + return 0; + }; + let len = rest.partition_point(|item| { + let item_ptr = core::ptr::addr_of!(*item); + // SAFETY: `item` is guaranteed to be in bounds of `slice`. + let elem_distance = usize::try_from(unsafe { item_ptr.offset_from(first) }).unwrap(); + let value_distance = item.checked_sub(*first).expect("array must be sorted"); + elem_distance == usize::from(value_distance) + }); + len + 1 // +1 for the first element +} + +/// Return the last contiguous range of elements in a sorted slice. +pub(crate) fn last_contiguous_range_len(slice: &[u16]) -> usize { + let [rest @ .., last] = slice else { + // Explicitly empty range + return 0; + }; + let last_ptr = core::ptr::addr_of!(*last); + let len_from_start = rest.partition_point(|item| { + // SAFETY: `item` is guaranteed to be in bounds of `slice`. + let elem_distance = usize::try_from(unsafe { last_ptr.offset_from(item) }).unwrap(); + let value_distance = last.checked_sub(*item).expect("array must be sorted"); + elem_distance != usize::from(value_distance) + }); + slice.len() - len_from_start +} + impl ArrayStore { pub fn new() -> ArrayStore { ArrayStore { vec: vec![] } diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index c1a4acde..98eed58f 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -10,6 +10,7 @@ use super::{ArrayStore, Interval}; use alloc::boxed::Box; #[cfg(not(feature = "std"))] use alloc::vec::Vec; +use core::mem; pub const BITMAP_LENGTH: usize = 1024; pub const BITMAP_BYTES: usize = BITMAP_LENGTH * 8; @@ -565,33 +566,212 @@ impl> BitmapIter { self.key_back = new_key; *dst = value & low_bits; } -} - -impl> Iterator for BitmapIter { - type Item = u16; - fn next(&mut self) -> Option { - if self.value == 0 { - 'get_val: { - if self.key >= self.key_back { - return None; + pub(crate) fn next_range(&mut self) -> Option> { + let value = *advance_to_next_nonzero_word( + &mut self.key, + &mut self.value, + self.bits.borrow(), + &mut self.key_back, + &mut self.value_back, + )?; + let offset = value.trailing_zeros() as u16; + let start = self.key * 64 + offset; + let value = value >> offset; + let num_set = value.trailing_ones() as u16; + let mut end_inclusive = start + (num_set - 1); + if num_set + offset != 64 { + self.value &= !0 << (num_set + offset); + return Some(start..=end_inclusive); + } + self.value = 0; + if self.key == self.key_back { + return Some(start..=end_inclusive); + } + loop { + debug_assert!(self.key < self.key_back); + self.key += 1; + self.value = if self.key == self.key_back { + mem::replace(&mut self.value_back, 0) + } else { + // Safety: + // - self.key and self.key_back are always kept in bounds + unsafe { *self.bits.borrow().get_unchecked(self.key as usize) } + }; + let set_bits = self.value.trailing_ones() as u16; + end_inclusive += set_bits; + if set_bits != 64 || self.key == self.key_back { + if set_bits != 64 { + self.value &= !0 << set_bits; + } else { + self.value = 0; } - for key in self.key + 1..self.key_back { - self.value = unsafe { *self.bits.borrow().get_unchecked(key as usize) }; - if self.value != 0 { - self.key = key; - break 'get_val; - } + return Some(start..=end_inclusive); + } + } + } + + pub(crate) fn next_range_back(&mut self) -> Option> { + let value_dst = advance_back_to_next_nonzero_word( + &mut self.key, + &mut self.value, + self.bits.borrow(), + &mut self.key_back, + &mut self.value_back, + )?; + + let end_offset = value_dst.leading_zeros() as u16; + let end_inclusive = self.key_back * 64 + (63 - end_offset); + let value = *value_dst << end_offset; + let num_set = value.leading_ones() as u16; + let mut start = end_inclusive - (num_set - 1); + if num_set + end_offset != 64 { + *value_dst &= !0 >> (num_set + end_offset); + return Some(start..=end_inclusive); + } + *value_dst = 0; + if self.key == self.key_back { + return Some(start..=end_inclusive); + } + loop { + debug_assert!(self.key_back > self.key); + self.key_back -= 1; + let value_dst = if self.key_back == self.key { + &mut self.value + } else { + // Safety: + // - self.key and self.key_back are always kept in bounds + let value = unsafe { *self.bits.borrow().get_unchecked(self.key_back as usize) }; + self.value_back = value; + &mut self.value_back + }; + let set_bits = value_dst.leading_ones() as u16; + start -= set_bits; + if set_bits != 64 || self.key_back == self.key { + if set_bits != 64 { + *value_dst &= !0 >> set_bits; + } else { + *value_dst = 0; } - self.key = self.key_back; - self.value = self.value_back; - if self.value == 0 { + return Some(start..=end_inclusive); + } + } + } + + pub(crate) fn peek(&self) -> Option { + let mut key = self.key; + let mut value = self.value; + let mut key_back = self.key_back; + let mut value_back = self.value_back; + let value = advance_to_next_nonzero_word( + &mut key, + &mut value, + self.bits.borrow(), + &mut key_back, + &mut value_back, + )?; + + let index = value.trailing_zeros() as u16; + Some(64 * key + index) + } + + pub(crate) fn peek_back(&self) -> Option { + let mut key = self.key; + let mut key_back = self.key_back; + let mut value = self.value; + let mut value_back = self.value_back; + let value = advance_back_to_next_nonzero_word( + &mut key, + &mut value, + self.bits.borrow(), + &mut key_back, + &mut value_back, + )?; + let index_from_left = value.leading_zeros() as u16; + let index = 63 - index_from_left; + Some(64 * key_back + index) + } +} + +fn advance_to_next_nonzero_word<'a>( + key: &mut u16, + value: &'a mut u64, + bits: &[u64; BITMAP_LENGTH], + key_back: &mut u16, + value_back: &'a mut u64, +) -> Option<&'a mut u64> { + if *value == 0 { + if *key >= *key_back { + return None; + } + loop { + debug_assert!(*key < *key_back); + *key += 1; + if *key == *key_back { + *value = mem::replace(value_back, 0); + if *value == 0 { return None; } + break; + } + // Safety: + // - self.key and self.key_back are always kept in bounds + *value = unsafe { *bits.get_unchecked(*key as usize) }; + if *value != 0 { + break; } } - let index = self.value.trailing_zeros() as u16; - self.value &= self.value - 1; + } + debug_assert!(*value != 0); + Some(value) +} + +fn advance_back_to_next_nonzero_word<'a>( + key: &mut u16, + value: &'a mut u64, + bits: &[u64; BITMAP_LENGTH], + key_back: &mut u16, + value_back: &'a mut u64, +) -> Option<&'a mut u64> { + if *key_back > *key { + if *value_back != 0 { + return Some(value_back); + } + loop { + debug_assert!(key_back > key); + *key_back -= 1; + if *key_back == *key { + break; + } + // Safety: + // - self.key and self.key_back are always kept in bounds + *value_back = unsafe { *bits.get_unchecked(*key_back as usize) }; + if *value_back != 0 { + return Some(value_back); + } + } + } + debug_assert!(*key_back == *key); + if *value != 0 { + Some(value) + } else { + None + } +} + +impl> Iterator for BitmapIter { + type Item = u16; + + fn next(&mut self) -> Option { + let value = advance_to_next_nonzero_word( + &mut self.key, + &mut self.value, + self.bits.borrow(), + &mut self.key_back, + &mut self.value_back, + )?; + let index = value.trailing_zeros() as u16; + *value &= *value - 1; Some(64 * self.key + index) } @@ -616,23 +796,17 @@ impl> Iterator for BitmapIter { impl> DoubleEndedIterator for BitmapIter { fn next_back(&mut self) -> Option { - loop { - let value = - if self.key_back <= self.key { &mut self.value } else { &mut self.value_back }; - if *value == 0 { - if self.key_back <= self.key { - return None; - } - self.key_back -= 1; - self.value_back = - unsafe { *self.bits.borrow().get_unchecked(self.key_back as usize) }; - continue; - } - let index_from_left = value.leading_zeros() as u16; - let index = 63 - index_from_left; - *value &= !(1 << index); - return Some(64 * self.key_back + index); - } + let value_dst = advance_back_to_next_nonzero_word( + &mut self.key, + &mut self.value, + self.bits.borrow(), + &mut self.key_back, + &mut self.value_back, + )?; + let index_from_left = value_dst.leading_zeros() as u16; + let index = 63 - index_from_left; + *value_dst &= !(1 << index); + Some(64 * self.key_back + index) } } diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 185a719e..86b20223 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -798,6 +798,42 @@ impl> RunIter { } } } + + pub(crate) fn next_range(&mut self) -> Option> { + let interval = self.intervals.as_slice().first()?; + let end_offset = + if self.intervals.as_slice().len() == 1 { self.backward_offset } else { 0 }; + let result = interval.start + self.forward_offset..=interval.end - end_offset; + _ = self.intervals.next(); + self.forward_offset = 0; + if self.intervals.as_slice().is_empty() { + self.backward_offset = 0; + } + Some(result) + } + + pub(crate) fn next_range_back(&mut self) -> Option> { + let interval = self.intervals.as_slice().last()?; + let start_offset = + if self.intervals.as_slice().len() == 1 { self.forward_offset } else { 0 }; + let result = interval.start + start_offset..=interval.end - self.backward_offset; + _ = self.intervals.next_back(); + self.backward_offset = 0; + if self.intervals.as_slice().is_empty() { + self.forward_offset = 0; + } + Some(result) + } + + pub(crate) fn peek(&self) -> Option { + let result = self.intervals.as_slice().first()?.start + self.forward_offset; + Some(result) + } + + pub(crate) fn peek_back(&self) -> Option { + let result = self.intervals.as_slice().last()?.end - self.backward_offset; + Some(result) + } } impl> Iterator for RunIter { diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index d07980a8..c57d3afe 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -898,6 +898,89 @@ impl Iter<'_> { Iter::RunBorrowed(inner) => inner.advance_back_to(n), } } + + pub(crate) fn next_range(&mut self) -> Option> { + match self { + Iter::Array(inner) => { + let slice = inner.as_slice(); + let len = array_store::first_contiguous_range_len(slice); + let mut range = None; + if len != 0 { + let first = slice[0]; + inner.nth(len - 1); + range = Some(first..=first + (len - 1) as u16); + } + range + } + Iter::Vec(inner) => { + let slice = inner.as_slice(); + let len = array_store::first_contiguous_range_len(slice); + let mut range = None; + if len != 0 { + let first = slice[0]; + inner.nth(len - 1); + range = Some(first..=first + (len - 1) as u16); + } + range + } + Iter::BitmapBorrowed(inner) => inner.next_range(), + Iter::BitmapOwned(inner) => inner.next_range(), + Iter::RunBorrowed(inner) => inner.next_range(), + Iter::RunOwned(inner) => inner.next_range(), + } + } + + pub(crate) fn next_range_back(&mut self) -> Option> { + match self { + Iter::Array(inner) => { + let slice = inner.as_slice(); + let len = array_store::last_contiguous_range_len(slice); + let mut range = None; + if len != 0 { + let last = slice[slice.len() - 1]; + inner.nth_back(len - 1); + range = Some(last - (len - 1) as u16..=last); + } + range + } + Iter::Vec(inner) => { + let slice = inner.as_slice(); + let len = array_store::last_contiguous_range_len(slice); + let mut range = None; + if len != 0 { + let last = slice[slice.len() - 1]; + inner.nth_back(len - 1); + range = Some(last - (len - 1) as u16..=last); + } + range + } + Iter::BitmapBorrowed(inner) => inner.next_range_back(), + Iter::BitmapOwned(inner) => inner.next_range_back(), + Iter::RunBorrowed(inner) => inner.next_range_back(), + Iter::RunOwned(inner) => inner.next_range_back(), + } + } + + pub(crate) fn peek(&self) -> Option { + match self { + Iter::Array(inner) => inner.as_slice().first().copied(), + Iter::Vec(inner) => inner.as_slice().first().copied(), + Iter::BitmapBorrowed(inner) => inner.peek(), + Iter::BitmapOwned(inner) => inner.peek(), + Iter::RunBorrowed(inner) => inner.peek(), + Iter::RunOwned(inner) => inner.peek(), + } + } + pub(crate) fn peek_back(&self) -> Option { + match self { + Iter::Array(inner) => inner.as_slice().last().copied(), + Iter::Vec(inner) => inner.as_slice().last().copied(), + Iter::BitmapBorrowed(inner) => inner.peek_back(), + Iter::BitmapOwned(inner) => inner.peek_back(), + Iter::RunBorrowed(inner) => inner.peek_back(), + Iter::RunOwned(inner) => inner.peek_back(), + } + } } impl Iterator for Iter<'_> { diff --git a/roaring/tests/iter_next_range.rs b/roaring/tests/iter_next_range.rs new file mode 100644 index 00000000..5800c454 --- /dev/null +++ b/roaring/tests/iter_next_range.rs @@ -0,0 +1,558 @@ +use roaring::RoaringBitmap; + +#[test] +fn next_range_basic() { + let bm = RoaringBitmap::from([1, 2, 4, 5]); + let mut iter = bm.iter(); + + // First consecutive range: 1..=2 + assert_eq!(iter.next_range(), Some(1..=2)); + + // Iterator should now point at 4 + assert_eq!(iter.next(), Some(4)); + + // Second consecutive range: 5..=5 (single element) + assert_eq!(iter.next_range(), Some(5..=5)); + + // Iterator should now be exhausted + assert_eq!(iter.next(), None); + assert_eq!(iter.next_range(), None); +} + +#[test] +fn next_range_back_basic() { + let bm = RoaringBitmap::from([1, 2, 4, 5]); + let mut iter = bm.iter(); + + // Last consecutive range from back: 4..=5 + assert_eq!(iter.next_range_back(), Some(4..=5)); + + // Iterator back should now point at 2 + assert_eq!(iter.next_back(), Some(2)); + + // Previous consecutive range from back: 1..=1 (single element) + assert_eq!(iter.next_range_back(), Some(1..=1)); + + // Iterator should now be exhausted from back + assert_eq!(iter.next_back(), None); + assert_eq!(iter.next_range_back(), None); +} + +#[test] +fn next_range_single_elements() { + // All single-element ranges + let bm = RoaringBitmap::from([1, 3, 5, 7]); + let mut iter = bm.iter(); + + assert_eq!(iter.next_range(), Some(1..=1)); + assert_eq!(iter.next(), Some(3)); + + assert_eq!(iter.next_range(), Some(5..=5)); + assert_eq!(iter.next(), Some(7)); + + assert_eq!(iter.next_range(), None); + assert_eq!(iter.next(), None); +} + +#[test] +fn next_range_long_consecutive() { + // Long consecutive sequence + let bm = RoaringBitmap::from([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let mut iter = bm.iter(); + + // Should get the entire range + assert_eq!(iter.next_range(), Some(1..=10)); + + // Iterator should be exhausted after consuming the range + assert_eq!(iter.next(), None); + assert_eq!(iter.next_range(), None); +} + +#[test] +fn next_range_partial_consumption() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 5, 10, 11, 12]); + let mut iter = bm.iter(); + + // Consume some elements first + assert_eq!(iter.next(), Some(1)); + assert_eq!(iter.next(), Some(2)); + + // Should get remaining range from current position + assert_eq!(iter.next_range(), Some(3..=5)); + + // Continue with next range + assert_eq!(iter.next(), Some(10)); + assert_eq!(iter.next_range(), Some(11..=12)); +} + +#[test] +fn next_range_back_partial_consumption() { + let bm = RoaringBitmap::from([1, 2, 3, 10, 11, 12]); + let mut iter = bm.iter(); + + // Consume some elements from back first + assert_eq!(iter.next_back(), Some(12)); + assert_eq!(iter.next_back(), Some(11)); + + // Should get remaining range from back position + assert_eq!(iter.next_range_back(), Some(10..=10)); + + // Continue with previous range + assert_eq!(iter.next_back(), Some(3)); + assert_eq!(iter.next_range_back(), Some(1..=2)); +} + +#[test] +fn next_range_empty_bitmap() { + let bm = RoaringBitmap::new(); + let mut iter = bm.iter(); + + assert_eq!(iter.next_range(), None); + assert_eq!(iter.next_range_back(), None); +} + +#[test] +fn next_range_single_element_bitmap() { + let bm = RoaringBitmap::from([42]); + let mut iter = bm.iter(); + + assert_eq!(iter.next_range(), Some(42..=42)); + assert_eq!(iter.next(), None); + + // Reset for back test + let mut iter = bm.iter(); + assert_eq!(iter.next_range_back(), Some(42..=42)); + assert_eq!(iter.next_back(), None); +} + +#[test] +fn next_range_mixed_operations() { + let bm = RoaringBitmap::from([1, 2, 3, 10, 11, 12, 20]); + let mut iter = bm.iter(); + + // Mix forward and backward operations + assert_eq!(iter.next(), Some(1)); + assert_eq!(iter.next_back(), Some(20)); + + // Get remaining range from front (from current position 2) + assert_eq!(iter.next_range(), Some(2..=3)); + + // Get remaining range from back (should be 10..=12) + assert_eq!(iter.next_range_back(), Some(10..=12)); + + // Both ranges consumed, iterator should be empty + assert_eq!(iter.next(), None); + assert_eq!(iter.next_back(), None); +} + +#[test] +fn next_range_multi_container() { + // Test across container boundaries + let bm = RoaringBitmap::from([1, 2, 0x1_0000, 0x1_0001, 0x1_0002]); + let mut iter = bm.iter(); + + // First container range + assert_eq!(iter.next_range(), Some(1..=2)); + + // Second container range + assert_eq!(iter.next(), Some(0x1_0000)); + assert_eq!(iter.next_range(), Some(0x1_0001..=0x1_0002)); + + assert_eq!(iter.next(), None); +} + +#[test] +fn next_range_u32_max_boundary() { + // Test behavior at u32::MAX boundary + let bm = RoaringBitmap::from([u32::MAX - 2, u32::MAX - 1, u32::MAX]); + let mut iter = bm.iter(); + + // Should handle u32::MAX correctly with RangeInclusive + assert_eq!(iter.next_range(), Some((u32::MAX - 2)..=u32::MAX)); + + assert_eq!(iter.next(), None); +} + +#[test] +fn next_range_advance_to_integration() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 5, 10, 11, 12, 13]); + let mut iter = bm.iter(); + + // Advance to middle of a consecutive range + iter.advance_to(3); + + // Should get remaining part of the range + assert_eq!(iter.next_range(), Some(3..=5)); + + // Continue with next range + assert_eq!(iter.next(), Some(10)); + assert_eq!(iter.next_range(), Some(11..=13)); +} + +#[test] +fn next_range_advance_back_to_integration() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 5, 10, 11, 12, 13]); + let mut iter = bm.iter(); + + // Advance back to middle of a consecutive range + iter.advance_back_to(12); + + // Should get range from start to current back position + assert_eq!(iter.next_range_back(), Some(10..=12)); + + // Continue with previous range + assert_eq!(iter.next_back(), Some(5)); + assert_eq!(iter.next_range_back(), Some(1..=4)); +} + +// Test IntoIter variants +#[test] +fn into_iter_next_range_basic() { + let bm = RoaringBitmap::from([1, 2, 4, 5]); + let mut iter = bm.into_iter(); + + assert_eq!(iter.next_range(), Some(1..=2)); + assert_eq!(iter.next(), Some(4)); + assert_eq!(iter.next_range(), Some(5..=5)); +} + +#[test] +fn into_iter_next_range_back_basic() { + let bm = RoaringBitmap::from([1, 2, 4, 5]); + let mut iter = bm.into_iter(); + + assert_eq!(iter.next_range_back(), Some(4..=5)); + assert_eq!(iter.next_back(), Some(2)); + assert_eq!(iter.next_range_back(), Some(1..=1)); +} + +#[test] +fn next_range_exhausted_iterator() { + let bm = RoaringBitmap::from([1, 2, 3]); + let mut iter = bm.iter(); + + // Consume all elements + iter.next(); + iter.next(); + iter.next(); + + // Iterator should be exhausted + assert_eq!(iter.next_range(), None); + assert_eq!(iter.next_range_back(), None); +} + +#[test] +fn next_range_overlapping_calls() { + let bm = RoaringBitmap::from([1, 2, 3, 10, 11]); + let mut iter = bm.iter(); + + // Get first range + assert_eq!(iter.next_range(), Some(1..=3)); + + // Iterator advanced past first range, get second range + assert_eq!(iter.next_range(), Some(10..=11)); + + // No more ranges + assert_eq!(iter.next_range(), None); +} + +#[test] +fn next_range_very_sparse() { + // Very sparse bitmap + let bm = RoaringBitmap::from([0, 1000, 2000, 3000]); + let mut iter = bm.iter(); + + // Each element should be its own range + assert_eq!(iter.next_range(), Some(0..=0)); + assert_eq!(iter.next(), Some(1000)); + + assert_eq!(iter.next_range(), Some(2000..=2000)); + assert_eq!(iter.next(), Some(3000)); + + assert_eq!(iter.next_range(), None); +} + +#[test] +fn next_range_dense_bitmap() { + // Dense bitmap with large consecutive ranges + let mut bm = RoaringBitmap::new(); + // Add ranges: 0-99, 200-299, 500-599 + for i in 0..100 { + bm.insert(i); + } + for i in 200..300 { + bm.insert(i); + } + for i in 500..600 { + bm.insert(i); + } + + let mut iter = bm.iter(); + + assert_eq!(iter.next_range(), Some(0..=99)); + assert_eq!(iter.next(), Some(200)); + + assert_eq!(iter.next_range(), Some(201..=299)); + assert_eq!(iter.next(), Some(500)); + + assert_eq!(iter.next_range(), Some(501..=599)); + assert_eq!(iter.next(), None); +} + +#[test] +fn next_range_multi_container_range() { + // Single element bitmap + let mut bm = RoaringBitmap::new(); + bm.insert_range(0..=0x4_0000); + let mut iter = bm.iter(); + + assert_eq!(iter.next(), Some(0)); + assert_eq!(iter.next(), Some(1)); + assert_eq!(iter.next_range(), Some(2..=0x4_0000)); + + assert_eq!(iter.next_range(), None); + assert_eq!(iter.next(), None); +} + +// Tests for bitmap store - these should trigger the todo!() implementations +#[test] +fn next_range_bitmap_store_forced() { + // Create a sparse pattern that exceeds ARRAY_LIMIT but is inefficient as runs + let mut bm = RoaringBitmap::new(); + + // Add alternating ranges to create many gaps - inefficient as runs + for i in (0..20000).step_by(4) { + bm.insert(i); // bit at i + bm.insert(i + 1); // bit at i+1 + // gaps at i+2, i+3 + } + + // Force removal of run compression to ensure bitmap store + bm.remove_run_compression(); + + let mut iter = bm.iter(); + + // First consecutive range should be 0..=1 + assert_eq!(iter.next_range(), Some(0..=1)); + + // Iterator should now point at 4 + assert_eq!(iter.next(), Some(4)); + + // Second consecutive range: 5..=5 (single element) + assert_eq!(iter.next_range(), Some(5..=5)); +} + +#[test] +fn next_range_back_bitmap_store_forced() { + // Create a sparse pattern that exceeds ARRAY_LIMIT but is inefficient as runs + let mut bm = RoaringBitmap::new(); + + // Add alternating ranges to create many gaps + for i in (0..20000).step_by(4) { + bm.insert(i); + bm.insert(i + 1); + } + + // Force removal of run compression + bm.remove_run_compression(); + + let mut iter = bm.iter(); + + // Last consecutive range from back should be the last pair + // The last elements should be 19996, 19997 + assert_eq!(iter.next_range_back(), Some(19996..=19997)); +} + +#[test] +fn next_range_bitmap_store_dense_with_gaps() { + // Create a dense bitmap with strategic gaps to force bitmap store + let mut bm = RoaringBitmap::new(); + + // Add most elements but with regular gaps to make runs inefficient + for i in 0..10000 { + if i % 3 != 0 { + // Skip every 3rd element + bm.insert(i); + } + } + + // Force bitmap representation + bm.remove_run_compression(); + + let mut iter = bm.iter(); + + // First consecutive range should be 1..=2 + assert_eq!(iter.next_range(), Some(1..=2)); + + // Next element should be 4 + assert_eq!(iter.next(), Some(4)); + + // Next range should be 5..=5 + assert_eq!(iter.next_range(), Some(5..=5)); +} + +#[test] +fn next_range_bitmap_store_partial_consumption() { + // Create bitmap that forces bitmap store + let mut bm = RoaringBitmap::new(); + + // Add elements in groups of 2 with gaps + for i in (1000..8000).step_by(3) { + bm.insert(i); + bm.insert(i + 1); + } + + bm.remove_run_compression(); + + let mut iter = bm.iter(); + + // Consume first few elements + assert_eq!(iter.next(), Some(1000)); + assert_eq!(iter.next(), Some(1001)); + + // Should get next range starting at 1003 + assert_eq!(iter.next_range(), Some(1003..=1004)); +} + +#[test] +fn next_range_bitmap_store_mixed_operations() { + let mut bm = RoaringBitmap::new(); + + // Create pattern that forces bitmap store + for i in (0..10000).step_by(3) { + bm.insert(i); + bm.insert(i + 1); + } + + bm.remove_run_compression(); + + // The pattern will be: 0,1 gap 3,4 gap 6,7 gap ... 9996,9997 gap 9999 + // Last iteration: i=9999, so we insert 9999 and 10000 + // But 10000 might be in a different container, so let's find the actual last element + let last_element = bm.iter().next_back().unwrap(); + + let mut iter = bm.iter(); + + // Mix forward and backward operations + assert_eq!(iter.next(), Some(0)); + assert_eq!(iter.next_back(), Some(last_element)); + + // Get remaining range from front + assert_eq!(iter.next_range(), Some(1..=1)); + + // Continue to next range + assert_eq!(iter.next(), Some(3)); + assert_eq!(iter.next_range(), Some(4..=4)); +} + +#[test] +fn next_range_bitmap_store_single_elements() { + // Create very sparse bitmap that forces bitmap store + let mut bm = RoaringBitmap::new(); + + // Add individual elements spread far apart + for i in (0..20000).step_by(5) { + bm.insert(i); + } + + bm.remove_run_compression(); + + let mut iter = bm.iter(); + + // Each element should be its own single-element range + assert_eq!(iter.next_range(), Some(0..=0)); + assert_eq!(iter.next(), Some(5)); + assert_eq!(iter.next_range(), Some(10..=10)); + assert_eq!(iter.next(), Some(15)); + assert_eq!(iter.next_range(), Some(20..=20)); +} + +#[test] +fn next_range_bitmap_store_alternating_pattern() { + // Create alternating pattern that's inefficient for run encoding + let mut bm = RoaringBitmap::new(); + + // Every other bit set in a large range + for i in (0..10000).step_by(2) { + bm.insert(i); + } + + bm.remove_run_compression(); + + let mut iter = bm.iter(); + + // Each bit should be its own range due to alternating pattern + assert_eq!(iter.next_range(), Some(0..=0)); + assert_eq!(iter.next(), Some(2)); + assert_eq!(iter.next_range(), Some(4..=4)); + assert_eq!(iter.next(), Some(6)); + assert_eq!(iter.next_range(), Some(8..=8)); +} + +#[test] +fn next_range_bitmap_store_with_small_clusters() { + // Create small clusters of bits separated by gaps + let mut bm = RoaringBitmap::new(); + + // Add clusters of 3 bits separated by gaps of 5 + for base in (0..15000).step_by(8) { + bm.insert(base); + bm.insert(base + 1); + bm.insert(base + 2); + // gap of 5 (base+3, base+4, base+5, base+6, base+7) + } + + bm.remove_run_compression(); + + let mut iter = bm.iter(); + + // First cluster: 0..=2 + assert_eq!(iter.next_range(), Some(0..=2)); + + // Next cluster starts at 8 + assert_eq!(iter.next(), Some(8)); + assert_eq!(iter.next_range(), Some(9..=10)); + + // Next cluster starts at 16 + assert_eq!(iter.next(), Some(16)); + assert_eq!(iter.next_range(), Some(17..=18)); +} + +#[test] +fn range_partial_consume() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..=0x3FFF); + let mut iter = bitmap.iter(); + iter.next(); + assert_eq!(iter.next_range_back(), Some(1..=0x3FFF)); +} + +#[test] +fn range_with_initial_next() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(69311..=180090); + let mut iter = bitmap.iter(); + assert_eq!(iter.next(), Some(69311)); + assert_eq!(iter.next_range_back(), Some(69312..=180090)); +} + +#[test] +fn range_with_gap() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0x2_0000..=0x2_FFFF); + bitmap.remove(0x2_1000); + bitmap.remove_run_compression(); + let mut iter = bitmap.iter(); + assert_eq!(iter.next_range(), Some(0x2_0000..=0x2_0FFF)); + assert_eq!(iter.next(), Some(0x2_1001)); +} + +#[test] +fn range_back_after_next() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..=0x3_FFFF); + bitmap.remove(0x0_3000); + let mut iter = bitmap.iter(); + assert_eq!(iter.next(), Some(0)); + assert_eq!(iter.next_range_back(), Some(0x0_3001..=0x3_FFFF)); +}