Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion theta/include/theta_sketch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -609,9 +609,11 @@ class wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator {
uint32_t index_;
uint64_t previous_;
bool is_block_mode_;
uint8_t buf_i_;
uint8_t offset_;
uint64_t buffer_[8];

inline void unpack1();
inline void unpack8();
};

} /* namespace datasketches */
Expand Down
58 changes: 28 additions & 30 deletions theta/include/theta_sketch_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -817,23 +817,15 @@ num_entries_(num_entries),
index_(index),
previous_(0),
is_block_mode_(num_entries_ >= 8),
buf_i_(0),
offset_(0)
{
if (entry_bits == 64) { // no compression
ptr_ = reinterpret_cast<const uint64_t*>(ptr) + index;
} else if (index < num_entries) {
if (is_block_mode_) {
unpack_bits_block8(buffer_, reinterpret_cast<const uint8_t*>(ptr_), entry_bits_);
ptr_ = reinterpret_cast<const uint8_t*>(ptr_) + entry_bits_;
for (int i = 0; i < 8; ++i) {
buffer_[i] += previous_;
previous_ = buffer_[i];
}
unpack8();
} else {
offset_ = unpack_bits(buffer_[0], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
buffer_[0] += previous_;
previous_ = buffer_[0];
unpack1();
}
}
}
Expand All @@ -844,35 +836,41 @@ auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator++()
ptr_ = reinterpret_cast<const uint64_t*>(ptr_) + 1;
return *this;
}
++index_;
if (index_ < num_entries_) {
if (++index_ < num_entries_) {
if (is_block_mode_) {
++buf_i_;
if (buf_i_ == 8) {
buf_i_ = 0;
if (index_ + 8 < num_entries_) {
unpack_bits_block8(buffer_, reinterpret_cast<const uint8_t*>(ptr_), entry_bits_);
ptr_ = reinterpret_cast<const uint8_t*>(ptr_) + entry_bits_;
for (int i = 0; i < 8; ++i) {
buffer_[i] += previous_;
previous_ = buffer_[i];
}
if ((index_ & 7) == 0) {
if (num_entries_ - index_ >= 8) {
unpack8();
} else {
is_block_mode_ = false;
offset_ = unpack_bits(buffer_[0], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
buffer_[0] += previous_;
previous_ = buffer_[0];
unpack1();
}
}
} else {
offset_ = unpack_bits(buffer_[0], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
buffer_[0] += previous_;
previous_ = buffer_[0];
unpack1();
}
}
return *this;
}

template<typename Allocator>
void wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::unpack1() {
const uint32_t i = index_ & 7;
offset_ = unpack_bits(buffer_[i], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
buffer_[i] += previous_;
previous_ = buffer_[i];
}

template<typename Allocator>
void wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::unpack8() {
unpack_bits_block8(buffer_, reinterpret_cast<const uint8_t*>(ptr_), entry_bits_);
ptr_ = reinterpret_cast<const uint8_t*>(ptr_) + entry_bits_;
for (int i = 0; i < 8; ++i) {
buffer_[i] += previous_;
previous_ = buffer_[i];
}
}

template<typename Allocator>
auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator++(int) -> const_iterator {
const_iterator tmp(*this);
Expand All @@ -895,13 +893,13 @@ bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator==(c
template<typename Allocator>
auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator*() const -> reference {
if (entry_bits_ == 64) return *reinterpret_cast<const uint64_t*>(ptr_);
return buffer_[buf_i_];
return buffer_[index_ & 7];
}

template<typename Allocator>
auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator->() const -> pointer {
if (entry_bits_ == 64) return reinterpret_cast<const uint64_t*>(ptr_);
return buffer_ + buf_i_;
return buffer_ + (index_ & 7);
}

} /* namespace datasketches */
Expand Down
50 changes: 50 additions & 0 deletions theta/test/bit_packing_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,54 @@ TEST_CASE("pack unpack blocks") {
}
}

TEST_CASE("pack bits unpack blocks") {
uint64_t value = 0; // arbitrary starting value
for (int m = 0; m < 10000; ++m) {
for (uint8_t bits = 1; bits <= 63; ++bits) {
const uint64_t mask = (1ULL << bits) - 1;
std::vector<uint64_t> input(8, 0);
for (int i = 0; i < 8; ++i) {
input[i] = value & mask;
value += IGOLDEN64;
}
std::vector<uint8_t> bytes(bits, 0);
uint8_t offset = 0;
uint8_t* ptr = bytes.data();
for (int i = 0; i < 8; ++i) {
offset = pack_bits(input[i], bits, ptr, offset);
}
std::vector<uint64_t> output(8, 0);
unpack_bits_block8(output.data(), bytes.data(), bits);
for (int i = 0; i < 8; ++i) {
REQUIRE(input[i] == output[i]);
}
}
}
}

TEST_CASE("pack blocks unpack bits") {
uint64_t value = 111; // arbitrary starting value
for (int m = 0; m < 10000; ++m) {
for (uint8_t bits = 1; bits <= 63; ++bits) {
const uint64_t mask = (1ULL << bits) - 1;
std::vector<uint64_t> input(8, 0);
for (int i = 0; i < 8; ++i) {
input[i] = value & mask;
value += IGOLDEN64;
}
std::vector<uint8_t> bytes(bits, 0);
pack_bits_block8(input.data(), bytes.data(), bits);
std::vector<uint64_t> output(8, 0);
uint8_t offset = 0;
const uint8_t* cptr = bytes.data();
for (int i = 0; i < 8; ++i) {
offset = unpack_bits(output[i], bits, cptr, offset);
}
for (int i = 0; i < 8; ++i) {
REQUIRE(input[i] == output[i]);
}
}
}
}

} /* namespace datasketches */
Loading