Skip to content

Commit d37e607

Browse files
committed
Diagnostics and readme
1 parent 2fe7037 commit d37e607

7 files changed

Lines changed: 141 additions & 28 deletions

File tree

CMakeLists.txt

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,32 @@ endif()
2727
# ---------------------------------------------------------------------------
2828
option(PIXIE_TESTS "Build unit tests" ON)
2929
option(PIXIE_BENCHMARKS "Build benchmarks (includes comparison benchmarks against third-party libraries)" OFF)
30+
option(PIXIE_DIAGNOSTICS "Include diagnostic logs" OFF)
3031
option(PIXIE_DOCS "Build Doxygen documentation" OFF)
3132

33+
if(PIXIE_DIAGNOSTICS)
34+
add_compile_definitions(PIXIE_DIAGNOSTICS)
35+
set(PIXIE_DIAGNOSTICS_LIBS spdlog::spdlog_header_only)
36+
endif()
37+
3238
# ---------------------------------------------------------------------------
3339
# Dependencies (fetched only when needed)
3440
# ---------------------------------------------------------------------------
3541
include(FetchContent)
3642

43+
if(PIXIE_DIAGNOSTICS)
44+
set(SPDLOG_BUILD_SHARED OFF CACHE BOOL "" FORCE)
45+
set(SPDLOG_BUILD_EXAMPLE OFF CACHE BOOL "" FORCE)
46+
set(SPDLOG_BUILD_TESTING OFF CACHE BOOL "" FORCE)
47+
set(SPDLOG_INSTALL OFF CACHE BOOL "" FORCE)
48+
FetchContent_Declare(
49+
spdlog
50+
GIT_REPOSITORY https://github.com/gabime/spdlog.git
51+
GIT_TAG v1.14.1
52+
)
53+
FetchContent_MakeAvailable(spdlog)
54+
endif()
55+
3756
if(PIXIE_BENCHMARKS)
3857
FetchContent_Declare(
3958
googlebenchmark
@@ -91,15 +110,17 @@ if(PIXIE_TESTS)
91110
PUBLIC include)
92111
target_link_libraries(unittests
93112
gtest
94-
gtest_main)
113+
gtest_main
114+
${PIXIE_DIAGNOSTICS_LIBS})
95115

96116
add_executable(benchmark_tests
97117
src/tests/benchmark_tests.cpp)
98118
target_include_directories(benchmark_tests
99119
PUBLIC include)
100120
target_link_libraries(benchmark_tests
101121
gtest
102-
gtest_main)
122+
gtest_main
123+
${PIXIE_DIAGNOSTICS_LIBS})
103124

104125
add_executable(test_rmm
105126
src/tests/test_rmm.cpp)
@@ -108,15 +129,17 @@ if(PIXIE_TESTS)
108129
PUBLIC include/misc)
109130
target_link_libraries(test_rmm
110131
gtest
111-
gtest_main)
132+
gtest_main
133+
${PIXIE_DIAGNOSTICS_LIBS})
112134

113135
add_executable(louds_tree_tests
114136
src/tests/louds_tree_tests.cpp)
115137
target_include_directories(louds_tree_tests
116138
PUBLIC include)
117139
target_link_libraries(louds_tree_tests
118140
gtest
119-
gtest_main)
141+
gtest_main
142+
${PIXIE_DIAGNOSTICS_LIBS})
120143
endif()
121144

122145
# ---------------------------------------------------------------------------
@@ -130,15 +153,17 @@ if(PIXIE_BENCHMARKS)
130153
target_link_libraries(benchmarks
131154
benchmark
132155
benchmark_main
133-
pasta_bit_vector)
156+
pasta_bit_vector
157+
${PIXIE_DIAGNOSTICS_LIBS})
134158
target_compile_definitions(benchmarks PRIVATE PIXIE_THIRD_PARTY_BENCHMARKS)
135159

136160
add_executable(bench_rmm
137161
src/benchmarks/bench_rmm.cpp)
138162
target_include_directories(bench_rmm
139163
PUBLIC include)
140164
target_link_libraries(bench_rmm
141-
benchmark)
165+
benchmark
166+
${PIXIE_DIAGNOSTICS_LIBS})
142167

143168
add_executable(bench_rmm_sdsl
144169
src/benchmarks/bench_rmm_sdsl.cpp)
@@ -147,23 +172,26 @@ if(PIXIE_BENCHMARKS)
147172
PRIVATE ${sdsl_lite_SOURCE_DIR}/include)
148173
target_link_libraries(bench_rmm_sdsl
149174
PRIVATE
150-
benchmark)
175+
benchmark
176+
${PIXIE_DIAGNOSTICS_LIBS})
151177

152178
add_executable(louds_tree_benchmarks
153179
src/benchmarks/louds_tree_benchmarks.cpp)
154180
target_include_directories(louds_tree_benchmarks
155181
PUBLIC include)
156182
target_link_libraries(louds_tree_benchmarks
157183
benchmark
158-
benchmark_main)
184+
benchmark_main
185+
${PIXIE_DIAGNOSTICS_LIBS})
159186

160187
add_executable(alignment_comparison
161188
src/benchmarks/alignment_comparison.cpp)
162189
target_include_directories(alignment_comparison
163190
PUBLIC include)
164191
target_link_libraries(alignment_comparison
165192
benchmark
166-
benchmark_main)
193+
benchmark_main
194+
${PIXIE_DIAGNOSTICS_LIBS})
167195
endif()
168196

169197
# ---------------------------------------------------------------------------

CMakePresets.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
"cacheVariables": {
3939
"BENCHMARK_ENABLE_LIBPFM": "ON",
4040
"CMAKE_BUILD_TYPE": "RelWithDebInfo",
41+
"PIXIE_DIAGNOSTICS": "ON",
4142
"PIXIE_BENCHMARKS": "ON"
4243
}
4344
},

README.md

Lines changed: 46 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,31 @@
1-
# pixie
1+
# Pixie
2+
3+
<img src="src/docs/images/logo.png" alt="Pixie logo" width="256" align="left" style="float: left; margin-right: 16px; margin-bottom: 8px;" />
24

35
`pixie` is a **succinct data structures library**.
6+
7+
<br clear="left" />
8+
49
---
510

611
## Features
712

813
* **BitVector**
9-
* Data structure with 3.61% overhead supporting rank and select for 1 bits. Select support for 0 bits require additional 0.39%, currently not implemented
14+
* Data structure with 3.61% overhead supporting rank and select for 1 bits.
1015
* Supports:
1116
* `rank(i)`: number of set bits (`1`s) up to position `i`.
1217
* `select(k)`: position of the `k`-th set bit.
13-
* Implementation mainly follows [1] with SIMD optimizations similar to [2]
14-
* AVX-512 support is mandatory for now and thus will not compile without it.
18+
* Similar operations `rank0/select0` for `0`.
19+
* Implementation mainly follows [1] with SIMD optimizations similar to [2]
20+
* Optimized via AVX-512/AVX-2, for large binary sequences performance is I/O bounded.
21+
* **RmMTree**
22+
* Implementation of a range min-max tree, it supports `rank`, `select` and `excess`-related operations allowing for a fast navigation in DFUDS/BP trees.
1523
---
1624

1725
## Requirements
1826

1927
* C++20
20-
* Compiler with AVX-512 support recommended for best performance.
21-
* [CMake](https://cmake.org/) ≥ 3.15.
28+
* [CMake](https://cmake.org/) ≥ 3.18.
2229

2330
---
2431

@@ -27,57 +34,78 @@
2734
```bash
2835
git clone https://github.com/Malkovsky/pixie.git
2936
cd pixie
30-
mkdir build && cd build
31-
cmake ..
32-
make -j
37+
cmake --preset release
38+
cmake --build --preset release
39+
```
40+
41+
Manual alternative:
42+
43+
```bash
44+
mkdir -p build/release
45+
cmake -B build/release -DCMAKE_BUILD_TYPE=Release
46+
cmake --build build/release -j
3347
```
3448

35-
This will build the library along with benchmarks and tests.
49+
Tests are enabled by default (`PIXIE_TESTS=ON`). Benchmarks are opt-in; enable with `-DPIXIE_BENCHMARKS=ON` or configure with the `benchmarks-all` preset, you can use `benchmark-diagnostic` preset for performance diagnostics (Release with debug info + performance counters support).
3650

3751
---
3852

3953
## Running Tests
4054

41-
After building:
55+
After building with presets, binaries are located in `build/release`.
4256

4357
### BitVector
4458

4559
```bash
46-
./unittests
60+
./build/release/unittests
4761
```
4862

4963
### RmM Tree
5064

5165
```bash
52-
./test_rmm
66+
./build/release/test_rmm
5367
```
5468

5569
---
5670

5771
## Running Benchmarks
5872

73+
Before running benchmarks, configure with presets:
74+
75+
```bash
76+
cmake --preset benchmarks-all
77+
cmake --build --preset release
78+
```
79+
80+
For a RelWithDebInfo diagnostic build, use:
81+
82+
```bash
83+
cmake --preset benchmarks-diagnostic
84+
cmake --build --preset release
85+
```
86+
5987
### BitVector
6088

6189
Benchmarks are random 50/50 0-1 bitvectors up to $2^{34}$ bits.
6290

6391
```bash
64-
./benchmarks
92+
./build/release/benchmarks
6593
```
6694

6795
### RmM Tree
6896

6997
```bash
70-
./bench_rmm
98+
./build/release/bench_rmm
7199
```
72100

73-
For comparison with range min-max tree implementation from [sdsl-lite](https://github.com/simongog/sdsl-lite) (Release build required: `cmake .. -DCMAKE_BUILD_TYPE=Release`):
101+
For comparison with range min-max tree implementation from [sdsl-lite](https://github.com/simongog/sdsl-lite) (Release build required; use the release preset or `-DCMAKE_BUILD_TYPE=Release`):
74102

75103
```bash
76104
sudo cpupower frequency-set --governor performance
77-
./bench_rmm_sdsl --benchmark_out=rmm_bench_sdsl.json
105+
./build/release/bench_rmm_sdsl --benchmark_out=rmm_bench_sdsl.json
78106
```
79107

80-
For visualization, write the JSON output to a file using `--benchmark_out=<file>` (e.g. `./bench_rmm --benchmark_out=rmm_bench.json`) and plot it with `misc/plot_rmm.py` (add `--sdsl-json rmm_bench_sdsl.json` for comparison).
108+
For visualization, write the JSON output to a file using `--benchmark_out=<file>` (e.g. `./build/release/bench_rmm --benchmark_out=rmm_bench.json`) and plot it with `scripts/plot_rmm.py` (add `--sdsl-json rmm_bench_sdsl.json` for comparison).
81109

82110
---
83111

include/pixie/bitvector.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#include <string>
1111
#include <vector>
1212

13+
#ifdef PIXIE_DIAGNOSTICS
14+
#include <spdlog/spdlog.h>
15+
#endif
16+
1317
namespace pixie {
1418

1519
/**
@@ -381,6 +385,55 @@ class BitVector {
381385
}
382386

383387
public:
388+
#ifdef PIXIE_DIAGNOSTICS
389+
struct DiagnosticsBytes {
390+
size_t source_bitvector_bytes = 0;
391+
size_t super_block_rank_bytes = 0;
392+
size_t basic_block_rank_bytes = 0;
393+
size_t select1_samples_bytes = 0;
394+
size_t select0_samples_bytes = 0;
395+
size_t total_bytes = 0;
396+
};
397+
398+
/**
399+
* @brief Returns the number of bytes used by each internal component.
400+
*/
401+
DiagnosticsBytes diagnostics_bytes() const {
402+
DiagnosticsBytes result;
403+
result.source_bitvector_bytes = (num_bits_ + 7) / 8;
404+
result.super_block_rank_bytes = super_block_rank_.AsConstBytes().size();
405+
result.basic_block_rank_bytes = basic_block_rank_.AsConstBytes().size();
406+
result.select1_samples_bytes = select1_samples_.AsConstBytes().size();
407+
result.select0_samples_bytes = select0_samples_.AsConstBytes().size();
408+
result.total_bytes = result.super_block_rank_bytes +
409+
result.basic_block_rank_bytes +
410+
result.select1_samples_bytes +
411+
result.select0_samples_bytes;
412+
return result;
413+
}
414+
415+
/**
416+
* @brief Log memory usage of internal components.
417+
*/
418+
void memory_report() const {
419+
const auto diagnostics = diagnostics_bytes();
420+
const double source_bytes =
421+
static_cast<double>(diagnostics.source_bitvector_bytes);
422+
const auto log_bytes = [&](std::string_view label, size_t bytes) {
423+
const double percentage =
424+
source_bytes > 0.0 ? 100.0 * static_cast<double>(bytes) / source_bytes
425+
: 0.0;
426+
spdlog::info("BitVector {}: {} bytes ({:.2f}% of source)", label, bytes,
427+
percentage);
428+
};
429+
log_bytes("source_bitvector", diagnostics.source_bitvector_bytes);
430+
log_bytes("super_block_rank", diagnostics.super_block_rank_bytes);
431+
log_bytes("basic_block_rank", diagnostics.basic_block_rank_bytes);
432+
log_bytes("select1_samples", diagnostics.select1_samples_bytes);
433+
log_bytes("select0_samples", diagnostics.select0_samples_bytes);
434+
log_bytes("total", diagnostics.total_bytes);
435+
}
436+
#endif
384437
/**
385438
* @brief Construct from an external array of 64-bit words.
386439
* @param

src/benchmarks/benchmarks.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ static void BM_RankNonInterleaved(benchmark::State& state) {
8989
auto bits_as_words = bits.As64BitInts();
9090
PrepareRandomBits50pFill(bits_as_words);
9191
pixie::BitVector bv(bits_as_words, n);
92+
#ifdef PIXIE_DIAGNOSTICS
93+
bv.memory_report();
94+
#endif
9295

9396
std::mt19937_64 rng(42);
9497
for (auto _ : state) {

src/docs/Doxyfile.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ PROJECT_BRIEF =
6161
# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
6262
# the logo to the output directory.
6363

64-
PROJECT_LOGO =
64+
PROJECT_LOGO = images/logo.png
6565

6666
# With the PROJECT_ICON tag one can specify an icon that is included in the tabs
6767
# when the HTML document is shown. Doxygen will copy the logo to the output

src/docs/images/logo.png

1020 KB
Loading

0 commit comments

Comments
 (0)