diff --git a/Cargo.lock b/Cargo.lock index 16ea17404..bd81677d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2871,6 +2871,18 @@ dependencies = [ "unicode-width 0.1.14", ] +[[package]] +name = "miden-large-forest-backend-rocksdb" +version = "0.14.0-alpha.5" +dependencies = [ + "miden-crypto", + "miden-node-rocksdb-cxx-linkage-fix", + "miden-protocol", + "rayon", + "rocksdb", + "winter-utils", +] + [[package]] name = "miden-large-smt-backend-rocksdb" version = "0.14.0-alpha.5" @@ -3164,6 +3176,7 @@ dependencies = [ "miden-agglayer", "miden-block-prover", "miden-crypto", + "miden-large-forest-backend-rocksdb", "miden-large-smt-backend-rocksdb", "miden-node-db", "miden-node-proto", diff --git a/Cargo.toml b/Cargo.toml index 2156ddf8f..7d34fe3dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "crates/block-producer", "crates/db", "crates/grpc-error-macro", + "crates/large-forest-backend-rocksdb", "crates/large-smt-backend-rocksdb", "crates/ntx-builder", "crates/proto", @@ -46,7 +47,8 @@ debug = true [workspace.dependencies] # Workspace crates. -miden-large-smt-backend-rocksdb = { path = "crates/large-smt-backend-rocksdb", version = "=0.14.0-alpha.5" } +miden-large-smt-backend-rocksdb= { path = "crates/large-smt-backend-rocksdb", version = "=0.14.0-alpha.5" } +miden-large-forest-backend-rocksdb= { path = "crates/large-forest-backend-rocksdb", version = "=0.14.0-alpha.5" } miden-node-block-producer = { path = "crates/block-producer", version = "=0.14.0-alpha.5" } miden-node-db = { path = "crates/db", version = "=0.14.0-alpha.5" } miden-node-grpc-error-macro = { path = "crates/grpc-error-macro", version = "=0.14.0-alpha.5" } diff --git a/bin/node/src/main.rs b/bin/node/src/main.rs index 2a7ce7335..72582382b 100644 --- a/bin/node/src/main.rs +++ b/bin/node/src/main.rs @@ -6,6 +6,8 @@ use clap::{Parser, Subcommand}; use miden_node_utils::logging::OpenTelemetry; mod commands; +#[cfg(test)] +mod tests; // COMMANDS // ================================================================================================ diff --git a/bin/node/src/tests.rs b/bin/node/src/tests.rs new file mode 100644 index 000000000..556a2be7f --- /dev/null +++ b/bin/node/src/tests.rs @@ -0,0 +1,46 @@ +use clap::Parser; + +use crate::Cli; + +fn parse(args: &[&str]) -> Result { + Cli::try_parse_from(std::iter::once("miden-node").chain(args.iter().copied())) +} + +#[test] +fn store_bootstrap_parses() { + let _ = parse(&["store", "bootstrap"]); +} + +#[test] +fn block_producer_start_parses() { + let _ = parse(&[ + "block-producer", + "start", + ]); +} + +#[test] +fn validator_bootstrap_parses() { + let _ = parse(&[ + "validator", + "bootstrap", + ]); +} + +#[test] +fn validator_start_parses() { + let _ = parse(&["validator", "start"]); +} + +#[test] +fn bundled_bootstrap_parses() { + let _ = parse(&[ + "bundled", + "bootstrap", + ]); +} + +#[test] +fn bundled_start_parses() { + let _ = parse(&["bundled", "start"]); +} \ No newline at end of file diff --git a/crates/large-forest-backend-rocksdb/Cargo.toml b/crates/large-forest-backend-rocksdb/Cargo.toml new file mode 100644 index 000000000..b9ba73c5f --- /dev/null +++ b/crates/large-forest-backend-rocksdb/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "miden-large-forest-backend-rocksdb" +description = "Large-scale Sparse Merkle Tree Forest backed by pluggable storage - RocksDB backend" +authors.workspace = true +edition.workspace = true +exclude.workspace = true +homepage.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[lints] +workspace = true + +[dependencies] +miden-crypto = { features = ["concurrent", "std"], workspace = true } +miden-protocol = { features = ["std"], workspace = true } +rayon = { version = "1.10" } +rocksdb = { default-features = false, features = ["bindgen-runtime", "lz4"], version = "0.24" } +winter-utils = { version = "0.13" } +miden-serde-utils = { workspace = true } + +[build-dependencies] +miden-node-rocksdb-cxx-linkage-fix = { workspace = true } diff --git a/crates/large-forest-backend-rocksdb/build.rs b/crates/large-forest-backend-rocksdb/build.rs new file mode 100644 index 000000000..ed4038d06 --- /dev/null +++ b/crates/large-forest-backend-rocksdb/build.rs @@ -0,0 +1,3 @@ +fn main() { + miden_node_rocksdb_cxx_linkage_fix::configure(); +} diff --git a/crates/large-forest-backend-rocksdb/src/config.rs b/crates/large-forest-backend-rocksdb/src/config.rs new file mode 100644 index 000000000..dc8b32563 --- /dev/null +++ b/crates/large-forest-backend-rocksdb/src/config.rs @@ -0,0 +1,271 @@ +//! Contains the configuration for the persistent backend. + +use std::{ffi::c_double, fs, path::PathBuf}; + +use super::Result; +use crate::merkle::smt::BackendError; + +// CONSTANTS +// ================================================================================================ + +/// The default size for the database cache in bytes (2 GiB). +const DEFAULT_CACHE_SIZE_BYTES: usize = 2 << 30; + +/// The default maximum number of files that the database engine can have open at one time. +const DEFAULT_MAX_OPEN_FILES: usize = 1 << 9; + +/// The default maximum size of the write-ahead log in bytes (1 GiB). +const DEFAULT_MAX_TOTAL_WAL_SIZE_BYTES: u64 = 1 << 30; + +/// The default number of bits in the RocksDB bloom filter to use per key. +const DEFAULT_BLOOM_FILTER_BITS_PER_KEY: c_double = 10.0; + +/// The default target file size for the files that make up the database (512 MiB). +const DEFAULT_TARGET_FILE_SIZE: u64 = 512 << 20; + +// CONFIG TYPE +// ================================================================================================ + +/// The basic configuration for the persistent backend. +#[derive(Clone, Debug, PartialEq)] +pub struct Config { + /// The path at which the database can be found. + /// + /// This should be a directory path that the application has read/write permissions for. The + /// database will create multiple files in this directory as part of its operation. + pub(super) path: PathBuf, + + /// The maximum size of the backend's block cache in bytes. + /// + /// This cache stores blocks that the database accesses frequently in memory to improve read + /// performance. Larger cache sizes improve read performance but consume more memory. + /// + /// Defaults to [`DEFAULT_CACHE_SIZE_BYTES`]. + pub(super) cache_size_bytes: usize, + + /// The maximum number of file handles that the database engine can keep open at one time. + /// + /// This setting affects both memory usage and the number of FDs used by the process. Higher + /// values can improve performance for large databases, but can increase resource usage. + /// + /// Defaults to [`DEFAULT_MAX_OPEN_FILES`]. + pub(super) max_open_files: usize, + + /// The maximum size of the write-ahead log in bytes. + /// + /// This setting affects the amount of data that can be buffered in the WAL before it has to be + /// flushed to disk, and hence the maximum amount of memory it can use. + /// + /// Defaults to [`DEFAULT_MAX_TOTAL_WAL_SIZE_BYTES`]. + pub(super) max_wal_size: u64, + + /// The number of bits in the RocksDB bloom filter to use per key. + /// + /// This setting balances between the amount of work to look up a key and the size of the bloom + /// filter. If the bloom filter gets too large, it gets too complex to query and hence slows + /// things down. If the bits per key are insufficiently distinguishing, then the performance + /// improvement from the bloom filter can become negligible. + /// + /// Defaults to [`DEFAULT_BLOOM_FILTER_BITS_PER_KEY`]. + pub(super) bloom_filter_bits: c_double, + + /// The target size for the files that make up the database. + /// + /// This directly determines how large any individual file can get on disk as part of the + /// Database, and hence has an impact on file handle churn as the database performs operations. + /// + /// Defaults to [`DEFAULT_TARGET_FILE_SIZE`]. + pub(super) target_file_size: u64, +} + +impl Config { + /// Constructs a new configuration object with the provided database `path` and default + /// settings. + /// + /// The provided `path` must be in a location writable by the backend, and will be created by + /// the database if it does not exist. + /// + /// The defaults are as follows: + /// + /// - `cache_size_bytes`: 2 GiB + /// - `max_open_files`: 512 + /// - `max_wal_size`: 1 GiB + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the provided `path` is not accessible to the backend, or is + /// not a directory. + pub fn new(path: impl Into) -> Result { + let path = path.into(); + + if fs::exists(&path)? { + // The provided path must be a directory or a symlink to one, and it must be + // RW-accessible by us if it does exist. + let path_data = fs::metadata(&path)?; + if !path_data.is_dir() { + return Err(BackendError::internal_from_message(format!( + "The path {} exists and is not a folder", + path.to_string_lossy() + ))); + } + if path_data.permissions().readonly() { + return Err(BackendError::internal_from_message(format!( + "The path {} is not writable", + path.to_string_lossy() + ))); + } + } + + Ok(Self { + path, + cache_size_bytes: DEFAULT_CACHE_SIZE_BYTES, + max_open_files: DEFAULT_MAX_OPEN_FILES, + max_wal_size: DEFAULT_MAX_TOTAL_WAL_SIZE_BYTES, + bloom_filter_bits: DEFAULT_BLOOM_FILTER_BITS_PER_KEY, + target_file_size: DEFAULT_TARGET_FILE_SIZE, + }) + } +} + +// BUILDER FUNCTIONS +// ================================================================================================ + +/// This block contains the functions for building an appropriate configuration for the backend. +impl Config { + /// Sets the cache size in bytes for the database cache. + /// + /// The block cache stores frequently-accessed data block in memory to improve read performance. + /// Larger cache sizes generally improve read performance but consume more memory. + /// + /// Defaults to `2 * 1024 * 1024 * 1024` bytes, or 2 GiB. + pub fn with_cache_size_bytes(mut self, cache_size_bytes: usize) -> Self { + self.cache_size_bytes = cache_size_bytes; + self + } + + /// Sets the maximum number of files that the backend can have open simultaneously. + /// + /// This affects both memory usage of the backend and the number of file descriptors used by the + /// process. Higher values improve performances for large databases, but increase resource + /// usage. + /// + /// Defaults to 512 files. + pub fn with_max_open_files(mut self, max_open_files: usize) -> Self { + self.max_open_files = max_open_files; + self + } + + /// Sets the maximum size of the write-ahead log in the backend. + /// + /// This setting affects the amount of data that can be buffered in the WAL before it has to be + /// flushed to disk, and hence the maximum amount of memory it can use. + /// + /// Defaults to 1 GiB. + pub fn with_max_wal_size(mut self, max_wal_size: u64) -> Self { + self.max_wal_size = max_wal_size; + self + } + + /// The number of bits in the RocksDB bloom filter to use per key. + /// + /// This setting balances between the amount of work to look up a key and the size of the bloom + /// filter. If the bloom filter gets too large, it gets too complex to query and hence slows + /// things down. If the bits per key are insufficiently distinguishing, then the performance + /// improvement from the bloom filter can become negligible. + /// + /// Defaults to 10.0. + pub fn with_bloom_filter_bits(mut self, bloom_filter_bits: f64) -> Self { + self.bloom_filter_bits = c_double::from(bloom_filter_bits); + self + } + + /// Sets the target size for the files that make up the database. + /// + /// This directly determines how large any individual file can get on disk as part of the + /// Database, and hence has an impact on file handle churn as the database performs operations. + /// + /// Defaults to 512 MiB. + pub fn with_target_file_size(mut self, target_file_size: u64) -> Self { + self.target_file_size = target_file_size; + self + } +} + +// TESTS +// ================================================================================================ + +#[cfg(test)] +mod test { + use tempfile::tempdir; + + use super::*; + + #[test] + fn new() -> Result<()> { + let tempdir = tempdir()?; + let config = Config::new(tempdir.path())?; + + assert_eq!(config.cache_size_bytes, DEFAULT_CACHE_SIZE_BYTES); + assert_eq!(config.max_open_files, DEFAULT_MAX_OPEN_FILES); + assert_eq!(config.max_wal_size, DEFAULT_MAX_TOTAL_WAL_SIZE_BYTES); + assert_eq!(config.bloom_filter_bits, DEFAULT_BLOOM_FILTER_BITS_PER_KEY); + assert_eq!(config.target_file_size, DEFAULT_TARGET_FILE_SIZE); + + Ok(()) + } + + #[test] + fn with_cache_size_bytes() -> Result<()> { + let tempdir = tempdir()?; + let config = Config::new(tempdir.path())?; + let config = config.with_cache_size_bytes(1024); + + assert_eq!(config.cache_size_bytes, 1024); + + Ok(()) + } + + #[test] + fn with_max_open_files() -> Result<()> { + let tempdir = tempdir()?; + let config = Config::new(tempdir.path())?; + let config = config.with_max_open_files(63); + + assert_eq!(config.max_open_files, 63); + + Ok(()) + } + + #[test] + fn with_max_wal_size() -> Result<()> { + let tempdir = tempdir()?; + let config = Config::new(tempdir.path())?; + let config = config.with_max_wal_size(2 << 30); + + assert_eq!(config.max_wal_size, 2 << 30); + + Ok(()) + } + + #[test] + fn with_bloom_filter_bits() -> Result<()> { + let tempdir = tempdir()?; + let config = Config::new(tempdir.path())?; + let config = config.with_bloom_filter_bits(21.0); + + assert_eq!(config.bloom_filter_bits, 21.0); + + Ok(()) + } + + #[test] + fn with_target_file_size() -> Result<()> { + let tempdir = tempdir()?; + let config = Config::new(tempdir.path())?; + let config = config.with_target_file_size(256 << 20); + + assert_eq!(config.target_file_size, 256 << 20); + + Ok(()) + } +} diff --git a/crates/large-forest-backend-rocksdb/src/helpers.rs b/crates/large-forest-backend-rocksdb/src/helpers.rs new file mode 100644 index 000000000..40588c393 --- /dev/null +++ b/crates/large-forest-backend-rocksdb/src/helpers.rs @@ -0,0 +1,109 @@ +use miden_crypto::merkle::smt::{MAX_LEAF_ENTRIES, SmtLeaf, SmtLeafError}; +use miden_crypto::word::LexicographicWord; +use rocksdb::Error as RocksDbError; + +use crate::{StorageError, Word}; + +pub(crate) fn map_rocksdb_err(err: RocksDbError) -> StorageError { + StorageError::Backend(Box::new(err)) +} + +pub(crate) fn get_leaf_value(leaf: &SmtLeaf, key: Word) -> Result, StorageError> { + match leaf { + SmtLeaf::Empty(_) => Ok(None), + SmtLeaf::Single((k, v)) => { + if *k == key { + Ok(Some(*v)) + } else { + Ok(None) + } + }, + SmtLeaf::Multiple(pairs) => match pairs.binary_search_by(|(k, _v)| { + LexicographicWord::from(*k).cmp(&LexicographicWord::from(key)) + }) { + Ok(pos) => Ok(Some(pairs[pos].1)), + Err(pos) => { + if pairs.len() >= MAX_LEAF_ENTRIES { + return Err(StorageError::Leaf(SmtLeafError::TooManyLeafEntries { + actual: pairs.len() + 1, + })); + } + Ok(None) + }, + }, + } +} + +pub(crate) fn insert_into_leaf( + leaf: &mut SmtLeaf, + key: Word, + value: Word, +) -> Result, StorageError> { + match leaf { + SmtLeaf::Empty(_) => { + *leaf = SmtLeaf::new_single(key, value); + Ok(None) + }, + SmtLeaf::Single(kv_pair) => { + if kv_pair.0 == key { + let old_value = kv_pair.1; + kv_pair.1 = value; + Ok(Some(old_value)) + } else { + let mut pairs = vec![*kv_pair, (key, value)]; + pairs.sort_by(|(key_1, _), (key_2, _)| { + LexicographicWord::from(*key_1).cmp(&LexicographicWord::from(*key_2)) + }); + *leaf = SmtLeaf::Multiple(pairs); + Ok(None) + } + }, + SmtLeaf::Multiple(kv_pairs) => match kv_pairs.binary_search_by(|kv_pair| { + LexicographicWord::from(kv_pair.0).cmp(&LexicographicWord::from(key)) + }) { + Ok(pos) => { + let old_value = kv_pairs[pos].1; + kv_pairs[pos].1 = value; + Ok(Some(old_value)) + }, + Err(pos) => { + if kv_pairs.len() >= MAX_LEAF_ENTRIES { + return Err(StorageError::Leaf(SmtLeafError::TooManyLeafEntries { + actual: kv_pairs.len() + 1, + })); + } + kv_pairs.insert(pos, (key, value)); + Ok(None) + }, + }, + } +} + +pub(crate) fn remove_from_leaf(leaf: &mut SmtLeaf, key: Word) -> (Option, bool) { + match leaf { + SmtLeaf::Empty(_) => (None, false), + SmtLeaf::Single((key_at_leaf, value_at_leaf)) => { + if *key_at_leaf == key { + let old_value = *value_at_leaf; + *leaf = SmtLeaf::new_empty(key.into()); + (Some(old_value), true) + } else { + (None, false) + } + }, + SmtLeaf::Multiple(kv_pairs) => match kv_pairs.binary_search_by(|kv_pair| { + LexicographicWord::from(kv_pair.0).cmp(&LexicographicWord::from(key)) + }) { + Ok(pos) => { + let old_value = kv_pairs[pos].1; + kv_pairs.remove(pos); + debug_assert!(!kv_pairs.is_empty()); + if kv_pairs.len() == 1 { + *leaf = SmtLeaf::Single(kv_pairs[0]); + } + (Some(old_value), false) + }, + Err(_) => (None, false), + }, + } +} diff --git a/crates/large-forest-backend-rocksdb/src/internal.rs b/crates/large-forest-backend-rocksdb/src/internal.rs new file mode 100644 index 000000000..f125ae2d8 --- /dev/null +++ b/crates/large-forest-backend-rocksdb/src/internal.rs @@ -0,0 +1,85 @@ +//! Contains internal functionality for interacting with RocksDB that is not exposed in the RocksDB +//! crate or the RocksDB C wrapper. + +use crate::merkle::smt::large_forest::backend::persistent::WriteBatch; + +/// Merges the provided [`WriteBatch`]es into a single batch using efficient raw-memory operations. +/// +/// This is intended to be equivalent to `WriteBatchInternal::Append` in the C++ codebase, but that +/// function is not exposed in the C API or the Rust wrapper for RocksDB. +/// +/// **This function must be revisited every time the RocksDB crate is updated to ensure that it +/// operates correctly.** +/// +/// # `WriteBatch` Layout +/// +/// The basic format of a `WriteBatch` is as follows: +/// +/// ```text +/// [sequence: u64le][count: u32le][record0][record1]... +/// ``` +/// +/// Each record contains: +/// +/// - A **type tag** (e.g. `kTypeColumnFamilyValue` or `kTypeColumnFamilyDeletion`) which determines +/// how to read the rest of the underlying data. +/// - A **column family ID** `varint32_cf_id` which identifies the column family into which the +/// record will be written. +/// - A **key** `&[u8]` that is the key in the column. +/// - A **value** `&[u8]` that is the value for the given key in the column. +/// +/// # Alternatives +/// +/// An alternative to this would be to use a transaction-based RocksDB instance instead, and perform +/// the merging using `Transaction::rebuild_from_writebatch`. This is semantically equivalent, but +/// requires involving the whole transaction machinery. It ends up being significantly more +/// heavyweight than this solution, almost doubling the amount of time the batch forest update takes +/// to run. +/// +/// # Panics +/// +/// - If the data for either the left or right batch is corrupt, as this indicates a bug in the +/// underlying RocksDB implementation and should not be continued with. +pub fn merge_batches(left: WriteBatch, right: &WriteBatch) -> WriteBatch { + const SEQUENCE_SIZE: usize = size_of::(); + const COUNT_SIZE: usize = size_of::(); + const HEADER_SIZE: usize = SEQUENCE_SIZE + COUNT_SIZE; + + let left_data = left.data(); + let right_data = right.data(); + + // We initially check that we have enough data to make this at all sane, returning a corruption + // error if not. + if right_data.len() < HEADER_SIZE { + panic!("Right write batch contained insufficient data of {} bytes", right_data.len()); + } + if left_data.len() < HEADER_SIZE { + panic!("Left write batch contained insufficient data of {} bytes", left_data.len()); + } + + // We then parse out the counts, returning an error if they are not valid u32 values. + let left_count = u32::from_le_bytes( + left_data[8..12] + .try_into() + .unwrap_or_else(|e| panic!("Left's count was not a valid u32: {e}")), + ); + let right_count = u32::from_le_bytes( + right_data[8..12] + .try_into() + .unwrap_or_else(|e| panic!("Right's count was not a valid u32: {e}")), + ); + + // If that is good, we take `left` as our starting point, update the counts, and then append the + // records from right. + let mut new_data = left_data.to_vec(); + new_data[SEQUENCE_SIZE..HEADER_SIZE].copy_from_slice( + left_count + .checked_add(right_count) + .expect("Overflow cannot occur") + .to_le_bytes() + .as_ref(), + ); + new_data.extend_from_slice(&right_data[HEADER_SIZE..]); + + WriteBatch::from_data(&new_data) +} diff --git a/crates/large-forest-backend-rocksdb/src/iterator.rs b/crates/large-forest-backend-rocksdb/src/iterator.rs new file mode 100644 index 000000000..64b2fe423 --- /dev/null +++ b/crates/large-forest-backend-rocksdb/src/iterator.rs @@ -0,0 +1,120 @@ +//! This module contains the iterator needed by the persistent backend of the forest. + +use alloc::boxed::Box; + +use miden_serde_utils::Deserializable; +use rocksdb as db; + +use crate::{ + Word, + merkle::smt::{ + LineageId, SmtLeaf, TreeEntry, large_forest::backend::persistent::keys::LeafKey, + }, +}; + +// TYPE ALIASES +// ================================================================================================ + +/// The type of the underlying iterator over the database. +/// +/// The type of items that it yields is not statically known, as it just provides bytes for +/// key-value pairs. Decoding these into the correct types is up to the client of the type. +pub type DBIterator<'db> = db::DBIteratorWithThreadMode<'db, super::DB>; + +// ENTRIES ITERATOR +// ================================================================================================ + +/// An iterator over the entries for a given tree in the backend. +pub struct PersistentBackendEntriesIterator<'db> { + /// The lineage whose leaves are being iterated over. + pub lineage: LineageId, + + /// The iterator over all leaves in the database. + iterator: DBIterator<'db>, + + /// State-machine tracking. + state: PersistentBackendEntriesIteratorState, +} +impl<'db> PersistentBackendEntriesIterator<'db> { + /// Constructs a new such iterator in the starting state. + /// + /// The provided `iterator` must yield items where the key decodes to a `LeafKey` and the value + /// decodes to an `SmtLeaf`. If this is not the case, iteration will panic. + /// + /// For performance, this iterator should be passed a prefix iterator over the database with the + /// correct prefix (corresponding to the provided `lineage`) set, but it will still function + /// properly if this is not the case. + pub fn new(lineage: LineageId, iterator: DBIterator<'db>) -> Self { + let state = PersistentBackendEntriesIteratorState::NotInLeaf; + Self { lineage, iterator, state } + } +} + +/// The internal state machine for the iterator. +enum PersistentBackendEntriesIteratorState { + /// The iterator is not currently iterating over a particular leaf. + NotInLeaf, + + /// The iterator is iterating over a particular leaf, with `leaf_entries` remaining to be + /// yielded in that leaf. + InLeaf { + /// The iterator over the current leaf's entries. + leaf_entries: Box>, + }, +} + +impl<'db> Iterator for PersistentBackendEntriesIterator<'db> { + type Item = TreeEntry; + + /// Advances the iterator and returns the next item if present. + /// + /// # Panics + /// + /// - If unable to read from the underlying database. + fn next(&mut self) -> Option { + loop { + match &mut self.state { + PersistentBackendEntriesIteratorState::NotInLeaf => { + // Here we are not in a leaf of the targeted tree, so we have to see if we _can_ + // be. + if let Some(entry) = self.iterator.next() { + let (key_bytes, value_bytes) = entry.expect("Able to read from the DB"); + let key = LeafKey::read_from_bytes(&key_bytes) + .expect("Leaf key data read from disk is not corrupt"); + + // If the key isn't for the correct lineage (which can happen even with the + // bloom filter), we need to advance by returning to the loop. + if key.lineage != self.lineage { + continue; + } + + // If the key is valid, we need to read out the leaf itself and then start + // iterating over that. + let leaf = SmtLeaf::read_from_bytes(&value_bytes) + .expect("Leaf data read from disk is not corrupt"); + let mut leaf_entries = leaf.into_entries(); + leaf_entries.sort_by_key(|(k, _)| *k); + + // We change state to being in the leaf, and then recurse to return a value. + self.state = PersistentBackendEntriesIteratorState::InLeaf { + leaf_entries: Box::new(leaf_entries.into_iter()), + }; + } else { + return None; + } + }, + PersistentBackendEntriesIteratorState::InLeaf { leaf_entries } => { + if let Some((key, value)) = leaf_entries.next() { + // Here we have an entry in the leaf, so we simply need to return it. + return Some(TreeEntry { key, value }); + } else { + // If we've run out of entries in the leaf itself, we need to see if there + // is another valid leaf. We do this by changing state and looping to use + // the existing logic. + self.state = PersistentBackendEntriesIteratorState::NotInLeaf; + } + }, + } + } + } +} diff --git a/crates/large-forest-backend-rocksdb/src/keys.rs b/crates/large-forest-backend-rocksdb/src/keys.rs new file mode 100644 index 000000000..64aef7807 --- /dev/null +++ b/crates/large-forest-backend-rocksdb/src/keys.rs @@ -0,0 +1,75 @@ +//! This module contains the definition of the various key types necessary for querying the backing +//! persistent DB. + +use miden_serde_utils::{ + ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, +}; +use serde::{Deserialize, Serialize}; + +use crate::merkle::{NodeIndex, smt::LineageId}; + +// LEAF KEY +// ================================================================================================ + +/// A key that uniquely identifies a leaf in the database. +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct LeafKey { + /// The lineage (and hence tree) to which the leaf belongs. + pub lineage: LineageId, + + /// The logical index of the leaf within its parent tree. + pub index: u64, +} + +impl Serializable for LeafKey { + fn write_into(&self, target: &mut W) { + target.write(self.lineage); + target.write(self.index); + } + + fn get_size_hint(&self) -> usize { + size_of::() + size_of::() + } +} + +impl Deserializable for LeafKey { + fn read_from(source: &mut R) -> Result { + let lineage = LineageId::read_from(source)?; + let index = source.read_u64()?; + + Ok(Self { lineage, index }) + } +} + +// SUBTREE KEY +// ================================================================================================ + +/// A key that uniquely identifies a subtree in the database. +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct SubtreeKey { + /// The lineage (and hence tree) to which the subtree belongs. + pub lineage: LineageId, + + /// The node index of the root of the subtree. + pub index: NodeIndex, +} + +impl Serializable for SubtreeKey { + fn write_into(&self, target: &mut W) { + target.write(self.lineage); + target.write(self.index); + } + + fn get_size_hint(&self) -> usize { + size_of::() + size_of::() + } +} + +impl Deserializable for SubtreeKey { + fn read_from(source: &mut R) -> Result { + let lineage = LineageId::read_from(source)?; + let index = NodeIndex::read_from(source)?; + + Ok(Self { lineage, index }) + } +} diff --git a/crates/large-forest-backend-rocksdb/src/lib.rs b/crates/large-forest-backend-rocksdb/src/lib.rs new file mode 100644 index 000000000..e252f6e7b --- /dev/null +++ b/crates/large-forest-backend-rocksdb/src/lib.rs @@ -0,0 +1,1482 @@ +//! A persistent backend for the SMT forest built with inspiration from LargeSMT's existing +//! persistent backend. +//! +//! # Performance Considerations +//! +//! Most operations in this backend need to perform disk I/O to the backing database. The +//! implementation does its best to mask this latency through parallelism, but some methods (e.g. +//! [`PersistentBackend::open`]) don't have work that can be parallelized in this way. +//! +//! To take advantage of data locality on disk, batches built using keys that share high-order bits +//! (e.g. from within the same lineage) are going to exhibit better read and write performance. Such +//! locality will also result in reductions to peak memory residency. +//! +//! ## Memory Residency +//! +//! As this backend does not store much data permanently in memory, the memory usage behavior is +//! quite spiky. Peak memory usage will be seen during a query or update operation, but all the +//! memory of that peak is released back to the system by the time the query has completed. +//! +//! Peak memory usage is proportional to: +//! +//! - The number of mutated leaves in a batch. +//! - The number of unique subtrees across which these leaves fall. +//! - The number of distinct subtrees altered by these mutations. +//! +//! This means that extremely large batches, or batches that are scattered across a significant +//! number of subtrees, will see higher peak residency than batches with good locality +//! characteristics. + +pub mod config; +mod helpers; +mod internal; +mod iterator; +mod keys; +mod property_tests; +mod tests; +mod tree_metadata; + +// FIXME TODO +// use alloc::string::ToString; +// use alloc::sync::Arc; +// use alloc::vec::Vec; +use core::ffi::c_int; +use std::collections::HashMap; +use std::iter::once; +use std::mem; + +use miden_crypto::merkle::smt::{ + Backend, + InnerNode, + LeafIndex, + LineageId, + MutatedSubtreeLeaves, + MutationSet, + NodeMutation, + SMT_DEPTH, + SUBTREE_DEPTH, + SmtForestUpdateBatch, + SmtLeaf, + SmtLeafError, + SmtProof, + SmtUpdateBatch, + StorageError, + StorageUpdateParts, + StorageUpdates, + Subtree, + SubtreeError, + SubtreeLeaf, + SubtreeLeavesIter, + SubtreeUpdate, + TreeEntry, + TreeWithRoot, + VersionId, + fetch_sibling_pair, + process_sorted_pairs_to_leaves, +}; +use miden_serde_utils::{Deserializable, DeserializationError, Serializable}; +use num::Integer; +use rayon::prelude::*; +use rocksdb as db; + +use super::{BackendError, Result}; +use crate::config::Config; +use crate::internal::merge_batches; +use crate::iterator::PersistentBackendEntriesIterator; +use crate::keys::{LeafKey, SubtreeKey}; +use crate::merkle::{EmptySubtreeRoots, MerkleError, NodeIndex, SparseMerklePath}; +use crate::tree_metadata::TreeMetadata; +use crate::{EMPTY_WORD, Map, Word}; + +// TYPE ALIASES +// ================================================================================================ + +/// The type of the underlying RocksDB database in use by this backend. +type DB = db::DB; + +/// The type of a write batch in the database associated with a transaction. +type WriteBatch = db::WriteBatch; + +// CONSTANTS / COLUMN FAMILY NAMES +// ================================================================================================ + +const LEAVES_CF: &str = "v1/leaves"; +const METADATA_CF: &str = "v1/metadata"; + +const SUBTREE_00_CF: &str = "v1/st00"; +const SUBTREE_08_CF: &str = "v1/st08"; +const SUBTREE_16_CF: &str = "v1/st16"; +const SUBTREE_24_CF: &str = "v1/st24"; +const SUBTREE_32_CF: &str = "v1/st32"; +const SUBTREE_40_CF: &str = "v1/st40"; +const SUBTREE_48_CF: &str = "v1/st48"; +const SUBTREE_56_CF: &str = "v1/st56"; + +const SUBTREE_CFS: [&str; 8] = [ + SUBTREE_00_CF, + SUBTREE_08_CF, + SUBTREE_16_CF, + SUBTREE_24_CF, + SUBTREE_32_CF, + SUBTREE_40_CF, + SUBTREE_48_CF, + SUBTREE_56_CF, +]; + +// CONSTANTS / DATABASE CONFIGURATION +// ================================================================================================ + +/// The maximum size of the write buffer for the metadata column family (currently 8 MiB). +const MAX_METADATA_CF_WRITE_BUFFER_SIZE_BYTES: usize = 8 << 20; + +/// The maximum size of the write buffer for the leaves column family (currently 128 MiB). +const MAX_LEAVES_CF_WRITE_BUFFER_SIZE_BYTES: usize = 128 << 20; + +/// The maximum size of the write buffer for the subtree column families (currently 128 MiB). +const MAX_SUBTREE_CF_WRITE_BUFFER_SIZE_BYTES: usize = 128 << 20; + +/// The maximum number of write buffers to maintain per column family. +const MAX_WRITE_BUFFER_COUNT: c_int = 3; + +/// The minimum number of write buffers to merge when flushing. +const MIN_WRITE_BUFFERS_TO_MERGE: c_int = 1; + +/// The maximum number of write buffers to retain in memory when flushing. +const MAX_WRITE_BUFFERS_TO_RETAIN: i64 = 0; + +/// The compression mode to be used for all column families where compression is enabled. +/// +/// This is chosen as it has fast decompression performance, and also does not require the +/// introduction of any additional dependencies into this project. +const COMPRESSION_MODE: db::DBCompressionType = db::DBCompressionType::Lz4; + +/// Trigger compaction of L0 files when there are this many or more. +const L0_FILE_COMPACTION_TRIGGER: c_int = 8; + +/// The minimum number of lineages in a batch where it is worth spawning additional threads to +/// combine their batches in parallel. +const MIN_LINEAGES_IN_BATCH_TO_PARALLELIZE: usize = 5; + +// PERSISTENT BACKEND +// ================================================================================================ + +/// The persistent backend for the SMT forest, providing durable storage for the latest tree in each +/// lineage in the forest. +#[derive(Debug)] +pub struct PersistentBackend { + /// The underlying database. + /// + /// # Layout + /// + /// The data on each tree is stored across a series of RocksDB column families, along with + /// additional metadata. The layout is fixed (for the moment), and has the following column + /// families. + /// + /// - [`LEAVES_CF`]: Stores the [`SmtLeaf`] data, keyed by a [`LeafKey`] instance. + /// - [`METADATA_CF`]: Stores a [`TreeMetadata`] instance for each tree, keyed by + /// [`LineageId`]. This acts like a mirror of the in-memory `lineages` data, which exists to + /// speed up common queries. + /// - `SUBTREE_XX_CF`: Stores the [`Subtree`]s with their root at level `XX` in the backend, + /// keyed on the [`SubtreeKey`]. + db: Arc, + + /// An in-memory cache of the tree metadata enabling the more rapid servicing of certain kinds + /// of queries. + /// + /// Care must be taken that this is _always_ kept in sync with the on-disk copy in the + /// [`METADATA_CF`] column. + lineages: HashMap, +} + +// CONSTRUCTION +// ================================================================================================ + +/// This block contains functions for the construction of the persistent backend. +impl PersistentBackend { + /// Constructs an instance of the persistent backend, either opening or creating the data store + /// at the location specified in the `config`. + /// + /// # Errors + /// + /// - [`BackendError::CorruptedData`] if data corruption is encountered when loading the forest + /// from disk. + /// - [`BackendError::Internal`] if the backend cannot be started up properly. + pub fn load(config: Config) -> Result { + let db = Arc::new(Self::build_db_with_options(&config)?); + let lineages = Self::read_all_metadata(db.clone())?; + + Ok(Self { db, lineages }) + } +} + +// BACKEND TRAIT +// ================================================================================================ + +impl Backend for PersistentBackend { + /// Returns an opening for the specified `key` in the SMT with the specified `lineage`. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] if the provided `lineage` is not known by the backend. + /// - [`BackendError::Internal`] if the backing database cannot be accessed for some reason. + fn open(&self, lineage: LineageId, key: Word) -> Result { + // We fail early if we don't know about the lineage in question, as querying further could + // cause very strange behavior. + if !self.lineages.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + + // We get our leaf first. + let leaf = self + .load_leaf_for(lineage, key)? + .unwrap_or_else(|| SmtLeaf::new_empty(LeafIndex::from(key))); + + // We then have to load both the corresponding leaf, and the siblings for its path out of + // storage. + let mut leaf_index: NodeIndex = LeafIndex::from(key).into(); + + // We calculate the roots of the subtrees in order to know their keys for loading. As an + // opening only ever needs to retrieve 8 subtrees we just do this sequentially. + let subtree_roots = (0..SMT_DEPTH / SUBTREE_DEPTH) + .scan(leaf_index.parent(), |cursor, _| { + let subtree_root = Subtree::find_subtree_root(*cursor); + *cursor = subtree_root.parent(); + Some(subtree_root) + }) + .collect::>(); + + // Doing this as a separate step exhibits better performance than loading these subtrees + // inline in the path creation. This appears to be due to better pipelining and + // branch-predictor behavior. + let mut subtree_cache = HashMap::::new(); + for root in subtree_roots { + let maybe_tree = self.load_subtree(SubtreeKey { lineage, index: root })?; + subtree_cache.insert(root, maybe_tree.unwrap_or_else(|| Subtree::new(root))); + } + + // Now we can read the necessary path from the cached subtree roots. + let mut path = Vec::with_capacity(SMT_DEPTH as usize); + + while leaf_index.depth() > 0 { + let is_right = leaf_index.is_position_odd(); + leaf_index = leaf_index.parent(); + + let root = Subtree::find_subtree_root(leaf_index); + let subtree = &subtree_cache[&root]; // Known to exist by construction. + let InnerNode { left, right } = + subtree.get_inner_node(leaf_index).unwrap_or_else(|| { + EmptySubtreeRoots::get_inner_node(SMT_DEPTH, leaf_index.depth()) + }); + + path.push(if is_right { left } else { right }); + } + + let merkle_path = + SparseMerklePath::from_sized_iter(path).expect("Always succeeds by construction"); + + // This is safe to do unchecked as we ensure that the path is valid by construction. + Ok(SmtProof::new_unchecked(merkle_path, leaf)) + } + + /// Returns the value associated with the provided `key` in the specified `lineage`, or [`None`] + /// if no such value exists. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] if the provided `lineage` is not known by the backend. + /// - [`BackendError::Internal`] if the backing database cannot be accessed for some reason. + fn get(&self, lineage: LineageId, key: Word) -> Result> { + // We fail early if we don't know about the lineage in question, as querying further could + // cause very strange behavior. + if !self.lineages.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + + // We cannot read individual key-value pairs out of storage, so we have to read the leaf + // that contains the key we care about. + let leaf = self.load_leaf_for(lineage, key)?; + Ok(leaf.and_then(|l| { + let val = helpers::get_leaf_value(&l, &key); + val.and_then(|e| if e.is_empty() { None } else { Some(e) }) + })) + } + + /// Returns the version of the tree with the specified `lineage`. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn version(&self, lineage: LineageId) -> Result { + let metadata = self.lineages.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(metadata.version) + } + + /// Returns an iterator over all the lineages that the backend knows about. + /// + /// # Errors + /// + /// - This implementation does not return any errors. + fn lineages(&self) -> Result> { + Ok(self.lineages.keys().copied()) + } + + /// Returns an iterator over all the trees that the backend knows about. + /// + /// The iteration order is unspecified. + /// + /// # Errors + /// + /// - This implementation does not return any errors. + fn trees(&self) -> Result> { + Ok(self + .lineages + .iter() + .map(|(l, m)| TreeWithRoot::new(*l, m.version, m.root_value))) + } + + /// Returns the number of entries in the tree with the provided `lineage`. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] if the provided `lineage` is not known by the backend. + fn entry_count(&self, lineage: LineageId) -> Result { + let metadata = self.lineages.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(metadata.entry_count.try_into().expect("Count of entries should fit into usize")) + } + + /// Returns an iterator that yields the populated (key-value) entries for the specified + /// `lineage`. + /// + /// This iterator yields entries in an arbitrary order, and never yields entries for which the + /// value is the empty word. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] if the provided `lineage` is not one known by this + /// backend. + fn entries(&self, lineage: LineageId) -> Result> { + if !self.lineages.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + + let lineage_bytes = lineage.to_bytes(); + + // In order to improve iteration performance significantly, we iterate with a prefix. As + // leaves are keyed on `LeafKey`, which begins with the bytes of the lineage, we can use the + // lineage as our prefix. That means that the iterator should only yield values whose key + // begins with the prefix with a high likelihood. + let pfx_iterator = self.db.prefix_iterator_cf(self.cf(LEAVES_CF)?, lineage_bytes); + + // Data ownership concerns mean we cannot use this iterator directly even if we could change + // its type, so we delegate to our custom entries iterator impl. + Ok(PersistentBackendEntriesIterator::new(lineage, pfx_iterator)) + } + + /// Adds the provided `lineage` to the forest with the provided `version` and sets the + /// associated tree to have the value created by applying `updates` to the empty tree, returning + /// the root of this new tree. + /// + /// # Errors + /// + /// - [`BackendError::DuplicateLineage`] if the provided `lineage` already exists in the + /// backend. + /// - [`BackendError::Internal`] if the database cannot be accessed at any point. + /// - [`BackendError::Merkle`] if an error occurs with the merkle tree semantics. + fn add_lineage( + &mut self, + lineage: LineageId, + version: VersionId, + updates: SmtUpdateBatch, + ) -> Result { + // We start by checking if the lineage already exists, as we are expected by contract to + // error if it is a duplicate. + if self.lineages.contains_key(&lineage) { + return Err(BackendError::DuplicateLineage(lineage)); + } + + // We now build our new tree metadata, which begins as a fully-empty tree with the default + // root, no leaves, and no entries. + let new_lineage_meta = TreeMetadata { + version, + root_value: *EmptySubtreeRoots::entry(SMT_DEPTH, 0), + entry_count: 0, + }; + + // We add and update the tree all in one go in the backend, using a single batch to ensure + // internal consistency. + let batch = WriteBatch::default(); + + // We perform the update. If this fails due to an error, the batch will get dropped at the + // end of the scope, and any staged mutations will be forgotten without being applied. + let (batch, reversion_set, tree_metadata) = self.update_tree_in_write_batch( + batch, + lineage, + new_lineage_meta, + version, + updates.into(), + )?; + + // Upon returning successfully, we need to update the metadata on disk as part of this + // operation. + let batch = self.write_metadata(batch, lineage, &tree_metadata)?; + + // Only when the batch has been successfully written to disk do we write to the in-memory + // metadata, ensuring that the state remains consistent. + let new_root = tree_metadata.root_value; + self.finalize_update(batch, once((lineage, tree_metadata, reversion_set)))?; + + // Finally we just return the necessary metadata. + Ok(TreeWithRoot::new(lineage, version, new_root)) + } + + /// Performs the provided `updates` on the tree with the specified `lineage`, returning the + /// mutation set that will revert the changes made to the tree. + /// + /// At most one new root is added to the backend for the entire batch. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the database cannot be accessed at any point. + /// - [`BackendError::Merkle`] if an error occurs with the merkle tree semantics. + /// - [`BackendError::UnknownLineage`] if the provided `lineage` is not known by this backend. + fn update_tree( + &mut self, + lineage: LineageId, + new_version: VersionId, + updates: SmtUpdateBatch, + ) -> Result { + // We check our lineage existence at the start just as a sanity check. + let tree_metadata = self + .lineages + .get(&lineage) + .ok_or(BackendError::UnknownLineage(lineage))? + .clone(); + + // All the work needs to happen atomically, so we construct a new write batch in which we + // stage our operations. + let batch = WriteBatch::default(); + + // We perform the update. If this fails with an error, the write batch will be dropped + // during error unwind, and any staged mutations will be forgotten about without being + // applied. + let (batch, reversion_set, tree_metadata) = self.update_tree_in_write_batch( + batch, + lineage, + tree_metadata, + new_version, + updates.into(), + )?; + + // At this point the batch contains the updates to the tree, but we still need to handle + // updating the metadata. + let batch = self.write_metadata(batch, lineage, &tree_metadata)?; + + // Writing the batch may fail, so we only write to the in-memory metadata once it is + // successful to ensure the in-memory state cache remains consistent with the database. + let mut res = self.finalize_update(batch, once((lineage, tree_metadata, reversion_set)))?; + let Some((_, reversion_set)) = res.pop() else { + unreachable!("finalize_update did not return the same number of output elements") + }; + + // We then just return the reversion set for the operations in question. + Ok(reversion_set) + } + + /// Performs the provided `updates` on the entire forest, returning the mutation sets that would + /// reverse the changes to each lineage in the forest. + /// + /// The order of application of these mutations is unspecified, but is guaranteed to produce no + /// more than one new root for each operated-upon lineage. All operations are performed as part + /// of one atomic update, leaving the data on disk in a consistent state even if failures occur. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the database cannot be accessed at any point. + /// - [`BackendError::Merkle`] if an error occurs with the merkle tree semantics. + /// - [`BackendError::UnknownLineage`] if the provided `lineage` is not known by this backend. + fn update_forest( + &mut self, + new_version: VersionId, + updates: SmtForestUpdateBatch, + ) -> Result> { + // We first have to check our precondition that all lineages are valid, returning an error + // as required by our contract if any lineage is unknown to the backend. + let updates = updates + .into_iter() + .map(|(lineage, ops)| { + if !self.lineages.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + + Ok((lineage, ops)) + }) + .collect::>>()?; + let lineage_count = updates.len(); + let updates = updates + .into_iter() + .map(|(lineage, ops)| { + (lineage, ops, self.lineages.get(&lineage).expect("Known to exist").clone()) + }) + .collect::>(); + + // We want to update all trees as part of an atomic update to the backing database, but we + // also want to do this in parallel. As we cannot share a transaction directly, we instead + // share a write-batch per tree. + let updates_with_batch = updates + .into_iter() + .map(|(lineage, ops, tree_data)| { + let batch = WriteBatch::default(); + (lineage, ops, tree_data, batch) + }) + .collect::>(); + + // Now we can simply issue the work in parallel. + let lineage_data = updates_with_batch + .into_par_iter() + .map(|(lineage, ops, tree_data, batch)| { + let ops = ops.into_iter().map(|op| op.into()).collect(); + let (batch, reversion, tree_data) = + self.update_tree_in_write_batch(batch, lineage, tree_data, new_version, ops)?; + let batch = self.write_metadata(batch, lineage, &tree_data)?; + + Ok((batch, (lineage, tree_data, reversion))) + }) + .collect::>>()?; + let (batches, mutation_sets): (Vec<_>, Vec<_>) = lineage_data.into_iter().unzip(); + + // We construct our final WriteBatch in parallel if we have enough of them, otherwise we + // just do it in serial. + let final_batch = if lineage_count > MIN_LINEAGES_IN_BATCH_TO_PARALLELIZE { + batches + .into_par_iter() + .fold(WriteBatch::new, |l, r| merge_batches(l, &r)) + .reduce(WriteBatch::new, |l, r| merge_batches(l, &r)) + } else { + batches.into_iter().fold(WriteBatch::new(), |l, r| merge_batches(l, &r)) + }; + + // Only at this point do we write to the in-memory metadata, ensuring that the state remains + // consistent. + let result = self.finalize_update(final_batch, mutation_sets.into_iter())?; + + Ok(result) + } +} + +// INTERNAL / UTILITY +// ================================================================================================ + +/// This block contains methods for internal use only that provide useful functionality for the +/// implementation of the backend. +impl PersistentBackend { + /// Performs `updates` on the tree in the specified lineage, assigning the new tree the + /// provided `new_version`. + /// + /// All operations in this method take place within the context of the provided `batch`. This + /// method will only stage operations using the transaction associated with that batch, and is + /// guaranteed to not commit those changes. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the backend fails to read to or write from storage. + /// - [`BackendError::Merkle`] if an error occurs with the merkle tree semantics in the backend. + fn update_tree_in_write_batch( + &self, + batch: WriteBatch, + lineage: LineageId, + mut tree_metadata: TreeMetadata, + new_version: VersionId, + mut updates: Vec<(Word, Word)>, + ) -> Result<(WriteBatch, MutationSet, TreeMetadata)> { + // We start by ensuring that our updates are sorted, as this is necessary for the efficiency + // of various other operations. + updates.sort_by_key(|(k, _)| LeafIndex::from(*k).position()); + + // We then have to load the leaves that correspond to these pairs from storage. + let leaf_map = self + .get_leaves_for_keys(lineage, &updates.iter().map(|(k, _)| *k).collect::>())?; + + // We then process the leaves in parallel to determine the mutations that we need to apply + // to the full tree. + let LeafMutations { + mut leaves, + leaf_updates, + leaf_count_delta, + entry_count_delta, + reversion_pairs, + } = self.sorted_pairs_to_mutated_leaves(updates, &leaf_map)?; + + // If we have no mutations to perform, we return early for performance and to satisfy the + // contract required of `add_lineage`. + if leaves.is_empty() { + // As a result, our mutation set is empty. + return Ok(( + batch, + MutationSet { + old_root: tree_metadata.root_value, + node_mutations: NodeMutations::default(), + new_pairs: Map::default(), + new_root: tree_metadata.root_value, + }, + tree_metadata, + )); + } + + // We can then preallocate capacity for the subtree updates. + let mut subtree_updates: Vec = Vec::with_capacity(leaves.len()); + let mut global_node_reversions = Map::new(); + + // We process each depth level in reverse, stepping by the subtree depth. This is due to the + // dependency order of updates. + for subtree_root_depth in + (0..=SMT_DEPTH - SUBTREE_DEPTH).step_by(SUBTREE_DEPTH as usize).rev() + { + let subtree_count = leaves.len(); + + let (mut subtree_roots, modified_subtrees, node_reversions) = leaves + .into_par_iter() + .map(|subtree_leaves| { + self.process_subtree_for_depth(lineage, subtree_leaves, subtree_root_depth) + }) + .fold( + || { + Ok(( + Vec::with_capacity(subtree_count), + Vec::with_capacity(subtree_count), + Map::new(), + )) + }, + |result, processed_tree| match (result, processed_tree) { + (Ok((mut roots, mut subtrees, mut node_reversions)), Ok(tree)) => { + roots.push(tree.subtree_root); + node_reversions.extend(tree.reversion_nodes); + if let Some(action) = tree.storage_action { + subtrees.push(action); + } + + Ok((roots, subtrees, node_reversions)) + }, + (Err(e), _) | (_, Err(e)) => Err(e), + }, + ) + .reduce( + || Ok((Vec::new(), Vec::new(), Map::new())), + |data1, data2| match (data1, data2) { + ( + Ok((mut roots1, mut trees1, mut reversions1)), + Ok((roots2, trees2, reversions2)), + ) => { + roots1.extend(roots2); + trees1.extend(trees2); + reversions1.extend(reversions2); + Ok((roots1, trees1, reversions1)) + }, + (Err(e), _) | (_, Err(e)) => Err(e), + }, + )?; + + subtree_updates.extend(modified_subtrees); + global_node_reversions.extend(node_reversions); + leaves = SubtreeLeavesIter::from_leaves(&mut subtree_roots).collect(); + + debug_assert!(!leaves.is_empty()); + } + + // Next we have to build the storage updates. + let mut leaf_update_map = leaf_map; + + for (idx, mutated_leaf) in leaf_updates { + let leaf_opt = match mutated_leaf { + SmtLeaf::Empty(_) => None, // Delete from storage + _ => Some(mutated_leaf), + }; + leaf_update_map.insert(idx, leaf_opt); + } + + let updates = StorageUpdates::from_parts( + leaf_update_map, + subtree_updates, + leaf_count_delta, + entry_count_delta, + ); + + // And we apply the updates to the lineage in the storage under the current transaction. + let batch = self.apply_updates_to_lineage(batch, lineage, updates)?; + + // And then compute the new root. + let root_after_modification = leaves[0][0].hash; + + // We then write the node metadata into a copy + let root_before_modification = tree_metadata.root_value; + tree_metadata.entry_count = tree_metadata.entry_count.saturating_add_signed( + entry_count_delta.try_into().expect("Delta should always fit into i64"), + ); + tree_metadata.root_value = root_after_modification; + tree_metadata.version = new_version; + + let mutation_set = MutationSet { + old_root: root_after_modification, + node_mutations: global_node_reversions, + new_pairs: reversion_pairs.into_iter().collect(), + new_root: root_before_modification, + }; + + Ok((batch, mutation_set, tree_metadata)) + } + + /// Applies the `updates` to the specified `lineage` in the context of the provided `batch`. + /// + /// It will stage operations into the provided `batch` before returning it. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the backend cannot be written to. + fn apply_updates_to_lineage( + &self, + mut batch: WriteBatch, + lineage: LineageId, + updates: StorageUpdates, + ) -> Result { + let leaves_cf = self.cf(LEAVES_CF)?; + + let StorageUpdateParts { leaf_updates, subtree_updates, .. } = updates.into_parts(); + + // These are simple enough that it does not make sense to do it in parallel. + for (k, v) in leaf_updates { + let key_bytes = LeafKey { lineage, index: k }.to_bytes(); + match v { + Some(leaf) => batch.put_cf(leaves_cf, key_bytes, leaf.to_bytes()), + None => batch.delete_cf(leaves_cf, key_bytes), + } + } + + // These do more work, so we issue all of them in parallel. + let update_data = subtree_updates + .into_par_iter() + .map(|update| { + let (index, maybe_bytes) = match update { + SubtreeUpdate::Store { index, subtree } => { + let bytes = subtree.to_vec(); + (index, Some(bytes)) + }, + SubtreeUpdate::Delete { index } => (index, None), + }; + + let key = SubtreeKey { lineage, index }; + let key_bytes = key.to_bytes(); + let cf = self.subtree_cf(index)?; + Ok((cf, key_bytes, maybe_bytes)) + }) + .collect::>>()?; + + // We then add all the changes to the transaction in serial for now. + for (cf, k, mv) in update_data { + match mv { + None => batch.delete_cf(cf, k), + Some(bytes) => batch.put_cf(cf, k, bytes), + } + } + + Ok(batch) + } + + /// Processes the provided set of `subtree_leaves` in the subtree at depth `subtree_root_depth` + /// and returns the updated subtree data. + /// + /// # Panics + /// + /// - If loading the subtree from disk fails. + /// - If the load succeeds but no subtrees have actually been loaded despite being requested. + /// - If the function cannot retrieve an inner node that is scheduled for removal from the + /// subtree. + fn process_subtree_for_depth( + &self, + lineage: LineageId, + subtree_leaves: Vec, + subtree_root_depth: u8, + ) -> Result { + debug_assert!(subtree_leaves.is_sorted(), "Subtree leaves were not sorted"); + debug_assert!(!subtree_leaves.is_empty(), "Subtree leaves were empty"); + + let subtree_root_index = + NodeIndex::new_unchecked(subtree_root_depth, subtree_leaves[0].col >> SUBTREE_DEPTH); + + // We now unconditionally load the subtree from storage as all subtrees are stored on disk. + let mut subtree = self + .load_subtree(SubtreeKey { lineage, index: subtree_root_index })? + .unwrap_or_else(|| Subtree::new(subtree_root_index)); + + // We then build the mutations for the subtree. + let (mutations, root) = + self.build_subtree_mutations(subtree_leaves, subtree_root_depth, &subtree)?; + + // We are always acting on from-storage subtrees, so we can next apply the mutations to the + // subtree in question and determine what we do to the storage. We also gather our reversion + // operations at the same time. + let mut node_reversion_mutations = NodeMutations::new(); + let modified = !mutations.is_empty(); + + for (index, mutation) in mutations { + match mutation { + NodeMutation::Removal => { + // If we are removing something we know structurally that it had to exist + // before. The reversion is simply then to add it back. + node_reversion_mutations.insert( + index, + NodeMutation::Addition(subtree.get_inner_node(index).expect( + "Removals imply the existence of a value at that index being removed", + )), + ); + subtree.remove_inner_node(index); + }, + NodeMutation::Addition(node) => { + // For an addition, we can either be adding something anew or overwriting an + // existing value. If there was no previous value, then our reversion is to + // remove the node entirely, while if there was we have to add it back. + node_reversion_mutations.insert( + index, + subtree + .get_inner_node(index) + .map(NodeMutation::Addition) + .unwrap_or_else(|| NodeMutation::Removal), + ); + + subtree.insert_inner_node(index, node); + }, + } + } + + let update = if !modified { + None + } else if !subtree.is_empty() { + Some(SubtreeUpdate::Store { index: subtree_root_index, subtree }) + } else { + Some(SubtreeUpdate::Delete { index: subtree_root_index }) + }; + + Ok(ProcessedSubtree { + subtree_root: root, + storage_action: update, + reversion_nodes: node_reversion_mutations, + }) + } + + /// Builds the set of subtree mutations based on the provided `leaves` and root_depth` in the + /// specified `subtree`. + fn build_subtree_mutations( + &self, + mut leaves: Vec, + root_depth: u8, + subtree: &Subtree, + ) -> Result<(NodeMutations, SubtreeLeaf)> { + let bottom_depth = root_depth + SUBTREE_DEPTH; + + debug_assert!(bottom_depth <= SMT_DEPTH); + debug_assert!(Integer::is_multiple_of(&bottom_depth, &SUBTREE_DEPTH)); + debug_assert!(leaves.len() <= usize::pow(2, SUBTREE_DEPTH as u32)); + + let mut node_mutations: NodeMutations = Default::default(); + let mut next_leaves: Vec = Vec::with_capacity(leaves.len() / 2); + + for current_depth in (root_depth..bottom_depth).rev() { + debug_assert!(current_depth <= bottom_depth); + + let next_depth = current_depth + 1; + let mut iter = leaves.drain(..).peekable(); + + while let Some(first_leaf) = iter.next() { + // This constructs a valid index because next_depth will never exceed the depth of + // the tree. + let parent_index = NodeIndex::new_unchecked(next_depth, first_leaf.col).parent(); + let parent_node = subtree.get_inner_node(parent_index).unwrap_or_else(|| { + EmptySubtreeRoots::get_inner_node(SMT_DEPTH, parent_index.depth()) + }); + + let combined_node = fetch_sibling_pair(&mut iter, first_leaf, parent_node); + let combined_hash = combined_node.hash(); + + let &empty_hash = EmptySubtreeRoots::entry(SMT_DEPTH, current_depth); + + // Add the parent node even if it is empty for proper upward updates + next_leaves.push(SubtreeLeaf { + col: parent_index.position(), + hash: combined_hash, + }); + + node_mutations.insert( + parent_index, + if combined_hash != empty_hash { + NodeMutation::Addition(combined_node) + } else { + NodeMutation::Removal + }, + ); + } + drop(iter); + leaves = mem::take(&mut next_leaves); + } + + debug_assert_eq!(leaves.len(), 1); + let root_leaf = leaves.pop().unwrap(); + Ok((node_mutations, root_leaf)) + } + + /// Loads the subtree specified by `tree_key`, returning it if present in the backing DB or + /// returning [`None`] if it is not. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the underlying database cannot be accessed. + fn load_subtree(&self, tree_key: SubtreeKey) -> Result> { + let cf = self.subtree_cf(tree_key.index)?; + let key_bytes = tree_key.to_bytes(); + let result = match self.db.get_cf(cf, key_bytes) { + Ok(Some(bytes)) => Some(Subtree::from_vec(tree_key.index, &bytes)?), + Ok(None) => None, + Err(e) => return Err(e.into()), + }; + + Ok(result) + } + + /// Converts the provided key-value `pairs` and current leaf values into the necessary updates + /// to be performed on the stored tree. + /// + /// The provided `pairs` must be sorted, else undefined behavior may result. + /// + /// # Errors + /// + /// - [`BackendError::Merkle`] if something goes wrong with the merkle tree semantics. + /// - [`BackendError::Internal`] if construction would cause any given SMT leaf to exceed its + /// maximum number of entries. + /// + /// # Panics + /// + /// - If a leaf was changed during the processing, but is empty when constructing the leaf and + /// entry deltas. + /// - If the provided `pairs` vector is not sorted, but only with debug assertions enabled. + fn sorted_pairs_to_mutated_leaves( + &self, + pairs: Vec<(Word, Word)>, + leaf_map: &HashMap>, + ) -> Result { + debug_assert!( + pairs.is_sorted_by_key(|(key, _)| LeafIndex::from(*key).position()), + "The provided pairs vector is not sorted but is required to be" + ); + + let mut reversion_pairs = HashMap::new(); + let mut leaf_count_delta = 0isize; + let mut entry_count_delta = 0isize; + + // We rely on existing functionality here, but pass in our own closure to provide the + // forest-specific logic. + let accumulator = process_sorted_pairs_to_leaves(pairs, |leaf_pairs| { + let leaf_index = LeafIndex::from(leaf_pairs[0].0); + + let maybe_old_leaf = leaf_map.get(&leaf_index.position()).and_then(Option::as_ref); + let old_entry_count = maybe_old_leaf.map(|leaf| leaf.num_entries()).unwrap_or_default(); + + // Whenever we change a value in the current leaf, we have to store the _old_ version of + // that value in our reversion pairs. + let mut new_leaf = maybe_old_leaf + .cloned() + .unwrap_or_else(|| SmtLeaf::new_empty(leaf_pairs[0].0.into())); + + let mut leaf_changed = false; + + for (key, value) in leaf_pairs { + // The old value for the key comes from the old corresponding leaf, or if no such + // leaf existed all of its values were implicitly zero. + let old_value_for_key = maybe_old_leaf + .and_then(|old_leaf| helpers::get_leaf_value(&old_leaf, &key)) + .unwrap_or(Word::empty()); + + if value != old_value_for_key { + new_leaf = self + .construct_prospective_leaf(new_leaf, &key, &value) + .map_err(|e| MerkleError::InternalError(e.to_string()))?; + reversion_pairs.insert(key, old_value_for_key); + leaf_changed = true; + } + } + + if leaf_changed { + let new_entry_count = new_leaf.entries().len(); + match (&new_leaf, maybe_old_leaf) { + (SmtLeaf::Empty(_), Some(_)) => { + leaf_count_delta -= 1; + entry_count_delta -= old_entry_count as isize; + }, + (SmtLeaf::Empty(_), None) => { + unreachable!("Leaf was empty but leaf_changed=true"); + }, + (_, None) => { + leaf_count_delta += 1; + entry_count_delta += new_entry_count as isize; + }, + (_, Some(_)) => { + entry_count_delta += new_entry_count as isize - old_entry_count as isize; + }, + } + + Ok(Some(new_leaf)) + } else { + Ok(None) + } + })?; + + Ok(LeafMutations { + leaves: accumulator.leaves, + leaf_updates: accumulator.nodes, + leaf_count_delta, + entry_count_delta, + reversion_pairs, + }) + } + + /// Updates a prospective `leaf` by modifying it based on the provided `key` and `value`. + /// + /// # Errors + /// + /// - [`SmtLeafError::TooManyLeafEntries`] if an attempt is made to insert `key` and `value` + /// into the `leaf` but the insertion would cause the leaf exceed the maximum number of leaf + /// entries. + fn construct_prospective_leaf( + &self, + mut leaf: SmtLeaf, + key: &Word, + value: &Word, + ) -> core::result::Result { + debug_assert_eq!(leaf.index(), LeafIndex::from(*key)); + + match leaf { + SmtLeaf::Empty(_) => Ok(SmtLeaf::new_single(*key, *value)), + _ => { + if *value != EMPTY_WORD { + helpers::insert_into_leaf(&mut leaf, *key, *value)?; + } else { + helpers::remove_from_leaf(&mut leaf, *key)?; + } + + Ok(leaf) + }, + } + } + + /// Gets the leaves from disk in the provided `lineage` that contain all the provided `keys`. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if it is unable to load the necessary data from the disk. + fn get_leaves_for_keys( + &self, + lineage: LineageId, + keys: &[Word], + ) -> Result>> { + // We have to get all the leaf indices, accounting for the fact that multiple keys may map + // to any given leaf. + let mut leaf_indices = + keys.iter().map(|k| LeafIndex::from(*k).position()).collect::>(); + leaf_indices.par_sort_unstable(); + leaf_indices.dedup(); + + let leaves = self.load_leaves(lineage, &leaf_indices)?; + + Ok(leaf_indices.into_iter().zip(leaves).collect()) + } + + /// Loads the concrete leaves from disk corresponding to the provided `indices` in the provided + /// `lineage`. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the data cannot be loaded from the database. + fn load_leaves(&self, lineage: LineageId, indices: &[u64]) -> Result>> { + let col = self.cf(LEAVES_CF)?; + let keys = indices + .iter() + .map(|index| LeafKey { lineage, index: *index }.to_bytes()) + .collect::>>(); + let leaves = self.db.multi_get_cf(keys.iter().map(|k| (col, k.as_slice()))); + + leaves + .into_iter() + .map(|result| match result { + Ok(Some(bytes)) => { + Ok(Some(SmtLeaf::read_from_bytes_with_budget(&bytes, bytes.len())?)) + }, + Ok(None) => Ok(None), + Err(e) => Err(e.into()), + }) + .collect() + } + + /// Gets the leaf from disk in the provided `lineage` that would contain `key`. + fn load_leaf_for(&self, lineage: LineageId, key: Word) -> Result> { + let col = self.cf(LEAVES_CF)?; + let key_bytes = LeafKey { + lineage, + index: LeafIndex::from(key).position(), + } + .to_bytes(); + let leaf_bytes = self.db.get_cf(col, key_bytes)?; + let leaf = match leaf_bytes { + Some(bytes) => Some(SmtLeaf::read_from_bytes_with_budget(&bytes, bytes.len())?), + None => None, + }; + + Ok(leaf) + } + + /// Gets the column family corresponding to the subtree with root index `index`. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the database cannot be accessed to get the column family. + #[inline(always)] + fn subtree_cf(&self, index: NodeIndex) -> Result<&db::ColumnFamily> { + let cf_name = subtree_cf_name(index.depth()); + self.cf(cf_name) + } + + /// Gets the column family with the specified name. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the database cannot be accessed to get the column family. + #[inline(always)] + fn cf(&self, name: &str) -> Result<&db::ColumnFamily> { + self.db.cf_handle(name).ok_or_else(|| { + BackendError::internal_from_message(format!("Could not load column with name {name}")) + }) + } + + /// Writes the provided `batch` into the database as part of one atomic operation. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if writing to the database fails for any reason. + fn write(&self, batch: WriteBatch) -> Result<()> { + let mut write_opts = db::WriteOptions::default(); + write_opts.set_sync(false); + self.db.write_opt(batch, &write_opts)?; + + Ok(()) + } + + /// Forces the underlying database to perform a sync to disk, and thus ensure that all data is + /// persisted. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the flush to disk fails for any reason. + fn sync(&self) -> Result<()> { + let mut flush_opts = db::FlushOptions::default(); + flush_opts.set_wait(true); + + // Flush all the subtree column families. + for name in SUBTREE_CFS { + self.db.flush_cf_opt(self.cf(name)?, &flush_opts)?; + } + + // Flush the leaves and metadata column families. + for name in [LEAVES_CF, METADATA_CF] { + self.db.flush_cf_opt(self.cf(name)?, &flush_opts)?; + } + + // Flush the WAL. + self.db.flush_wal(true)?; + + Ok(()) + } +} + +// INTERNAL / STARTUP +// ================================================================================================ + +/// This impl block contains internal functionality to do with starting up the backend and +/// performing its initialization work. +impl PersistentBackend { + /// Sets up the basic configuration for the underlying RocksDB database. + fn build_db_with_options(config: &Config) -> Result { + let mut db_opts = db::Options::default(); + + // We start by initially setting up the base options for the whole database. + db_opts.create_if_missing(true); + db_opts.create_missing_column_families(true); + db_opts.increase_parallelism(rayon::current_num_threads() as _); + db_opts.set_max_open_files(config.max_open_files as _); + db_opts.set_max_background_jobs(rayon::current_num_threads() as _); + db_opts.set_max_total_wal_size(config.max_wal_size); + + // We want to share a block cache across all column families. + let cache = db::Cache::new_lru_cache(config.cache_size_bytes); + + // Now we set up our basic options for all column families. + let mut cf_opts = db::BlockBasedOptions::default(); + cf_opts.set_block_cache(&cache); + cf_opts.set_bloom_filter(config.bloom_filter_bits, false); + cf_opts.set_whole_key_filtering(true); // Better for point lookups. + cf_opts.set_pin_l0_filter_and_index_blocks_in_cache(true); // Improves performance. + + // From this, we can set up the configuration for each of our column families. We start with + // the one for metadata. + let metadata_cf_opts = Self::build_cf_opts( + config, + &cf_opts, + MAX_METADATA_CF_WRITE_BUFFER_SIZE_BYTES, + db::DBCompressionType::None, + ); + + // We can also create the configuration for our leaves column family. + let leaves_cf_opts = Self::build_cf_opts( + config, + &cf_opts, + MAX_LEAVES_CF_WRITE_BUFFER_SIZE_BYTES, + COMPRESSION_MODE, + ); + + // Finally we create them for each of our subtree CFs. + let subtree_cfs = SUBTREE_CFS.into_iter().map(|name| { + db::ColumnFamilyDescriptor::new( + name, + Self::build_cf_opts( + config, + &cf_opts, + MAX_SUBTREE_CF_WRITE_BUFFER_SIZE_BYTES, + COMPRESSION_MODE, + ), + ) + }); + + // With the column-specific configuration made, we can then simply create our database + // options + let mut columns = vec![ + db::ColumnFamilyDescriptor::new(METADATA_CF, metadata_cf_opts), + db::ColumnFamilyDescriptor::new(LEAVES_CF, leaves_cf_opts), + ]; + columns.extend(subtree_cfs); + + Ok(DB::open_cf_descriptors(&db_opts, config.path.clone(), columns)?) + } + + /// Unifies the building of options for column families where most parameters are shared, + /// customizing only the `max_write_buffer_size` and `compression_mode`. + fn build_cf_opts( + config: &Config, + base: &db::BlockBasedOptions, + max_write_buffer_size: usize, + compression_mode: db::DBCompressionType, + ) -> db::Options { + let mut cf_opts = db::Options::default(); + cf_opts.set_block_based_table_factory(base); + cf_opts.set_write_buffer_size(max_write_buffer_size); + cf_opts.set_max_write_buffer_number(MAX_WRITE_BUFFER_COUNT); + cf_opts.set_min_write_buffer_number_to_merge(MIN_WRITE_BUFFERS_TO_MERGE); + cf_opts.set_max_write_buffer_size_to_maintain(MAX_WRITE_BUFFERS_TO_RETAIN); + cf_opts.set_compaction_style(db::DBCompactionStyle::Level); + cf_opts.set_target_file_size_base(config.target_file_size); + cf_opts.set_compression_type(compression_mode); + cf_opts.set_level_zero_file_num_compaction_trigger(L0_FILE_COMPACTION_TRIGGER); + + cf_opts + } + + /// Stages the provided `metadata` to be written to the provided `lineage` on disk within the + /// provided `batch`, staging the changes encapsulated by `batch` in the underlying DB. + /// + /// It stages its write operations into the provided `batch` before returning it. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the underlying database cannot be accessed for reading or + /// staging. + fn write_metadata( + &self, + mut batch: WriteBatch, + lineage: LineageId, + tree_metadata: &TreeMetadata, + ) -> Result { + let metadata = self.cf(METADATA_CF)?; + let metadata_key = lineage.to_bytes(); + let metadata_value = tree_metadata.to_bytes(); + batch.put_cf(metadata, &metadata_key, &metadata_value); + Ok(batch) + } + + /// Finalizes the update by committing the provided `batch` to disk and updating the in-memory + /// cache with the provided `metadata`. + /// + /// # Errors + /// + /// - [`BackendError::Internal`] if the underlying database cannot be written to. + fn finalize_update( + &mut self, + batch: WriteBatch, + metadata: impl Iterator, + ) -> Result> { + // We first write the full atomic update to disk. If it errors, we bail. + self.write(batch)?; + + // If it hasn't errored, we can now safely update the in-memory metadata cache. + Ok(metadata + .map(|(l, d, r)| { + self.lineages.insert(l, d); + (l, r) + }) + .collect()) + } + + /// Reads all the lineages and their corresponding metadata out of the on-disk storage as part + /// of the startup work. + /// + /// # Errors + /// + /// - [`BackendError::CorruptedData`] if data corruption is discovered. + /// - [`BackendError::Internal`] if the metadata cannot be read from disk. + fn read_all_metadata(db: Arc) -> Result> { + let cf = db.cf_handle(METADATA_CF).ok_or_else(|| { + BackendError::CorruptedData(format!("{METADATA_CF} column not found")) + })?; + let db_iter = db.iterator_cf(&cf, db::IteratorMode::Start); + + db_iter + .map(|bytes| { + let (key_bytes, value_bytes) = bytes?; + let lineage = LineageId::read_from_bytes(&key_bytes)?; + let metadata = TreeMetadata::read_from_bytes(&value_bytes)?; + + Ok((lineage, metadata)) + }) + .collect::>() + } +} + +// TRAIT IMPLEMENTATIONS +// ================================================================================================ + +/// We implement drop in order to force a sync to disk as the program shuts down, ensuring that all +/// data is correctly persisted. +impl Drop for PersistentBackend { + /// Forces the database to be synced to disk on drop. + /// + /// # Panics + /// + /// - If the sync cannot be performed, indicating that data on disk may be in a corrupt state. + fn drop(&mut self) { + if let Err(e) = self.sync() { + if std::thread::panicking() { + std::eprintln!("Failed to flush database on shutdown during panic: {e}") + } else { + panic!("Failed to flush database on shutdown: {e}") + } + } + } +} + +// PROCESSED_SUBTREE +// ================================================================================================ + +/// The results of processing a subtree in a full tree. +#[derive(Clone, Debug)] +struct ProcessedSubtree { + /// The computed root of the subtree as a leaf of its containing subtree. + pub subtree_root: SubtreeLeaf, + + /// The storage update instruction for the subtree. + pub storage_action: Option, + + /// The operations that need to be performed to revert the changes to the subtree. + pub reversion_nodes: HashMap, +} + +// LEAF MUTATIONS +// ================================================================================================ + +/// A container for the data necessary to perform mutations on the in-memory tree, computed based on +/// a list of key-value pairs representing the changes to the leaves. +#[derive(Clone, Debug, Eq, PartialEq)] +struct LeafMutations { + /// The leaves, organized into groups to allow building subtrees in parallel. + pub leaves: MutatedSubtreeLeaves, + + /// The leaves, mapping leaf index to the corresponding _new_ node, organized for performing + /// storage updates. + pub leaf_updates: HashMap, + + /// The change in the number of leaves in the tree. + pub leaf_count_delta: isize, + + /// The change in the number of entries in the tree. + pub entry_count_delta: isize, + + /// A key-value mapping of leaves that would need to be inserted in order to **reverse** the + /// changes specified by `self`. + pub reversion_pairs: HashMap, +} + +// ERRORS +// ================================================================================================ + +/// We forward all errors in deserialization as data corruption errors. +impl From for BackendError { + fn from(e: DeserializationError) -> Self { + Self::CorruptedData(e.to_string()) + } +} + +/// We generically forward all errors to do with the DB implementation out of the interface of the +/// [`Backend`] as internal errors. +impl From for BackendError { + fn from(e: db::Error) -> Self { + BackendError::internal_from(e) + } +} + +/// We generically forward IO errors as fatal errors out of the interface of the [`Backend`] as +/// internal errors. +impl From for BackendError { + fn from(e: std::io::Error) -> Self { + BackendError::internal_from(e) + } +} + +/// We generically forward storage backend errors out of the interface for the [`Backend`] as +/// data corruption errors. +impl From for BackendError { + fn from(e: StorageError) -> Self { + BackendError::CorruptedData(e.to_string()) + } +} + +/// All errors to do with subtrees are fatal. +impl From for BackendError { + fn from(e: SubtreeError) -> Self { + Self::internal_from(e) + } +} + +// HELPERS +// ================================================================================================ + +/// Gets the subtree column family name corresponding to the provided depth. +/// +/// # Panics +/// +/// - If `depth` is not a valid subtree depth in this backend. +#[inline(always)] +fn subtree_cf_name(depth: u8) -> &'static str { + match depth { + 0 => SUBTREE_00_CF, + 8 => SUBTREE_08_CF, + 16 => SUBTREE_16_CF, + 24 => SUBTREE_24_CF, + 32 => SUBTREE_32_CF, + 40 => SUBTREE_40_CF, + 48 => SUBTREE_48_CF, + 56 => SUBTREE_56_CF, + _ => panic!("Unsupported subtree depth {depth}"), + } +} diff --git a/crates/large-forest-backend-rocksdb/src/property_tests.rs b/crates/large-forest-backend-rocksdb/src/property_tests.rs new file mode 100644 index 000000000..1831665b0 --- /dev/null +++ b/crates/large-forest-backend-rocksdb/src/property_tests.rs @@ -0,0 +1,354 @@ +#![cfg(test)] +//! This module contains the property tests for the persistent backend of the Large SMT forest. + +use alloc::vec::Vec; + +use itertools::Itertools; +use proptest::prelude::*; + +use super::tests::default_backend; +use crate::{ + EMPTY_WORD, + merkle::smt::{ + Backend, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeWithRoot, + large_forest::test_utils::{ + arbitrary_batch, arbitrary_lineage, arbitrary_version, arbitrary_word, + }, + }, +}; + +// TESTS +// ================================================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(10))] + + #[test] + fn open_correct( + lineage in arbitrary_lineage(), + version in arbitrary_version(), + entries in arbitrary_batch(), + random_key in arbitrary_word(), + ) { + let (_file, mut backend) = default_backend()?; + + // We can add the lineage to the backend. + backend.add_lineage(lineage, version, entries.clone())?; + + // And construct a normal tree to compare against. + let mut tree = Smt::new(); + let tree_mutations = tree.compute_mutations(Vec::from(entries.clone()).into_iter())?; + tree.apply_mutations(tree_mutations)?; + + // We should get the same opening from the backend and from the tree, for a random key. + let backend_opening = backend.open(lineage, random_key)?; + let tree_opening = tree.open(&random_key); + prop_assert_eq!(backend_opening, tree_opening); + + // And this should be true for all keys. + for op in entries.into_iter() { + let key = op.key(); + let backend_opening = backend.open(lineage, key)?; + let tree_opening = tree.open(&key); + prop_assert_eq!(backend_opening, tree_opening); + } + } + + #[test] + fn get_correct( + lineage in arbitrary_lineage(), + version in arbitrary_version(), + entries in arbitrary_batch(), + random_key in arbitrary_word(), + ) { + let (_file, mut backend) = default_backend()?; + + // We can add the lineage to the backend. + backend.add_lineage(lineage, version, entries.clone())?; + + // And construct a normal tree to compare against. + let mut tree = Smt::new(); + let tree_mutations = tree.compute_mutations(Vec::from(entries.clone()).into_iter())?; + tree.apply_mutations(tree_mutations)?; + + // We should get the same opening from the backend and from the tree, for a random key. + let backend_opening = + backend.get(lineage, random_key)?.unwrap_or(EMPTY_WORD); + let tree_opening = tree.get_value(&random_key); + prop_assert_eq!(backend_opening, tree_opening); + + // And this should be true for all keys. + for op in entries.into_iter() { + let (key, value) = op.into(); + let backend_opening = backend.get(lineage, key)?; + prop_assert_eq!(backend_opening, if value == EMPTY_WORD { None } else { Some(value) }); + let tree_opening = tree.get_value(&key); + prop_assert_eq!(backend_opening.unwrap_or(EMPTY_WORD), tree_opening); + } + } + + #[test] + fn version_correct( + lineage in arbitrary_lineage(), + version in arbitrary_version(), + entries_v1 in arbitrary_batch(), + entries_v2 in arbitrary_batch(), + ) { + let (_file, mut backend) = default_backend()?; + + // We can add the lineage to the backend, and we should always get the provided version back. + backend.add_lineage(lineage, version, entries_v1.clone())?; + prop_assert_eq!(backend.version(lineage)?, version); + + // We're going to need an auxiliary tree to check the behavior. + let mut tree = Smt::new(); + let muts_1 =tree.compute_mutations(Vec::from(entries_v1.clone()).into_iter())?; + tree.apply_mutations(muts_1)?; + let muts_2 =tree.compute_mutations(Vec::from(entries_v2.clone()).into_iter())?; + + // If we then update that lineage, we should still get the new version back. + backend.update_tree(lineage, version + 1, entries_v2)?; + + if muts_2.is_empty() { + prop_assert_eq!(backend.version(lineage)?, version); + } else { + prop_assert_eq!(backend.version(lineage)?, version + 1); + } + } + + #[test] + fn lineages_correct( + lineages in prop::collection::vec(arbitrary_lineage(), 0..30), + version in arbitrary_version(), + ) { + let (_file, mut backend) = default_backend()?; + + // We should be able to add all the lineages to the backend as long as they are unique. + let lineages = lineages.into_iter().unique().sorted().collect_vec(); + + for lineage in &lineages { + backend.add_lineage(*lineage, version, SmtUpdateBatch::empty())?; + } + + // And we should always be able to get the same lineages back. + let returned_lineages = backend.lineages()?.sorted().collect_vec(); + prop_assert_eq!(returned_lineages, lineages); + } + + #[test] + fn trees_correct( + lineages in prop::collection::vec(arbitrary_lineage(), 10), + version in arbitrary_version(), + entries in prop::collection::vec(arbitrary_batch(), 10), + ) { + let (_file, mut backend) = default_backend()?; + + // We should be able to add all the lineages to the backend as long as they are unique. + let lineages = lineages.into_iter().unique().sorted().collect_vec(); + let pairs = lineages.into_iter().zip(entries).collect_vec(); + let count = pairs.len(); + + for (lineage, entries) in &pairs { + backend.add_lineage(*lineage, version, entries.clone())?; + } + + // We should be able to get the correct root data for each tree. + for (lineage, entries) in pairs { + let mut tree = Smt::new(); + let tree_muts = tree.compute_mutations(Vec::from(entries).into_iter())?; + tree.apply_mutations(tree_muts)?; + + prop_assert_eq!(backend.trees()?.count(), count); + prop_assert!( + backend.trees()?.contains(&TreeWithRoot::new(lineage, version, tree.root())) + ); + } + } + + #[test] + fn entry_count_correct( + lineages in prop::collection::vec(arbitrary_lineage(), 10), + version in arbitrary_version(), + entries in prop::collection::vec(arbitrary_batch(), 10) + ) { + let (_file, mut backend) = default_backend()?; + + let pairs = lineages.into_iter().unique().sorted().zip(entries).collect_vec(); + let (target_lineage, target_entries) = pairs[0].clone(); + + // Each lineage should be able to be added to the backend. + for (lineage, entries) in pairs { + backend.add_lineage(lineage, version, entries.clone())?; + } + + // And then construct an auxiliary tree to mirror it. + let mut tree = Smt::new(); + let tree_mutations = tree.compute_mutations(Vec::from(target_entries).into_iter())?; + tree.apply_mutations(tree_mutations)?; + + // And we should have the same number of entries in each. + let backend_entries = backend.entry_count(target_lineage)?; + let tree_entries = tree.num_entries(); + prop_assert_eq!(backend_entries, tree_entries); + } + + #[test] + fn entries_correct( + lineages in prop::collection::vec(arbitrary_lineage(), 10), + version in arbitrary_version(), + entries in prop::collection::vec(arbitrary_batch(), 10) + ) { + let (_file, mut backend) = default_backend()?; + + let pairs = lineages.into_iter().unique().sorted().zip(entries).collect_vec(); + let (target_lineage, target_entries) = pairs[0].clone(); + + // Each lineage should be able to be added to the backend. + for (lineage, entries) in pairs { + backend.add_lineage(lineage, version, entries.clone())?; + } + + // And then construct an auxiliary tree to mirror it. + let mut tree = Smt::new(); + let tree_mutations = + tree.compute_mutations(Vec::from(target_entries).into_iter())?; + tree.apply_mutations(tree_mutations)?; + + // And we should have the same number of entries in each. + let backend_entries = backend + .entries(target_lineage)? + .map(|e| (e.key, e.value)) + .sorted() + .collect_vec(); + let tree_entries = tree.entries().copied().sorted().collect_vec(); + prop_assert_eq!(backend_entries, tree_entries); + } + + #[test] + fn add_lineage_correct( + lineage in arbitrary_lineage(), + version in arbitrary_version(), + entries in arbitrary_batch() + ) { + let (_file, mut backend) = default_backend()?; + + // We can add the lineage to the backend. + let root = backend.add_lineage(lineage, version, entries.clone())?; + + // And create a normal tree to compare against. + let mut tree = Smt::new(); + let tree_mutations =tree.compute_mutations(Vec::from(entries.clone()).into_iter())?; + tree.apply_mutations(tree_mutations)?; + + // The root should return the same results as that. + prop_assert_eq!(root.root(), tree.root()); + prop_assert_eq!(root.version(), version); + prop_assert_eq!(root.lineage(), lineage); + + // And we should only see that one lineage. + prop_assert_eq!(backend.lineages()?.count(), 1); + prop_assert!(backend.lineages()?.contains(&lineage)); + } + + #[test] + fn update_lineage_correct( + lineage_1 in arbitrary_lineage(), + lineage_2 in arbitrary_lineage(), + version in arbitrary_version(), + entries_1_1 in arbitrary_batch(), + entries_1_2 in arbitrary_batch(), + entries_2_1 in arbitrary_batch() + ) { + let (_file, mut backend) = default_backend()?; + + // We add two lineages with initial values. + let root_1 = backend.add_lineage(lineage_1, version, entries_1_1.clone())?; + let root_2 = backend.add_lineage(lineage_2, version, entries_2_1)?; + + // We should see these roots in the trees iterator. + prop_assert!(backend.trees()?.contains(&root_1)); + prop_assert!(backend.trees()?.contains(&root_2)); + prop_assert_eq!(backend.trees()?.count(), 2); + + // And create an auxiliary tree to check things are correct. + let mut tree = Smt::new(); + let tree_mutations = + tree.compute_mutations(Vec::from(entries_1_1).into_iter())?; + tree.apply_mutations(tree_mutations)?; + prop_assert_eq!(root_1.root(), tree.root()); + + // We then update lineage 1. + let backend_reversion =backend.update_tree(lineage_1, version + 1, entries_1_2.clone())?; + + // And our auxiliary tree to check. + let tree_mutations =tree.compute_mutations(Vec::from(entries_1_2).into_iter())?; + let is_empty = tree_mutations.is_empty(); + let tree_reversion = tree.apply_mutations_with_reversion(tree_mutations)?; + + // Our reversions should be the same, and we should no longer see the previous root. + prop_assert_eq!(&backend_reversion, &tree_reversion); + prop_assert_eq!(backend.trees()?.count(), 2); + prop_assert!(backend.trees()?.contains(&root_2)); + + if is_empty { + prop_assert!(backend.trees()?.contains(&root_1)); + } else { + prop_assert!(backend.trees()?.contains( + &TreeWithRoot::new(lineage_1, version + 1, backend_reversion.old_root)) + ); + prop_assert!(!backend.trees()?.contains(&root_1)); + } + } + + #[test] + fn update_forest_correct( + lineages in prop::collection::vec(arbitrary_lineage(), 0..30), + version in arbitrary_version(), + entries_v1 in prop::collection::vec(arbitrary_batch(), 0..30), + entries_v2 in prop::collection::vec(arbitrary_batch(), 0..30), + ) { + let (_file, mut backend) = default_backend()?; + + let triples = lineages.into_iter().unique().zip(entries_v1).zip(entries_v2).collect_vec(); + + // We should be able to add every lineage initially. + let updates = triples.into_iter().map(|((lineage, entries_v1), entries_v2)| { + let root = backend.add_lineage(lineage, version, entries_v1.clone())?; + + let mut tree = Smt::new(); + let tree_mutations = tree.compute_mutations(Vec::from(entries_v1).into_iter())?; + tree.apply_mutations(tree_mutations)?; + + prop_assert_eq!(root.root(), tree.root()); + + Ok((lineage, entries_v2, tree)) + }).collect::, _>>()?; + + // Let's pull our data apart. + let (inputs, tree_reversions) = updates + .into_iter() + .map(|(lineage, entries, mut tree)| { + let mutations = tree.compute_mutations(Vec::from(entries.clone()).into_iter()).unwrap(); + let reversion = tree.apply_mutations_with_reversion(mutations).unwrap(); + ((lineage, entries), (lineage, reversion)) + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + + // And then we should be able to correctly update all of them in one go. + let mut update_batch = SmtForestUpdateBatch::empty(); + inputs.into_iter().for_each(|(lineage, updates)| { + *update_batch.operations(lineage) = updates; + }); + + let backend_reversions = backend.update_forest(version + 1, update_batch)?; + + for (lineage, reversion) in tree_reversions { + prop_assert_eq!( + backend_reversions + .iter() + .find_map(|(l, r)| if *l == lineage { Some(r) } else { None }), + Some(&reversion) + ); + } + } +} diff --git a/crates/large-forest-backend-rocksdb/src/tests.rs b/crates/large-forest-backend-rocksdb/src/tests.rs new file mode 100644 index 000000000..8d74c4138 --- /dev/null +++ b/crates/large-forest-backend-rocksdb/src/tests.rs @@ -0,0 +1,699 @@ +#![cfg(test)] +//! This module contains the basic functional tests for the persistent backend for the SMT forest. +//! +//! Rather than hard-code specific values for the trees, these tests rely on the correctness of the +//! existing [`Smt`] implementation, comparing the results of the persistent backend against it +//! wherever relevant. + +use assert_matches::assert_matches; +use itertools::Itertools; +use tempfile::{TempDir, tempdir}; + +use super::{PersistentBackend, Result}; +use crate::{ + EMPTY_WORD, Word, + merkle::smt::{ + Backend, BackendError, LineageId, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeEntry, + TreeWithRoot, VersionId, large_forest::backend::persistent::config::Config, + }, + rand::test_utils::ContinuousRng, +}; + +// UTILITIES +// ================================================================================================ + +/// Builds an empty persistent backend that contains no data at a random temporary file path that +/// will be subject to clean up. +/// +/// It returns the temporary directory handle as well so that it can be ensured to live for the +/// lifetime of the caller's scope at a minimum. +/// +/// It is necessary to explicitly hold onto the returned directory until the end of the scope in +/// order to ensure correct cleanup behavior. +pub fn default_backend() -> Result<(TempDir, PersistentBackend)> { + let temp_path = tempdir()?; + let config = Config::new(temp_path.path())?; + let backend = PersistentBackend::load(config)?; + + Ok((temp_path, backend)) +} + +// CONSTRUCTION +// ================================================================================================ + +#[test] +fn load_empty() -> Result<()> { + // Construction of a backend passing a new path in the config will initialize the backend with + // no data. + let temp_path = tempdir()?; + let config = Config::new(temp_path.path())?; + let backend = PersistentBackend::load(config)?; + + // This means that it should have no lineages, and no trees. + assert!(backend.lineages.is_empty()); + assert_eq!(backend.trees()?.count(), 0); + + Ok(()) +} + +#[test] +fn load_extant() -> Result<()> { + // We start by creating an empty backend and populating it with some lineages. + let (path, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0x42; 32]); + let version: VersionId = rng.value(); + + let lineage_1: LineageId = rng.value(); + let l1_k1: Word = rng.value(); + let l1_v1: Word = rng.value(); + let l1_k2: Word = rng.value(); + let l1_v2: Word = rng.value(); + let l1_batch = SmtUpdateBatch::from([(l1_k1, l1_v1), (l1_k2, l1_v2)].into_iter()); + + let lineage_2: LineageId = rng.value(); + let l2_k1: Word = rng.value(); + let l2_v1: Word = rng.value(); + let l2_k2: Word = rng.value(); + let l2_v2: Word = rng.value(); + let l2_batch = SmtUpdateBatch::from([(l2_k1, l2_v1), (l2_k2, l2_v2)].into_iter()); + + let root_1 = backend.add_lineage(lineage_1, version, l1_batch)?; + let root_2 = backend.add_lineage(lineage_2, version, l2_batch)?; + + // We check the values against reference SMTs. + let tree_1 = Smt::with_entries([(l1_k1, l1_v1), (l1_k2, l1_v2)].into_iter())?; + let tree_2 = Smt::with_entries([(l2_k1, l2_v1), (l2_k2, l2_v2)].into_iter())?; + + assert_eq!(root_1.root(), tree_1.root()); + assert_eq!(root_2.root(), tree_2.root()); + + // We can check that certain things are true now. + assert_eq!(backend.lineages()?.count(), 2); + assert_eq!(backend.trees()?.count(), 2); + + // Next we force it to close, which will leave our data on disk with a forced sync. + drop(backend); + + // We should then be able to re-open it again at the same path. + let backend = PersistentBackend::load(Config::new(path.path())?)?; + + // And more importantly it should have the same data. + assert_eq!(backend.lineages()?.count(), 2); + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.lineages()?.contains(&lineage_1)); + assert!(backend.lineages()?.contains(&lineage_2)); + assert_eq!(backend.version(lineage_1)?, version); + assert_eq!(backend.version(lineage_2)?, version); + assert!(backend.trees()?.contains(&root_1)); + assert!(backend.trees()?.contains(&root_2)); + + // And we should be able to perform openings on it... + let l1_opening = backend.open(lineage_1, l1_k1)?; + let t1_opening = tree_1.open(&l1_k1); + assert_eq!(l1_opening, t1_opening); + + let l2_opening = backend.open(lineage_2, l2_k1)?; + let t2_opening = tree_2.open(&l2_k1); + assert_eq!(l2_opening, t2_opening); + + // ...as well as get... + let l1_value = backend.get(lineage_1, l1_k2)?; + let t1_value = tree_1.get_value(&l1_k2); + assert_eq!(l1_value, Some(t1_value)); + + let l2_value = backend.get(lineage_2, l2_k1)?; + let t2_value = tree_2.get_value(&l2_k1); + assert_eq!(l2_value, Some(t2_value)); + + // ...and entries. + let l1_entries = backend.entries(lineage_1)?.sorted().collect_vec(); + let t1_entries = tree_1 + .entries() + .sorted() + .map(|(k, v)| TreeEntry { key: *k, value: *v }) + .collect_vec(); + assert_eq!(l1_entries, t1_entries); + + let l2_entries = backend.entries(lineage_2)?.sorted().collect_vec(); + let t2_entries = tree_2 + .entries() + .sorted() + .map(|(k, v)| TreeEntry { key: *k, value: *v }) + .collect_vec(); + assert_eq!(l2_entries, t2_entries); + + Ok(()) +} + +// BACKEND TRAIT +// ================================================================================================ + +#[test] +fn open() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0xab; 32]); + + // When we `open` for a lineage that has never been added to the backend, it should yield an + // error. + let ne_lineage: LineageId = rng.value(); + let random_key: Word = rng.value(); + let result = backend.open(ne_lineage, random_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // Let's now add a tree with a few items in it to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // We also want to match this against a reference merkle tree to check correctness, so let's + // create that now. + let mut tree = Smt::new(); + tree.insert(key_1, value_1)?; + tree.insert(key_2, value_2)?; + + // Let's first get the backend's opening for a key that hasn't been inserted. This should still + // return properly, and should match the opening provided by the reference tree. + let backend_result = backend.open(lineage_1, random_key)?; + let smt_result = tree.open(&random_key); + assert_eq!(backend_result, smt_result); + + // It should also generate correct openings for both of the inserted values. + assert_eq!(backend.open(lineage_1, key_1)?, tree.open(&key_1)); + assert_eq!(backend.open(lineage_1, key_2)?, tree.open(&key_2)); + + Ok(()) +} + +#[test] +fn get() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0x67; 32]); + + // When we `get` for a lineage that has never been added to the backend, it should yield an + // error. + let ne_lineage: LineageId = rng.value(); + let random_key: Word = rng.value(); + let result = backend.get(ne_lineage, random_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // Let's now add a tree with a few items in it to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // We also want to match this against a reference merkle tree to check correctness, so let's + // create that now. + let mut tree = Smt::new(); + tree.insert(key_1, value_1)?; + tree.insert(key_2, value_2)?; + + // Let's first get the backend's result for a key that hasn't been inserted. This should return + // `None` in our case. + assert!(backend.get(lineage_1, random_key)?.is_none()); + + // It should also provide correct values for both of the inserted values. + assert_eq!(backend.get(lineage_1, key_1)?.unwrap(), tree.get_value(&key_1)); + assert_eq!(backend.get(lineage_1, key_2)?.unwrap(), tree.get_value(&key_2)); + + Ok(()) +} + +#[test] +fn version() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0x52; 32]); + + // Getting the version for a lineage that the backend doesn't know about should yield an error. + let ne_lineage: LineageId = rng.value(); + let result = backend.version(ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // Let's now shove a tree into the backend. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // The forest should return the correct version if asked for the version of the lineage. + assert_eq!(backend.version(lineage_1)?, version_1); + + Ok(()) +} + +#[test] +fn lineages() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0x96; 32]); + + // Initially there should be no lineages. + assert_eq!(backend.lineages()?.count(), 0); + + // We'll use the same data for each tree here to simplify the test. + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + let version: VersionId = rng.value(); + + // Let's start by adding one lineage and checking that the iterator contains it. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, operations.clone())?; + assert_eq!(backend.lineages()?.count(), 1); + assert!(backend.lineages()?.contains(&lineage_1)); + + // We add another + let lineage_2: LineageId = rng.value(); + backend.add_lineage(lineage_2, version, operations.clone())?; + assert_eq!(backend.lineages()?.count(), 2); + assert!(backend.lineages()?.contains(&lineage_1)); + assert!(backend.lineages()?.contains(&lineage_2)); + + // And yet another + let lineage_3: LineageId = rng.value(); + backend.add_lineage(lineage_3, version, operations.clone())?; + assert_eq!(backend.lineages()?.count(), 3); + assert!(backend.lineages()?.contains(&lineage_1)); + assert!(backend.lineages()?.contains(&lineage_2)); + assert!(backend.lineages()?.contains(&lineage_3)); + + Ok(()) +} + +#[test] +fn trees() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0x4a; 32]); + + // Initially there should be no lineages. + assert_eq!(backend.lineages()?.count(), 0); + + // We need individual trees and versions here to check on the roots, so let's add our first + // tree. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + + backend.add_lineage(lineage_1, version_1, operations)?; + + let mut tree_1 = Smt::new(); + tree_1.insert(key_1_1, value_1_1)?; + tree_1.insert(key_1_2, value_1_2)?; + + // With one tree added we should only see one root. + assert_eq!(backend.trees()?.count(), 1); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_1, version_1, tree_1.root())) + ); + + // Let's add another tree. + let key_2_1: Word = rng.value(); + let value_2_1: Word = rng.value(); + let key_2_2: Word = rng.value(); + let value_2_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_2_1, value_2_1); + operations.add_insert(key_2_2, value_2_2); + + let lineage_2: LineageId = rng.value(); + let version_2: VersionId = rng.value(); + + backend.add_lineage(lineage_2, version_2, operations)?; + + let mut tree_2 = Smt::new(); + tree_2.insert(key_2_1, value_2_1)?; + tree_2.insert(key_2_2, value_2_2)?; + + // With two added we should see two roots. + assert_eq!(backend.trees()?.count(), 2); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_1, version_1, tree_1.root())) + ); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_2, version_2, tree_2.root())) + ); + + // Let's add one more, just as a sanity check. + let key_3_1: Word = rng.value(); + let value_3_1: Word = rng.value(); + let key_3_2: Word = rng.value(); + let value_3_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_3_1, value_3_1); + operations.add_insert(key_3_2, value_3_2); + + let lineage_3: LineageId = rng.value(); + let version_3: VersionId = rng.value(); + + backend.add_lineage(lineage_3, version_3, operations)?; + + let mut tree_3 = Smt::new(); + tree_3.insert(key_3_1, value_3_1)?; + tree_3.insert(key_3_2, value_3_2)?; + + // With that added, we should see three. + assert_eq!(backend.trees()?.count(), 3); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_1, version_1, tree_1.root())) + ); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_2, version_2, tree_2.root())) + ); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_3, version_3, tree_3.root())) + ); + + Ok(()) +} + +#[test] +fn entry_count() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0x01; 32]); + + // It should yield an error for a lineage that doesn't exist. + let ne_lineage: LineageId = rng.value(); + let result = backend.entry_count(ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + let version: VersionId = rng.value(); + + // Let's start by adding a new lineage with an entirely empty tree. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + + // When queried, this should yield zero entries. + assert_eq!(backend.entry_count(lineage_1)?, 0); + + // Now let's modify that tree to add entries. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + + backend.update_tree(lineage_1, version, operations)?; + + // Now if we query we should get two entries. + assert_eq!(backend.entry_count(lineage_1)?, 2); + + Ok(()) +} + +#[test] +fn entries() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0xa0; 32]); + + // It should yield an error for a lineage that doesn't exist. + let ne_lineage: LineageId = rng.value(); + let result = backend.entries(ne_lineage); + assert!(result.is_err()); + match result { + Err(BackendError::UnknownLineage(l)) => { + assert_eq!(l, ne_lineage); + }, + _ => panic!("Incorrect result encountered"), + } + drop(result); // Forget the borrow. + + let version: VersionId = rng.value(); + + // If we add an empty lineage, the iterator should yield no items. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + assert_eq!(backend.entries(lineage_1)?.count(), 0); + + // So let's add some entries. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut key_1_3: Word = rng.value(); + key_1_3[3] = key_1_1[3]; + let value_1_3: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + operations.add_insert(key_1_3, value_1_3); + backend.update_tree(lineage_1, version, operations)?; + + // Now, the iterator should yield the expected three items. + assert_eq!(backend.entries(lineage_1)?.count(), 3); + assert!( + backend + .entries(lineage_1)? + .contains(&TreeEntry { key: key_1_1, value: value_1_1 }), + ); + assert!( + backend + .entries(lineage_1)? + .contains(&TreeEntry { key: key_1_2, value: value_1_2 }), + ); + assert!( + backend + .entries(lineage_1)? + .contains(&TreeEntry { key: key_1_3, value: value_1_3 }), + ); + + Ok(()) +} + +#[test] +fn add_lineage() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0x49; 32]); + let version: VersionId = rng.value(); + + // We should be able to add a lineage without actually changing the empty tree. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + assert_eq!(backend.entry_count(lineage_1)?, 0); + + // Adding a lineage with a duplicate lineage identifier should yield an error. + let result = backend.add_lineage(lineage_1, version, SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::DuplicateLineage(l) if l == lineage_1); + + // But we should also be able to add lineages that _contain data_ from the get-go. + let key_2_1: Word = rng.value(); + let value_2_1: Word = rng.value(); + let key_2_2: Word = rng.value(); + let value_2_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_2_1, value_2_1); + operations.add_insert(key_2_2, value_2_2); + + let lineage_2: LineageId = rng.value(); + let root = backend.add_lineage(lineage_2, version, operations)?; + assert_eq!(backend.entry_count(lineage_2)?, 2); + + // Let's build an auxiliary tree to check this. + let mut tree = Smt::new(); + tree.insert(key_2_1, value_2_1)?; + tree.insert(key_2_2, value_2_2)?; + + // Check we get the right values for the root. + assert_eq!(root.root(), tree.root()); + + Ok(()) +} + +#[test] +fn update_tree() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0x49; 32]); + + // Updating a lineage that does not exist should result in an error. + let ne_lineage: LineageId = rng.value(); + let result = backend.update_tree(ne_lineage, rng.value(), SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // So let's add an actual lineage. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // And check that it agrees with a standard tree. + let mut tree_1 = Smt::new(); + tree_1.insert(key_1_1, value_1_1)?; + tree_1.insert(key_1_2, value_1_2)?; + + assert_eq!(backend.trees()?.count(), 1); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + + // Now let's add another node to the tree! Note that reusing the same version does not matter; + // version consistency is enforced by the FOREST and not the backend. + let version_2 = version_1 + 1; + let key_1_3: Word = rng.value(); + let value_1_3: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_3, value_1_3); + let backend_revs_1 = backend.update_tree(lineage_1, version_2, operations)?; + assert_eq!(backend.trees()?.count(), 1); + + // And we can check against our other tree for consistency again. + let mutations = tree_1.compute_mutations([(key_1_3, value_1_3)].into_iter())?; + let tree_revs_1 = tree_1.apply_mutations_with_reversion(mutations)?; + + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert_eq!(backend_revs_1, tree_revs_1); + + // Now let's try a remove operation. + let version_3 = version_2 + 1; + let mut operations = SmtUpdateBatch::default(); + operations.add_remove(key_1_2); + let backend_revs_2 = backend.update_tree(lineage_1, version_3, operations)?; + + // And check it against our other tree for consistency. + let mutations = tree_1.compute_mutations([(key_1_2, EMPTY_WORD)])?; + let tree_revs_2 = tree_1.apply_mutations_with_reversion(mutations)?; + assert_eq!(backend.trees()?.count(), 1); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert_eq!(backend_revs_2, tree_revs_2); + + Ok(()) +} + +#[test] +fn update_forest() -> Result<()> { + let (_file, mut backend) = default_backend()?; + let mut rng = ContinuousRng::new([0x51; 32]); + let version: VersionId = rng.value(); + + // Let's start by adding two trees to the forest. + let lineage_1: LineageId = rng.value(); + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations_1 = SmtUpdateBatch::default(); + operations_1.add_insert(key_1_1, value_1_1); + operations_1.add_insert(key_1_2, value_1_2); + + let lineage_2: LineageId = rng.value(); + let key_2_1: Word = rng.value(); + let value_2_1: Word = rng.value(); + let mut operations_2 = SmtUpdateBatch::default(); + operations_2.add_insert(key_2_1, value_2_1); + + backend.add_lineage(lineage_1, version, operations_1)?; + backend.add_lineage(lineage_2, version, operations_2)?; + + // Let's replicate them with SMTs to check correctness. + let mut tree_1 = Smt::new(); + tree_1.insert(key_1_1, value_1_1)?; + tree_1.insert(key_1_2, value_1_2)?; + + let mut tree_2 = Smt::new(); + tree_2.insert(key_2_1, value_2_1)?; + + // At this point we should have two trees in the forest, and their roots should match the trees + // we're checking against. + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert!(backend.trees()?.any(|e| e.root() == tree_2.root())); + + // Let's do a batch modification to start with, doing an insert into both trees. + let key_1_3: Word = rng.value(); + let value_1_3: Word = rng.value(); + let key_2_2: Word = rng.value(); + let value_2_2: Word = rng.value(); + + let mut forest_ops = SmtForestUpdateBatch::empty(); + forest_ops.operations(lineage_1).add_insert(key_1_3, value_1_3); + forest_ops.operations(lineage_2).add_insert(key_2_2, value_2_2); + + backend.update_forest(version, forest_ops)?; + + // We can check these results against our trees. + tree_1.insert(key_1_3, value_1_3)?; + tree_2.insert(key_2_2, value_2_2)?; + + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert!(backend.trees()?.any(|e| e.root() == tree_2.root())); + + // We should see an error when performing operations on a lineage that does not exist... + let ne_lineage: LineageId = rng.value(); + let key_1_4: Word = rng.value(); + let value_1_4: Word = rng.value(); + + let mut forest_ops = SmtForestUpdateBatch::empty(); + forest_ops.operations(lineage_1).add_insert(key_1_4, value_1_4); + forest_ops.operations(ne_lineage).add_insert(key_1_4, value_1_4); + + let result = backend.update_forest(version, forest_ops); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // ... but it should also leave the existing data unchanged. + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert!(backend.trees()?.any(|e| e.root() == tree_2.root())); + + Ok(()) +} diff --git a/crates/large-forest-backend-rocksdb/src/tree_metadata.rs b/crates/large-forest-backend-rocksdb/src/tree_metadata.rs new file mode 100644 index 000000000..012da94a5 --- /dev/null +++ b/crates/large-forest-backend-rocksdb/src/tree_metadata.rs @@ -0,0 +1,43 @@ +//! Contains the metadata definition for each persisted tree in the forest. + +use miden_serde_utils::{ + ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, +}; +use serde::{Deserialize, Serialize}; + +use crate::{Word, merkle::smt::VersionId}; + +/// The basic metadata stored for each tree in the forest. +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct TreeMetadata { + /// The version to which the tree belongs. + pub version: VersionId, + + /// The current value of the tree's root. + pub root_value: Word, + + /// The number of entries that are populated on disk. + pub entry_count: u64, +} + +impl Serializable for TreeMetadata { + fn write_into(&self, target: &mut W) { + target.write(self.version); + target.write(self.root_value); + target.write(self.entry_count); + } + + fn get_size_hint(&self) -> usize { + size_of::() + size_of::() + size_of::() + } +} + +impl Deserializable for TreeMetadata { + fn read_from(source: &mut R) -> Result { + let version = source.read::()?; + let root_value = source.read::()?; + let entry_count = source.read_u64()?; + + Ok(Self { version, root_value, entry_count }) + } +} diff --git a/crates/store/Cargo.toml b/crates/store/Cargo.toml index 7c8135d97..73a6a679f 100644 --- a/crates/store/Cargo.toml +++ b/crates/store/Cargo.toml @@ -28,6 +28,7 @@ libsqlite3-sys = { workspace = true } miden-block-prover = { workspace = true } miden-crypto = { features = ["concurrent", "hashmaps"], workspace = true } miden-large-smt-backend-rocksdb = { optional = true, workspace = true } +miden-large-forest-backend-rocksdb = { optional = true, workspace = true } miden-node-db = { workspace = true } miden-node-proto = { workspace = true } miden-node-proto-build = { features = ["internal"], workspace = true } @@ -72,7 +73,7 @@ termtree = "1.0" [features] default = ["rocksdb"] -rocksdb = ["dep:miden-large-smt-backend-rocksdb", "miden-node-utils/rocksdb"] +rocksdb = ["dep:miden-large-smt-backend-rocksdb", "dep:miden-large-forest-backend-rocksdb", "miden-node-utils/rocksdb"] [[bench]] harness = false diff --git a/crates/store/src/account_state_forest/mod.rs b/crates/store/src/account_state_forest/mod.rs index 58026cfe2..b522fa7a9 100644 --- a/crates/store/src/account_state_forest/mod.rs +++ b/crates/store/src/account_state_forest/mod.rs @@ -1,4 +1,6 @@ use std::collections::BTreeSet; +#[cfg(feature = "rocksdb")] +use std::path::Path; use miden_crypto::hash::rpo::Rpo256; use miden_crypto::merkle::smt::ForestInMemoryBackend; @@ -60,22 +62,34 @@ pub enum WitnessError { AssetError(#[from] AssetError), } +#[cfg(feature = "rocksdb")] +type LargeForest = LargeSmtForest; +#[cfg(not(feature = "rocksdb"))] +type LargeForest = LargeSmtForest; + // ACCOUNT STATE FOREST // ================================================================================================ /// Container for forest-related state that needs to be updated atomically. -pub(crate) struct AccountStateForest { +pub(crate) struct AccountStateForest { /// `LargeSmtForest` for efficient account storage reconstruction. /// Populated during block import with storage and vault SMTs. - forest: LargeSmtForest, + forest: LargeForest, } impl AccountStateForest { - pub(crate) fn new() -> Self { - Self { forest: Self::create_forest() } + pub(crate) fn new(xxx: &Path) -> Self { + Self { forest: Self::create_forest(xxx) } + } + + #[cfg(feature = "rocksdb")] + fn create_forest(path: &Path) -> LargeForest { + let backend = miden_large_forest_backend_rocksdb::PersistentBackend::load(path); + LargeSmtForest::new(backend).expect("in-memory backend should initialize") } - fn create_forest() -> LargeSmtForest { + #[cfg(not(feature = "rocksdb"))] + fn create_forest() -> LargeForest { let backend = ForestInMemoryBackend::new(); LargeSmtForest::new(backend).expect("in-memory backend should initialize") } diff --git a/crates/store/src/state/loader.rs b/crates/store/src/state/loader.rs index 20ef9cdc0..d670e40fc 100644 --- a/crates/store/src/state/loader.rs +++ b/crates/store/src/state/loader.rs @@ -363,10 +363,11 @@ pub async fn load_mmr(db: &mut Db) -> Result Result { use miden_protocol::account::delta::AccountDelta; - let mut forest = AccountStateForest::new(); + let mut forest = AccountStateForest::new(xxx); let mut cursor = None; loop { diff --git a/crates/utils/src/clap.rs b/crates/utils/src/clap.rs index 079a619d3..c5ca3e0f3 100644 --- a/crates/utils/src/clap.rs +++ b/crates/utils/src/clap.rs @@ -149,6 +149,9 @@ pub struct StorageOptions { #[cfg(feature = "rocksdb")] #[clap(flatten)] pub nullifier_tree: NullifierTreeRocksDbOptions, + #[cfg(feature = "rocksdb")] + #[clap(flatten)] + pub account_state_forest: AccountStateForestRocksDbOptions, } impl StorageOptions { @@ -166,7 +169,12 @@ impl StorageOptions { max_open_fds: BENCH_ROCKSDB_MAX_OPEN_FDS, cache_size_in_bytes: DEFAULT_ROCKSDB_CACHE_SIZE, }; - Self { account_tree, nullifier_tree } + let account_state_forest = AccountStateForestRocksDbOptions::default(); + Self { + account_tree, + nullifier_tree, + account_state_forest, + } } #[cfg(not(feature = "rocksdb"))] Self::default() diff --git a/crates/utils/src/clap/rocksdb.rs b/crates/utils/src/clap/rocksdb.rs index 9b752205d..b4602e4d0 100644 --- a/crates/utils/src/clap/rocksdb.rs +++ b/crates/utils/src/clap/rocksdb.rs @@ -12,12 +12,14 @@ pub(crate) const BENCH_ROCKSDB_MAX_OPEN_FDS: i32 = 512; #[derive(clap::Args, Clone, Debug, PartialEq, Eq)] pub struct NullifierTreeRocksDbOptions { #[arg( + id = "nullifier_tree_rocksdb_max_open_fds", long = "nullifier_tree.rocksdb.max_open_fds", default_value_t = DEFAULT_ROCKSDB_MAX_OPEN_FDS, value_name = "NULLIFIER_TREE__ROCKSDB__MAX_OPEN_FDS" )] pub max_open_fds: i32, #[arg( + id = "nullifier_tree_rocksdb_max_cache_size", long = "nullifier_tree.rocksdb.max_cache_size", default_value_t = DEFAULT_ROCKSDB_CACHE_SIZE, value_name = "NULLIFIER_TREE__ROCKSDB__CACHE_SIZE" @@ -35,12 +37,14 @@ impl Default for NullifierTreeRocksDbOptions { #[derive(clap::Args, Clone, Debug, PartialEq, Eq)] pub struct AccountTreeRocksDbOptions { #[arg( + id = "account_tree_rocksdb_max_open_fds", long = "account_tree.rocksdb.max_open_fds", default_value_t = DEFAULT_ROCKSDB_MAX_OPEN_FDS, value_name = "ACCOUNT_TREE__ROCKSDB__MAX_OPEN_FDS" )] pub max_open_fds: i32, #[arg( + id = "account_tree_rocksdb_max_cache_size", long = "account_tree.rocksdb.max_cache_size", default_value_t = DEFAULT_ROCKSDB_CACHE_SIZE, value_name = "ACCOUNT_TREE__ROCKSDB__CACHE_SIZE" @@ -105,3 +109,38 @@ impl RocksDbOptions { .with_max_open_files(self.max_open_fds) } } + +// -- FOREST ---------------------------------------------- + +/// Per usage options for rocksdb configuration +#[derive(clap::Args, Clone, Debug, PartialEq, Eq)] +pub struct AccountStateForestRocksDbOptions { + #[arg( + id = "nullifier_tree_rocksdb_max_open_fds", + long = "nullifier_tree.rocksdb.max_open_fds", + default_value_t = DEFAULT_ROCKSDB_MAX_OPEN_FDS, + value_name = "NULLIFIER_TREE__ROCKSDB__MAX_OPEN_FDS" + )] + pub max_open_fds: i32, + #[arg( + id = "nullifier_tree_rocksdb_max_cache_size", + long = "nullifier_tree.rocksdb.max_cache_size", + default_value_t = DEFAULT_ROCKSDB_CACHE_SIZE, + value_name = "NULLIFIER_TREE__ROCKSDB__CACHE_SIZE" + )] + pub cache_size_in_bytes: usize, + // FIXME TODO add more options here +} + +impl Default for AccountStateForestRocksDbOptions { + fn default() -> Self { + RocksDbOptions::default().into() + } +} + +impl From for AccountStateForestRocksDbOptions { + fn from(value: RocksDbOptions) -> Self { + let RocksDbOptions { max_open_fds, cache_size_in_bytes } = value; + Self { max_open_fds, cache_size_in_bytes } + } +}