diff --git a/Cargo.toml b/Cargo.toml index 53e5182bb..5e6246c9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "bin/stress-test", "crates/block-producer", "crates/grpc-error-macro", + "crates/large-smt", "crates/ntx-builder", "crates/proto", "crates/remote-prover-client", @@ -49,15 +50,16 @@ miden-node-validator = { path = "crates/validator", version = "0.13" } miden-remote-prover-client = { path = "crates/remote-prover-client", version = "0.13" } # miden-base aka protocol dependencies. These should be updated in sync. -miden-block-prover = { branch = "next", git = "https://github.com/0xMiden/miden-base.git" } -miden-protocol = { branch = "next", default-features = false, git = "https://github.com/0xMiden/miden-base.git" } -miden-standards = { branch = "next", git = "https://github.com/0xMiden/miden-base.git" } -miden-testing = { branch = "next", git = "https://github.com/0xMiden/miden-base.git" } -miden-tx = { branch = "next", default-features = false, git = "https://github.com/0xMiden/miden-base.git" } -miden-tx-batch-prover = { branch = "next", git = "https://github.com/0xMiden/miden-base.git" } +miden-block-prover = { branch = "bernhard-migrate-rocksdb-from-crypto", git = "https://github.com/0xMiden/miden-base.git" } +miden-protocol = { branch = "bernhard-migrate-rocksdb-from-crypto", default-features = false, git = "https://github.com/0xMiden/miden-base.git" } +miden-standards = { branch = "bernhard-migrate-rocksdb-from-crypto", git = "https://github.com/0xMiden/miden-base.git" } +miden-testing = { branch = "bernhard-migrate-rocksdb-from-crypto", git = "https://github.com/0xMiden/miden-base.git" } +miden-tx = { branch = "bernhard-migrate-rocksdb-from-crypto", default-features = false, git = "https://github.com/0xMiden/miden-base.git" } +miden-tx-batch-prover = { branch = "bernhard-migrate-rocksdb-from-crypto", git = "https://github.com/0xMiden/miden-base.git" } # Other miden dependencies. These should align with those expected by miden-base. -miden-air = { features = ["std", "testing"], version = "0.20" } +miden-air = { features = ["std", "testing"], version = "0.20" } +miden-crypto = { branch = "bernhard-migrate-rocksdb-from-crypto", git = "https://github.com/0xMiden/crypto.git" } # External dependencies anyhow = { version = "1.0" } @@ -112,3 +114,6 @@ must_use_candidate = "allow" # This marks many fn's which isn't helpfu needless_for_each = "allow" # Context dependent if that's useful. should_panic_without_expect = "allow" # We don't care about the specific panic message. # End of pedantic lints. + +[patch.crates-io] +miden-crypto = { branch = "bernhard-migrate-rocksdb-from-crypto", git = "https://github.com/0xMiden/crypto.git" } diff --git a/crates/large-smt/Cargo.toml b/crates/large-smt/Cargo.toml new file mode 100644 index 000000000..41f83f4e0 --- /dev/null +++ b/crates/large-smt/Cargo.toml @@ -0,0 +1,26 @@ +[package] +authors.workspace = true +description = "Large-scale Sparse Merkle Tree backed by pluggable storage (RocksDB, memory)" +edition.workspace = true +homepage.workspace = true +keywords = ["miden", "node", "smt", "merkle"] +license.workspace = true +name = "miden-large-smt" +readme = "README.md" +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[lints] +workspace = true + +[features] +default = ["concurrent"] +concurrent = [] +rocksdb = ["dep:rocksdb", "dep:rayon", "dep:winter-utils"] + +[dependencies] +miden-protocol = { features = ["std"], workspace = true } +rayon = { version = "1.10", optional = true } +rocksdb = { default-features = false, features = ["bindgen-runtime", "lz4"], optional = true, version = "0.24" } +winter-utils = { version = "0.13", optional = true } diff --git a/crates/large-smt/README.md b/crates/large-smt/README.md new file mode 100644 index 000000000..1d08e1aa4 --- /dev/null +++ b/crates/large-smt/README.md @@ -0,0 +1,45 @@ +# miden-large-smt + +Large-scale Sparse Merkle Tree backed by pluggable storage (RocksDB, memory). + +This crate provides `LargeSmt`, a hybrid SMT implementation that stores the top of the tree +(depths 0–23) in memory and persists the lower depths (24–64) in storage as fixed-size subtrees. +This hybrid layout scales beyond RAM while keeping common operations fast. + +## Migration Status + +This crate is the future home for `LargeSmt` and its storage backends. Currently it re-exports +types from `miden-protocol` (which re-exports from `miden-crypto`). + +The migration will be completed in phases: +1. ✅ Create this crate as a re-export layer (current state) +2. Copy the full implementation from miden-crypto to this crate +3. Update miden-crypto to remove the rocksdb feature +4. Update dependents to use this crate directly + +## Features + +- **concurrent**: Enables parallel processing with rayon (enabled by default) +- **rocksdb**: (Future) Enables RocksDB storage backend + +## Usage + +```rust +use miden_large_smt::{LargeSmt, MemoryStorage}; + +// Create an empty tree with in-memory storage +let storage = MemoryStorage::new(); +let smt = LargeSmt::new(storage).unwrap(); +``` + +## Re-exported Types + +This crate re-exports the following types from `miden-protocol`: + +- `LargeSmt` - The large-scale SMT implementation +- `LargeSmtError` - Error type for LargeSmt operations +- `MemoryStorage` - In-memory storage backend +- `SmtStorage` - Storage backend trait +- `Subtree` - Serializable subtree representation +- `StorageUpdates` / `StorageUpdateParts` - Batch update types +- Various SMT types: `Smt`, `SmtLeaf`, `SmtProof`, `LeafIndex`, etc. diff --git a/crates/large-smt/src/lib.rs b/crates/large-smt/src/lib.rs new file mode 100644 index 000000000..18e979ed7 --- /dev/null +++ b/crates/large-smt/src/lib.rs @@ -0,0 +1,65 @@ +//! Large-scale Sparse Merkle Tree backed by pluggable storage. +//! +//! `LargeSmt` stores the top of the tree (depths 0–23) in memory and persists the lower +//! depths (24–64) in storage as fixed-size subtrees. This hybrid layout scales beyond RAM +//! while keeping common operations fast. +//! +//! # Usage +//! +//! ```ignore +//! use miden_large_smt::{LargeSmt, MemoryStorage}; +//! +//! // Create an empty tree with in-memory storage +//! let storage = MemoryStorage::new(); +//! let smt = LargeSmt::new(storage).unwrap(); +//! ``` +//! +//! With RocksDB (requires `rocksdb` feature): +//! +//! ```ignore +//! use miden_large_smt::{LargeSmt, RocksDbConfig, RocksDbStorage}; +//! +//! let storage = RocksDbStorage::open(RocksDbConfig::new("/path/to/db")).unwrap(); +//! let smt = LargeSmt::new(storage).unwrap(); +//! ``` + +#![cfg_attr(not(feature = "concurrent"), allow(unused_imports))] + +extern crate alloc; + +#[cfg(feature = "rocksdb")] +mod rocksdb; +#[cfg(feature = "rocksdb")] +pub use rocksdb::{RocksDbConfig, RocksDbStorage}; + +// Re-export from miden-protocol. +pub use miden_protocol::crypto::merkle::smt::{ + InnerNode, + LargeSmt, + LargeSmtError, + LeafIndex, + MemoryStorage, + SMT_DEPTH, + Smt, + SmtLeaf, + SmtLeafError, + SmtProof, + SmtStorage, + StorageError, + StorageUpdateParts, + StorageUpdates, + SubtreeUpdate, + Subtree, + SubtreeError, +}; + +// Also re-export commonly used types for convenience +pub use miden_protocol::{ + EMPTY_WORD, + Felt, + Word, + crypto::{ + hash::rpo::Rpo256, + merkle::{EmptySubtreeRoots, InnerNodeInfo, MerkleError, NodeIndex, SparseMerklePath}, + }, +}; diff --git a/crates/large-smt/src/rocksdb.rs b/crates/large-smt/src/rocksdb.rs new file mode 100644 index 000000000..82cf8167c --- /dev/null +++ b/crates/large-smt/src/rocksdb.rs @@ -0,0 +1,815 @@ +//! RocksDB-backed persistent storage for Sparse Merkle Trees. + +use alloc::{boxed::Box, vec::Vec}; +use std::{path::PathBuf, sync::Arc}; + +use rocksdb::{ + BlockBasedOptions, Cache, ColumnFamilyDescriptor, DB, DBCompactionStyle, DBCompressionType, + DBIteratorWithThreadMode, FlushOptions, IteratorMode, Options, ReadOptions, WriteBatch, +}; +use winter_utils::{Deserializable, Serializable}; + +use crate::{ + EMPTY_WORD, InnerNode, Map, NodeIndex, SmtLeaf, SmtStorage, StorageError, StorageUpdateParts, + StorageUpdates, Subtree, SubtreeUpdate, Word, +}; + +/// The name of the RocksDB column family used for storing SMT leaves. +const LEAVES_CF: &str = "leaves"; +/// The names of the RocksDB column families used for storing SMT subtrees (deep nodes). +const SUBTREE_24_CF: &str = "st24"; +const SUBTREE_32_CF: &str = "st32"; +const SUBTREE_40_CF: &str = "st40"; +const SUBTREE_48_CF: &str = "st48"; +const SUBTREE_56_CF: &str = "st56"; + +/// The name of the RocksDB column family used for storing metadata (e.g., root, counts). +const METADATA_CF: &str = "metadata"; +/// The name of the RocksDB column family used for storing level 24 hashes for fast tree rebuilding. +const DEPTH_24_CF: &str = "depth24"; + +/// The key used in the `METADATA_CF` column family to store the SMT's root hash. +const ROOT_KEY: &[u8] = b"smt_root"; +/// The key used in the `METADATA_CF` column family to store the total count of non-empty leaves. +const LEAF_COUNT_KEY: &[u8] = b"leaf_count"; +/// The key used in the `METADATA_CF` column family to store the total count of key-value entries. +const ENTRY_COUNT_KEY: &[u8] = b"entry_count"; + +/// A RocksDB-backed persistent storage implementation for a Sparse Merkle Tree (SMT). +/// +/// Implements the `SmtStorage` trait, providing durable storage for SMT components +/// including leaves, subtrees (for deeper parts of the tree), and metadata like the SMT root +/// and counts. It leverages RocksDB column families to organize data: +/// - `LEAVES_CF` ("leaves"): Stores `SmtLeaf` data, keyed by their logical u64 index. +/// - `SUBTREE_24_CF` ("st24"): Stores serialized `Subtree` data at depth 24. +/// - `SUBTREE_32_CF` ("st32"): Stores serialized `Subtree` data at depth 32. +/// - `SUBTREE_40_CF` ("st40"): Stores serialized `Subtree` data at depth 40. +/// - `SUBTREE_48_CF` ("st48"): Stores serialized `Subtree` data at depth 48. +/// - `SUBTREE_56_CF` ("st56"): Stores serialized `Subtree` data at depth 56. +/// - `METADATA_CF` ("metadata"): Stores overall SMT metadata such as the current root hash, total +/// leaf count, and total entry count. +#[derive(Debug, Clone)] +pub struct RocksDbStorage { + db: Arc, +} + +impl RocksDbStorage { + /// Opens or creates a RocksDB database at the specified `path` and configures it for SMT + /// storage. + /// + /// This method sets up the necessary column families (`leaves`, `subtrees`, `metadata`) + /// and applies various RocksDB options for performance, such as caching, bloom filters, + /// and compaction strategies tailored for SMT workloads. + /// + /// # Errors + /// Returns `StorageError::Backend` if the database cannot be opened or configured, + /// for example, due to path issues, permissions, or RocksDB internal errors. + pub fn open(config: RocksDbConfig) -> Result { + // Base DB options + let mut db_opts = Options::default(); + db_opts.create_if_missing(true); + db_opts.create_missing_column_families(true); + db_opts.increase_parallelism(rayon::current_num_threads() as i32); + db_opts.set_max_open_files(config.max_open_files); + db_opts.set_max_background_jobs(rayon::current_num_threads() as i32); + db_opts.set_max_total_wal_size(512 * 1024 * 1024); + + // Shared block cache across all column families + let cache = Cache::new_lru_cache(config.cache_size); + + // Common table options for bloom filtering and cache + let mut table_opts = BlockBasedOptions::default(); + table_opts.set_block_cache(&cache); + table_opts.set_bloom_filter(10.0, false); + table_opts.set_whole_key_filtering(true); + table_opts.set_pin_l0_filter_and_index_blocks_in_cache(true); + + // Column family for leaves + let mut leaves_opts = Options::default(); + leaves_opts.set_block_based_table_factory(&table_opts); + leaves_opts.set_write_buffer_size(128 << 20); + leaves_opts.set_max_write_buffer_number(3); + leaves_opts.set_min_write_buffer_number_to_merge(1); + leaves_opts.set_max_write_buffer_size_to_maintain(0); + leaves_opts.set_compaction_style(DBCompactionStyle::Level); + leaves_opts.set_target_file_size_base(512 << 20); + leaves_opts.set_target_file_size_multiplier(2); + leaves_opts.set_compression_type(DBCompressionType::Lz4); + leaves_opts.set_level_zero_file_num_compaction_trigger(8); + + // Helper to build subtree CF options + fn subtree_cf(cache: &Cache, bloom_filter_bits: f64) -> Options { + let mut tbl = BlockBasedOptions::default(); + tbl.set_block_cache(cache); + tbl.set_bloom_filter(bloom_filter_bits, false); + tbl.set_whole_key_filtering(true); + tbl.set_pin_l0_filter_and_index_blocks_in_cache(true); + + let mut opts = Options::default(); + opts.set_block_based_table_factory(&tbl); + opts.set_write_buffer_size(128 << 20); + opts.set_max_write_buffer_number(3); + opts.set_min_write_buffer_number_to_merge(1); + opts.set_max_write_buffer_size_to_maintain(0); + opts.set_compaction_style(DBCompactionStyle::Level); + opts.set_level_zero_file_num_compaction_trigger(4); + opts.set_target_file_size_base(512 << 20); + opts.set_target_file_size_multiplier(2); + opts.set_compression_type(DBCompressionType::Lz4); + opts.set_level_zero_file_num_compaction_trigger(8); + opts + } + + let mut depth24_opts = Options::default(); + depth24_opts.set_compression_type(DBCompressionType::Lz4); + depth24_opts.set_block_based_table_factory(&table_opts); + + let mut metadata_opts = Options::default(); + metadata_opts.set_compression_type(DBCompressionType::None); + + let cfs = vec![ + ColumnFamilyDescriptor::new(LEAVES_CF, leaves_opts), + ColumnFamilyDescriptor::new(SUBTREE_24_CF, subtree_cf(&cache, 8.0)), + ColumnFamilyDescriptor::new(SUBTREE_32_CF, subtree_cf(&cache, 10.0)), + ColumnFamilyDescriptor::new(SUBTREE_40_CF, subtree_cf(&cache, 10.0)), + ColumnFamilyDescriptor::new(SUBTREE_48_CF, subtree_cf(&cache, 12.0)), + ColumnFamilyDescriptor::new(SUBTREE_56_CF, subtree_cf(&cache, 12.0)), + ColumnFamilyDescriptor::new(METADATA_CF, metadata_opts), + ColumnFamilyDescriptor::new(DEPTH_24_CF, depth24_opts), + ]; + + let db = DB::open_cf_descriptors(&db_opts, config.path, cfs)?; + + Ok(Self { db: Arc::new(db) }) + } + + /// Syncs the RocksDB database to disk. + fn sync(&self) -> Result<(), StorageError> { + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + + for name in [ + LEAVES_CF, + SUBTREE_24_CF, + SUBTREE_32_CF, + SUBTREE_40_CF, + SUBTREE_48_CF, + SUBTREE_56_CF, + METADATA_CF, + DEPTH_24_CF, + ] { + let cf = self.cf_handle(name)?; + self.db.flush_cf_opt(cf, &fopts)?; + } + + self.db.flush_wal(true)?; + Ok(()) + } + + #[inline(always)] + fn index_db_key(index: u64) -> [u8; 8] { + index.to_be_bytes() + } + + #[inline(always)] + fn subtree_db_key(index: NodeIndex) -> KeyBytes { + let keep = match index.depth() { + 24 => 3, + 32 => 4, + 40 => 5, + 48 => 6, + 56 => 7, + d => panic!("unsupported depth {d}"), + }; + KeyBytes::new(index.value(), keep) + } + + fn cf_handle(&self, name: &str) -> Result<&rocksdb::ColumnFamily, StorageError> { + self.db + .cf_handle(name) + .ok_or_else(|| StorageError::Unsupported(format!("unknown column family `{name}`"))) + } + + #[inline(always)] + fn subtree_cf(&self, index: NodeIndex) -> &rocksdb::ColumnFamily { + let name = cf_for_depth(index.depth()); + self.cf_handle(name).expect("CF handle missing") + } +} + +impl SmtStorage for RocksDbStorage { + fn get_root(&self) -> Result, StorageError> { + let cf = self.cf_handle(METADATA_CF)?; + match self.db.get_cf(cf, ROOT_KEY)? { + Some(bytes) => { + let digest = Word::read_from_bytes(&bytes)?; + Ok(Some(digest)) + }, + None => Ok(None), + } + } + + fn set_root(&self, root: Word) -> Result<(), StorageError> { + let cf = self.cf_handle(METADATA_CF)?; + self.db.put_cf(cf, ROOT_KEY, root.to_bytes())?; + Ok(()) + } + + fn leaf_count(&self) -> Result { + let cf = self.cf_handle(METADATA_CF)?; + self.db.get_cf(cf, LEAF_COUNT_KEY)?.map_or(Ok(0), |bytes| { + let arr: [u8; 8] = + bytes.as_slice().try_into().map_err(|_| StorageError::BadValueLen { + what: "leaf count", + expected: 8, + found: bytes.len(), + })?; + Ok(usize::from_be_bytes(arr)) + }) + } + + fn entry_count(&self) -> Result { + let cf = self.cf_handle(METADATA_CF)?; + self.db.get_cf(cf, ENTRY_COUNT_KEY)?.map_or(Ok(0), |bytes| { + let arr: [u8; 8] = + bytes.as_slice().try_into().map_err(|_| StorageError::BadValueLen { + what: "entry count", + expected: 8, + found: bytes.len(), + })?; + Ok(usize::from_be_bytes(arr)) + }) + } + + fn insert_value( + &self, + index: u64, + key: Word, + value: Word, + ) -> Result, StorageError> { + debug_assert_ne!(value, EMPTY_WORD); + + let mut batch = WriteBatch::default(); + + let mut current_leaf_count = self.leaf_count()?; + let mut current_entry_count = self.entry_count()?; + + let leaves_cf = self.cf_handle(LEAVES_CF)?; + let db_key = Self::index_db_key(index); + + let maybe_leaf = self.get_leaf(index)?; + + let (old_value, new_leaf) = match maybe_leaf { + Some(mut existing_leaf) => { + let old_val = existing_leaf.insert(key, value)?; + if old_val.is_none() { + current_entry_count += 1; + } + (old_val, existing_leaf) + }, + None => { + let new_leaf = SmtLeaf::new_single(key, value); + current_leaf_count += 1; + current_entry_count += 1; + (None, new_leaf) + }, + }; + + batch.put_cf(leaves_cf, db_key, new_leaf.to_bytes()); + + let metadata_cf = self.cf_handle(METADATA_CF)?; + batch.put_cf(metadata_cf, LEAF_COUNT_KEY, current_leaf_count.to_be_bytes()); + batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, current_entry_count.to_be_bytes()); + + self.db.write(batch)?; + + Ok(old_value) + } + + fn remove_value(&self, index: u64, key: Word) -> Result, StorageError> { + let maybe_leaf = self.get_leaf(index)?; + + let Some(mut existing_leaf) = maybe_leaf else { + return Ok(None); + }; + + let (old_value, is_empty) = existing_leaf.remove(key); + if old_value.is_none() { + return Ok(None); + } + + let mut batch = WriteBatch::default(); + let leaves_cf = self.cf_handle(LEAVES_CF)?; + let db_key = Self::index_db_key(index); + + let mut current_leaf_count = self.leaf_count()?; + let mut current_entry_count = self.entry_count()?; + + if is_empty { + batch.delete_cf(leaves_cf, db_key); + current_leaf_count = current_leaf_count.saturating_sub(1); + } else { + batch.put_cf(leaves_cf, db_key, existing_leaf.to_bytes()); + } + current_entry_count = current_entry_count.saturating_sub(1); + + let metadata_cf = self.cf_handle(METADATA_CF)?; + batch.put_cf(metadata_cf, LEAF_COUNT_KEY, current_leaf_count.to_be_bytes()); + batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, current_entry_count.to_be_bytes()); + + self.db.write(batch)?; + + Ok(old_value) + } + + fn get_leaf(&self, index: u64) -> Result, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let db_key = Self::index_db_key(index); + + match self.db.get_cf(cf, db_key)? { + Some(bytes) => Ok(Some(SmtLeaf::read_from_bytes(&bytes)?)), + None => Ok(None), + } + } + + fn set_leaves(&self, leaves: Map) -> Result<(), StorageError> { + let leaves_cf = self.cf_handle(LEAVES_CF)?; + let mut batch = WriteBatch::default(); + + for (index, leaf) in leaves { + let db_key = Self::index_db_key(index); + batch.put_cf(leaves_cf, db_key, leaf.to_bytes()); + } + + self.db.write(batch)?; + Ok(()) + } + + fn remove_leaf(&self, index: u64) -> Result, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let db_key = Self::index_db_key(index); + + let old_leaf = match self.db.get_cf(cf, &db_key)? { + Some(bytes) => Some(SmtLeaf::read_from_bytes(&bytes)?), + None => None, + }; + + if old_leaf.is_some() { + self.db.delete_cf(cf, db_key)?; + } + + Ok(old_leaf) + } + + fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let keys: Vec<[u8; 8]> = indices.iter().map(|idx| Self::index_db_key(*idx)).collect(); + let key_refs: Vec<(&rocksdb::ColumnFamily, &[u8])> = + keys.iter().map(|k| (cf, k.as_slice())).collect(); + + let results = self.db.multi_get_cf(key_refs); + let mut out = Vec::with_capacity(indices.len()); + for res in results { + match res? { + Some(bytes) => out.push(Some(SmtLeaf::read_from_bytes(&bytes)?)), + None => out.push(None), + } + } + Ok(out) + } + + fn has_leaves(&self) -> Result { + Ok(self.leaf_count()? > 0) + } + + fn get_subtree(&self, index: NodeIndex) -> Result, StorageError> { + let cf = self.subtree_cf(index); + let key = Self::subtree_db_key(index); + + match self.db.get_cf(cf, key)? { + Some(bytes) => Ok(Some(Subtree::from_vec(index, &bytes)?)), + None => Ok(None), + } + } + + fn get_subtrees(&self, indices: &[NodeIndex]) -> Result>, StorageError> { + let keys: Vec<(NodeIndex, KeyBytes)> = indices + .iter() + .map(|&idx| (idx, Self::subtree_db_key(idx))) + .collect(); + + let key_refs: Vec<(&rocksdb::ColumnFamily, &[u8])> = keys + .iter() + .map(|(idx, k)| (self.subtree_cf(*idx), k.as_slice())) + .collect(); + + let results = self.db.multi_get_cf(key_refs); + let mut out = Vec::with_capacity(indices.len()); + + for (res, &idx) in results.into_iter().zip(indices.iter()) { + match res? { + Some(bytes) => out.push(Some(Subtree::from_vec(idx, &bytes)?)), + None => out.push(None), + } + } + Ok(out) + } + + fn set_subtree(&self, subtree: &Subtree) -> Result<(), StorageError> { + let index = subtree.root_index(); + let cf = self.subtree_cf(index); + let key = Self::subtree_db_key(index); + let data = subtree.to_vec(); + + self.db.put_cf(cf, key, data)?; + Ok(()) + } + + fn set_subtrees(&self, subtrees: Vec) -> Result<(), StorageError> { + let mut batch = WriteBatch::default(); + + for subtree in subtrees { + let index = subtree.root_index(); + let cf = self.subtree_cf(index); + let key = Self::subtree_db_key(index); + let data = subtree.to_vec(); + batch.put_cf(cf, key, data); + } + + self.db.write(batch)?; + Ok(()) + } + + fn remove_subtree(&self, index: NodeIndex) -> Result<(), StorageError> { + let cf = self.subtree_cf(index); + let key = Self::subtree_db_key(index); + self.db.delete_cf(cf, key)?; + Ok(()) + } + + fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError> { + let subtree = self.get_subtree(index)?; + Ok(subtree.and_then(|s| s.get_inner_node(index))) + } + + fn set_inner_node( + &self, + index: NodeIndex, + node: InnerNode, + ) -> Result, StorageError> { + let mut subtree = self.get_subtree(index)?.unwrap_or_else(|| Subtree::new(index)); + let old = subtree.insert_inner_node(index, node); + self.set_subtree(&subtree)?; + Ok(old) + } + + fn remove_inner_node(&self, index: NodeIndex) -> Result, StorageError> { + let Some(mut subtree) = self.get_subtree(index)? else { + return Ok(None); + }; + let old = subtree.remove_inner_node(index); + if subtree.is_empty() { + self.remove_subtree(index)?; + } else { + self.set_subtree(&subtree)?; + } + Ok(old) + } + + fn apply(&self, updates: StorageUpdates) -> Result<(), StorageError> { + let StorageUpdateParts { + new_root, + leaf_updates, + subtree_updates, + leaf_count_delta, + entry_count_delta, + } = updates.into_parts(); + + let mut batch = WriteBatch::default(); + + let leaves_cf = self.cf_handle(LEAVES_CF)?; + let metadata_cf = self.cf_handle(METADATA_CF)?; + let depth24_cf = self.cf_handle(DEPTH_24_CF)?; + + // Process leaf updates + for (index, maybe_leaf) in leaf_updates { + let key = Self::index_db_key(index); + match maybe_leaf { + Some(leaf) => batch.put_cf(leaves_cf, key, leaf.to_bytes()), + None => batch.delete_cf(leaves_cf, key), + } + } + + // Process subtree updates + let subtree_ops: Result, StorageError> = subtree_updates + .into_iter() + .map(|update| { + let (index, maybe_bytes, depth24_op) = match update { + SubtreeUpdate::Set(subtree) => { + let index = subtree.root_index(); + let bytes = subtree.to_vec(); + let depth24_op = if index.depth() == 24 { + let hash_key = Self::index_db_key(index.value()); + let root_hash = subtree.root_hash(); + Some((hash_key, Some(root_hash.to_bytes()))) + } else { + None + }; + (index, Some(bytes), depth24_op) + }, + SubtreeUpdate::Remove(index) => { + let depth24_op = if index.depth() == 24 { + let hash_key = Self::index_db_key(index.value()); + Some((hash_key, None)) + } else { + None + }; + (index, None, depth24_op) + }, + }; + + let key = Self::subtree_db_key(index); + let subtrees_cf = self.subtree_cf(index); + + Ok((subtrees_cf, key, maybe_bytes, depth24_op)) + }) + .collect(); + + for (subtrees_cf, key, maybe_bytes, depth24_op) in subtree_ops? { + match maybe_bytes { + Some(bytes) => batch.put_cf(subtrees_cf, key, bytes), + None => batch.delete_cf(subtrees_cf, key), + } + + if let Some((hash_key, maybe_hash_bytes)) = depth24_op { + match maybe_hash_bytes { + Some(hash_bytes) => batch.put_cf(depth24_cf, hash_key, hash_bytes), + None => batch.delete_cf(depth24_cf, hash_key), + } + } + } + + if leaf_count_delta != 0 || entry_count_delta != 0 { + let current_leaf_count = self.leaf_count()?; + let current_entry_count = self.entry_count()?; + + let new_leaf_count = current_leaf_count.saturating_add_signed(leaf_count_delta); + let new_entry_count = current_entry_count.saturating_add_signed(entry_count_delta); + + batch.put_cf(metadata_cf, LEAF_COUNT_KEY, new_leaf_count.to_be_bytes()); + batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, new_entry_count.to_be_bytes()); + } + + batch.put_cf(metadata_cf, ROOT_KEY, new_root.to_bytes()); + + let mut write_opts = rocksdb::WriteOptions::default(); + write_opts.set_sync(false); + self.db.write_opt(batch, &write_opts)?; + + Ok(()) + } + + fn iter_leaves(&self) -> Result + '_>, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let mut read_opts = ReadOptions::default(); + read_opts.set_total_order_seek(true); + let db_iter = self.db.iterator_cf_opt(cf, read_opts, IteratorMode::Start); + + Ok(Box::new(RocksDbDirectLeafIterator { iter: db_iter })) + } + + fn iter_subtrees(&self) -> Result + '_>, StorageError> { + const SUBTREE_CFS: [&str; 5] = + [SUBTREE_24_CF, SUBTREE_32_CF, SUBTREE_40_CF, SUBTREE_48_CF, SUBTREE_56_CF]; + + let mut cf_handles = Vec::new(); + for cf_name in SUBTREE_CFS { + cf_handles.push(self.cf_handle(cf_name)?); + } + + Ok(Box::new(RocksDbSubtreeIterator::new(&self.db, cf_handles))) + } + + fn get_depth24(&self) -> Result, StorageError> { + let cf = self.cf_handle(DEPTH_24_CF)?; + let iter = self.db.iterator_cf(cf, IteratorMode::Start); + let mut hashes = Vec::new(); + + for item in iter { + let (key_bytes, value_bytes) = item?; + + let index = index_from_key_bytes(&key_bytes)?; + let hash = Word::read_from_bytes(&value_bytes)?; + + hashes.push((index, hash)); + } + + Ok(hashes) + } +} + +impl Drop for RocksDbStorage { + fn drop(&mut self) { + if let Err(e) = self.sync() { + panic!("failed to flush RocksDB on drop: {e}"); + } + } +} + +// ITERATORS +// -------------------------------------------------------------------------------------------- + +struct RocksDbDirectLeafIterator<'a> { + iter: DBIteratorWithThreadMode<'a, DB>, +} + +impl Iterator for RocksDbDirectLeafIterator<'_> { + type Item = (u64, SmtLeaf); + + fn next(&mut self) -> Option { + self.iter.find_map(|result| { + let (key_bytes, value_bytes) = result.ok()?; + let leaf_idx = index_from_key_bytes(&key_bytes).ok()?; + let leaf = SmtLeaf::read_from_bytes(&value_bytes).ok()?; + Some((leaf_idx, leaf)) + }) + } +} + +struct RocksDbSubtreeIterator<'a> { + db: &'a DB, + cf_handles: Vec<&'a rocksdb::ColumnFamily>, + current_cf_index: usize, + current_iter: Option>, +} + +impl<'a> RocksDbSubtreeIterator<'a> { + fn new(db: &'a DB, cf_handles: Vec<&'a rocksdb::ColumnFamily>) -> Self { + let mut iterator = Self { + db, + cf_handles, + current_cf_index: 0, + current_iter: None, + }; + iterator.advance_to_next_cf(); + iterator + } + + fn advance_to_next_cf(&mut self) { + if self.current_cf_index < self.cf_handles.len() { + let cf = self.cf_handles[self.current_cf_index]; + let mut read_opts = ReadOptions::default(); + read_opts.set_total_order_seek(true); + self.current_iter = Some(self.db.iterator_cf_opt(cf, read_opts, IteratorMode::Start)); + } else { + self.current_iter = None; + } + } + + fn try_next_from_iter( + iter: &mut DBIteratorWithThreadMode, + cf_index: usize, + ) -> Option { + iter.find_map(|result| { + let (key_bytes, value_bytes) = result.ok()?; + let depth = 24 + (cf_index * 8) as u8; + + let node_idx = subtree_root_from_key_bytes(&key_bytes, depth).ok()?; + Subtree::from_vec(node_idx, &value_bytes).ok() + }) + } +} + +impl Iterator for RocksDbSubtreeIterator<'_> { + type Item = Subtree; + + fn next(&mut self) -> Option { + loop { + let iter = self.current_iter.as_mut()?; + + if let Some(subtree) = Self::try_next_from_iter(iter, self.current_cf_index) { + return Some(subtree); + } + + self.current_cf_index += 1; + self.advance_to_next_cf(); + + self.current_iter.as_ref()?; + } + } +} + +// ROCKSDB CONFIGURATION +// -------------------------------------------------------------------------------------------- + +/// Configuration for RocksDB storage used by the Sparse Merkle Tree implementation. +#[derive(Debug, Clone)] +pub struct RocksDbConfig { + pub(crate) path: PathBuf, + pub(crate) cache_size: usize, + pub(crate) max_open_files: i32, +} + +impl RocksDbConfig { + /// Creates a new RocksDbConfig with the given database path and default settings. + pub fn new>(path: P) -> Self { + Self { + path: path.into(), + cache_size: 1 << 30, + max_open_files: 512, + } + } + + /// Sets the block cache size for RocksDB. + pub fn with_cache_size(mut self, size: usize) -> Self { + self.cache_size = size; + self + } + + /// Sets the maximum number of files that RocksDB can have open simultaneously. + pub fn with_max_open_files(mut self, count: i32) -> Self { + self.max_open_files = count; + self + } +} + +// SUBTREE DB KEY +// -------------------------------------------------------------------------------------------- + +#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] +pub(crate) struct KeyBytes { + bytes: [u8; 8], + len: u8, +} + +impl KeyBytes { + #[inline(always)] + pub fn new(value: u64, keep: usize) -> Self { + debug_assert!((3..=7).contains(&keep)); + let bytes = value.to_be_bytes(); + debug_assert!(bytes[..8 - keep].iter().all(|&b| b == 0)); + Self { bytes, len: keep as u8 } + } + + #[inline(always)] + pub fn as_slice(&self) -> &[u8] { + &self.bytes[8 - self.len as usize..] + } +} + +impl AsRef<[u8]> for KeyBytes { + #[inline(always)] + fn as_ref(&self) -> &[u8] { + self.as_slice() + } +} + +// HELPERS +// -------------------------------------------------------------------------------------------- + +fn index_from_key_bytes(key_bytes: &[u8]) -> Result { + if key_bytes.len() != 8 { + return Err(StorageError::BadKeyLen { expected: 8, found: key_bytes.len() }); + } + let mut arr = [0u8; 8]; + arr.copy_from_slice(key_bytes); + Ok(u64::from_be_bytes(arr)) +} + +#[inline(always)] +fn subtree_root_from_key_bytes(key_bytes: &[u8], depth: u8) -> Result { + let expected = match depth { + 24 => 3, + 32 => 4, + 40 => 5, + 48 => 6, + 56 => 7, + d => return Err(StorageError::Unsupported(format!("unsupported subtree depth {d}"))), + }; + + if key_bytes.len() != expected { + return Err(StorageError::BadSubtreeKeyLen { depth, expected, found: key_bytes.len() }); + } + let mut buf = [0u8; 8]; + buf[8 - expected..].copy_from_slice(key_bytes); + let value = u64::from_be_bytes(buf); + Ok(NodeIndex::new_unchecked(depth, value)) +} + +#[inline(always)] +fn cf_for_depth(depth: u8) -> &'static str { + match depth { + 24 => SUBTREE_24_CF, + 32 => SUBTREE_32_CF, + 40 => SUBTREE_40_CF, + 48 => SUBTREE_48_CF, + 56 => SUBTREE_56_CF, + _ => panic!("unsupported subtree depth: {depth}"), + } +} + +impl From for StorageError { + fn from(e: rocksdb::Error) -> Self { + StorageError::Backend(Box::new(e)) + } +}