diff --git a/.github/workflows/msrv.yml b/.github/workflows/msrv.yml index f57ec9616..727f84e4a 100644 --- a/.github/workflows/msrv.yml +++ b/.github/workflows/msrv.yml @@ -25,7 +25,7 @@ jobs: - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - name: Install cargo-msrv - run: cargo install cargo-msrv + run: cargo install --locked cargo-msrv - name: Cache rustup toolchains run: rustup update - name: Check MSRV for each workspace member diff --git a/CHANGELOG.md b/CHANGELOG.md index a0a440378..cae0fc3b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.19.4 (unreleased) + +- Backport `LargeSmtForest` with its `InMemory` backend ([#834](https://github.com/0xMiden/crypto/pull/834)). + ## 0.19.3 (2026-01-21) - Fix: don't disable WAL during subtree construction in `LargeSmt`'s RocksDB backend ([#792](https://github.com/0xMiden/crypto/pull/792)). diff --git a/miden-crypto/src/lib.rs b/miden-crypto/src/lib.rs index 8e820de0a..f41062a2e 100644 --- a/miden-crypto/src/lib.rs +++ b/miden-crypto/src/lib.rs @@ -2,7 +2,6 @@ #[macro_use] extern crate alloc; - #[cfg(feature = "std")] extern crate std; @@ -38,6 +37,8 @@ pub type Map = hashbrown::HashMap; #[cfg(feature = "hashmaps")] pub use hashbrown::hash_map::Entry as MapEntry; +#[cfg(feature = "hashmaps")] +pub use hashbrown::hash_map::IntoIter as MapIntoIter; /// An alias for a key-value map. /// @@ -48,6 +49,22 @@ pub type Map = alloc::collections::BTreeMap; #[cfg(not(feature = "hashmaps"))] pub use alloc::collections::btree_map::Entry as MapEntry; +#[cfg(not(feature = "hashmaps"))] +pub use alloc::collections::btree_map::IntoIter as MapIntoIter; + +/// An alias for a simple set. +/// +/// By default, this is an alias for the [`alloc::collections::BTreeSet`]. However, when the +/// `hashmaps` feature is enabled, this becomes an alias for hashbrown's HashSet. +#[cfg(feature = "hashmaps")] +pub type Set = hashbrown::HashSet; + +/// An alias for a simple set. +/// +/// By default, this is an alias for the [`alloc::collections::BTreeSet`]. However, when the +/// `hashmaps` feature is enabled, this becomes an alias for hashbrown's HashSet. +#[cfg(not(feature = "hashmaps"))] +pub type Set = alloc::collections::BTreeSet; // CONSTANTS // ================================================================================================ diff --git a/miden-crypto/src/merkle/smt/full/leaf.rs b/miden-crypto/src/merkle/smt/full/leaf.rs index 031ade9a6..3561aa955 100644 --- a/miden-crypto/src/merkle/smt/full/leaf.rs +++ b/miden-crypto/src/merkle/smt/full/leaf.rs @@ -175,6 +175,10 @@ impl SmtLeaf { self.clone().into_elements() } + pub fn to_entries(&self) -> impl Iterator { + self.entries().iter().map(|(k, v)| (k, v)) + } + /// Converts a leaf to a list of field elements pub fn into_elements(self) -> Vec { self.into_entries().into_iter().flat_map(kv_to_elements).collect() diff --git a/miden-crypto/src/merkle/smt/full/mod.rs b/miden-crypto/src/merkle/smt/full/mod.rs index 2f2662842..d9b04e4bb 100644 --- a/miden-crypto/src/merkle/smt/full/mod.rs +++ b/miden-crypto/src/merkle/smt/full/mod.rs @@ -259,6 +259,11 @@ impl Smt { >::get_leaf(self, key) } + /// Returns the leaf corresponding to the provided `index`. + pub fn get_leaf_by_index(&self, index: LeafIndex) -> Option { + self.leaves.get(&index.position()).cloned() + } + /// Returns the value associated with `key` pub fn get_value(&self, key: &Word) -> Word { >::get_value(self, key) diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs b/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs new file mode 100644 index 000000000..8e8ca502a --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs @@ -0,0 +1,399 @@ +//! This module contains a non-persistent, in-memory [`Backend`] for the SMT forest. It is +//! non-parallel and is not intended to be such, allowing its use on effectively any platform where +//! this library can be built. +//! +//! # Performance + +mod tests; + +use alloc::{ + collections::{BTreeMap, BTreeSet}, + vec::Vec, +}; + +use crate::{ + EMPTY_WORD, Map, Word, + merkle::smt::{ + LeafIndex, SMT_DEPTH, Smt, SmtProof, VersionId, + large_forest::{ + Backend, + backend::{BackendError, MutationSet, Result}, + operation::{SmtForestUpdateBatch, SmtUpdateBatch}, + root::{LineageId, TreeEntry, TreeWithRoot}, + }, + }, +}; + +// IN-MEMORY BACKEND +// ================================================================================================ + +/// The in-memory backend itself. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct InMemoryBackend { + /// The storage for the full trees that are stored in this backend, always guaranteed to be the + /// latest tree in the lineage. + trees: Map, +} + +impl InMemoryBackend { + /// Constructs a new instance of the in-memory backend. + pub fn new() -> Self { + let trees = Map::default(); + Self { trees } + } +} + +// BACKEND TRAIT +// ================================================================================================ + +impl Backend for InMemoryBackend { + /// Returns an opening for the specified `key` in the SMT with the specified `lineage`. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn open(&self, lineage: LineageId, key: Word) -> Result { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(tree.tree.open(&key)) + } + + /// Returns the value associated with the provided `key` in the SMT with the specified + /// `lineage`, or [`None`] if no such value exists. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn get(&self, lineage: LineageId, key: Word) -> Result> { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + let value = tree.tree.get_value(&key); + let value = if value == EMPTY_WORD { None } else { Some(value) }; + + Ok(value) + } + + /// Returns the version of the tree with the specified `lineage`. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn version(&self, lineage: LineageId) -> Result { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(tree.version) + } + + /// Returns an iterator over all the lineages that the backend knows about. + fn lineages(&self) -> Result> { + Ok(self.trees.keys().cloned()) + } + + /// Returns an iterator over all the trees that the backend knows about. + /// + /// The iteration order is unspecified. + fn trees(&self) -> Result> { + Ok(self.trees.iter().map(|(l, t)| TreeWithRoot::new(*l, t.version, t.tree.root()))) + } + + /// Returns the total number of (key-value) entries in the specified `tree`. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn entry_count(&self, lineage: LineageId) -> Result { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(tree.tree.num_entries()) + } + + /// Returns an iterator that yields the populated (key-value) entries for the specified + /// `lineage`. + /// + /// This iterator yields entries in an order such that they are sorted by their leaf index, + /// within which entries that share a leaf index are sorted by key. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn entries(&self, lineage: LineageId) -> Result> { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(InMemoryBackendEntriesIterator::new(&tree.tree)) + } + + /// Adds the provided `lineage` to the forest. + /// + /// # Errors + /// + /// - [`BackendError::DuplicateLineage`] if the provided `lineage` is the same as an + /// already-known lineage. No data is changed in this case. + /// - [`BackendError::Merkle`] if the provided `updates` cannot be applied to the empty tree. + fn add_lineage( + &mut self, + lineage: LineageId, + version: VersionId, + updates: SmtUpdateBatch, + ) -> Result { + // Returning this in the case of a duplicate lineage is required by the method contract on + // the `Backend` trait. + if self.trees.contains_key(&lineage) { + return Err(BackendError::DuplicateLineage(lineage)); + } + + let mut tree = Smt::new(); + + // A failure to compute mutations is a failure derived from user input, so we forward it as + // appropriate. + let mutations = tree.compute_mutations(updates.into_iter().map(|o| o.into()))?; + + // If computation of the mutations has succeeded but the application fails, then this should + // be reported as an internal error, not a merkle error, to allow the caller to decide what + // to do. + tree.apply_mutations(mutations).map_err(BackendError::internal_from)?; + + // The following has had its preconditions checked, so we can change the state without + // worrying about consistency. + let tree_data = TreeData { version, tree }; + let root = tree_data.tree.root(); + self.trees.insert(lineage, tree_data); + Ok(TreeWithRoot::new(lineage, version, root)) + } + + /// Performs the provided `updates` on the tree with the specified `lineage`, returning the + /// mutation set that will revert the changes made to the tree. + /// + /// At most one new root is added to the backend for the entire batch. + /// + /// # Errors + /// + /// - [`BackendError::Merkle`] if the application of `updates` to the tree fails for any reason. + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn update_tree( + &mut self, + lineage: LineageId, + new_version: VersionId, + updates: SmtUpdateBatch, + ) -> Result { + // The method contract requires raising this error in the case that `lineage` is unknown to + // the backend. + let tree_data = + self.trees.get_mut(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + let tree = &mut tree_data.tree; + + // We compute the mutations as a precondition check, which will leave the underlying tree in + // the same state if anything errors. Any error this yields is considered to be derived from + // user-input and hence is forwarded as-is. + let mutations = tree.compute_mutations(updates.into_iter().map(|o| o.into()))?; + + // The invariants on this method given by the `Backend` trait states that no new allocations + // should be performed if the updates do not change the tree. As a result, we can + // short-circuit even trying. + if mutations.is_empty() { + // As the reverse of an empty mutations is also empty mutations, we can just return + // that. + return Ok(mutations); + } + + // Any failure to apply the mutations here is considered an internal error, so we transform + // it as such. + let reversion_set = tree + .apply_mutations_with_reversion(mutations) + .map_err(BackendError::internal_from)?; + + // With preconditions checked, we can actually perform our modifications as it should yield + // a consistent state. + tree_data.version = new_version; + + Ok(reversion_set) + } + + /// Performs the provided `updates` on the entire forest, returning the mutation + /// sets that would reverse the changes to each tree in the forest. + /// + /// The order of application of these mutations is unspecified. + /// + /// # Errors + /// + /// - [`BackendError::Merkle`] if any set of operations on any lineage in the batch fail for any + /// reason. + /// - [`BackendError::UnknownLineage`] if any lineage in the `updates` is not known by the + /// backend. + /// + /// # Panics + /// + /// - If a tree that has been checked to be present is not present upon later access. + fn update_forest( + &mut self, + new_version: VersionId, + updates: SmtForestUpdateBatch, + ) -> Result> { + // We start by checking that all lineages referred to in the batch of `updates` are valid, + // failing early with an error if need be. + let updates = updates + .into_iter() + .map(|(lineage, ops)| { + if !self.trees.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + + Ok((lineage, ops)) + }) + .collect::>>()?; + + // Next, we compute all the relevant mutations to each tree, also failing with an error + // where relevant. + let mutations = updates + .into_iter() + .map(|(lineage, ops)| { + let tree = self.trees.get(&lineage).expect("Tree known to be present was not"); + let mutations = tree.tree.compute_mutations(ops.into_iter().map(|o| o.into()))?; + Ok((lineage, mutations)) + }) + .collect::>>()?; + + // With the preconditions checked, we can unconditionally perform the changes on all trees. + let reversion_sets = mutations + .into_iter() + .map(|(lineage, mutations)| { + if mutations.is_empty() { + // The inverse of empty mutations is empty mutations. + Ok((lineage, mutations)) + } else { + let tree = + self.trees.get_mut(&lineage).expect("Tree known to be present was not"); + let reversion = tree + .tree + .apply_mutations_with_reversion(mutations) + .map_err(BackendError::internal_from)?; + tree.version = new_version; + Ok((lineage, reversion)) + } + }) + .collect::>>()?; + + Ok(reversion_sets) + } +} + +// TRAIT IMPLEMENTATIONS +// ================================================================================================ + +impl Default for InMemoryBackend { + fn default() -> Self { + Self::new() + } +} + +// TREE DATA +// ================================================================================================ + +/// A container for the data associated with the latest tree in a given lineage within the backend. +#[derive(Clone, Debug, Eq, PartialEq)] +struct TreeData { + version: VersionId, + tree: Smt, +} + +// ENTRIES ITERATOR +// ================================================================================================ + +/// An iterator over entries in a given tree in the backend. +/// +/// It is guaranteed to yield entries such that they are sorted by their leaf index, and then for +/// entries that share the same leaf index they are sorted by their key. It should never yield +/// entries that have `value == EMPTY_WORD`. +#[derive(Clone, Debug)] +struct InMemoryBackendEntriesIterator<'backend> { + /// A reference to the tree over which the iterator is running. + tree: &'backend Smt, + + /// The leaves that are yet to have their entries iterated over. + remaining_leaves: BTreeSet>, + + /// The current iteration state of the iterator. + state: InMemoryBackendEntriesIteratorState, +} +impl<'backend> InMemoryBackendEntriesIterator<'backend> { + /// Constructs a new iterator over the entries for a tree. + pub fn new(tree: &'backend Smt) -> Self { + let remaining_leaves = tree.leaves().map(|(ix, _)| ix).collect::>(); + assert!(remaining_leaves.iter().is_sorted()); + + let state = InMemoryBackendEntriesIteratorState::NotInLeaf; + + Self { tree, remaining_leaves, state } + } +} + +impl<'backend> Iterator for InMemoryBackendEntriesIterator<'backend> { + type Item = TreeEntry; + + fn next(&mut self) -> Option { + match &mut self.state { + InMemoryBackendEntriesIteratorState::NotInLeaf => { + // If we are not inside a leaf we need to see if we can become so. + if let Some(ix) = self.remaining_leaves.pop_first() { + // If we can move into a new leaf, we transition the state into that leaf and + // return the entry. + let leaf = self + .tree + .get_leaf_by_index(ix) + .expect("Leaf should exist for index derived from tree"); + + // We can now grab the entries from the leaf, and we know that if it was in the + // source iterator it must have at least one. We smoosh them into a BTreeMap to + // ensure that they are sorted by key as required. + let entries: BTreeMap<_, _> = + leaf.to_entries().map(|(k, v)| (*k, *v)).collect(); + let (key, value) = entries.first_key_value() + .expect("The source iterator should have provided only leaves with at least one entry."); + let item = TreeEntry { key: *key, value: *value }; + + self.state = + InMemoryBackendEntriesIteratorState::InEntry { remaining_entries: entries }; + + Some(item) + } else { + // If we can't move into a new leaf, the iterator is done. + None + } + }, + InMemoryBackendEntriesIteratorState::InEntry { remaining_entries } => { + // If we are already inside a leaf when `next` is called, we need to pop the front + // value. + remaining_entries + .pop_first() + .expect("InEntry implies there should be at least one entry"); + + // There are then two cases that can happen. + if let Some((k, v)) = remaining_entries.first_key_value() { + // The simple case is that we have another entry in the current leaf. In that + // case, we just re-write the current state to track this. + let item = TreeEntry { key: *k, value: *v }; + + Some(item) + } else { + // If we reach here there are no further entries in the leaf, so we are + // implicitly in the `NotInLeaf` state. We make this explicit and then recurse + // the once. + self.state = InMemoryBackendEntriesIteratorState::NotInLeaf; + self.next() + } + }, + } + } +} + +#[derive(Clone, Debug)] +enum InMemoryBackendEntriesIteratorState { + /// The iterator is currently not in a leaf. + NotInLeaf, + + /// The iterator is pointing to a specific entry in a leaf. + InEntry { + /// The remaining entries in the leaf. + remaining_entries: BTreeMap, + }, +} diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs b/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs new file mode 100644 index 000000000..dc424fc6e --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs @@ -0,0 +1,581 @@ +#![cfg(test)] +//! This module contains the tests for the in-memory backend for the SMT forest. +//! +//! Rather than hard-code specific values for the trees, these tests rely on the correctness of the +//! existing [`Smt`] implementation, comparing the results of the in-memory backend against it +//! wherever relevant. + +use assert_matches::assert_matches; +use itertools::Itertools; + +use crate::{ + EMPTY_WORD, Word, + merkle::smt::{ + Backend, BackendError, Smt, SmtForestUpdateBatch, SmtUpdateBatch, VersionId, + large_forest::{ + InMemoryBackend, + backend::Result, + root::{LineageId, TreeEntry, TreeWithRoot}, + }, + }, + rand::test_utils::ContinuousRng, +}; + +// CONSTRUCTION +// ================================================================================================ + +#[test] +fn new() -> Result<()> { + let backend = InMemoryBackend::new(); + + // A newly created in-memory backend should not know about any lineages. + assert_eq!(backend.lineages()?.count(), 0); + + // It should similarly not know about any trees. + assert_eq!(backend.trees()?.count(), 0); + + Ok(()) +} + +// BACKEND TRAIT +// ================================================================================================ + +#[test] +fn open() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x42; 32]); + + // When we `open` for a lineage that has never been added to the backend, it should yield an + // error. + let ne_lineage: LineageId = rng.value(); + let random_key: Word = rng.value(); + let result = backend.open(ne_lineage, random_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // Let's now add a tree with a few items in it to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // We also want to match this against a reference merkle tree to check correctness, so let's + // create that now. + let mut tree = Smt::new(); + tree.insert(key_1, value_1)?; + tree.insert(key_2, value_2)?; + + // Let's first get the backend's opening for a key that hasn't been inserted. This should still + // return properly, and should match the opening provided by the reference tree. + let backend_result = backend.open(lineage_1, random_key)?; + let smt_result = tree.open(&random_key); + assert_eq!(backend_result, smt_result); + + // It should also generate correct openings for both of the inserted values. + assert_eq!(backend.open(lineage_1, key_1)?, tree.open(&key_1)); + assert_eq!(backend.open(lineage_1, key_2)?, tree.open(&key_2)); + + Ok(()) +} + +#[test] +fn get() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x71; 32]); + + // When we `get` for a lineage that has never been added to the backend, it should yield an + // error. + let ne_lineage: LineageId = rng.value(); + let random_key: Word = rng.value(); + let result = backend.get(ne_lineage, random_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // Let's now add a tree with a few items in it to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // We also want to match this against a reference merkle tree to check correctness, so let's + // create that now. + let mut tree = Smt::new(); + tree.insert(key_1, value_1)?; + tree.insert(key_2, value_2)?; + + // Let's first get the backend's result for a key that hasn't been inserted. This should return + // `None` in our case. + assert!(backend.get(lineage_1, random_key)?.is_none()); + + // It should also provide correct values for both of the inserted values. + assert_eq!(backend.get(lineage_1, key_1)?.unwrap(), tree.get_value(&key_1)); + assert_eq!(backend.get(lineage_1, key_2)?.unwrap(), tree.get_value(&key_2)); + + Ok(()) +} + +#[test] +fn version() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x96; 32]); + + // Getting the version for a lineage that the backend doesn't know about should yield an error. + let ne_lineage: LineageId = rng.value(); + let result = backend.version(ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // Let's now shove a tree into the backend. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // The forest should return the correct version if asked for the version of the lineage. + assert_eq!(backend.version(lineage_1)?, version_1); + + Ok(()) +} + +#[test] +fn lineages() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x91; 32]); + + // Initially there should be no lineages. + assert_eq!(backend.lineages()?.count(), 0); + + // We'll use the same data for each tree here to simplify the test. + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + let version: VersionId = rng.value(); + + // Let's start by adding one lineage and checking that the iterator contains it. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, operations.clone())?; + assert_eq!(backend.lineages()?.count(), 1); + assert!(backend.lineages()?.contains(&lineage_1)); + + // We add another + let lineage_2: LineageId = rng.value(); + backend.add_lineage(lineage_2, version, operations.clone())?; + assert_eq!(backend.lineages()?.count(), 2); + assert!(backend.lineages()?.contains(&lineage_1)); + assert!(backend.lineages()?.contains(&lineage_2)); + + // And yet another + let lineage_3: LineageId = rng.value(); + backend.add_lineage(lineage_3, version, operations.clone())?; + assert_eq!(backend.lineages()?.count(), 3); + assert!(backend.lineages()?.contains(&lineage_1)); + assert!(backend.lineages()?.contains(&lineage_2)); + assert!(backend.lineages()?.contains(&lineage_3)); + + Ok(()) +} + +#[test] +fn trees() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x91; 32]); + + // Initially there should be no lineages. + assert_eq!(backend.lineages()?.count(), 0); + + // We need individual trees and versions here to check on the roots, so let's add our first + // tree. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + + backend.add_lineage(lineage_1, version_1, operations)?; + + let mut tree_1 = Smt::new(); + tree_1.insert(key_1_1, value_1_1)?; + tree_1.insert(key_1_2, value_1_2)?; + + // With one tree added we should only see one root. + assert_eq!(backend.trees()?.count(), 1); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_1, version_1, tree_1.root())) + ); + + // Let's add another tree. + let key_2_1: Word = rng.value(); + let value_2_1: Word = rng.value(); + let key_2_2: Word = rng.value(); + let value_2_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_2_1, value_2_1); + operations.add_insert(key_2_2, value_2_2); + + let lineage_2: LineageId = rng.value(); + let version_2: VersionId = rng.value(); + + backend.add_lineage(lineage_2, version_2, operations)?; + + let mut tree_2 = Smt::new(); + tree_2.insert(key_2_1, value_2_1)?; + tree_2.insert(key_2_2, value_2_2)?; + + // With two added we should see two roots. + assert_eq!(backend.trees()?.count(), 2); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_1, version_1, tree_1.root())) + ); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_2, version_2, tree_2.root())) + ); + + // Let's add one more, just as a sanity check. + let key_3_1: Word = rng.value(); + let value_3_1: Word = rng.value(); + let key_3_2: Word = rng.value(); + let value_3_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_3_1, value_3_1); + operations.add_insert(key_3_2, value_3_2); + + let lineage_3: LineageId = rng.value(); + let version_3: VersionId = rng.value(); + + backend.add_lineage(lineage_3, version_3, operations)?; + + let mut tree_3 = Smt::new(); + tree_3.insert(key_3_1, value_3_1)?; + tree_3.insert(key_3_2, value_3_2)?; + + // With that added, we should see three. + assert_eq!(backend.trees()?.count(), 3); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_1, version_1, tree_1.root())) + ); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_2, version_2, tree_2.root())) + ); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_3, version_3, tree_3.root())) + ); + + Ok(()) +} + +#[test] +fn entry_count() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x67; 32]); + + // It should yield an error for a lineage that doesn't exist. + let ne_lineage: LineageId = rng.value(); + let result = backend.entry_count(ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + let version: VersionId = rng.value(); + + // Let's start by adding a new lineage with an entirely empty tree. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + + // When queried, this should yield zero entries. + assert_eq!(backend.entry_count(lineage_1)?, 0); + + // Now let's modify that tree to add entries. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + + backend.update_tree(lineage_1, version, operations)?; + + // Now if we query we should get two entries. + assert_eq!(backend.entry_count(lineage_1)?, 2); + + Ok(()) +} + +#[test] +fn entries() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x67; 32]); + + // It should yield an error for a lineage that doesn't exist. + let ne_lineage: LineageId = rng.value(); + let result = backend.entry_count(ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + let version: VersionId = rng.value(); + + // If we add an empty lineage, the iterator should yield no items. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + assert_eq!(backend.entries(lineage_1)?.count(), 0); + + // So let's add some entries. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut key_1_3: Word = rng.value(); + key_1_3[3] = key_1_1[3]; + let value_1_3: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + operations.add_insert(key_1_3, value_1_3); + backend.update_tree(lineage_1, version, operations)?; + + // Now, the iterator should yield the expected three items. + assert_eq!(backend.entries(lineage_1)?.count(), 3); + assert!( + backend + .entries(lineage_1)? + .contains(&TreeEntry { key: key_1_1, value: value_1_1 }), + ); + assert!( + backend + .entries(lineage_1)? + .contains(&TreeEntry { key: key_1_2, value: value_1_2 }), + ); + assert!( + backend + .entries(lineage_1)? + .contains(&TreeEntry { key: key_1_3, value: value_1_3 }), + ); + + // Importantly, the iterator should also be sorted in two stages. First by leaf index, and then + // by key. + assert!(backend.entries(lineage_1)?.is_sorted_by(|l, r| { + if l.index() == r.index() { + l.key < r.key + } else { + l.index() < r.index() + } + })); + + Ok(()) +} + +#[test] +fn add_lineage() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x76; 32]); + let version: VersionId = rng.value(); + + // We should be able to add a lineage without actually changing the empty tree. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + assert_eq!(backend.entry_count(lineage_1)?, 0); + + // Adding a lineage with a duplicate lineage identifier should yield an error. + let result = backend.add_lineage(lineage_1, version, SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::DuplicateLineage(l) if l == lineage_1); + + // But we should also be able to add lineages that _contain data_ from the get-go. + let key_2_1: Word = rng.value(); + let value_2_1: Word = rng.value(); + let key_2_2: Word = rng.value(); + let value_2_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_2_1, value_2_1); + operations.add_insert(key_2_2, value_2_2); + + let lineage_2: LineageId = rng.value(); + backend.add_lineage(lineage_2, version, operations)?; + assert_eq!(backend.entry_count(lineage_2)?, 2); + + Ok(()) +} + +#[test] +fn update_tree() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x76; 32]); + + // Updating a lineage that does not exist should result in an error. + let ne_lineage: LineageId = rng.value(); + let result = backend.update_tree(ne_lineage, rng.value(), SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // So let's add an actual lineage. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // And check that it agrees with a standard tree. + let mut tree_1 = Smt::new(); + tree_1.insert(key_1_1, value_1_1)?; + tree_1.insert(key_1_2, value_1_2)?; + + assert_eq!(backend.trees()?.count(), 1); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + + // Now let's add another node to the tree! Note that reusing the same version does not matter; + // version consistency is enforced by the FOREST and not the backend. + let key_1_3: Word = rng.value(); + let value_1_3: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_3, value_1_3); + backend.update_tree(lineage_1, version_1, operations)?; + + // And we can check against our other tree for consistency again. + tree_1.insert(key_1_3, value_1_3)?; + assert_eq!(backend.trees()?.count(), 1); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + + // Now let's try a remove operation. + let mut operations = SmtUpdateBatch::default(); + operations.add_remove(key_1_2); + backend.update_tree(lineage_1, version_1, operations)?; + + // And check it against our other tree for consistency. + let mutations = tree_1.compute_mutations([(key_1_2, EMPTY_WORD)])?; + tree_1.apply_mutations(mutations)?; + assert_eq!(backend.trees()?.count(), 1); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + + Ok(()) +} + +#[test] +fn update_forest() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x76; 32]); + let version: VersionId = rng.value(); + + // Let's start by adding two trees to the forest. + let lineage_1: LineageId = rng.value(); + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations_1 = SmtUpdateBatch::default(); + operations_1.add_insert(key_1_1, value_1_1); + operations_1.add_insert(key_1_2, value_1_2); + + let lineage_2: LineageId = rng.value(); + let key_2_1: Word = rng.value(); + let value_2_1: Word = rng.value(); + let mut operations_2 = SmtUpdateBatch::default(); + operations_2.add_insert(key_2_1, value_2_1); + + backend.add_lineage(lineage_1, version, operations_1)?; + backend.add_lineage(lineage_2, version, operations_2)?; + + // Let's replicate them with SMTs to check correctness. + let mut tree_1 = Smt::new(); + tree_1.insert(key_1_1, value_1_1)?; + tree_1.insert(key_1_2, value_1_2)?; + + let mut tree_2 = Smt::new(); + tree_2.insert(key_2_1, value_2_1)?; + + // At this point we should have two trees in the forest, and their roots should match the trees + // we're checking against. + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert!(backend.trees()?.any(|e| e.root() == tree_2.root())); + + // Let's do a batch modification to start with, doing an insert into both trees. + let key_1_3: Word = rng.value(); + let value_1_3: Word = rng.value(); + let key_2_2: Word = rng.value(); + let value_2_2: Word = rng.value(); + + let mut forest_ops = SmtForestUpdateBatch::empty(); + forest_ops.operations(lineage_1).add_insert(key_1_3, value_1_3); + forest_ops.operations(lineage_2).add_insert(key_2_2, value_2_2); + + backend.update_forest(version, forest_ops)?; + + // We can check these results against our trees. + tree_1.insert(key_1_3, value_1_3)?; + tree_2.insert(key_2_2, value_2_2)?; + + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert!(backend.trees()?.any(|e| e.root() == tree_2.root())); + + // We should see an error when performing operations on a lineage that does not exist... + let ne_lineage: LineageId = rng.value(); + let key_1_4: Word = rng.value(); + let value_1_4: Word = rng.value(); + + let mut forest_ops = SmtForestUpdateBatch::empty(); + forest_ops.operations(lineage_1).add_insert(key_1_4, value_1_4); + forest_ops.operations(ne_lineage).add_insert(key_1_4, value_1_4); + + let result = backend.update_forest(version, forest_ops); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // ... but it should also leave the existing data unchanged. + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert!(backend.trees()?.any(|e| e.root() == tree_2.root())); + + Ok(()) +} diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs b/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs new file mode 100644 index 000000000..a7261b8f5 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs @@ -0,0 +1,221 @@ +//! This file contains the [`Backend`] trait for the SMT forest implementation and the supporting +//! types it needs. + +pub mod memory; + +use alloc::{boxed::Box, vec::Vec}; +use core::fmt::Debug; + +use thiserror::Error; + +use crate::{ + Word, + merkle::{ + MerkleError, + smt::{ + SmtProof, + large_forest::{ + operation::{SmtForestUpdateBatch, SmtUpdateBatch}, + root::{LineageId, TreeEntry, TreeWithRoot, VersionId}, + utils::MutationSet, + }, + }, + }, +}; + +// BACKEND +// ================================================================================================ + +/// The backing storage for the SMT forest, providing the necessary high-level methods for +/// performing operations on the full trees that make up the forest, while allowing the forest +/// itself to be storage agnostic. +/// +/// # Backend Data Storage +/// +/// Having a generic [`Backend`] provides no guarantees to the user about how it stores data and +/// what patterns are used for data access under the hood. It is, however, guaranteed to store +/// _only_ the data necessary to describe the latest state of each tree in the forest. +/// +/// # Error Handling +/// +/// We separate errors in backend implementations into two semantic categories: +/// +/// 1. **User-Derived Errors:** These are errors that arise downstream of data provided by the user. +/// These errors must be signaled by returning an [`Err`] variant with an appropriate error. +/// 2. **Internal Errors:** These are errors that are not derived from data provided by the user. +/// Signaling such an error is up to the implementation, but can be done through both panicking +/// and returning the [`BackendError::Internal`] variant as appropriate. These **may leave the +/// backend in an inconsistent state** as they are designed to effect program termination or +/// perform it directly. +/// +/// The only reason that [`BackendError::Internal`] exists is to allow certain failures to result in +/// termination at the level of the _forest_ instead of the _backend_ as this can sometimes lead to +/// cleaner logic. If this is not appropriate, a panic is a better option. +/// +/// # Expected Behavior +/// +/// Certain methods on this trait (e.g. [`Backend::update_tree`]) provide behaviors expected for +/// that method. These combine with the following trait-level behavior requirements to become part +/// of the contract of the method, but a portion that cannot be encoded in the type system. Any +/// failure to conform to these expected behaviors is **considered a bug in the implementation** of +/// the backend, and must be rectified. +/// +/// The following behavior is expected of all methods in implementations of this trait: +/// +/// - For any failure derived from user input (see _User-Derived Errors_ above), the data and the +/// backend must be **left in a consistent state** when the error is returned to the caller. +/// - Failures derived from user input (see _User-Derived Errors_ above) must be signaled to the +/// caller by returning a variant of [`BackendError`] that is **not [`BackendError::Internal`]**. +/// Methods may place additional constraints on which errors are used to signal certain failures. +pub trait Backend +where + Self: Debug, +{ + // QUERIES + // ============================================================================================ + + /// Returns an opening for the specified `key` in the SMT with the specified `lineage`. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + fn open(&self, lineage: LineageId, key: Word) -> Result; + + /// Returns the value associated with the provided `key` in the SMT with the specified + /// `lineage`, or [`None`] if no such value exists. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + fn get(&self, lineage: LineageId, key: Word) -> Result>; + + /// Returns the version of the tree with the specified `lineage`. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + fn version(&self, lineage: LineageId) -> Result; + + /// Returns an iterator over all the lineages that the backend knows about. + /// + /// The iteration order is unspecified. + fn lineages(&self) -> Result>; + + /// Returns an iterator over all the trees (and their corresponding roots) that the backend + /// knows about. + /// + /// The iteration order is unspecified. + fn trees(&self) -> Result>; + + /// Returns the total number of (key-value) entries in the specified `lineage`. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + fn entry_count(&self, lineage: LineageId) -> Result; + + /// Returns an iterator that yields the populated (key-value) entries for the specified + /// `lineage`. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + /// + /// This iterator must yield entries in an order such that they are sorted by their leaf index, + /// and entries that share a leaf index are sorted by key. It must not include key-value pairs + /// where the value is the empty word. + fn entries(&self, lineage: LineageId) -> Result>; + + // SINGLE-TREE MODIFIERS + // ============================================================================================ + + /// Adds a new `lineage` to the forest with the provided `version` and sets the associated SMT + /// to have the value created by applying `updates` to the empty tree, returning the new root of + /// that tree. + /// + /// # Expected Behavior + /// + /// Implementations must guarantee the following behavior in addition to the global invariants: + /// + /// - If the provided `lineage` conflicts with an already-existing lineage in the backend, it + /// must return [`BackendError::DuplicateLineage`]. + fn add_lineage( + &mut self, + lineage: LineageId, + version: VersionId, + updates: SmtUpdateBatch, + ) -> Result; + + /// Performs the provided `updates` on the tree with the specified `lineage`, returning the + /// mutation set that will revert the changes made to the tree. + /// + /// # Expected Behavior + /// + /// Implementations must guarantee the following behavior in addition to the global invariants: + /// + /// - At most one new root must be added to the forest for the entire batch. + /// - If applying the provided `updates` results in no changes to the tree, no new tree must be + /// allocated. + fn update_tree( + &mut self, + lineage: LineageId, + new_version: VersionId, + updates: SmtUpdateBatch, + ) -> Result; + + // MULTI-TREE MODIFIERS + // ============================================================================================ + + /// Performs the provided `updates` on the forest, setting all new tree states to have the + /// provided `new_version` and returning a vector of the mutation sets that reverse the changes + /// to each changed tree. + /// + /// # Expected Behavior + /// + /// Implementations must guarantee the following behavior in addition to the global invariants: + /// + /// - At most one new root must be added to the forest for each target root in the provided + /// `updates`. + /// - If applying the provided `updates` results in no changes to a given lineage of trees in + /// the forest, then no new tree must be allocated in that lineage. + fn update_forest( + &mut self, + new_version: VersionId, + updates: SmtForestUpdateBatch, + ) -> Result>; +} + +// BACKEND ERROR +// ================================================================================================ + +/// The error type for use within Backends. +#[derive(Debug, Error)] +pub enum BackendError { + /// Raised when there is a conflict between an existing lineage ID and one already in the + /// forest. + #[error("Duplicate lineage ID {0} provided")] + DuplicateLineage(LineageId), + + /// Raised for arbitrary errors that are not derived from user-input. These should be considered + /// fatal by callers, but exist to forward the termination decision up to an appropriate level. + #[error(transparent)] + Internal(Box), + + /// Raised when there is an error with the merkle tree semantics within the backend. + #[error(transparent)] + Merkle(#[from] MerkleError), + + /// Raised for arbitrary other errors within the backend that are derived from user-input and + /// hence non-fatal. + #[error(transparent)] + Other(Box), + + /// Raised when the backend is queried for a lineage it doesn't know about. + #[error("Lineage {0} is not known by the backend")] + UnknownLineage(LineageId), +} + +impl BackendError { + /// Constructs an internal error variant from the provided concrete error `e`. + fn internal_from(e: E) -> Self { + Self::Internal(Box::new(e)) + } +} + +/// The result type for use with backends. +pub type Result = core::result::Result; diff --git a/miden-crypto/src/merkle/smt/large_forest/config.rs b/miden-crypto/src/merkle/smt/large_forest/config.rs new file mode 100644 index 000000000..dd7c63d39 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/config.rs @@ -0,0 +1,66 @@ +//! This module contains the configuration structure for the forest. + +// CONSTANTS +// ================================================================================================ + +/// The default number of historical versions of each tree to keep. +pub const DEFAULT_MAX_HISTORY_VERSIONS: usize = 10; + +/// The minimum number of historical versions per lineage that the forest can store. +pub const MIN_HISTORY_VERSIONS: usize = 1; + +// CONFIG +// ================================================================================================ + +/// The configuration for the forest's behavior. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Config { + /// The maximum number of historical versions that the forest will keep for any given lineage. + max_historical_versions: usize, +} + +/// This block contains the accessors for the configuration options. +impl Config { + /// The maximum number of historical versions that the forest will keep for any given lineage. + /// + /// If this field is set to `n`, the forest will implicitly store `n + 1` versions of a given + /// lineage once the latest version in that lineage is accounted for. + /// + /// Defaults to [`DEFAULT_MAX_HISTORY_VERSIONS`]. + pub fn max_history_versions(&self) -> usize { + self.max_historical_versions + } +} + +// BUILDERS +// ================================================================================================ + +/// This impl block contains the builder functions for the configuration options. +impl Config { + /// Sets the maximum number of historical versions that the forest will store for any given + /// lineage, clamping to [`MIN_HISTORY_VERSIONS`] on the low end. + /// + /// If this field is set to `n`, the forest will implicitly store `n + 1` versions of a given + /// lineage once the latest version in that lineage is accounted for. + /// + /// This defaults to [`DEFAULT_MAX_HISTORY_VERSIONS`]. + pub fn with_max_history_versions(mut self, max_historical_versions: usize) -> Self { + self.max_historical_versions = if max_historical_versions < MIN_HISTORY_VERSIONS { + MIN_HISTORY_VERSIONS + } else { + max_historical_versions + }; + self + } +} + +// TRAIT IMPLS +// ================================================================================================ + +/// Please see individual methods on [`Config`] for the default value of each configuration option. +impl Default for Config { + fn default() -> Self { + let max_historical_versions = DEFAULT_MAX_HISTORY_VERSIONS; + Self { max_historical_versions } + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/error.rs b/miden-crypto/src/merkle/smt/large_forest/error.rs new file mode 100644 index 000000000..42ef1cfaf --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/error.rs @@ -0,0 +1,93 @@ +//! This module contains the error types and helpers for working with errors from the large SMT +//! forest. + +use alloc::boxed::Box; + +use thiserror::Error; + +use crate::merkle::{ + MerkleError, + smt::{ + SmtLeafError, SmtProofError, TreeId, VersionId, + large_forest::{backend::BackendError, history::error::HistoryError, root::LineageId}, + }, +}; + +// LARGE SMT FOREST ERROR +// ================================================================================================ + +/// The type of errors returned by operations on the large SMT forest. +#[derive(Debug, Error)] +pub enum LargeSmtForestError { + /// Raised when the provided version for any update is older than the latest-known version for + /// the lineage being updated. + #[error("Version {provided} is not newer than latest-known {latest}")] + BadVersion { provided: VersionId, latest: VersionId }, + + /// Raised when there is a conflict between an existing lineage ID and one already in the + /// forest. + #[error("Duplicate lineage ID {0} provided")] + DuplicateLineage(LineageId), + + /// Raised for arbitrary errors that are not derived from user-input. These **must be considered + /// fatal by the caller**, but exist to provide the caller with control over process termination + /// (e.g. for improved diagnostics or error reporting) wherever possible. + #[error(transparent)] + Fatal(Box), + + /// Errors in the history subsystem of the forest. + #[error(transparent)] + History(#[from] HistoryError), + + /// Errors with the merkle tree operations of the forest. + #[error(transparent)] + Merkle(#[from] MerkleError), + + /// Errors in working with leaves in the merkle trees. + #[error(transparent)] + SmtLeaf(#[from] SmtLeafError), + + /// Errors in the construction and manipulation of SMT proofs. + #[error(transparent)] + SmtProof(#[from] SmtProofError), + + /// Raised when an operation specifies a lineage that is not known. + #[error("The lineage {0:?} is not in the forest")] + UnknownLineage(LineageId), + + /// Raised when an operation specifies a tree that is not known. + #[error("The tree {0} is not in the forest")] + UnknownTree(TreeId), + + /// Raised when an operation requests a version that is not known. + #[error("The version {0} is not known by the forest")] + UnknownVersion(VersionId), + + /// Raised for arbitrary other errors. + #[error(transparent)] + Other(#[from] Box), +} + +impl LargeSmtForestError { + /// Constructs a fatal error variant from the provided concrete error `e`. + pub fn fatal_from(e: E) -> Self { + Self::Fatal(Box::new(e)) + } +} + +/// We want to forward backend errors specifically when we can, so we manually implement the +/// conversion. +impl From for LargeSmtForestError { + fn from(value: BackendError) -> Self { + match value { + BackendError::DuplicateLineage(l) => LargeSmtForestError::DuplicateLineage(l), + BackendError::Internal(e) => LargeSmtForestError::Fatal(e), + BackendError::Merkle(e) => LargeSmtForestError::from(e), + BackendError::Other(e) => LargeSmtForestError::from(e), + BackendError::UnknownLineage(t) => LargeSmtForestError::UnknownLineage(t), + } + } +} + +/// The result type for use within the large SMT forest portion of the library. +pub type Result = core::result::Result; diff --git a/miden-crypto/src/merkle/smt/large_forest/history/error.rs b/miden-crypto/src/merkle/smt/large_forest/history/error.rs new file mode 100644 index 000000000..6f6c0e2af --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/history/error.rs @@ -0,0 +1,23 @@ +//! The error type and utility types for working with errors from the SMT history construct. +use thiserror::Error; + +use crate::merkle::smt::large_forest::history::VersionId; + +/// The type of errors returned by the history container. +#[derive(Debug, Error, PartialEq)] +pub enum HistoryError { + /// Raised when a query expects the history to contain at least one entry, but it is empty. + #[error("The history was empty")] + HistoryEmpty, + + /// Raised when a version is added to the history and is not newer than the previous. + #[error("Version {0} is not monotonic with respect to {1}")] + NonMonotonicVersions(VersionId, VersionId), + + /// Raised when no version exists in the history for an arbitrary query. + #[error("The specified version is too old to be served by the history")] + VersionTooOld, +} + +/// The result type for use within the history container. +pub type Result = core::result::Result; diff --git a/miden-crypto/src/merkle/smt/large_forest/history/mod.rs b/miden-crypto/src/merkle/smt/large_forest/history/mod.rs new file mode 100644 index 000000000..7510594bc --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/history/mod.rs @@ -0,0 +1,640 @@ +//! This module contains the definition of [`History`], a simple container for some number of +//! historical versions of a given merkle tree. +//! +//! This history consists of a series of _deltas_ from the current state of the tree, moving +//! backward in history away from that current state. These deltas are then used to form a "merged +//! overlay" that represents the changes to be made on top of the current tree to put it _back_ in +//! that historical state. +//! +//! It provides functionality for adding new states to the history, as well as for querying the +//! history at a given point in time. +//! +//! # Complexity +//! +//! Versions in this structure are _cumulative_. To get the entire picture of an arbitrary node or +//! leaf at version `v` it may be necessary to check for changes in all versions between `v` and the +//! current tree state. This gives worst-case complexity `O(v)` when querying a node or leaf for the +//! version `v`. +//! +//! This is acceptable overhead as we assert that newer versions are far more likely to be queried +//! than older versions. Nevertheless, it may be improved in future using a sharing approach, but +//! that potential improvement is being ignored for now for the sake of simplicity. +//! +//! # Performance +//! +//! This structure operates entirely in memory, and is hence reasonably quick to query. As of the +//! current time, no detailed benchmarking has taken place for the history, but based on some basic +//! profiling the major time taken is in chasing pointers throughout memory due to the use of +//! [`Map`]s, but this is unavoidable in the current structure and may need to be revisited in +//! the future. + +pub mod error; + +mod tests; + +use alloc::collections::{BTreeMap, BTreeSet, VecDeque}; +use core::fmt::Debug; + +use error::{HistoryError, Result}; + +use crate::{ + Map, Word, + merkle::{ + EmptySubtreeRoots, NodeIndex, + smt::{ + LeafIndex, NodeMutation, SMT_DEPTH, + large_forest::{ + root::{RootValue, TreeEntry, VersionId}, + utils::MutationSet, + }, + }, + }, +}; + +// UTILITY TYPE ALIASES +// ================================================================================================ + +/// A compact leaf is a mapping from full word-length keys to word-length values, intended to be +/// stored in the leaves of an otherwise shallower merkle tree. +/// +/// We use a BTreeMap as we need a guaranteed iteration order over the keys. +pub type CompactLeaf = BTreeMap; + +/// A collection of changes to arbitrary non-leaf nodes in a merkle tree. +/// +/// All changes to nodes between versions `v` and `v + 1` must be explicitly "undone" in the +/// `NodeChanges` representing version `v`. This includes nodes that were defaulted in version `v` +/// that were given an explicit value in version `v + 1`, where the `NodeChanges` must explicitly +/// set those nodes back to the default. +/// +/// Failure to do so will result in incorrect values when those nodes are queried at a point in the +/// history corresponding to version `v`. +pub type NodeChanges = Map; + +/// A collection of changes to arbitrary leaf nodes in a merkle tree. +/// +/// While represented as a single leaf, it only contains the changes to the leaf as part of the +/// delta, and still needs to be combined with the actual leaf data for querying. +/// +/// Note that if in the version of the tree represented by these `LeafChanges` had the default value +/// at the leaf, this default value must be made concrete in the map. Failure to do so will retain a +/// newer, non-default value for that leaf, and thus result in incorrect query results at this point +/// in the history. +pub type LeafChanges = Map, CompactLeaf>; + +// HISTORY +// ================================================================================================ + +/// A History contains a sequence of versions atop a given tree. +/// +/// The versions are _cumulative_, meaning that querying the history must account for changes from +/// the current tree that take place in versions that are not the queried version or the current +/// tree. +#[derive(Clone, Debug)] +pub struct History { + /// The maximum number of historical versions to be stored. + max_count: usize, + + /// The deltas that make up the history for this tree. + /// + /// It will never contain more than `max_count` deltas, and is ordered with the oldest data at + /// the lowest index. + /// + /// # Implementation Note + /// + /// As we are targeting small numbers of history items (e.g. 30), having a sequence with an + /// allocated capacity equal to the small maximum number of items is perfectly sane. This will + /// avoid costly reallocations in the fast path. + /// + /// We use a [`VecDeque`] instead of a [`Vec`] or [`alloc::collections::LinkedList`] as we + /// estimate that the vast majority of removals will be the oldest entries as new ones are + /// pushed. This means that we can optimize for those removals along with indexing performance, + /// rather than optimizing for more rare removals from the middle of the sequence. + deltas: VecDeque, +} + +impl History { + /// Constructs a new history container, containing at most `max_count` historical versions for + /// a tree. + #[must_use] + pub fn empty(max_count: usize) -> Self { + // We allocate one more than we actually need to store to allow us to insert and THEN + // remove, rather than the other way around. This leads to negligible increases in memory + // usage while allowing for cleaner code. + let deltas = VecDeque::with_capacity(max_count + 1); + Self { max_count, deltas } + } + + /// Gets the maximum number of versions that this history can store. + #[must_use] + pub fn max_versions(&self) -> usize { + self.max_count + } + + /// Gets the current number of versions in the history. + #[must_use] + pub fn num_versions(&self) -> usize { + self.deltas.len() + } + + /// Returns all the roots that the history knows about. + /// + /// The iteration order of the roots is guaranteed to move backward in time, with earlier items + /// being roots from versions closer to the present. + /// + /// # Complexity + /// + /// Calling this method provides an iterator whose consumption requires a traversal of all the + /// versions. The method's complexity is thus `O(n)` in the number of versions. + pub fn roots(&self) -> impl Iterator { + self.deltas.iter().rev().map(|d| d.root) + } + + /// Returns the root value that corresponds to the provided `version`. + pub fn root_for_version(&self, version: VersionId) -> Result { + let ix = self.find_latest_corresponding_version(version)?; + + // The direct index is safe here because `find_latest_...` will have returned an error if + // there is no such version, and is hence guaranteed to have returned a valid index. + Ok(self.deltas[ix].root) + } + + /// Adds a version to the history with the provided `root` and represented by the changes from + /// the current tree given in `nodes` and `leaves`. + /// + /// If adding this version would result in exceeding `self.max_count` historical versions, then + /// the oldest of the versions is automatically removed. + /// + /// # Gotchas + /// + /// When constructing the `nodes` and `leaves`, keep in mind that those collections must contain + /// entries for the **default value of a node or leaf** at any position where the tree was + /// sparse in the state represented by `root`. If this is not done, incorrect values may be + /// returned. + /// + /// This is necessary because the changes are the _reverse_ from what one might expect. Namely, + /// the changes in a given version `v` must "_revert_" the changes made in the transition from + /// version `v` to version `v + 1`. + /// + /// # Errors + /// + /// - [`HistoryError::NonMonotonicVersions`] if the provided version is not greater than the + /// previously added version. + pub fn add_version( + &mut self, + root: RootValue, + version_id: VersionId, + nodes: NodeChanges, + leaves: LeafChanges, + ) -> Result<()> { + if let Some(v) = self.deltas.iter().last() { + if v.version_id < version_id { + self.deltas.push_back(Delta::new(root, version_id, nodes, leaves)); + if self.num_versions() > self.max_versions() { + self.deltas.pop_front(); + } + + Ok(()) + } else { + Err(HistoryError::NonMonotonicVersions(version_id, v.version_id)) + } + } else { + self.deltas.push_back(Delta::new(root, version_id, nodes, leaves)); + + Ok(()) + } + } + + /// Adds a version to the history and represented by the changes from the current tree given + /// `mutations`. + /// + /// If adding this version would result in exceeding `self.max_count` historical versions, then + /// the oldest of the versions is automatically removed. + /// + /// # Gotchas + /// + /// When constructing the `mutations`, keep in mind that the set must contain entries for the + /// **default value of a node or leaf** at any position where the tree was sparse in the state + /// represented by `root`. If this is not done, incorrect values may be returned. + /// + /// This is necessary because the changes are the _reverse_ from what one might expect. Namely, + /// the changes in a given version `v` must "_revert_" the changes made in the transition from + /// version `v` to version `v + 1`. + /// + /// # Errors + /// + /// - [`HistoryError::NonMonotonicVersions`] if the provided version is not greater than the + /// previously added version. + pub fn add_version_from_mutation_set( + &mut self, + version_id: VersionId, + mutations: MutationSet, + ) -> Result<()> { + // The leaf changes must be grouped by parent leaf when being inserted, so we do that here. + let mut leaf_changes = LeafChanges::default(); + for (key, val) in mutations.new_pairs { + leaf_changes.entry(LeafIndex::from(key)).or_default().insert(key, val); + } + + // The node changes are more complex, as we have to explicitly handle reversions to empty + // specially. + let node_changes: NodeChanges = mutations + .node_mutations + .into_iter() + .map(|(ix, m)| match m { + NodeMutation::Removal => (ix, *EmptySubtreeRoots::entry(SMT_DEPTH, ix.depth())), + NodeMutation::Addition(n) => (ix, n.hash()), + }) + .collect(); + + // Now we can simply delegate to the standard function. + self.add_version(mutations.new_root, version_id, node_changes, leaf_changes) + } + + /// Returns the index in the sequence of deltas of the version that corresponds to the provided + /// `version_id`. + /// + /// To "correspond" means that it either has the provided `version_id`, or is the newest version + /// with a `version_id` less than the provided id. In either case, it is the correct version to + /// be used to query the tree state in the provided `version_id`. + /// + /// # Complexity + /// + /// Finding the latest corresponding version in the history requires a linear traversal of the + /// history entries, and hence has complexity `O(n)` in the number of versions. + /// + /// # Errors + /// + /// - [`HistoryError::HistoryEmpty`] if the history is empty and hence there is no version to + /// find. + /// - [`HistoryError::VersionTooOld`] if the history does not contain the data to provide a + /// coherent overlay for the provided `version_id` due to `version_id` being older than the + /// oldest version stored. + fn find_latest_corresponding_version(&self, version_id: VersionId) -> Result { + // If the version is older than the oldest, we error. + if let Some(oldest_version) = self.deltas.front() { + if oldest_version.version_id > version_id { + return Err(HistoryError::VersionTooOld); + } + } else { + return Err(HistoryError::VersionTooOld); + } + + let ix = self + .deltas + .iter() + .position(|d| d.version_id > version_id) + .unwrap_or_else(|| self.num_versions()) + .checked_sub(1) + .expect( + "Subtraction should not overflow as we have ruled out the no-version \ + case, and in the other cases the left operand will be >= 1", + ); + + Ok(ix) + } + + /// Returns a view of the history that allows querying as a single unified overlay on the + /// current state of the merkle tree as if the overlay was reverting the tree to the state + /// corresponding to the specified `version_id`. + /// + /// Note that the history may not contain a version that directly corresponds to `version_id`. + /// In such a case, the view will instead use the newest version coherent with the provided + /// `version_id`, as this is the correct version for the provided id. Note that this will be + /// incorrect if the versions stored in the history do not represent contiguous changes from the + /// current tree. + /// + /// # Complexity + /// + /// The computational complexity of this method is linear in the number of versions stored in + /// the history. + /// + /// # Errors + /// + /// - [`HistoryError::VersionTooOld`] if the history does not contain the data to provide a + /// coherent overlay for the provided `version_id` due to `version_id` being older than the + /// oldest version stored. + pub fn get_view_at(&self, version_id: VersionId) -> Result> { + HistoryView::new_of(version_id, self) + } + + /// Removes all versions in the history that are older than the version denoted by the provided + /// `version_id`. + /// + /// If `version_id` is not a version known by the history, it will keep the newest version that + /// is capable of serving as that version in queries. + /// + /// # Complexity + /// + /// The computational complexity of this method is linear in the number of versions stored in + /// the history prior to any removals. + pub fn truncate(&mut self, version_id: VersionId) -> usize { + // We start by getting the index to truncate to, though it is not an error to remove + // something too old. + let truncate_ix = self.find_latest_corresponding_version(version_id).unwrap_or(0); + + for _ in 0..truncate_ix { + self.deltas.pop_front(); + } + + truncate_ix + } + + /// Removes all versions from the history. + pub fn clear(&mut self) { + self.deltas.clear(); + } +} + +/// The functions in this impl block are specifically used for testing and are not available for +/// general API usage. +#[cfg(test)] +impl History { + /// Returns `true` if `root` is in the history and `false` otherwise. + #[must_use] + pub fn is_known_root(&self, root: RootValue) -> bool { + self.deltas.iter().any(|r| r.root == root) + } +} + +// HISTORY VIEW +// ================================================================================================ + +/// A read-only view of the history overlay on the tree at a specified place in the history. +#[derive(Debug)] +pub struct HistoryView<'history> { + /// The version of the history pointed to by the history view. + version: VersionId, + + /// The index of the target version in the history. + version_ix: usize, + + /// The history that actually stores the data that will be queried. + history: &'history History, +} + +impl<'history> HistoryView<'history> { + /// Constructs a new history view that acts as a single overlay of the state represented by the + /// history at the provided `version`. + /// + /// # Complexity + /// + /// The computational complexity of this method is linear in the number of versions stored in + /// the history. + /// + /// # Errors + /// + /// - [`HistoryError::VersionTooOld`] if the history does not contain the data to provide a + /// coherent overlay for the provided `version`. + fn new_of(version: VersionId, history: &'history History) -> Result { + let version_ix = history.find_latest_corresponding_version(version)?; + Ok(Self { version, version_ix, history }) + } + + /// Gets the value of the node in the history at the provided `index`, or returns `None` if the + /// version does not overlay the current tree at that node. + /// + /// # Complexity + /// + /// The computational complexity of this method is linear in the number of versions due to the + /// need to traverse to find the correct overlay value. + #[must_use] + pub fn node_value(&self, index: &NodeIndex) -> Option<&Word> { + self.history + .deltas + .iter() + .skip(self.version_ix) + .find_map(|v| v.nodes.get(index)) + } + + /// Gets a single leaf that represents the delta from the current version of the tree to the + /// point in the history at the specified `index`. + /// + /// If the specified version does not overlay the current tree at that leaf, it will return an + /// empty compact leaf. + /// + /// # Complexity + /// + /// The computational complexity of this method is linear in the number of versions due to the + /// need to traverse to find the correct overlay value. + #[must_use] + pub fn leaf_delta(&self, index: &LeafIndex) -> CompactLeaf { + let mut leaf = CompactLeaf::default(); + + // We want to keep the _oldest_ change for any particular key in a leaf. + for delta in self.history.deltas.iter().skip(self.version_ix) { + if let Some(leaf_delta) = delta.leaves.get(index) { + for (key, value) in leaf_delta { + leaf.entry(*key).or_insert(*value); + } + } + } + + leaf + } + + /// Queries the value of a specific `key` in a leaf in the overlay, returning the value for that + /// `key` if it has been changed, and [`None`] otherwise. + /// + /// # Complexity + /// + /// The computational complexity of this method is linear in the number of versions due to the + /// need to traverse to find the correct overlay value. + #[must_use] + pub fn value(&self, key: &Word) -> Option { + self.leaf_delta(&LeafIndex::from(*key)).get(key).copied() + } + + /// Returns an iterator which yields the entries that are changed by this view. + /// + /// This iterator yields entries in an order such that they are sorted by their leaf index, and + /// entries that share a leaf index are sorted by key. It includes key-value pairs where the + /// value is the empty word, as these are necessary for merging with entries in the full tree. + pub fn entries(&self) -> impl Iterator + 'history { + // It is safe to call this directly here as the construction of `HistoryView` has ensured + // that we have such a version. + HistoricalEntriesIterator::new(self.history, self.version) + } +} + +// DELTA +// ================================================================================================ + +/// A delta for a state `n` represents the changes (to both nodes and leaves) that need to be +/// applied on top of the state `n + 1` to yield the correct tree for state `n`. +/// +/// # Cumulative Deltas and Temporal Ordering +/// +/// In order to best represent the history of a merkle tree, these deltas are constructed to take +/// advantage of two main properties: +/// +/// - They are _cumulative_, which reduces their practical memory usage. This does, however, mean +/// that querying the state of older blocks is more expensive than querying newer ones. +/// - Deltas are applied in **temporally reversed order** from what one might expect. Most +/// conventional applications of deltas bring something from the past into the future through +/// application. In our case, the application of one or more deltas moves the tree into a **past +/// state**. +/// +/// # Construction +/// +/// While the [`Delta`] type is visible in the interface of the history, it is only intended to be +/// constructed by the history. Users should not be allowed to construct it directly. +#[derive(Clone, Debug, PartialEq)] +struct Delta { + /// The root of the tree in the `version` corresponding to the application of the reversions in + /// this delta to the previous tree state. + root: RootValue, + + /// The version of the tree represented by the delta. + version_id: VersionId, + + /// Any changes to the non-leaf nodes in the tree for this delta. + nodes: NodeChanges, + + /// Any changes to the leaf nodes in the tree for this delta. + /// + /// Note that the leaf state is **not represented compactly**, and describes the entire state + /// of the leaf in the corresponding version. + leaves: LeafChanges, +} + +impl Delta { + /// Creates a new delta with the provided `root`, and representing the provided + /// changes to `nodes` and `leaves` in the merkle tree. + #[must_use] + fn new( + root: RootValue, + version_id: VersionId, + nodes: NodeChanges, + leaves: LeafChanges, + ) -> Self { + Self { root, version_id, nodes, leaves } + } +} + +// ENTRIES ITERATOR +// ================================================================================================ + +/// An iterator over the historical value for each changed entry at a given point in the history. +/// +/// This iterator yields entries in an order such that they are sorted by their leaf index, and +/// entries that share a leaf index are sorted by key. It includes key-value pairs where the value +/// is the empty word, as these are necessary for merging with entries in the full tree. +#[derive(Debug)] +pub struct HistoricalEntriesIterator<'history> { + /// The history over which the iterator is defined. + history: &'history History, + + /// The version in the history to be working from. + version: VersionId, + + /// The set of all changed leaves in the deltas that make up this iterator that have not yet + /// been visited by the iterator. + /// + /// We use a BTreeSet specifically as we need sorted iteration behavior. + changed_leaves: BTreeSet>, + + /// The current state of the iterator's iteration behavior. + position: HistoricalEntriesIteratorState, +} + +impl<'history> HistoricalEntriesIterator<'history> { + /// Creates a new historical entries iterator that represents a coherent set of delta entries at + /// the position in the history given by `version_ix`. + fn new(history: &'history History, version: VersionId) -> Self { + let changed_leaves = history + .deltas + .iter() + .skip( + history + .find_latest_corresponding_version(version) + .expect("Caller has guaranteed existence of a corresponding version"), + ) + .flat_map(|d| d.leaves.keys()) + .copied() + .collect(); + + // We want to start not pointing to any leaf as we can only advance when `next` is called. + let current_leaf_index = HistoricalEntriesIteratorState::NotInLeaf; + + Self { + history, + version, + changed_leaves, + position: current_leaf_index, + } + } +} + +impl<'history> Iterator for HistoricalEntriesIterator<'history> { + type Item = TreeEntry; + + fn next(&mut self) -> Option { + match &mut self.position { + HistoricalEntriesIteratorState::NotInLeaf => { + // If we are not inside a leaf we need to see if we can become so. + if let Some(ix) = self.changed_leaves.pop_first() { + // If we can move into a new leaf, we transition the state into that leaf and + // return the entry. + let leaf_delta = self + .history + .get_view_at(self.version) + .expect( + "Version was guaranteed to exist before construction of the iterator", + ) + .leaf_delta(&ix); + + // As we are querying based on `changed_leaves`, each of the `leaf_delta` + // results should contain at least one item. + let (key, value) = leaf_delta + .first_key_value() + .expect("At least one item guaranteed by construction"); + let item = TreeEntry { key: *key, value: *value }; + + // At this point we now have the item, but we need to set up the state to point + // to this item as we return it. + self.position = HistoricalEntriesIteratorState::InLeaf { value: leaf_delta }; + + Some(item) + } else { + // If we cannot move to a new leaf index, the iterator is done. + None + } + }, + HistoricalEntriesIteratorState::InLeaf { value } => { + // If we are already inside a leaf, there are two cases that can occur when + // advancing. + value.pop_first().expect("InLeaf implies there is at least one entry in value"); + if let Some((k, v)) = value.first_key_value() { + // The first (and simplest) case is that we have another entry in the current + // leaf value. In this case, the item is just the front of the leaf value, and + // we re-write the key to point to it while leaving the leaf index the same. + let item = TreeEntry { key: *k, value: *v }; + + Some(item) + } else { + // Here, we have no further entries in the current leaf, so we have to check if + // there is another leaf to move to. In other words, we are implicitly in the + // `NotInLeaf` state, so we can just call `next` recursively. + // + // This is not a stack overflow risk as it should only ever recurse once. + self.position = HistoricalEntriesIteratorState::NotInLeaf; + self.next() + } + }, + } + } +} + +/// The state that tracks where the iterator is in the iteration process. +#[derive(Debug)] +enum HistoricalEntriesIteratorState { + /// It currently does not point to any underlying leaf index. + NotInLeaf, + + /// It is currently pointing to the specified key within the specified index. + InLeaf { + /// The combined full delta that represents the compact leaf. + value: CompactLeaf, + }, +} diff --git a/miden-crypto/src/merkle/smt/large_forest/history/tests.rs b/miden-crypto/src/merkle/smt/large_forest/history/tests.rs new file mode 100644 index 000000000..47e5b6c21 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/history/tests.rs @@ -0,0 +1,526 @@ +#![cfg(test)] +//! The functional tests for the history component. + +use alloc::vec::Vec; +use core::iter::once; + +use itertools::Itertools; + +use super::{CompactLeaf, History, LeafChanges, NodeChanges, error::Result}; +use crate::{ + EMPTY_WORD, Felt, Word, + merkle::{ + NodeIndex, + smt::{LeafIndex, Smt, VersionId, large_forest::root::TreeEntry}, + }, + rand::test_utils::ContinuousRng, +}; + +// TESTS +// ================================================================================================ + +#[test] +fn empty() { + let history = History::empty(5); + assert_eq!(history.num_versions(), 0); + assert_eq!(history.max_versions(), 5); +} + +#[test] +fn roots() -> Result<()> { + let mut rng = ContinuousRng::new([0x12; 32]); + + // Set up our test state + let nodes = NodeChanges::default(); + let leaves = LeafChanges::default(); + let mut history = History::empty(2); + let root_1: Word = rng.value(); + let root_2: Word = rng.value(); + history.add_version(root_1, 0, nodes.clone(), leaves.clone())?; + history.add_version(root_2, 1, nodes.clone(), leaves.clone())?; + + // We should be able to get all the roots. + let roots = history.roots().collect::>(); + assert_eq!(roots.len(), 2); + assert!(roots.contains(&root_1)); + assert!(roots.contains(&root_2)); + + Ok(()) +} + +#[test] +fn find_latest_corresponding_version() -> Result<()> { + let mut rng = ContinuousRng::new([0x14; 32]); + + // Start by setting up our test data. + let nodes = NodeChanges::default(); + let leaves = LeafChanges::default(); + let mut history = History::empty(5); + + let v1 = 10; + let v2 = 20; + let v3 = 30; + let v4 = 31; + let v5 = 45; + + history.add_version(rng.value(), v1, nodes.clone(), leaves.clone())?; + history.add_version(rng.value(), v2, nodes.clone(), leaves.clone())?; + history.add_version(rng.value(), v3, nodes.clone(), leaves.clone())?; + history.add_version(rng.value(), v4, nodes.clone(), leaves.clone())?; + history.add_version(rng.value(), v5, nodes.clone(), leaves.clone())?; + + // When we query for a version that is older than the oldest in the history we should get an + // error. + assert!(history.find_latest_corresponding_version(0).is_err()); + assert!(history.find_latest_corresponding_version(9).is_err()); + + // When we query for the oldest version we should get its index. + assert_eq!(history.find_latest_corresponding_version(v1), Ok(0)); + + // And that goes for any other known version + assert_eq!(history.find_latest_corresponding_version(v2), Ok(1)); + assert_eq!(history.find_latest_corresponding_version(v3), Ok(2)); + assert_eq!(history.find_latest_corresponding_version(v4), Ok(3)); + assert_eq!(history.find_latest_corresponding_version(v5), Ok(4)); + + // But we can also query for versions in between. + assert_eq!(history.find_latest_corresponding_version(11), Ok(0)); + assert_eq!(history.find_latest_corresponding_version(19), Ok(0)); + assert_eq!(history.find_latest_corresponding_version(21), Ok(1)); + assert_eq!(history.find_latest_corresponding_version(29), Ok(1)); + assert_eq!(history.find_latest_corresponding_version(32), Ok(3)); + assert_eq!(history.find_latest_corresponding_version(44), Ok(3)); + assert_eq!(history.find_latest_corresponding_version(46), Ok(4)); + + Ok(()) +} + +#[test] +fn add_version() -> Result<()> { + let nodes = NodeChanges::default(); + let leaves = LeafChanges::default(); + let mut rng = ContinuousRng::new([0x15; 32]); + + // We start with an empty state, and we should be able to add deltas up until the limit we + // set. + let mut history = History::empty(2); + assert_eq!(history.num_versions(), 0); + assert_eq!(history.max_versions(), 2); + + let root_1: Word = rng.value(); + let id_1 = 0; + history.add_version(root_1, id_1, nodes.clone(), leaves.clone())?; + assert_eq!(history.num_versions(), 1); + + let root_2: Word = rng.value(); + let id_2 = 1; + history.add_version(root_2, id_2, nodes.clone(), leaves.clone())?; + assert_eq!(history.num_versions(), 2); + + // At this point, adding any version should remove the oldest. + let root_3: Word = rng.value(); + let id_3 = 2; + history.add_version(root_3, id_3, nodes.clone(), leaves.clone())?; + assert_eq!(history.num_versions(), 2); + + // If we then query for that first version it won't be there anymore, but the other two + // should. + assert!(history.get_view_at(id_1).is_err()); + assert!(history.get_view_at(id_2).is_ok()); + assert!(history.get_view_at(id_3).is_ok()); + + // If we try and add a version with a non-monotonic version number, we should see an error. + assert!(history.add_version(root_3, id_1, nodes, leaves).is_err()); + + Ok(()) +} + +#[test] +fn add_version_from_mutation_set() -> Result<()> { + let mut rng = ContinuousRng::new([0x16; 32]); + + // We start by producing values. + let l1_k1: Word = rng.value(); + let leaf_1_ix = LeafIndex::from(l1_k1); + let l1_v1: Word = rng.value(); + let mut l1_k2: Word = rng.value(); + l1_k2[3] = Felt::new(leaf_1_ix.position()); + let l1_v2: Word = rng.value(); + + let l2_k1: Word = rng.value(); + let leaf_2_ix = LeafIndex::from(l2_k1); + let l2_v1: Word = rng.value(); + let mut l2_k2: Word = rng.value(); + l2_k2[3] = Felt::new(leaf_2_ix.position()); + let l2_v2: Word = rng.value(); + + // We produce a changeset by applying these changes to a merkle tree to put things back in the + // right state. + let tree = Smt::new(); + let mutations = tree + .compute_mutations([(l1_k1, l1_v1), (l1_k2, l1_v2), (l2_k1, l2_v1), (l2_k2, l2_v2)]) + .expect("Failed to compute mutations"); + + // We then set up our history and apply it. + let mut history = History::empty(2); + let version: VersionId = rng.value(); + + history.add_version_from_mutation_set(version, mutations)?; + + // Now we can check that it did things correctly. + let view = history.get_view_at(version)?; + let expected_leaf_1 = CompactLeaf::from([(l1_k1, l1_v1), (l1_k2, l1_v2)]); + assert_eq!(view.leaf_delta(&leaf_1_ix), expected_leaf_1); + let expected_leaf_2 = CompactLeaf::from([(l2_k1, l2_v1), (l2_k2, l2_v2)]); + assert_eq!(view.leaf_delta(&leaf_2_ix), expected_leaf_2); + + Ok(()) +} + +#[test] +fn truncate() -> Result<()> { + let mut rng = ContinuousRng::new([0x17; 32]); + + // Start by setting up the test data + let mut history = History::empty(4); + + let nodes = NodeChanges::default(); + let leaves = LeafChanges::default(); + + let root_1: Word = rng.value(); + let id_1 = 5; + history.add_version(root_1, id_1, nodes.clone(), leaves.clone())?; + + let root_2: Word = rng.value(); + let id_2 = 10; + history.add_version(root_2, id_2, nodes.clone(), leaves.clone())?; + + let root_3: Word = rng.value(); + let id_3 = 15; + history.add_version(root_3, id_3, nodes.clone(), leaves.clone())?; + + let root_4: Word = rng.value(); + let id_4 = 20; + history.add_version(root_4, id_4, nodes.clone(), leaves.clone())?; + + assert_eq!(history.num_versions(), 4); + + // If we truncate to the oldest version or before, nothing should be removed. + assert_eq!(history.truncate(0), 0); + assert_eq!(history.num_versions(), 4); + assert_eq!(history.truncate(4), 0); + assert_eq!(history.num_versions(), 4); + assert_eq!(history.truncate(id_1), 0); + assert_eq!(history.num_versions(), 4); + + // If we truncate to a specific known version, it should remove all previous versions. + assert_eq!(history.truncate(id_2), 1); + assert_eq!(history.num_versions(), 3); + + // If we truncate to a version that is not known, the newest relevant version should be + // retained. + assert_eq!(history.truncate(16), 1); + assert_eq!(history.num_versions(), 2); + + // If we truncate to a version beyond the newest known, only that should be retained. + assert_eq!(history.truncate(25), 1); + assert_eq!(history.num_versions(), 1); + + Ok(()) +} + +#[test] +fn clear() -> Result<()> { + let mut rng = ContinuousRng::new([0x18; 32]); + + // Start by setting up the test data + let mut history = History::empty(4); + + let nodes = NodeChanges::default(); + let leaves = LeafChanges::default(); + + let root_1: Word = rng.value(); + let id_1 = 0; + history.add_version(root_1, id_1, nodes.clone(), leaves.clone())?; + + let root_2: Word = rng.value(); + let id_2 = 1; + history.add_version(root_2, id_2, nodes.clone(), leaves.clone())?; + + assert_eq!(history.num_versions(), 2); + + // We can clear the history entirely in one go. + history.clear(); + assert_eq!(history.num_versions(), 0); + + Ok(()) +} + +#[test] +fn view_at() -> Result<()> { + // Starting in an empty state we should be able to add deltas up until the limit we set. + let mut history = History::empty(3); + let mut rng = ContinuousRng::new([0x19; 32]); + assert_eq!(history.num_versions(), 0); + assert_eq!(history.max_versions(), 3); + + // We can add an initial version with some changes in both nodes and leaves. + let root_1: Word = rng.value(); + let id_1 = 3; + let mut nodes_1 = NodeChanges::default(); + let n1_value: Word = rng.value(); + let n2_value: Word = rng.value(); + nodes_1.insert(NodeIndex::new(2, 1).unwrap(), n1_value); + nodes_1.insert(NodeIndex::new(8, 128).unwrap(), n2_value); + + let mut leaf_1 = CompactLeaf::new(); + let l1_e1_key: Word = rng.value(); + let l1_e1_value: Word = rng.value(); + let leaf_1_ix = LeafIndex::from(l1_e1_key); + leaf_1.insert(l1_e1_key, l1_e1_value); + + let mut leaf_2 = CompactLeaf::new(); + let l2_e1_key: Word = rng.value(); + let l2_e1_value: Word = rng.value(); + let leaf_2_ix = LeafIndex::from(l2_e1_key); + let mut l2_e2_key: Word = rng.value(); + l2_e2_key[3] = Felt::new(leaf_2_ix.position()); + let l2_e2_value: Word = rng.value(); + leaf_2.insert(l2_e1_key, l2_e1_value); + leaf_2.insert(l2_e2_key, l2_e2_value); + + let mut leaves_1 = LeafChanges::default(); + leaves_1.insert(leaf_1_ix, leaf_1.clone()); + leaves_1.insert(leaf_2_ix, leaf_2.clone()); + + history.add_version(root_1, id_1, nodes_1.clone(), leaves_1.clone())?; + assert_eq!(history.num_versions(), 1); + + // We then add another version that overlaps with the older version. + let root_2: Word = rng.value(); + let id_2 = 5; + + let mut nodes_2 = NodeChanges::default(); + let n3_value: Word = rng.value(); + let n4_value: Word = rng.value(); + nodes_2.insert(NodeIndex::new(2, 1).unwrap(), n3_value); + nodes_2.insert(NodeIndex::new(10, 256).unwrap(), n4_value); + + let mut leaf_3 = CompactLeaf::new(); + let leaf_3_ix = leaf_2_ix; + let mut l3_e1_key: Word = rng.value(); + l3_e1_key[3] = Felt::new(leaf_3_ix.position()); + let l3_e1_value: Word = rng.value(); + leaf_3.insert(l3_e1_key, l3_e1_value); + + let mut leaves_2 = LeafChanges::default(); + leaves_2.insert(leaf_3_ix, leaf_3.clone()); + history.add_version(root_2, id_2, nodes_2.clone(), leaves_2.clone())?; + assert_eq!(history.num_versions(), 2); + + // And another version for the sake of the test. + let root_3: Word = rng.value(); + let id_3 = 6; + + let mut nodes_3 = NodeChanges::default(); + let n5_value: Word = rng.value(); + nodes_3.insert(NodeIndex::new(30, 1).unwrap(), n5_value); + + let mut leaf_4 = CompactLeaf::new(); + let l4_e1_key: Word = rng.value(); + let l4_e1_value: Word = rng.value(); + let leaf_4_ix = LeafIndex::from(l4_e1_key); + leaf_4.insert(l4_e1_key, l4_e1_value); + + let mut leaf_1n = CompactLeaf::new(); + let l1n_e1_key = l1_e1_key; + let l1n_e1_value: Word = rng.value(); + leaf_1n.insert(l1n_e1_key, l1n_e1_value); + + let mut leaves_3 = LeafChanges::default(); + leaves_3.insert(leaf_4_ix, leaf_4.clone()); + leaves_3.insert(leaf_1_ix, leaf_1n); + + history.add_version(root_3, id_3, nodes_3.clone(), leaves_3.clone())?; + assert_eq!(history.num_versions(), 3); + + // At this point, we can grab a view into the history. If we grab something older than the + // history knows about we should get an error. + assert!(history.get_view_at(2).is_err()); + + // If we grab something valid, then we should get the right results. Let's grab the oldest + // possible version to test the overlay logic. + let view = history.get_view_at(id_1)?; + + // Getting a node in the targeted version should just return it. + assert_eq!(view.node_value(&NodeIndex::new(2, 1).unwrap()), Some(&n1_value)); + assert_eq!(view.node_value(&NodeIndex::new(8, 128).unwrap()), Some(&n2_value)); + + // Getting a node that is _not_ in the targeted delta directly should search through the + // versions in between the targeted version at the current tree and return the oldest value + // it can find for it. + assert_eq!(view.node_value(&NodeIndex::new(10, 256).unwrap()), Some(&n4_value)); + assert_eq!(view.node_value(&NodeIndex::new(30, 1).unwrap()), Some(&n5_value)); + + // Getting a node that doesn't exist in ANY versions should return none. + assert!(view.node_value(&NodeIndex::new(45, 100).unwrap()).is_none()); + + // Getting a leaf from the targeted version will compose with other (newer) deltas to yield the + // correct changes. The first test here checks that a value updated in a newer delta is + // nevertheless reverted to the correct value. + assert_eq!(view.leaf_delta(&leaf_1_ix), leaf_1); + + // This test checks that the delta for a single leaf correctly combines non-overlapping key + // reversions. + let leaf_2_delta: CompactLeaf = once((l3_e1_key, l3_e1_value)) + .chain(leaf_2.iter().map(|(k, v)| (*k, *v))) + .collect(); + assert_eq!(view.leaf_delta(&leaf_2_ix), leaf_2_delta); + + // But getting a leaf that is not in the target delta directly should result in the same + // traversal. + assert_eq!(view.leaf_delta(&leaf_4_ix), leaf_4); + + // And getting a leaf that does not exist in any of the versions should return an empty delta. + assert!(view.leaf_delta(&LeafIndex::new(1024).unwrap()).is_empty()); + + // Finally, getting a full value from a compact leaf should yield the value directly from + // the target version if the target version overlays it AND contains it. + assert_eq!(view.value(&l1_e1_key), Some(l1_e1_value)); + assert_eq!(view.value(&l2_e1_key), Some(l2_e1_value)); + assert_eq!(view.value(&l2_e2_key), Some(l2_e2_value)); + + // However, if the leaf exists but does not contain the provided word, it should return the + // sentinel `Some(None)`. + let mut ne_key_in_existing_leaf: Word = rng.value(); + ne_key_in_existing_leaf[3] = Felt::new(leaf_1_ix.position()); + assert_eq!(view.value(&ne_key_in_existing_leaf), None); + + // If the leaf is not overlaid, then the lookup should go up the chain just as in the other + // cases. + assert_eq!(view.value(&l4_e1_key), Some(l4_e1_value)); + + // But if nothing is found, it should just return None; + let ne_key: Word = rng.value(); + assert!(view.value(&ne_key).is_none()); + + // We can also get views for versions that are not directly contained, such as a version newer + // than the newest. This should just use the newest version to service the query. + let view = history.get_view_at(7)?; + assert_eq!(view.node_value(&NodeIndex::new_unchecked(30, 1)), Some(&n5_value)); + assert!(view.node_value(&NodeIndex::new_unchecked(30, 2)).is_none()); + + // We can also get an iterator over the entries for a given view. This should yield all the + // correctly-collapsed key-value pairs in the overlay. We start with the most recent view. + let view = history.get_view_at(id_3)?; + assert_eq!(view.entries().count(), 2); + assert!(view.entries().contains(&TreeEntry { key: l4_e1_key, value: l4_e1_value })); + assert!(view.entries().contains(&TreeEntry { key: l1n_e1_key, value: l1n_e1_value })); + assert!(view.entries().is_sorted_by(|l, r| { + if l.index() == r.index() { + l.key < r.key + } else { + l.index() < r.index() + } + })); + + Ok(()) +} + +// SMT INTEGRATION TESTS +// ================================================================================================ + +/// Tests History integration using real SMT mutations. +/// +/// This test creates an actual SMT, computes mutations via the SMT API, +/// and verifies that History correctly tracks the resulting node and leaf changes. +#[test] +fn history_from_smt_non_overlapping() -> Result<()> { + let mut rng = ContinuousRng::new([0x1a; 32]); + + // Create an empty SMT + let mut smt = Smt::new(); + let initial_root = smt.root(); + + // Generate test key-value pairs + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + // Create history to track versions + let mut history = History::empty(3); + + // Version 0: Insert first key-value pair using real SMT mutation while getting the reversion + // set for the history. + let mutations_v0 = smt.compute_mutations(vec![(key_1, value_1)]).unwrap(); + let reversion_set = smt.apply_mutations_with_reversion(mutations_v0).unwrap(); + let root_v0 = smt.root(); + history.add_version_from_mutation_set(0, reversion_set)?; + assert_eq!(history.num_versions(), 1); + + // Version 1: Insert second key-value pair + let mutations_v1 = smt.compute_mutations(vec![(key_2, value_2)]).unwrap(); + let reversion_set = smt.apply_mutations_with_reversion(mutations_v1).unwrap(); + let root_v1 = smt.root(); + history.add_version_from_mutation_set(1, reversion_set)?; + + // Verify the roots for older states are tracked correctly in the history. + assert!(history.is_known_root(initial_root)); + assert!(history.is_known_root(root_v0)); + + // And that the latest root of the tree is not. + assert!(!history.is_known_root(root_v1)); + + // We can start by checking that version 0 performs the correct reversion operations, + // encompassing _both_ changes made to obtain the current version. + let view_v0 = history.get_view_at(0)?; + assert_eq!(view_v0.value(&key_1), Some(EMPTY_WORD)); + assert_eq!(view_v0.value(&key_2), Some(EMPTY_WORD)); + assert_eq!(view_v0.leaf_delta(&key_1.into()).len(), 1); + assert_eq!(view_v0.leaf_delta(&key_2.into()).len(), 1); + + // When we query version 1 it should only make revert one change on top of the current tree. + let view_v1 = history.get_view_at(1)?; + assert_eq!(view_v0.value(&key_2), Some(EMPTY_WORD)); + assert_eq!(view_v0.leaf_delta(&key_2.into()).len(), 1); + + // Verify querying a non-existent key returns None + let nonexistent_key: Word = rng.value(); + assert!(view_v1.value(&nonexistent_key).is_none()); + + Ok(()) +} + +/// Tests History with SMT value updates (replacing existing values). +#[test] +fn history_from_smt_overlapping() -> Result<()> { + let mut rng = ContinuousRng::new([0x1b; 32]); + let mut smt = Smt::new(); + + let key: Word = rng.value(); + let value_v0: Word = rng.value(); + let value_v1: Word = rng.value(); + + let mut history = History::empty(2); + + // Version 0: Insert initial value + let mutations_v0 = smt.compute_mutations(vec![(key, value_v0)]).unwrap(); + let reversion_set = smt.apply_mutations_with_reversion(mutations_v0).unwrap(); + history.add_version_from_mutation_set(0, reversion_set)?; + + // Version 1: Update to new value + let mutations_v1 = smt.compute_mutations(vec![(key, value_v1)]).unwrap(); + let reversion_set = smt.apply_mutations_with_reversion(mutations_v1).unwrap(); + history.add_version_from_mutation_set(1, reversion_set)?; + + // In version 0 we should have the correct (empty) value when reverted. + let view_v0 = history.get_view_at(0)?; + assert_eq!(view_v0.value(&key), Some(EMPTY_WORD)); + + // In version 1 we should have the value set in the transition to version 0. + let view_v1 = history.get_view_at(1)?; + assert_eq!(view_v1.value(&key), Some(value_v0)); + + Ok(()) +} diff --git a/miden-crypto/src/merkle/smt/large_forest/iterator.rs b/miden-crypto/src/merkle/smt/large_forest/iterator.rs new file mode 100644 index 000000000..a1950f9b3 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/iterator.rs @@ -0,0 +1,461 @@ +//! This module contains the implementation of the iterator over the entries of an arbitrary tree in +//! the forest. +//! +//! # Performance +//! +//! The performance of this iterator has a significant dependency on the tree that it is running +//! over. Due to the differing performance characteristics of backends, we cannot provide exact +//! performance bounds, but the following general rules apply. +//! +//! - Iterating over the entries of the **latest tree in a lineage** is going to be **the fastest +//! possible query**. This depends only on the direct iteration performance of the backend in +//! question. +//! - Iterating over the entries of **a historical tree is going to be slower**. This is because it +//! has to do work to merge the entries provided by the history with the entries of the full tree +//! in order to create a coherent picture of the historical tree. +//! +//! We highly recommend benchmarking the iteration behavior on the concrete workload(s) you are +//! concerned about, rather than trying to statically reason about performance of this iterator. + +use alloc::boxed::Box; +use core::iter::Peekable; + +use crate::{ + EMPTY_WORD, + merkle::smt::{LeafIndex, large_forest::root::TreeEntry}, +}; + +// ENTRIES ITERATOR +// ================================================================================================ + +/// An iterator over the entries of an arbitrary tree in the forest, yielding entries in an +/// arbitrary order. +/// +/// It is split into two variants for performance, as iterating over a full tree is significantly +/// simpler than iterating over a historical tree. While it would be nice to be able to return one +/// of two different iterators depending on the circumstances of construction, Rust's `impl Trait` +/// bounds do not allow for this. +/// +/// The iterator **must never transition between variants** during the process of iteration. +pub(super) enum EntriesIterator<'forest> { + /// An iterator over a tree in the forest that is formed from a merger of the full tree and a + /// historical overlay. + WithHistory { + /// The iterator over the entries in the full tree. + /// + /// This iterator should never yield any entries where `value == EMPTY_WORD`. + full_tree_iter: Peekable + 'forest>>, + + /// The iterator over the entries in the history. + /// + /// This iterator may yield entries with `value == EMPTY_WORD`. These are explicit + /// reversions of entries newly-set in newer versions, and so should be used. While they + /// technically should only ever correspond to a case where they _are_ reverting a + /// newly-set entry, care must be taken to remove them regardless if they do not match up + /// for some reason. + history_entries_iter: Peekable + 'forest>>, + + /// The current state of the iteration state machine. + state: EntriesIteratorState, + }, + + /// An iterator over a tree in the forest that is simply an iterator over the full tree. + WithoutHistory { + /// The iterator over the entries in the full tree. + full_tree_iter: Box + 'forest>, + }, +} + +impl<'forest> EntriesIterator<'forest> { + /// Constructs a new entries iterator pointing to the first item in the designated `tree` in the + /// `forest`, formed by combining a historical overlay with the current tree. + /// + /// Note that it _does not_ perform checks as to the correctness of the provided iterators. If + /// these are not an iterator over the full tree and the historical entries in turn, the results + /// the iterator yields will be invalid. + pub(super) fn new_with_history( + full_tree_iter: impl Iterator + 'forest, + history_entries_iter: impl Iterator + 'forest, + ) -> Self { + // This type gymnastics is unfortunately necessary to let us easily store the `Peekable` + // which we need to avoid carrying additional state in the state machine. + let full_tree_iter: Box> = Box::new(full_tree_iter); + let history_entries_iter: Box> = Box::new(history_entries_iter); + + // We begin in `NotInLeaf`. This is implicitly `Start -> NotInLeaf` + Self::WithHistory { + full_tree_iter: full_tree_iter.peekable(), + history_entries_iter: history_entries_iter.peekable(), + state: EntriesIteratorState::NotInLeaf, + } + } + + /// Constructs a new entries iterator pointing to the first item in the designated `tree` in the + /// `forest` without any associated history. + /// + /// Note that it _does not_ check whether `full_tree_iter` is actually an iterator over the + /// full tree. If it is not, the iterator will yield invalid results. + pub(super) fn new_without_history( + full_tree_iter: impl Iterator + 'forest, + ) -> Self { + let full_tree_iter = Box::new(full_tree_iter); + Self::WithoutHistory { full_tree_iter } + } + + /// Advances the iterator and returns the next value in the case where it is iterating over a + /// historical tree version. + /// + /// For the details of the state machine that this implements, please see the documentation for + /// the [`EntriesIteratorState`]. It explains the valid state transitions and the conditions + /// under which they occur. This implementation does not match them directly in order to + /// simplify the logic, but matches the intended semantics. + /// + /// # Panics + /// + /// - If the method is called with a `self` that is not in the [`Self::WithHistory`] variant. + #[inline(always)] // To help the optimizer eliminate the redundant check in Iterator::next() + fn next_with_history(&mut self) -> Option { + let EntriesIterator::WithHistory { + full_tree_iter, + history_entries_iter, + state, + } = self + else { + panic!("EntriesIterator::next_with_history called without history") + }; + + match state { + EntriesIteratorState::NotInLeaf => { + // Here we are (semantically) not pointing to any specific leaf, so we need to work + // out which of our possible outgoing transitions take place. This state does not + // actually return anything except in the `-> End` case. + match (full_tree_iter.peek(), history_entries_iter.peek()) { + (None, None) => { + // No more entries exist in either of the iterators. `NotInLeaf -> End`. + None + }, + (Some(_), None) => { + // Entries only exist in the full tree iterator. `NotInLeaf -> TreeOnly` + *state = EntriesIteratorState::TreeOnly; + self.next_with_history() + }, + (None, Some(_)) => { + // Entries only exist in the full tree iterator. `NotInLeaf -> HistOnly` + *state = EntriesIteratorState::HistOnly; + self.next_with_history() + }, + (Some(full), Some(hist)) => { + // Entries exist in both, but the exact state transition has not yet been + // determined. We have three other possible outgoing edges from `NotInLeaf`. + let full_idx = LeafIndex::from(full.key); + let hist_idx = LeafIndex::from(hist.key); + + if full_idx == hist_idx { + // We are in the same leaf. `NotInLeaf -> InLeafShared` + *state = EntriesIteratorState::InLeafShared; + } else if full_idx < hist_idx { + // We are in different leaves with full_idx coming first. `NotInLeaf -> + // InLeafTreeOnly` + *state = EntriesIteratorState::InLeafTreeOnly; + } else { + // We are in different leaves with hist_idx coming first. `NotInLeaf -> + // InTreeHistOnly`. + *state = EntriesIteratorState::InLeafHistOnly; + } + + self.next_with_history() + }, + } + }, + EntriesIteratorState::HistOnly => { + // In this state we simply can continue yielding the history entries iterator until + // it is empty. We just have to check that we're not yielding EMPTY_WORD entries + // directly as these should not be seen. + history_entries_iter.next().and_then(|e| { + if e.value == EMPTY_WORD { + self.next_with_history() + } else { + Some(e) + } + }) + }, + EntriesIteratorState::TreeOnly => { + // In this state we can simply continue yielding the tree entries iterator until it + // is empty. When it returns `None` we have `TreeOnly -> End` + full_tree_iter.next() + }, + EntriesIteratorState::InLeafHistOnly => { + // Here, we are in a leaf that is only in the history. We technically only want to + // transition out of this state once we have exhausted the leaf, but in actuality we + // can rely on the logic for `NotInLeaf` to do the right thing here. We only have to + // skip empty words as these should never be yielded. + *state = EntriesIteratorState::NotInLeaf; + history_entries_iter.next().and_then(|e| { + if e.value == EMPTY_WORD { + self.next_with_history() + } else { + Some(e) + } + }) + }, + EntriesIteratorState::InLeafTreeOnly => { + // Here we are in a leaf that is only in the full tree. We technically only want to + // transition out of this state once we have exhausted the leaf, but in actuality we + // can rely on the logic for `NotInleaf` to do the right thing here. + *state = EntriesIteratorState::NotInLeaf; + full_tree_iter.next() + }, + EntriesIteratorState::InLeafShared => { + // Here we have both iterators in the same LEAF but that does not mean they have the + // same item. + let hist_item = + history_entries_iter.peek().expect("Entry already checked to exist"); + let tree_item = full_tree_iter.peek().expect("Entry already checked to exist"); + + if hist_item.key == tree_item.key { + *state = EntriesIteratorState::InLeafBothKeysEq; + } else if hist_item.key < tree_item.key { + *state = EntriesIteratorState::InLeafBothHistPrio; + } else { + *state = EntriesIteratorState::InLeafBothTreePrio; + } + + self.next_with_history() + }, + EntriesIteratorState::InLeafBothKeysEq => { + // If the keys are equal we want to pop both entries and only return the history's + // one. We can again rely on `NotInLeaf` to do our logic correctly. + *state = EntriesIteratorState::NotInLeaf; + + // We can discard this entry entirely as it has been overwritten. + full_tree_iter.next(); + + // But this one may or may not need to be returned. + let hist_item = + history_entries_iter.next().expect("Entry already checked to exist"); + if hist_item.value == EMPTY_WORD { + // We never want to yield empty items, so we skip them. + self.next_with_history() + } else { + // Otherwise the item is real and we want to yield it. + Some(hist_item) + } + }, + EntriesIteratorState::InLeafBothHistPrio => { + // Here we have a history item with a key < the full tree item, so we want to return + // that. We can again rely on `NotInLeaf` to do our logic correctly. + *state = EntriesIteratorState::NotInLeaf; + history_entries_iter.next() + }, + EntriesIteratorState::InLeafBothTreePrio => { + // Here we have a full tree item with a key < the history item, so we want to return + // that. We can again rely on `NotInLeaf` to do our logic correctly. + *state = EntriesIteratorState::NotInLeaf; + full_tree_iter.next() + }, + } + } + + /// Advances the iterator and returns the next value in the case where it is iterating over the + /// current tree version. + /// + /// # Panics + /// + /// - If the method is called with a `self` that is not the [`Self::WithoutHistory`] variant. + #[inline(always)] // To help the optimizer eliminate the redundant check in Iterator::next() + fn next_without_history(&mut self) -> Option { + let EntriesIterator::WithoutHistory { full_tree_iter } = self else { + panic!("EntriesIterator::next_without_history called with history") + }; + + full_tree_iter.next() + } +} + +// ITERATOR TRAIT +// ================================================================================================ + +impl Iterator for EntriesIterator<'_> { + type Item = TreeEntry; + + fn next(&mut self) -> Option { + match self { + EntriesIterator::WithHistory { .. } => self.next_with_history(), + EntriesIterator::WithoutHistory { .. } => self.next_without_history(), + } + } +} + +// ENTRIES ITERATOR STATE +// ================================================================================================ + +/// The state machine that is the entries iterator for the forest. +/// +/// We do not represent the ghost states of `Start` and `End`, so [`Self::NotInLeaf`] serves as the +/// initial state of the machine in practice. A full diagram of the state machine's allowable +/// transitions can be found below. See the individual variants for the conditions under which these +/// transitions take place. +/// +/// ```text +/// ┌─────────┐ +/// │ Start │ +/// └─────────┘ +/// │ +/// │ +/// ▼ +/// ┌───────────┐ +/// ┌─────────────┬────────────│ │◀──────────────┬──────────────────┐ +/// │ │ │ NotInLeaf │ │ │ +/// │ │ ┌────│ │────────────┬──┼───────────────┐ │ +/// │ │ │ └───────────┘ │ │ │ │ +/// │ │ │ │ ▲ │ │ │ │ +/// │ │ │ │ │ │ │ │ │ +/// │ │ │ │ │ │ │ │ │ +/// │ │ │ │ │ │ │ │ │ +/// ▼ ▼ │ ▼ │ ▼ │ ▼ │ +/// ┌──────────┐ ┌──────────┐ │ ┌────────────────┐ ┌────────────────┐ ┌──────────────┐ +/// │ TreeOnly │ │ HistOnly │ │ │ InLeafHistOnly │ │ InLeafTreeOnly │ │ InLeafShared │◀─────────────┐ +/// └──────────┘ └──────────┘ │ └────────────────┘ └────────────────┘ └──────────────┘ │ +/// │ │ │ │ │ +/// │ │ │ │ │ +/// │ │ │ ┌──────────────────────┬──────────────┴────────┐ │ +/// │ │ │ │ │ │ │ +/// │ │ │ ▼ ▼ ▼ │ +/// │ │ │ ┌──────────────────┐ ┌────────────────────┐ ┌────────────────────┐ │ +/// └─────────────┴─────┐ │ │ InLeafBothKeysEq │ │ InLeafBothHistPrio │ │ InLeafBothTreePrio │ │ +/// │ │ └──────────────────┘ └────────────────────┘ └────────────────────┘ │ +/// │ │ │ │ │ │ +/// │ │ │ │ │ │ +/// │ │ └──────────────────────┴───────────────────────┴────────────┘ +/// ▼ ▼ +/// ┌─────────┐ +/// │ End │ +/// └─────────┘ +/// ``` +/// +/// Note that this describes the _semantics_ of the transitions between states, and may not directly +/// correspond to the implementation in [`EntriesIterator::next_with_history`] for reasons of +/// performance and maintainability. +pub(super) enum EntriesIteratorState { + /// The iterator is currently not in any leaf. + /// + /// This state should not advance the underlying iterators directly, and the iterator is not + /// intended to return a value for `next` while in this state. + /// + /// Incoming state transitions: + /// + /// - `Start -> NotInLeaf`: The state of the state machine. + /// - `InLeafHistOnly -> NotInLeaf`: Upon completing the leaf in the history. + /// - `InLeafTreeOnly -> NotInLeaf`: Upon completing the leaf in the tree. + /// - `InLeafShared -> NotInLeaf`: Upon completing the leaf that exists in both. + /// + /// Outgoing state transitions: + /// + /// - `NotInLeaf -> End`: If neither iterator has remaining entries. + /// - `NotInLeaf -> HistOnly`: If the tree entries iterator is empty. + /// - `NotInLeaf -> TreeOnly`: If the history entries iterator is empty. + /// - `NotInLeaf -> InLeafHistOnly`: If the next leaf is only in the history. + /// - `NotInLeaf -> InLeafTreeOnly`: If the next leaf is only in the tree. + /// - `NotInLeaf -> InLeafShared`: If the leaf exists in both iterators. + NotInLeaf, + + /// The iterator over the full tree has no entries, so we can iterate only over the history + /// until completion. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> HistOnly`: The tree entries iterator is empty. + /// + /// Outgoing state transitions: + /// + /// - `HistOnly -> End`: The history entries iterator is empty. + HistOnly, + + /// The iterator over the history has no entries, so we can iterate only over the full tree + /// until completion. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> TreeOnly`: The history entries iterator is empty. + /// + /// Outgoing state transitions: + /// + /// - `TreeOnly -> End`: The tree entries iterator is empty. + TreeOnly, + + /// The iterator is operating over a leaf that only exists in the history iterator. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> InLeafHistOnly`: The tree entries iterator has items but the latest is not + /// in the same leaf as the history's latest. + /// + /// Outgoing state transitions: + /// + /// - `InLeafHistOnly -> NotInLeaf`: Upon completing iteration through the current leaf. + InLeafHistOnly, + + /// The iterator is operating over a leaf that only exists in the tree iterator. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> InLeafTreeOnly`: The history entries iterator has items but the latest is + /// not in the same leaf as the tree's latest. + /// + /// Outgoing state transitions: + /// + /// - `InLeafTreeOnly -> NotInLeaf`: Upon completing iteration through the current leaf. + InLeafTreeOnly, + + /// The iterator is operating over a leaf that exists in both iterators. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> InLeafShared`: Both iterators have their latest entry in the same leaf. + /// + /// Outgoing state transitions: + /// + /// - `InLeafShared -> InLeafBothKeysEq`: If the two keys in the shared leaf are equal. + /// - `InLeafShared -> InLeafBothKeysHistPrio`: If the key in the history < the key in the tree. + /// - `InLeafShared -> InLeafBothKeysTreePrio`: If the key in the tree < the key in the history. + /// - `InLeafShared -> NotInLeaf`: Upon completing iteration through the current leaf. + InLeafShared, + + /// The iterator is operating over a leaf that exists in both iterators, and the current keys + /// are the same. + /// + /// Incoming state transitions: + /// + /// - `InLeafShared -> InLeafBothKeysEq`: If the key in each iterator is the same. + /// + /// Outgoing state transitions: + /// + /// - `InLeafBothKeysEq -> InLeafShared`: When needing to check the next element. + InLeafBothKeysEq, + + /// The iterator is operating over a leaf that exists in both iterators, and the current key + /// in the history is less than the current key in the tree. + /// + /// Incoming state transitions: + /// + /// - `InLeafShared -> InLeafBothHistPrio`: If the key in the history iterator < the key in the + /// tree iterator. + /// + /// Outgoing state transitions: + /// + /// - `InLeafBothHistPrio -> InLeafShared`: When needing to check the next element. + InLeafBothHistPrio, + + /// The iterator is operating over a leaf that exists in both iterators, and the current key in + /// the tree is less than the current key in the history. + /// + /// Incoming state transitions: + /// + /// - `InLeafShared -> InLeafBothTreePrio`: If the key in the tree iterator < the key in the + /// history iterator. + /// + /// Outgoing state transitions: + /// + /// - `InLeafBothTreePrio -> InLeafShared`: When needing to check the next element. + InLeafBothTreePrio, +} diff --git a/miden-crypto/src/merkle/smt/large_forest/lineage.rs b/miden-crypto/src/merkle/smt/large_forest/lineage.rs new file mode 100644 index 000000000..fc925a73c --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/lineage.rs @@ -0,0 +1,67 @@ +//! This module contains the data types used by the forest to store and manage the lineages that it +//! knows about. + +use core::iter::once; + +use crate::merkle::smt::{ + VersionId, + large_forest::{history::History, root::RootValue}, +}; + +// LINEAGE DATA +// ================================================================================================ + +/// The data that the forest stores in memory for each lineage of trees. +#[derive(Clone, Debug)] +pub(super) struct LineageData { + /// The history of changes made to the lineage, representing a contiguous set of historical + /// trees in the lineage up to the configured maximum number of versions. + pub history: History, + + /// The version of the latest tree in the lineage. + pub latest_version: VersionId, + + /// The value of the root for the latest tree in the lineage. + pub latest_root: RootValue, +} + +impl LineageData { + /// Gets an iterator that yields all roots in the lineage. + /// + /// The iteration order of the roots is guaranteed to move backward in time, with earlier items + /// in the iterator being roots from versions closer to the present. The current root of the + /// lineage will always be the first item that the iterator yields. + pub(super) fn roots(&self) -> impl Iterator { + once(self.latest_root).chain(self.history.roots()) + } + + /// Truncates the information on this tree to the provided `version`, returning `true` if the + /// history is empty after truncation, and `false` otherwise. + /// + /// If the latest version in the lineage is older than the specified `version`, this latest + /// version is always retained. In other words, the method cannot prune a lineage from the + /// forest entirely. + pub(super) fn truncate(&mut self, version: VersionId) -> bool { + if version >= self.latest_version { + // Truncation in the history is defined such that it never removes a version that could + // possibly serve as the latest delta for a newer version. This is because it cannot + // safely know if a version `v` is between the latest delta `d` and the current version + // `c`, as it has no knowledge of the current version. + // + // Thus, if we have a version `v` such that `d <= v < c`, we need to retain the + // reversion delta `d` in the history to correctly service queries for `v`. If, however, + // we have `d < c <= v` we need to explicitly remove the last delta as well. + // + // To that end, we handle the latter case first, by explicitly calling + // `History::clear()`. + self.history.clear(); + true + } else { + // The other case is `v < c`, which is handled simply by the truncation mechanism in the + // history as we want. In other words, it retains the necessary delta, and so we can + // just call it here. + self.history.truncate(version); + false + } + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/mod.rs b/miden-crypto/src/merkle/smt/large_forest/mod.rs new file mode 100644 index 000000000..ea5f482a0 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/mod.rs @@ -0,0 +1,1092 @@ +//! A high-performance sparse merkle tree forest with pluggable backends. +//! +//! # Semantic Layout +//! +//! Much like the `SparseMerkleTree`, the forest stores its trees at depth [`SMT_DEPTH`] and then +//! relies on the compact leaf optimization to uniquely store the 256-bit elements that it contains. +//! This is done to both reduce the size of a merkle path, and to reduce the computational work +//! necessary to perform queries into the trees. +//! +//! It also has the benefit of significantly reducing the memory usage for the forest. Even in cases +//! where it relies on a persistent backend, the other peripheral structures are able to be smaller +//! and thus use less memory. +//! +//! # Backends +//! +//! The forest is implemented to rely on the API and contract conformance of an arbitrary +//! [`Backend`] implementation. These backends provide the storage for full trees in the forest, and +//! are the main extension point for the way the forest functions. +//! +//! The [`InMemoryBackend`] provides simple, in-memory storage for the full trees in the forest. It +//! is _primarily_ intended to be used for testing purposes, but should nevertheless be correct and +//! functional for production use-cases if no persistence is required. +//! +//! While any given [`Backend`] may choose to share data between lineages, this behavior is not +//! guaranteed, and must not be relied upon. +//! +//! ## Performance +//! +//! Each [`Backend`] provides the same set of functionality to the forest, but may exhibit +//! significant variance in their performance characteristics. As a result, **any performance +//! analysis of the forest should be done in conjunction with a specific backend**. +//! +//! Take care to read the documentation of the specific [`Backend`] that you are planning to use in +//! order to understand its performance, potential gotchas, and other such details. +//! +//! # Storing Trees and Versions +//! +//! An SMT forest conceptually performs two roles. Firstly, it acts as a collection that is able to +//! store **multiple, unrelated trees**. Secondly, it is a container for **multiple versions of a +//! given tree**. In order to make it tractable to implement a performant forest with pluggable +//! backends, this type makes an explicit delineation between these use-cases in both the API and +//! the implementation. +//! +//! ## Lineages +//! +//! We term a set of trees, where each tree is derived from changing the previous version, to be a +//! **lineage** of trees. A single lineage contains the information necessary to reconstruct any +//! previous version of the tree, within the bounds of the history that the forest stores. +//! +//! Users must take care to ensure that each lineage identifier is unique, as reuse of these +//! identifiers can result in data corruption and hence queries that return incorrect results. +//! +//! # Tree Identification +//! +//! It is possible for a tree with identical leaves (and hence an identical root) to exist in +//! multiple lineages in the forest. As lineages are stored separately, there needs to be a way to +//! specify the precise instance of a given tree. +//! +//! Trees are thus identified using the [`TreeId`], which combines the **lineage** in which the tree +//! exists with the **version** in that lineage. +//! +//! ## Potential Gotchas +//! +//! The separation of the forest into lineages of trees has a few impacts that a client of the +//! forest must understand: +//! +//! - When using a [`Backend`] that offers data persistence, **only the state of the current version +//! of each lineage is persisted**, while **the historical data is not persisted**. This is part +//! of the way the forest is structured, and does not depend on the choice of backend. +//! - It is always going to be more expensive to query a given lineage at **an older point** in its +//! history than it is to query at a newer point. +//! - Querying **the latest tree in a lineage will take the least time**. +//! +//! # Batch Operations +//! +//! The [`LargeSmtForest::update_tree`] and [`LargeSmtForest::update_forest`] methods are what is +//! known as **batch operations**. In other words, they are performed in one go and only produce a +//! one-stage update to the forest, rather than a sequence of updates. +//! +//! These methods should be used wherever possible (especially preferring `update_forest` over a +//! sequence of `update_tree` calls) as this will allow the forest and its backend to exploit as +//! much parallelism as possible in the updates. +//! +//! # Examples +//! +//! The following section contains usage examples for the forest. They rely on the included +//! [`InMemoryBackend`] for simplicity, but will work with any conformant [`Backend`] +//! implementation. Each example is designed to build upon the last. +//! +//! ## Constructing a Forest +//! +//! A new forest can be constructed by calling either [`LargeSmtForest::new`], which will use a +//! default [`Config`], or by explicitly providing the config in [`LargeSmtForest::with_config`]. +//! +//! ``` +//! use miden_crypto::merkle::smt::{ForestInMemoryBackend, LargeSmtForest}; +//! # use miden_crypto::merkle::smt::LargeSmtForestError; +//! # +//! # fn main() -> Result<(), LargeSmtForestError> { +//! +//! let backend = ForestInMemoryBackend::new(); +//! let forest = LargeSmtForest::new(backend)?; +//! # +//! # Ok(()) +//! # } +//! ``` +//! +//! Upon startup, the forest has to read the lineages it knows from the provided storage. If it +//! cannot get this information, it cannot start up properly and the constructor may return an +//! error. +//! +//! ## Adding a Lineage +//! +//! Each tree in the forest belongs to a _lineage_, identified by a [`LineageId`]. In order to work +//! with a lineage in the forest, that lineage first has to be added to it! Adding a lineage can +//! either add the empty tree, or specify a set of modifications on the empty tree to create a +//! starting state. +//! +//! ``` +//! # use miden_crypto::merkle::smt::LargeSmtForestError; +//! # use miden_crypto::merkle::smt::{ForestInMemoryBackend, LargeSmtForest}; +//! use miden_crypto::{ +//! Word, +//! merkle::smt::{LineageId, SmtUpdateBatch}, +//! }; +//! +//! # fn main() -> Result<(), LargeSmtForestError> { +//! # let backend = ForestInMemoryBackend::new(); +//! # let mut forest = LargeSmtForest::new(backend)?; +//! # +//! // We can just make some arbitrary values here for demonstration. +//! let key_1 = Word::parse("0x42").unwrap(); +//! let value_1 = Word::parse("0x80").unwrap(); +//! let key_2 = Word::parse("0xAB").unwrap(); +//! let value_2 = Word::parse("0xCD").unwrap(); +//! +//! // Operations are most cleanly specified using a builder. +//! let mut operations = SmtUpdateBatch::empty(); +//! operations.add_insert(key_1, value_1); +//! operations.add_insert(key_2, value_2); +//! +//! // To add a new lineage we also need to give it a lineage ID, and a version. +//! let lineage = LineageId::new([0x42; 32]); +//! let version_1 = 1; +//! +//! // Now we can add the lineage to the forest! +//! assert!(forest.add_lineage(lineage, version_1, operations).is_ok()); +//! # +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Modifying a Lineage +//! +//! A forest is not all that useful if we cannot update it! Modifying a lineage is much like adding +//! a new one, in that we specify operations to be performed on the latest tree in that lineage. +//! +//! ``` +//! # use miden_crypto::merkle::smt::LargeSmtForestError; +//! # use miden_crypto::{ +//! # Word, +//! # merkle::smt::{ForestInMemoryBackend, LargeSmtForest, LineageId, SmtUpdateBatch}, +//! # }; +//! # +//! # fn main() -> Result<(), LargeSmtForestError> { +//! # let backend = ForestInMemoryBackend::new(); +//! # let mut forest = LargeSmtForest::new(backend)?; +//! # +//! # // We can just make some arbitrary values here for demonstration. +//! # let key_1 = Word::parse("0x42").unwrap(); +//! # let value_1 = Word::parse("0x80").unwrap(); +//! # let key_2 = Word::parse("0xAB").unwrap(); +//! # let value_2 = Word::parse("0xCD").unwrap(); +//! # +//! # // Operations are most cleanly specified using a builder. +//! # let mut operations = SmtUpdateBatch::empty(); +//! # operations.add_insert(key_1, value_1); +//! # operations.add_insert(key_2, value_2); +//! # +//! # // To add a new lineage we also need to give it a lineage ID, and a version. +//! # let lineage = LineageId::new([0x42; 32]); +//! # let version_1 = 1; +//! # +//! # // Now we can add the lineage to the forest! +//! # forest.add_lineage(lineage, version_1, operations)?; +//! # +//! // Let's make another arbitrary value. +//! let key_3 = Word::parse("0x67").unwrap(); +//! let value_3 = Word::parse("0x96").unwrap(); +//! +//! // And build a batch of operations again. +//! let mut operations = SmtUpdateBatch::empty(); +//! operations.add_insert(key_3, value_3); +//! operations.add_remove(key_1); +//! +//! // Now we can simply update the tree all in one go with our changes. +//! let version_2 = version_1 + 1; +//! assert!(forest.update_tree(lineage, version_2, operations).is_ok()); +//! # +//! # Ok(()) +//! # } +//! ``` +//! +//! Multiple lineages can be modified at once using the [`LargeSmtForest::update_forest`] method, +//! which works very similarly to the [`LargeSmtForest::update_tree`] method shown above. +//! +//! ## Querying a Lineage +//! +//! Modification is just one part of the puzzle, however. It is just as important to be able to get +//! data _out_ of the forest too! +//! +//! ``` +//! # use miden_crypto::merkle::smt::LargeSmtForestError; +//! # use miden_crypto::{ +//! # Word, +//! # merkle::smt::{ForestInMemoryBackend, LargeSmtForest, LineageId, SmtUpdateBatch}, +//! # }; +//! use miden_crypto::merkle::smt::{TreeEntry, TreeId}; +//! +//! # fn main() -> Result<(), LargeSmtForestError> { +//! # let backend = ForestInMemoryBackend::new(); +//! # let mut forest = LargeSmtForest::new(backend)?; +//! # +//! # // We can just make some arbitrary values here for demonstration. +//! # let key_1 = Word::parse("0x42").unwrap(); +//! # let value_1 = Word::parse("0x80").unwrap(); +//! # let key_2 = Word::parse("0xAB").unwrap(); +//! # let value_2 = Word::parse("0xCD").unwrap(); +//! # +//! # // Operations are most cleanly specified using a builder. +//! # let mut operations = SmtUpdateBatch::empty(); +//! # operations.add_insert(key_1, value_1); +//! # operations.add_insert(key_2, value_2); +//! # +//! # // To add a new lineage we also need to give it a lineage ID, and a version. +//! # let lineage = LineageId::new([0x42; 32]); +//! # let version_1 = 1; +//! # +//! # // Now we can add the lineage to the forest! +//! # forest.add_lineage(lineage, version_1, operations)?; +//! # +//! # // Let's make another arbitrary value. +//! # let key_3 = Word::parse("0x67").unwrap(); +//! # let value_3 = Word::parse("0x96").unwrap(); +//! # +//! # // And build a batch of operations again. +//! # let mut operations = SmtUpdateBatch::empty(); +//! # operations.add_insert(key_3, value_3); +//! # operations.add_remove(key_1); +//! # +//! # // Now we can simply update the tree all in one go with our changes. +//! # let version_2 = version_1 + 1; +//! # forest.update_tree(lineage, version_2, operations)?; +//! # +//! // As discussed above, trees are identified by a combination of their lineage and version. +//! let old_tree = TreeId::new(lineage, version_1); +//! let current_tree = TreeId::new(lineage, version_2); +//! +//! // The first really useful query is `open`, which gets the opening for the specified key. We can +//! // get openings for the current tree AND the historical trees. +//! assert!(forest.open(old_tree, key_1).is_ok()); +//! assert!(forest.open(current_tree, key_3).is_ok()); +//! +//! // We can also just `get` the value associated with a key, which returns `None` if the key is +//! // not populated. +//! assert_eq!(forest.get(old_tree, key_1)?, Some(value_1)); +//! assert_eq!(forest.get(current_tree, key_3)?, Some(value_3)); +//! assert!(forest.get(current_tree, key_1)?.is_none()); +//! +//! // We can also get an iterator over all the entries in the tree. +//! let entries_old: Vec<_> = forest.entries(old_tree)?.collect(); +//! let entries_current: Vec<_> = forest.entries(current_tree)?.collect(); +//! assert!(entries_old.contains(&TreeEntry { key: key_1, value: value_1 })); +//! assert!(entries_old.contains(&TreeEntry { key: key_2, value: value_2 })); +//! assert!(!entries_old.contains(&TreeEntry { key: key_3, value: value_3 })); +//! assert!(!entries_current.contains(&TreeEntry { key: key_1, value: value_1 })); +//! assert!(entries_current.contains(&TreeEntry { key: key_2, value: value_2 })); +//! assert!(entries_current.contains(&TreeEntry { key: key_3, value: value_3 })); +//! # +//! # Ok(()) +//! # } +//! ``` +//! +//! There are many other kinds of queries of course, so taking a look at the methods available on +//! [`LargeSmtForest`] is a good starting point. + +mod backend; +mod config; +mod error; +mod history; +mod iterator; +mod lineage; +mod operation; +mod property_tests; +mod root; +mod tests; +mod utils; + +use alloc::vec::Vec; +use core::num::NonZeroU8; + +pub use backend::{Backend, BackendError, memory::InMemoryBackend}; +pub use config::{Config, DEFAULT_MAX_HISTORY_VERSIONS, MIN_HISTORY_VERSIONS}; +pub use error::{LargeSmtForestError, Result}; +pub use operation::{ForestOperation, SmtForestUpdateBatch, SmtUpdateBatch}; +pub use root::{LineageId, RootInfo, TreeEntry, TreeId, TreeWithRoot, VersionId}; + +use crate::{ + EMPTY_WORD, Map, Set, Word, + merkle::{ + NodeIndex, SparseMerklePath, + smt::{ + LeafIndex, SMT_DEPTH, SmtLeaf, SmtProof, + large_forest::{ + history::{CompactLeaf, History, HistoryView}, + iterator::EntriesIterator, + lineage::LineageData, + root::{RootValue, UniqueRoot}, + }, + }, + }, +}; + +// SPARSE MERKLE TREE FOREST +// ================================================================================================ + +/// A high-performance forest of sparse merkle trees with pluggable storage backends. +/// +/// See the module documentation for more information. +#[derive(Debug)] +pub struct LargeSmtForest { + /// The configuration for how the forest functions. + config: Config, + + /// The backend for storing the full trees that exist as part of the forest. + /// + /// It makes no guarantees as to where the tree data is stored, and **must not be exposed** in + /// the API of the forest to ensure that internal invariants are maintained. + backend: B, + + /// The container for the in-memory data associated with each lineage in the forest. + /// + /// It must contain an entry for every tree lineage in the forest. + lineage_data: Map, + + /// A set tracking which lineage which lineages have histories containing actual deltas in + /// order to speed up querying. + /// + /// It must always be maintained as a strict subset of `lineage_data.keys()`. + non_empty_histories: Set, +} + +// CONSTRUCTION AND BASIC QUERIES +// ================================================================================================ + +/// These functions deal with the creation of new forest instances, and hence rely on the ability to +/// query the backend to do so. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Backend`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { + /// Constructs a new forest backed by the provided `backend` using the default [`Config`] for + /// the forest's behavior. + /// + /// This constructor will treat whatever state is contained within the provided `backend` as the + /// starting state for the forest. This means that, if you pass a newly-initialized storage, the + /// forest will start in an empty state. Similarly, if you pass a `backend` that already + /// contains some data (loaded from disk, for example), then the forest will start in that state + /// instead. + /// + /// # Performance + /// + /// For performance notes on this method, see [`Self::with_config`] instead. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::Other`] if the forest cannot be started up correctly using the + /// provided `backend`. + pub fn new(backend: B) -> Result { + Self::with_config(backend, Config::default()) + } + + /// Constructs a new forest backed by the provided `backend` and configuring behavior using the + /// provided `config`. + /// + /// This constructor will treat whatever state is contained within the provided `backend` as the + /// starting state for the forest. This means that, if you pass a newly-initialized storage, the + /// forest will start in an empty state. Similarly, if you pass a `backend` that already + /// contains some data (loaded from disk, for example), then the forest will start in that state + /// instead. + /// + /// # Performance + /// + /// This method is required to load the basic tree metadata from the backend during forest + /// construction. This metadata should be stored separately, and hence this method should take a + /// relatively small amount of time. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::Fatal`] if the forest cannot be started up correctly using the + /// provided `backend`. + pub fn with_config(backend: B, config: Config) -> Result { + // The lineages at initialization time are whichever ones the backend knows about. To that + // end, we read from the backend and construct the starting state for each known lineage. + let lineage_data = backend + .trees()? + .map(|t| { + let data = LineageData { + history: History::empty(config.max_history_versions()), + latest_version: t.version(), + latest_root: t.root(), + }; + (t.lineage(), data) + }) + .collect::>(); + + // As no backend is able to preserve history, we can unconditionally initialize the tracking + // for non-empty histories as empty. + let non_empty_histories = Set::default(); + + Ok(Self { + config, + backend, + lineage_data, + non_empty_histories, + }) + } +} + +/// These methods provide the ability to perform basic operations on the forest without the need to +/// query the backend. +/// +/// # Performance +/// +/// All of these methods can be performed fully in-memory, and hence their performance is +/// predictable on a given machine regardless of the choice of [`Backend`] instance being used by +/// the forest. +impl LargeSmtForest { + /// Returns an iterator that yields all the (uniquely identified) roots that the forest knows + /// about, including those from historical versions. + /// + /// The iteration order of these roots is unspecified. + pub fn roots(&self) -> impl Iterator { + // As the history container does not deal in roots with domains, we have to attach the + // corresponding domain to each root, and do this as lazily as possible to avoid + // materializing more things than we need to. + self.lineage_data + .iter() + .flat_map(|(l, d)| d.roots().map(|r| UniqueRoot::new(*l, r))) + } + + /// Gets the latest version of the tree for the provided `lineage`, if that lineage is in the + /// forest, or returns [`None`] otherwise. + pub fn latest_version(&self, lineage: LineageId) -> Option { + self.lineage_data.get(&lineage).map(|d| d.latest_version) + } + + /// Returns an iterator that yields the root values for trees within the specified `lineage`, or + /// [`None`] if the lineage is not known. + /// + /// The iteration order of the roots is guaranteed to move backward in time as the iterator + /// advances, with earlier items being roots from versions closer to the present. The current + /// root of the lineage will thus always be the first item yielded by the iterator. + pub fn lineage_roots(&self, lineage: LineageId) -> Option> { + self.lineage_data.get(&lineage).map(|d| d.roots()) + } + + /// Gets the value root of the newest tree in the provided `lineage`, if that lineage is in the + /// forest, or returns [`None`] otherwise. + pub fn latest_root(&self, lineage: LineageId) -> Option { + self.lineage_data.get(&lineage).map(|d| d.latest_root) + } + + /// Returns the number of trees in the forest that have unique identity. + /// + /// This is **not** the number of unique tree lineages in the forest, as it includes all + /// historical trees as well. For that, see [`Self::lineage_count`]. + pub fn tree_count(&self) -> usize { + self.roots().count() + } + + /// Returns the number of unique tree lineages in the forest. + /// + /// This is **not** the number of unique trees in the forest, as it does not include all + /// versions in each lineage. For that, see [`Self::tree_count`]. + pub fn lineage_count(&self) -> usize { + self.lineage_data.len() + } + + /// Returns data describing what information the forest knows about the provided `root`. + pub fn root_info(&self, root: TreeId) -> RootInfo { + let Some(d) = self.lineage_data.get(&root.lineage()) else { + return RootInfo::Missing; + }; + + if d.latest_version == root.version() { + return RootInfo::LatestVersion(d.latest_root); + } + + if root.version() > d.latest_version { + return RootInfo::Missing; + } + + match d.history.root_for_version(root.version()) { + Ok(r) => RootInfo::HistoricalVersion(r), + Err(_) => RootInfo::Missing, + } + } + + /// Removes all tree versions in the forest that are older than the provided `version`, but + /// always retains the latest tree in each lineage. + pub fn truncate(&mut self, version: VersionId) { + let mut newly_empty = Set::default(); + + self.non_empty_histories.iter().for_each(|l| { + if let Some(d) = self.lineage_data.get_mut(l) + && d.truncate(version) + { + newly_empty.insert(*l); + } + }); + + for l in &newly_empty { + self.non_empty_histories.remove(l); + } + } +} + +// QUERIES +// ================================================================================================ + +/// These methods pertain to non-mutating queries about the data stored in the forest. They differ +/// from the simple queries in the previous block by requiring access to the backend to function. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Backend`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { + /// Returns an opening for the specified `key` in the specified `tree`, regardless of whether + /// the `tree` has a value associated with `key` or not. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::Fatal`] if the backend fails to operate properly during the query. + /// - [`LargeSmtForestError::UnknownLineage`] If the provided `tree` specifies a lineage that is + /// not one known by the forest. + /// - [`LargeSmtForestError::UnknownTree`] if the provided `tree` refers to a tree that is not a + /// member of the forest. + /// - [`LargeSmtForestError::Merkle`] if there is insufficient data in the specified `tree` to + /// provide an opening for `key`. + pub fn open(&self, tree: TreeId, key: Word) -> Result { + // We want to return an error if the lineage is unknown to comply with the stated contract + // for the function. + let lineage_data = self + .lineage_data + .get(&tree.lineage()) + .ok_or(LargeSmtForestError::UnknownLineage(tree.lineage()))?; + + // We then check if the version exists in the forest. We do this before fetching the full + // tree as to do so otherwise would represent a possible denial-of-service vector. + if tree.version() > lineage_data.latest_version { + // Here the tree is newer than we know about, and so we should error. + return Err(LargeSmtForestError::UnknownTree(tree)); + } + + if tree.version() == lineage_data.latest_version { + // In this case we can service the opening directly from the backend as the query is for + // the latest version of the tree. + return self.backend.open(tree.lineage(), key).map_err(Into::into); + } + + let Ok(view) = lineage_data.history.get_view_at(tree.version()) else { + // In this case, either the version in `tree` is newer than the latest we know about, so + // we can't provide an opening, or it is not serviceable by the history. In either case, + // the specified tree is unknown to the forest. + return Err(LargeSmtForestError::UnknownTree(tree)); + }; + + // We start by computing the relevant leaf index and getting the opening from the full + // tree to do our (potentially) most-expensive work up front. + let leaf_index = LeafIndex::from(key); + let opening = self + .backend + .open(tree.lineage(), key) + .map_err(Into::::into)?; + + // We compute the new leaf and new path by applying any reversions from the history on + // top of the current state. + let new_leaf = self.merge_leaves(opening.leaf(), &view.leaf_delta(&leaf_index))?; + let new_path = self.merge_paths(leaf_index, opening.path(), view)?; + + // Finally we can compose our combined opening. + Ok(SmtProof::new(new_path, new_leaf)?) + } + + /// Returns the value associated with the provided `key` in the specified `tree`, or [`None`] if + /// there is no non-default value corresponding to the provided `key` in that tree. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::Fatal`] if the backend fails to operate properly during the query. + /// - [`LargeSmtForestError::UnknownLineage`] if the provided `tree` specifies a lineage that is + /// not one known by the forest. + /// - [`LargeSmtForestError::UnknownTree`] if the provided `tree` refers to a tree that is not a + /// member of the forest. + pub fn get(&self, tree: TreeId, key: Word) -> Result> { + // We want to return an error if the lineage is unknown to comply with the stated contract + // for the function. + let lineage_data = self + .lineage_data + .get(&tree.lineage()) + .ok_or(LargeSmtForestError::UnknownLineage(tree.lineage()))?; + + if tree.version() > lineage_data.latest_version { + // Here the tree is newer than we know about, and so we should error. + return Err(LargeSmtForestError::UnknownTree(tree)); + } + + if tree.version() == lineage_data.latest_version { + // In this case we can service the opening directly from the backend as the query is for + // the latest version of the tree. + return self.backend.get(tree.lineage(), key).map_err(Into::into); + } + + let Ok(view) = lineage_data.history.get_view_at(tree.version()) else { + // In this case, either the version in `tree` is newer than the latest we know about, so + // we can't provide an opening, or it is not serviceable by the history. In either case, + // the specified tree is unknown to the forest. + return Err(LargeSmtForestError::UnknownTree(tree)); + }; + + // We prioritize the value in the history if one exists, falling back to the full tree + // if none does. We don't use `or` here because we don't want to query the backend + // unless we have to, and we can't use `or_else` due to lack of support for `Result`. + let result = if let Some(value) = view.value(&key) { + // If the history value is an empty word, the value was unset in the historical tree + // version, so we have to conform to our interface by returning `None` here. + if value == EMPTY_WORD { None } else { Some(value) } + } else { + self.backend.get(tree.lineage(), key)? + }; + + // We can just return that directly. + Ok(result) + } + + /// Returns the number of populated entries in the specified `tree`. + /// + /// # Performance + /// + /// Due to the way that tree data is stored, this method exhibits a split performance profile. + /// + /// - If querying for a `tree` that is the latest in its lineage, the time to return a result + /// should be constant. + /// - If querying for a `tree` that is a historical version, the time to return a result will be + /// linear in the number of entries in the tree. This is because an overlaid iterator has to + /// be created to yield the correct entries for the historical version, and then queried for + /// its length. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::Fatal`] if the backend fails to operate properly during the query. + /// - [`LargeSmtForestError::UnknownLineage`] if the provided `tree` specifies a lineage that is + /// not one known by the forest. + /// - [`LargeSmtForestError::UnknownTree`] if the provided `tree` refers to a tree that is not a + /// member of the forest. + pub fn entry_count(&self, tree: TreeId) -> Result { + // We start by yielding an error if we cannot get the lineage data for the specified tree. + let Some(lineage_data) = self.lineage_data.get(&tree.lineage()) else { + return Err(LargeSmtForestError::UnknownLineage(tree.lineage())); + }; + + if tree.version() > lineage_data.latest_version { + // Here the tree is newer than we know about, and so we should error. + return Err(LargeSmtForestError::UnknownTree(tree)); + } + + if tree.version() == lineage_data.latest_version { + // We can fast-path the current tree using the backend. + return Ok(self.backend.entry_count(tree.lineage())?); + } + + let Ok(view) = lineage_data.history.get_view_at(tree.version()) else { + // If neither of these are the case, we do not know the version and so fail out. + return Err(LargeSmtForestError::UnknownTree(tree)); + }; + + // In the general case there is no faster path than doing the iteration to merge the + // history with the full tree, so we just count the iterator. + Ok( + EntriesIterator::new_with_history( + self.backend.entries(tree.lineage())?, + view.entries(), + ) + .count(), + ) + } + + /// Returns an iterator that yields the entries in the specified `tree`. + /// + /// # Performance + /// + /// The performance of the iterator depends both on the choice of backend _and_ the type of tree + /// that is queried for. We cannot give exact performance figures, but in general querying over + /// **the current tree** in a lineage will be faster than querying over **a historical tree** in + /// a lineage. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::Fatal`] if the backend fails to operate properly during the query. + /// - [`LargeSmtForestError::UnknownLineage`] if the provided `tree` specifies a lineage that is + /// not one known by the forest. + /// - [`LargeSmtForestError::UnknownTree`] if the provided `tree` refers to a tree that is not a + /// member of the forest. + pub fn entries(&self, tree: TreeId) -> Result> { + // We start by yielding an error if we cannot get the lineage data for the specified tree. + let Some(lineage_data) = self.lineage_data.get(&tree.lineage()) else { + return Err(LargeSmtForestError::UnknownLineage(tree.lineage())); + }; + + if tree.version() > lineage_data.latest_version { + // Here the tree is newer than we know about, and so we should error. + return Err(LargeSmtForestError::UnknownTree(tree)); + } + + if tree.version() == lineage_data.latest_version { + // If we match the current version, we can construct the simple iterator variant. + return Ok(EntriesIterator::new_without_history(self.backend.entries(tree.lineage())?)); + } + + let Ok(view) = lineage_data.history.get_view_at(tree.version()) else { + // If neither of these are the case, we do not know the version and so fail out. + return Err(LargeSmtForestError::UnknownTree(tree)); + }; + + // If we can serve it from the history we need to instead construct the complex version. + Ok(EntriesIterator::new_with_history( + self.backend.entries(tree.lineage())?, + view.entries(), + )) + } +} + +// SINGLE-TREE MODIFIERS +// ================================================================================================ + +/// These methods pertain to modifications that can be made to a single tree in the forest. They +/// exploit parallelism within the single target tree wherever possible. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Backend`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { + /// Adds a new `lineage` to the tree, creating an empty tree and modifying it as specified by + /// `updates`, with the result taking the provided `new_version`. + /// + /// If the provided `updates` batch is empty, then the **empty tree will be added** as the first + /// version in the lineage. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::DuplicateLineage`] if the provided `lineage` is the same as an + /// already-known lineage. + /// - [`LargeSmtForestError::Fatal`] if the backend fails while being accessed. + /// - [`BackendError::Merkle`] if the provided `updates` cannot be applied to the empty tree. + pub fn add_lineage( + &mut self, + lineage: LineageId, + new_version: VersionId, + updates: SmtUpdateBatch, + ) -> Result { + // We can immediately add lineage in the backend, as by its contract it should return + // `DuplicateLineage` if the new lineage is a duplicate. We forward that, and any other + // errors, as this is the correct behavior for correctly-implemented backends. + let tree_info = self.backend.add_lineage(lineage, new_version, updates)?; + + // We then construct the lineage tracking data and shove it into the corresponding map. The + // history is guaranteed to be empty here, so we do not need to put an entry in the + // non-empty histories set. + let lineage_data = LineageData { + history: History::empty(self.config.max_history_versions()), + latest_version: tree_info.version(), + latest_root: tree_info.root(), + }; + self.lineage_data.insert(lineage, lineage_data); + + Ok(tree_info) + } + + /// Performs the provided `updates` on the latest tree in the specified `lineage`, adding a + /// single new root to the forest (corresponding to `new_version`) for the entire batch, and + /// returning the data for the new root of the tree. + /// + /// If applying the provided `operations` results in no changes to the tree, then the root data + /// will be returned unchanged and no new tree will be allocated. It will retain its original + /// version, and not be returned with `new_version`. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::BadVersion`] if the `new_version` is older than the latest version + /// for the provided `lineage`. + /// - [`LargeSmtForestError::Fatal`] if the backend fails while being accessed. + /// - [`LargeSmtForestError::UnknownLineage`] if the provided `tree` specifies a lineage that is + /// not one known by the forest. + pub fn update_tree( + &mut self, + lineage: LineageId, + new_version: VersionId, + updates: SmtUpdateBatch, + ) -> Result { + // We initially check that the lineage is known and that the version is greater than the + // last known version for that lineage. + let lineage_data = if let Some(lineage_data) = self.lineage_data.get_mut(&lineage) { + if lineage_data.latest_version < new_version { + lineage_data + } else { + return Err(LargeSmtForestError::BadVersion { + provided: new_version, + latest: lineage_data.latest_version, + }); + } + } else { + return Err(LargeSmtForestError::UnknownLineage(lineage)); + }; + + // We now know that we have a valid lineage and a valid version, so we perform the update in + // the backend. + let reversion_set = self.backend.update_tree(lineage, new_version, updates)?; + + // We do not want to actually change anything if the tree would not change. + if reversion_set.is_empty() { + return Ok(TreeWithRoot::new( + lineage, + lineage_data.latest_version, + lineage_data.latest_root, + )); + } + + // The new root of the latest tree is actually given by the **old root** in our reverse + // mutation set. + let updated_root = reversion_set.old_root; + + // The call to `add_version_from_mutation_set` should only yield an error if the + // provided version does not pass the version check. This check has already been + // performed as a precondition for reaching this point of the tree update, and + // hence should only ever fail due to a programmer bug so we panic if it does fail. + lineage_data + .history + .add_version_from_mutation_set(lineage_data.latest_version, reversion_set) + .unwrap_or_else(|_| { + panic!("Unable to add valid version {} to history", lineage_data.latest_version) + }); + + // At this point we now have a historical version added, so we track that the lineage has a + // non-empty history. + self.non_empty_histories.insert(lineage); + + // Now we just have to update the other portions of the lineage data in place... + lineage_data.latest_root = updated_root; + lineage_data.latest_version = new_version; + + // ...and return the correct value. + Ok(TreeWithRoot::new(lineage, new_version, updated_root)) + } +} + +// MULTI-TREE MODIFIERS +// ================================================================================================ + +/// These methods pertain to modifications that can be made to multiple trees in the forest at once. +/// They exploit parallelism both between trees and within trees wherever possible. +/// +/// # Performance +/// +/// All the methods in this impl block require access to the underlying [`Backend`] instance to +/// return results. This means that their performance will depend heavily on the specific instance +/// with which the forest was constructed. +/// +/// Where anything more specific can be said about performance, the method documentation will +/// contain more detail. +impl LargeSmtForest { + /// Performs the provided `updates` on the forest, adding at most one new root with version + /// `new_version` to the forest for each target root in `updates` and returning a mapping + /// from old root to the new root data. + /// + /// If applying the associated batch to any given lineage in the forest results in no changes to + /// that tree, the initial root for that lineage will be returned and no new tree will be + /// allocated. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::UnknownLineage`] If any lineage in the batch of modifications is + /// one that is not known by the forest. + /// - [`LargeSmtForestError::Fatal`] if any error occurs to leave the forest in an inconsistent + /// state. + /// - [`LargeSmtForestError::BadVersion`] if the `new_version` is older than the latest version + /// for the provided `lineage`. + pub fn update_forest( + &mut self, + new_version: VersionId, + updates: SmtForestUpdateBatch, + ) -> Result> { + // We start by performing our precondition checks on the lineages and versions. We have to + // ensure both that all the lineages exist, and that the specified version transition is + // valid for all of those lineages. + updates + .lineages() + .map(|lineage| { + let Some(lineage_data) = self.lineage_data.get(lineage) else { + return Err(LargeSmtForestError::UnknownLineage(*lineage)); + }; + + if lineage_data.latest_version < new_version { + Ok(()) + } else { + Err(LargeSmtForestError::BadVersion { + provided: new_version, + latest: lineage_data.latest_version, + }) + } + }) + .collect::>>()?; + + // With the preconditions checked we can call into the backend to perform the updates, and + // we forward all errors as this will be correct for conformant backend implementations. + let reversion_sets = self.backend.update_forest(new_version, updates)?; + + // Now we have to update the lineage data (including the history) to ensure that the state + // remains consistent, and we build our return values while doing so. + reversion_sets + .into_iter() + .map(|(lineage, reversion)| { + let lineage_data = self + .lineage_data + .get_mut(&lineage) + .expect("Lineage has been checked to be present"); + + // If the operations change nothing we want to short-circuit for that tree. + if reversion.is_empty() { + return Ok(TreeWithRoot::new( + lineage, + lineage_data.latest_version, + lineage_data.latest_root, + )); + } + + let updated_root = reversion.old_root; + + // The call to `add_version_from_mutation_set` should only yield an error if the + // provided version does not pass the version check. This check has already been + // performed as a precondition for reaching this point of the forest update, and + // hence should only ever fail due to a programmer bug so we panic if it does fail. + lineage_data + .history + .add_version_from_mutation_set(lineage_data.latest_version, reversion) + .unwrap_or_else(|_| { + panic!( + "Unable to add valid version {} to history", + lineage_data.latest_version + ) + }); + + // At this point we know that we have a historical version for that tree, so we + // should track it as having a non-empty history. + self.non_empty_histories.insert(lineage); + + lineage_data.latest_root = updated_root; + lineage_data.latest_version = new_version; + + Ok(TreeWithRoot::new(lineage, new_version, updated_root)) + }) + .collect::>>() + } +} + +// INTERNAL UTILITY FUNCTIONS +// ================================================================================================ + +/// This block contains internal functions that exist to de-duplicate or modularize functionality +/// within the forest. These should not be exposed. +impl LargeSmtForest { + /// Applies the provided `historical_delta` on top of the provided `full_tree_leaf` to produce + /// the correct leaf for a historical opening. + fn merge_leaves( + &self, + full_tree_leaf: &SmtLeaf, + historical_delta: &CompactLeaf, + ) -> Result { + // We apply the historical delta on top of the existing entries to perform the reversion + // back to the previous state. + let mut leaf_entries = Map::new(); + leaf_entries.extend(full_tree_leaf.to_entries().map(|(k, v)| (*k, *v))); + leaf_entries.extend(historical_delta); + + // Any entries that are still empty at this point should be removed. + let non_empties_only = leaf_entries.into_iter().filter(|(_, v)| *v != EMPTY_WORD).collect(); + Ok(SmtLeaf::new(non_empties_only, full_tree_leaf.index())?) + } + + /// Applies any historical changes contained in `history_view` on top of the merkle path + /// obtained from the full tree to produce the correct path for a historical opening. + fn merge_paths( + &self, + leaf_index: LeafIndex, + full_tree_path: &SparseMerklePath, + history_view: HistoryView, + ) -> Result { + let mut path_elems = [EMPTY_WORD; SMT_DEPTH as usize]; + let mut current_node_ix = NodeIndex::from(leaf_index); + for depth in (1..=SMT_DEPTH).rev() { + // This is the sibling node of the currently-tracked node. In other words, it is the + // node that needs to become part of the path. + let path_node_ix = current_node_ix.sibling(); + + if let Some(historical_value) = history_view.node_value(&path_node_ix) { + // If there is a historical value we need to use it, and so we write it to the + // correct slot in the path elements array. + path_elems[depth as usize - 1] = *historical_value; + } else { + // If there isn't a historical value, we should delegate to the corresponding + // element in the path from the full-tree opening. + let bounded_depth = NonZeroU8::new(depth).expect("depth ∈ 1 ..= SMT_DEPTH]"); + path_elems[depth as usize - 1] = full_tree_path.at_depth(bounded_depth)? + } + + // We then need to move upward in the tree of the nodes we know. + current_node_ix = current_node_ix.parent(); + } + + // Now that we have filled in our `path_elems` we can use the construction of a sparse + // merkle path from a sized iterator, and thus not compute the mask ourselves. We + // reverse the iterator to make it go from deepest to shallowest as required. + Ok(SparseMerklePath::from_sized_iter(path_elems.into_iter().rev())?) + } +} + +// TESTING FUNCTIONALITY +// ================================================================================================ + +/// This block contains functions that are exclusively for testing, providing some extra tools to +/// inspect the internal state of the forest that are unsafe to make part of the forest's public +/// API. +#[cfg(test)] +impl LargeSmtForest { + /// Gets an immutable reference to the underlying backend of the forest. + pub fn get_backend(&self) -> &B { + &self.backend + } + + /// Gets a mutable reference to the underlying backend of the forest. + pub fn get_backend_mut(&mut self) -> &mut B { + &mut self.backend + } + + /// Gets an immutable reference to the underlying configuration object for the forest. + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Gets the history container corresponding to the provided `lineage`. + /// + /// # Panics + /// + /// - If the `lineage` is not one that the tree knows about. + pub fn get_history(&self, lineage: LineageId) -> &History { + self.lineage_data + .get(&lineage) + .map(|d| &d.history) + .unwrap_or_else(|| panic!("Lineage {lineage} had no data")) + } + + /// Gets an immutable reference to the set tracking the lineages that have non-empty histories. + pub fn get_non_empty_histories(&self) -> &Set { + &self.non_empty_histories + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/operation.rs b/miden-crypto/src/merkle/smt/large_forest/operation.rs new file mode 100644 index 000000000..c9912b485 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/operation.rs @@ -0,0 +1,321 @@ +//! This module contains the definition of the [`ForestOperation`] type that encapsulates the +//! possible modifications made to a tree, as well as the concept of a [`SmtUpdateBatch`] of +//! operations to be performed on a single tree in the forest. This is then extended to +//! [`SmtForestUpdateBatch`], which defines a batch of operations across multiple trees. + +use alloc::vec::Vec; + +use crate::{EMPTY_WORD, Map, Set, Word, merkle::smt::large_forest::root::LineageId}; + +// FOREST OPERATION +// ================================================================================================ + +/// The operations that can be performed on an arbitrary leaf in a tree in a forest. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ForestOperation { + /// An insertion of `value` under `key` into the tree. + /// + /// If `key` already exists in the tree, the associated value will be replaced with `value` + /// instead. + Insert { key: Word, value: Word }, + + /// The removal of the `key` and its associated value from the tree. + Remove { key: Word }, +} +impl ForestOperation { + /// Insert the provided `value` into a tree under the provided `key`. + pub fn insert(key: Word, value: Word) -> Self { + Self::Insert { key, value } + } + + /// Remove the provided `key` and its associated value from a tree. + pub fn remove(key: Word) -> Self { + Self::Remove { key } + } + + /// Retrieves the key from the operation. + pub fn key(&self) -> Word { + match self { + ForestOperation::Insert { key, .. } => *key, + ForestOperation::Remove { key } => *key, + } + } +} + +impl From for (Word, Word) { + fn from(value: ForestOperation) -> Self { + match value { + ForestOperation::Insert { key, value } => (key, value), + ForestOperation::Remove { key } => (key, EMPTY_WORD), + } + } +} + +// TREE BATCH +// ================================================================================================ + +/// A batch of operations that can be performed on an arbitrary tree in a forest. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SmtUpdateBatch { + /// The operations to be performed on a tree. + operations: Vec, +} +impl SmtUpdateBatch { + /// Creates an empty batch of operations that, when applied, will produce a tree with the + /// provided `version` when applied. + pub fn empty() -> Self { + Self { operations: vec![] } + } + + /// Creates a batch containing the provided `operations` that will produce a tree with the + /// provided `version` when applied. + pub fn new(operations: impl Iterator) -> Self { + Self { + operations: operations.collect::>(), + } + } + + /// Adds the provided `operations` to the batch. + pub fn add_operations(&mut self, operations: impl Iterator) { + self.operations.extend(operations); + } + + /// Adds the [`ForestOperation::Insert`] operation for the provided `key` and `value` pair to + /// the batch. + pub fn add_insert(&mut self, key: Word, value: Word) { + self.operations.push(ForestOperation::insert(key, value)); + } + + /// Adds the [`ForestOperation::Remove`] operation for the provided `key` to the batch. + pub fn add_remove(&mut self, key: Word) { + self.operations.push(ForestOperation::remove(key)); + } + + /// Consumes the batch as a vector of operations, containing the last operation for any given + /// `key` in the case that multiple operations per key are encountered. + /// + /// This vector is guaranteed to be sorted by the key on which an operation is performed. + pub fn consume(self) -> Vec { + // As we want to keep the LAST operation for each key, rather than the first, we filter in + // reverse. + let mut seen_keys: Set = Set::new(); + let mut ops = self + .operations + .into_iter() + .rev() + .filter(|o| seen_keys.insert(o.key())) + .collect::>(); + ops.sort_by_key(|o| o.key()); + ops + } +} + +impl IntoIterator for SmtUpdateBatch { + type Item = ForestOperation; + type IntoIter = alloc::vec::IntoIter; + + /// Consumes the batch as an iterator yielding operations while respecting the guarantees given + /// by [`Self::consume`]. + /// + /// The iteration order is unspecified. + fn into_iter(self) -> Self::IntoIter { + self.consume().into_iter() + } +} + +impl From for Vec<(Word, Word)> { + fn from(value: SmtUpdateBatch) -> Self { + value + .consume() + .into_iter() + .map(|op| match op { + ForestOperation::Insert { key, value } => (key, value), + ForestOperation::Remove { key } => (key, EMPTY_WORD), + }) + .collect() + } +} + +impl From for SmtUpdateBatch +where + I: Iterator, +{ + fn from(value: I) -> Self { + Self::new(value) + } +} + +impl From for Vec { + /// The vector is guaranteed to be sorted by the key on which an operation is performed, and to + /// only contain the _last_ operation to be performed on any given key. + fn from(value: SmtUpdateBatch) -> Self { + value.consume() + } +} + +impl Default for SmtUpdateBatch { + fn default() -> Self { + Self::empty() + } +} + +// FOREST BATCH +// ================================================================================================ + +/// A batch of operations that can be performed on an arbitrary forest, consisting of operations +/// associated with specified trees in that forest. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SmtForestUpdateBatch { + /// The operations associated with each targeted lineage in the forest. + operations: Map, +} + +impl SmtForestUpdateBatch { + /// Creates a new, empty, batch of operations. + pub fn empty() -> Self { + Self { operations: Map::new() } + } + + /// Adds the provided `operations` to be performed on the tree with the specified `lineage`. + pub fn add_operations( + &mut self, + lineage: LineageId, + operations: impl Iterator, + ) { + let batch = self.operations.entry(lineage).or_insert_with(SmtUpdateBatch::empty); + batch.add_operations(operations); + } + + /// Gets the batch of operations for the tree with the specified `lineage` for inspection and/or + /// modification. + /// + /// It is assumed that calling this means that the caller wants to insert operations into the + /// associated batch, so a batch will be created even if one was not previously present. + pub fn operations(&mut self, lineage: LineageId) -> &mut SmtUpdateBatch { + self.operations.entry(lineage).or_insert_with(SmtUpdateBatch::empty) + } + + /// Gets an iterator over the lineages + pub fn lineages(&self) -> impl Iterator { + self.operations.keys() + } + + /// Consumes the batch as a map of batches, with each individual batch guaranteed to be in + /// sorted order and contain only the last operation in the batch for any given key. + pub fn consume(self) -> Map> { + self.operations.into_iter().map(|(k, v)| (k, v.consume())).collect() + } +} + +impl IntoIterator for SmtForestUpdateBatch { + type Item = (LineageId, Vec); + type IntoIter = crate::MapIntoIter>; + + /// Consumes the batch as an iterator yielding pairs of `(lineage, operations)` while respecting + /// the guarantees given by [`Self::consume`]. + /// + /// The iteration order is unspecified. + fn into_iter(self) -> Self::IntoIter { + self.consume().into_iter() + } +} + +// TESTS +// ================================================================================================ + +#[cfg(test)] +mod test { + use itertools::Itertools; + + use super::*; + use crate::rand::test_utils::ContinuousRng; + + #[test] + fn tree_batch() { + let mut rng = ContinuousRng::new([0x12; 32]); + + // We start by creating an empty tree batch. + let mut batch = SmtUpdateBatch::empty(); + + // Let's make three operations on different keys... + let o1_key: Word = rng.value(); + let o1_value: Word = rng.value(); + let o2_key: Word = rng.value(); + let o3_key: Word = rng.value(); + let o3_value: Word = rng.value(); + + let o1 = ForestOperation::insert(o1_key, o1_value); + let o2 = ForestOperation::remove(o2_key); + let o3 = ForestOperation::insert(o3_key, o3_value); + + // ... and stick them in the batch in various ways + batch.add_operations(vec![o1.clone()].into_iter()); + batch.add_remove(o2_key); + batch.add_insert(o3_key, o3_value); + + // We save a copy of the batch for later as we have more testing to do. + let batch_tmp = batch.clone(); + + // If we then consume the batch, we should have the operations ordered by their key. + let ops = batch.consume(); + assert!(ops.is_sorted_by_key(|o| o.key())); + + // Let's now make two additional operations with keys that overlay with keys from the first + // three... + let o4_key = o2_key; + let o4_value: Word = rng.value(); + let o5_key = o1_key; + + let o4 = ForestOperation::insert(o4_key, o4_value); + let o5 = ForestOperation::remove(o5_key); + + // ... and also stick them into the batch. + let mut batch = batch_tmp; + batch.add_operations(vec![o4.clone(), o5.clone()].into_iter()); + + // Now if we consume the batch we should have three operations, and they should be the last + // operation for each key. + let ops = batch.consume(); + + assert_eq!(ops.len(), 3); + assert!(ops.is_sorted_by_key(|o| o.key())); + + assert!(ops.contains(&o3)); + assert!(ops.contains(&o4)); + assert!(!ops.contains(&o2)); + assert!(ops.contains(&o5)); + assert!(!ops.contains(&o1)); + } + + #[test] + fn forest_batch() { + let mut rng = ContinuousRng::new([0x13; 32]); + + // We can start by creating an empty forest batch. + let mut batch = SmtForestUpdateBatch::empty(); + + // Let's start by adding a few operations to a tree. + let t1_lineage: LineageId = rng.value(); + let t1_o1 = ForestOperation::insert(rng.value(), rng.value()); + let t1_o2 = ForestOperation::remove(rng.value()); + batch.add_operations(t1_lineage, vec![t1_o1, t1_o2].into_iter()); + + // We can also add them differently. + let t2_lineage: LineageId = rng.value(); + let t2_o1 = ForestOperation::remove(rng.value()); + let t2_o2 = ForestOperation::insert(rng.value(), rng.value()); + batch.operations(t2_lineage).add_operations(vec![t2_o1, t2_o2].into_iter()); + + // When we consume the batch, each per-tree batch should be unique by key and sorted. + let ops = batch.consume(); + assert_eq!(ops.len(), 2); + + let t1_ops = ops.get(&t1_lineage).unwrap(); + assert!(t1_ops.is_sorted_by_key(|o| o.key())); + assert_eq!(t1_ops.iter().unique_by(|o| o.key()).count(), 2); + + let t2_ops = ops.get(&t2_lineage).unwrap(); + assert!(t2_ops.is_sorted_by_key(|o| o.key())); + assert_eq!(t2_ops.iter().unique_by(|o| o.key()).count(), 2); + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/property_tests.rs b/miden-crypto/src/merkle/smt/large_forest/property_tests.rs new file mode 100644 index 000000000..2eb3064bd --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/property_tests.rs @@ -0,0 +1,186 @@ +#![cfg(test)] +//! This module contains the property tests for the SMT forest. + +use alloc::{collections::BTreeSet, string::ToString, vec::Vec}; +use core::error::Error; + +use itertools::Itertools; +use proptest::prelude::*; + +use crate::{ + EMPTY_WORD, Felt, Map, ONE, Word, ZERO, + merkle::smt::{ + ForestInMemoryBackend, ForestOperation, LargeSmtForest, LeafIndex, LineageId, + MAX_LEAF_ENTRIES, SMT_DEPTH, Smt, SmtUpdateBatch, TreeEntry, TreeId, VersionId, + }, +}; + +// CONSTANTS +// ================================================================================================ + +/// The minimum number of entries that can be included in a batch. +const MIN_BATCH_ENTRIES: usize = 0; + +/// The maximum number of entries that can be included in a batch. +const MAX_BATCH_ENTRIES: usize = 10_000; + +// GENERATORS +// ================================================================================================ + +/// Generates an arbitrary lineage id. +fn arbitrary_lineage() -> impl Strategy { + prop::array::uniform32(any::()).prop_map(LineageId::new) +} + +/// Generates an arbitrary version identifier. +fn arbitrary_version() -> impl Strategy { + any::() +} + +/// Generates an arbitrary valid felt value. +fn arbitrary_felt() -> impl Strategy { + prop_oneof![any::().prop_map(Felt::new), Just(ZERO), Just(ONE)] +} + +/// Generates an arbitrary valid word value. +fn arbitrary_word() -> impl Strategy { + prop_oneof![prop::array::uniform4(arbitrary_felt()).prop_map(Word::new), Just(Word::empty()),] +} + +/// Generates a random number of unique (non-overlapping) key-value pairs. +/// +/// Note that the generated pairs may well have the same leaf index. +fn arbitrary_entries() -> impl Strategy> { + prop::collection::vec( + (arbitrary_word(), arbitrary_word()), + MIN_BATCH_ENTRIES..=MAX_BATCH_ENTRIES, + ) + .prop_map(move |entries| { + // We want to avoid duplicate entries. It is well-defined, but it helps with test simplicity + // to avoid it here. + let mut used_keys = BTreeSet::new(); + let mut keys_in_leaf: Map, usize> = Map::default(); + + entries + .into_iter() + .flat_map(|(k, v)| { + let leaf_index = LeafIndex::from(k); + let count = keys_in_leaf.entry(leaf_index).or_default(); + + // We don't want to overfill a leaf. + if *count >= MAX_LEAF_ENTRIES { + return None; + } else { + *count += 1; + } + + used_keys.insert(k); + Some((k, v)) + }) + .collect() + }) +} + +/// Generates an arbitrary batch of updates to be performed on an arbitrary tree. +fn arbitrary_batch() -> impl Strategy { + arbitrary_entries().prop_map(|e| { + SmtUpdateBatch::new(e.into_iter().map(|(k, v)| { + if v == EMPTY_WORD { + ForestOperation::remove(k) + } else { + ForestOperation::insert(k, v) + } + })) + }) +} + +// ENTRIES +// ================================================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(20))] + + /// This test ensures that the `entries` iterator for the forest always returns the exact same + /// values as the `entries` iterator over a basic SMT with the same state. + #[test] + fn entries_correct( + lineage in arbitrary_lineage(), + version in arbitrary_version(), + entries_v1 in arbitrary_batch(), + entries_v2 in arbitrary_batch(), + ) { + // We now create a forest and add the lineage to it using the first set of entries. + let mut forest = LargeSmtForest::new(ForestInMemoryBackend::new()).map_err(to_fail)?; + forest.add_lineage(lineage, version, entries_v1.clone()).map_err(to_fail)?; + forest.update_tree(lineage, version + 1, entries_v2.clone()).map_err(to_fail)?; + + // We then create two auxiliary trees to work with, to compare our results against. + let mut tree_v1 = Smt::new(); + let tree_v1_mutations = + tree_v1.compute_mutations(Vec::from(entries_v1).into_iter()).map_err(to_fail)?; + tree_v1.apply_mutations(tree_v1_mutations).map_err(to_fail)?; + + let mut tree_v2 = tree_v1.clone(); + let tree_v2_mutations = + tree_v2.compute_mutations(Vec::from(entries_v2).into_iter()).map_err(to_fail)?; + tree_v2.apply_mutations(tree_v2_mutations).map_err(to_fail)?; + + // Iterating over the historical version of the lineage in the forest should produce exactly + // the same entries as iterating over V1 of our test tree. + let old_version = TreeId::new(lineage, version); + let forest_entries = forest.entries(old_version).map_err(to_fail)?.sorted().collect_vec(); + let tree_entries = tree_v1 + .entries() + .map(|(k, v)| TreeEntry { key: *k, value: *v }) + .sorted() + .collect_vec(); + assert_eq!(forest_entries, tree_entries); + + // Iterating over the newest version of the lineage in the forest should provide exactly the + // same entries as iterating over V2 of our test tree. + let current_version = TreeId::new(lineage, version + 1); + let forest_entries = forest.entries(current_version).map_err(to_fail)?.sorted().collect_vec(); + let tree_entries = tree_v2 + .entries() + .map(|(k, v)| TreeEntry { key: *k, value: *v }) + .sorted() + .collect_vec(); + assert_eq!(forest_entries, tree_entries); + } + + /// This test ensures that the `entries` iterator for the forest will never return entries where + /// the value is the empty word. + #[test] + fn entries_never_yields_empty_values( + lineage in arbitrary_lineage(), + version in arbitrary_version(), + entries_v1 in arbitrary_batch(), + entries_v2 in arbitrary_batch(), + ) { + // We now create a forest and add the lineage to it using the first set of entries. + let mut forest = LargeSmtForest::new(ForestInMemoryBackend::new()).map_err(to_fail)?; + forest.add_lineage(lineage, version, entries_v1.clone()).map_err(to_fail)?; + forest.update_tree(lineage, version + 1, entries_v2.clone()).map_err(to_fail)?; + + // Iterating over the historical version of the lineage in the forest should produce exactly + // the same entries as iterating over V1 of our test tree. + let old_version = TreeId::new(lineage, version); + assert!(forest.entries(old_version).map_err(to_fail)?.all(|e| e.value != EMPTY_WORD)); + + // Iterating over the newest version of the lineage in the forest should provide exactly the + // same entries as iterating over V2 of our test tree. + let current_version = TreeId::new(lineage, version + 1); + assert!(forest.entries(current_version).map_err(to_fail)?.all(|e| e.value != EMPTY_WORD)); + } +} + +// UTILS +// ================================================================================================ + +/// Converts the provided `error` into a test case failure. +/// +/// This is necessary because the `From` implementation is only available in builds with +/// `std` enabled, and we want error forwarding to not suck. +fn to_fail(error: impl Error) -> TestCaseError { + TestCaseError::fail(error.to_string()) +} diff --git a/miden-crypto/src/merkle/smt/large_forest/root.rs b/miden-crypto/src/merkle/smt/large_forest/root.rs new file mode 100644 index 000000000..e8c4c2631 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/root.rs @@ -0,0 +1,208 @@ +//! This module contains utility types for working with roots and trees as part of the forest. + +#[cfg(test)] +use crate::rand::Randomizable; +use crate::{ + Word, + merkle::smt::{LeafIndex, SMT_DEPTH}, +}; + +// TYPES +// ================================================================================================ + +/// A root for a tree in the forest. +pub type RootValue = Word; + +/// An identifier for the version of a tree in a given lineage +pub type VersionId = u64; + +// LINEAGE ID +// ================================================================================================ + +/// An identifier for a lineage of trees. +/// +/// This is an arbitrary, user-provided identifier that is used to disambiguate cases where trees in +/// distinct lineages are otherwise identical and have the same root. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct LineageId([u8; 32]); + +impl LineageId { + /// Constructs a new lineage ID from the provided bytes. + pub fn new(bytes: [u8; 32]) -> Self { + Self(bytes) + } +} + +impl core::fmt::Display for LineageId { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "[")?; + for i in 0..4 { + let byte = self.0[i]; + write!(f, "{byte:x}, ")?; + } + write!(f, "...]") + } +} + +#[cfg(test)] +impl Randomizable for LineageId { + const VALUE_SIZE: usize = size_of::(); + + fn from_random_bytes(source: &[u8]) -> Option { + let bytes = source.get(..Self::VALUE_SIZE)?; + let mut buffer = [0u8; 32]; + buffer.copy_from_slice(bytes); + Some(Self::new(buffer)) + } +} + +// TREE IDENTIFIER +// ================================================================================================ + +/// An identifier that is capable of uniquely referring to a tree in the forest. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TreeId { + lineage: LineageId, + version: VersionId, +} + +/// The base API of the identifier. +impl TreeId { + /// Constructs a new tree identifier for the tree with the specified `version` in the specified + /// `lineage`. + pub fn new(lineage: LineageId, version: VersionId) -> Self { + Self { lineage, version } + } + + /// Gets the tree's lineage from the identifier. + pub fn lineage(&self) -> LineageId { + self.lineage + } + + /// Gets the tree's version from the identifier. + pub fn version(&self) -> VersionId { + self.version + } +} + +impl core::fmt::Display for TreeId { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "TreeId(lineage = {}, version = {}", self.lineage, self.version) + } +} + +#[cfg(test)] +impl Randomizable for TreeId { + const VALUE_SIZE: usize = size_of::(); + + fn from_random_bytes(source: &[u8]) -> Option { + let domain = Randomizable::from_random_bytes(source)?; + let version = Randomizable::from_random_bytes(source)?; + Some(Self::new(domain, version)) + } +} + +// UNIQUE ROOT +// ================================================================================================ + +/// A root in the forest that is anchored to a lineage. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct UniqueRoot { + lineage: LineageId, + value: RootValue, +} + +impl UniqueRoot { + /// Constructs a new unique root with the provided `value` and `lineage`. + pub fn new(lineage: LineageId, value: RootValue) -> Self { + Self { lineage, value } + } + + /// Gets the lineage in which the root is found. + pub fn lineage(&self) -> LineageId { + self.lineage + } + + /// Gets the value of the tree root itself. + pub fn value(&self) -> RootValue { + self.value + } +} + +// TREE ID WITH ROOT +// ================================================================================================ + +/// The unique identifier for a given tree, along with the value of its root. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TreeWithRoot { + id: TreeId, + root: RootValue, +} + +impl TreeWithRoot { + /// Constructs a new tree identifier from the provided `lineage`, `version`, and `root`. + pub fn new(lineage: LineageId, version: VersionId, root: RootValue) -> Self { + let id = TreeId::new(lineage, version); + Self { id, root } + } + + /// Gets the tree's lineage. + pub fn lineage(&self) -> LineageId { + self.id.lineage + } + + /// Gets the tree's version. + pub fn version(&self) -> VersionId { + self.id.version + } + + /// Gets the tree's root value. + pub fn root(&self) -> RootValue { + self.root + } +} + +impl From for TreeId { + fn from(value: TreeWithRoot) -> Self { + value.id + } +} + +impl From for UniqueRoot { + fn from(value: TreeWithRoot) -> Self { + UniqueRoot::new(value.id.lineage, value.root) + } +} + +// ROOT INFO +// ================================================================================================ + +/// Information about the role that a queried root plays in the forest. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum RootInfo { + /// The queried root corresponds to a tree that is the latest version of a given tree in the + /// forest. + LatestVersion(RootValue), + + /// The queried root corresponds to a tree that is _not_ the latest version of a given tree in + /// the forest. + HistoricalVersion(RootValue), + + /// The queried root does not belong to any tree that the forest knows about. + Missing, +} + +// TREE ENTRY +// ================================================================================================ + +/// An entry in a given tree. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct TreeEntry { + pub key: Word, + pub value: Word, +} +impl TreeEntry { + pub fn index(&self) -> LeafIndex { + LeafIndex::from(self.key) + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/tests.rs b/miden-crypto/src/merkle/smt/large_forest/tests.rs new file mode 100644 index 000000000..b06036a58 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/tests.rs @@ -0,0 +1,1208 @@ +#![cfg(test)] +//! This module contains the handwritten tests of the functionality for the SMT forest. These tests +//! are for the basic functionality, and rely on the +//! +//! Wherever possible, these tests rely on the correctness of the existing [`Smt`] implementation. +//! It is used as a point of comparison to avoid the need to hard-code specific values and scenarios +//! for the trees, instead allowing us to compare things directly. + +use alloc::vec::Vec; + +use assert_matches::assert_matches; +use itertools::Itertools; + +use super::{Config, Result}; +use crate::{ + EMPTY_WORD, Map, Set, Word, + merkle::{ + EmptySubtreeRoots, + smt::{ + Backend, ForestInMemoryBackend, ForestOperation, LargeSmtForest, LargeSmtForestError, + LeafIndex, RootInfo, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeId, VersionId, + large_forest::{ + LineageData, + history::{History, LeafChanges, NodeChanges}, + root::{LineageId, TreeEntry, TreeWithRoot}, + }, + }, + }, + rand::test_utils::{ContinuousRng, rand_value}, +}; + +// TYPE ALIASES +// ================================================================================================ + +/// We only care about testing with the in-memory backend here for correct functionality. +type Forest = LargeSmtForest; + +// CONSTRUCTION TESTS +// ================================================================================================ + +#[test] +fn new() -> Result<()> { + // Constructing a forest using the default constructor should yield the default configuration. + let backend = ForestInMemoryBackend::new(); + let forest = Forest::new(backend)?; + + // We can just sanity-check the configuration to ensure that things started up right. + let config = forest.get_config(); + + assert_eq!(config.max_history_versions(), 10); + + Ok(()) +} + +#[test] +fn with_config() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let forest = Forest::with_config(backend, Config::default().with_max_history_versions(30))?; + + // Let us sanity check using the config again. + let config = forest.get_config(); + + assert_eq!(config.max_history_versions(), 30); + + Ok(()) +} + +// BASIC QUERIES TESTS +// ================================================================================================ + +#[test] +fn roots() -> Result<()> { + // We start by constructing our forest. + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x96; 32]); + + // We add a number of lineages to the forest, some of which have the same _root_ value. + let version_1: VersionId = rng.value(); + let lineage_1: LineageId = rng.value(); + let lineage_2: LineageId = rng.value(); + let lineage_3: LineageId = rng.value(); + + let root_1 = forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::default())?; + assert_eq!( + root_1, + TreeWithRoot::new(lineage_1, version_1, *EmptySubtreeRoots::entry(64, 0)) + ); + let root_2 = forest.add_lineage(lineage_2, version_1, SmtUpdateBatch::default())?; + assert_eq!( + root_2, + TreeWithRoot::new(lineage_2, version_1, *EmptySubtreeRoots::entry(64, 0)) + ); + let root_3 = forest.add_lineage(lineage_3, version_1, SmtUpdateBatch::default())?; + assert_eq!( + root_3, + TreeWithRoot::new(lineage_3, version_1, *EmptySubtreeRoots::entry(64, 0)) + ); + + // We then update one of them to make sure it ends up with a historical root as well. + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let k2: Word = rng.value(); + let v2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + operations.add_insert(k2, v2); + + let version_2: VersionId = version_1 + 1; + let root_4 = forest.update_tree(lineage_1, version_2, operations)?; + + // We can now check that the roots iterator contains the items we expect. + let roots = forest.roots().collect::>(); + assert_eq!(roots.len(), 4); + assert!(roots.contains(&root_1.into())); + assert!(roots.contains(&root_2.into())); + assert!(roots.contains(&root_3.into())); + assert!(roots.contains(&root_4.into())); + + Ok(()) +} + +#[test] +fn latest_version() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x69; 32]); + + // Let's add some trees to the forest. Two are empty and one is added with data. + let version_1: VersionId = rng.value(); + let version_2: VersionId = version_1 + 1; + let version_3: VersionId = version_2 + 1; + + let lineage_1: LineageId = rng.value(); + let lineage_2: LineageId = rng.value(); + let lineage_3: LineageId = rng.value(); + + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let k2: Word = rng.value(); + let v2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + operations.add_insert(k2, v2); + + forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::default())?; + forest.add_lineage(lineage_2, version_1, SmtUpdateBatch::default())?; + forest.add_lineage(lineage_3, version_1, operations)?; + + // Now let's update one of the empty ones twice... + let k3: Word = rng.value(); + let v3: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k3, v3); + forest.update_tree(lineage_1, version_2, operations)?; + + let k4: Word = rng.value(); + let v4: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k4, v4); + forest.update_tree(lineage_1, version_3, operations)?; + + // ...and the non-empty one once with a non-contiguous version. + let k5: Word = rng.value(); + let v5: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k5, v5); + forest.update_tree(lineage_3, version_3, operations)?; + + // Now let's query the latest version for all of them. + assert_eq!(forest.latest_version(lineage_1).unwrap(), version_3); + assert_eq!(forest.latest_version(lineage_2).unwrap(), version_1); + assert_eq!(forest.latest_version(lineage_3).unwrap(), version_3); + + // Finally, if we look for a lineage that doesn't exist, we should get `None` back. + let ne_lineage: LineageId = rng.value(); + assert!(forest.latest_version(ne_lineage).is_none()); + + Ok(()) +} + +#[test] +fn lineage_roots() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x42; 32]); + + // Let's add a lineage to the forest and update it a few times. + let lineage: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let version_2 = version_1 + 1; + let version_3 = version_2 + 1; + let root_1 = forest.add_lineage(lineage, version_1, SmtUpdateBatch::default())?; + + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + let root_2 = forest.update_tree(lineage, version_2, operations)?; + + let k2: Word = rng.value(); + let v2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k2, v2); + let root_3 = forest.update_tree(lineage, version_3, operations)?; + + // Now we can query for the roots in this lineage. + let lineage_roots = forest + .lineage_roots(lineage) + .expect("Existing lineage should have roots") + .collect::>(); + assert_eq!(lineage_roots.len(), 3); + + // For this method, the contract insists that it is ordered from newer roots in the lineage to + // older roots. + assert_eq!(lineage_roots[0], root_3.root()); + assert_eq!(lineage_roots[1], root_2.root()); + assert_eq!(lineage_roots[2], root_1.root()); + + // If, however, we query for the roots of a non-existent lineage, we should get `None` back. + let ne_lineage: LineageId = rng.value(); + assert!(forest.lineage_roots(ne_lineage).is_none()); + + Ok(()) +} + +#[test] +fn latest_root() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x97; 32]); + + // Let's add a lineage to the forest. + let lineage: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let version_2 = version_1 + 1; + let root_1 = forest.add_lineage(lineage, version_1, SmtUpdateBatch::default())?; + + // We can get its latest root. + assert_eq!(forest.latest_root(lineage), Some(root_1.root())); + + // And then update it... + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + let root_2 = forest.update_tree(lineage, version_2, operations)?; + + // ...to check that we get the updated root. + assert_eq!(forest.latest_root(lineage), Some(root_2.root())); + + // However, if we query for a nonexistent lineage, we should get `None` back. + let ne_lineage: LineageId = rng.value(); + assert!(forest.latest_root(ne_lineage).is_none()); + + Ok(()) +} + +#[test] +fn tree_count() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x67; 32]); + + // A newly-initialized forest should know about only the trees that its backend knows about. + assert_eq!(forest.tree_count(), forest.get_backend().trees()?.count()); + + // Now let's add some trees. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let version_2 = version_1 + 1; + let version_3 = version_2 + 1; + forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::default())?; + + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + forest.update_tree(lineage_1, version_2, operations)?; + + let k2: Word = rng.value(); + let v2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k2, v2); + forest.update_tree(lineage_1, version_3, operations)?; + + let lineage_2: LineageId = rng.value(); + forest.add_lineage(lineage_2, version_1, SmtUpdateBatch::default())?; + + // As there are two current trees and two historical versions, we should see four trees total. + assert_eq!(forest.tree_count(), 4); + + Ok(()) +} + +#[test] +fn lineage_count() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x64; 32]); + + // A newly-initialized forest should know about only the lineages that its backend knows about. + assert_eq!(forest.lineage_count(), forest.get_backend().lineages()?.count()); + + // So now let's add some lineages. + let version: VersionId = rng.value(); + let lineage_1: LineageId = rng.value(); + forest.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + let lineage_2: LineageId = rng.value(); + forest.add_lineage(lineage_2, version, SmtUpdateBatch::default())?; + let lineage_3: LineageId = rng.value(); + forest.add_lineage(lineage_3, version, SmtUpdateBatch::default())?; + + // We should see three lineages. + assert_eq!(forest.lineage_count(), 3); + + // This should stay the same if we update a tree. + let operations = + SmtUpdateBatch::new([ForestOperation::insert(rng.value(), rng.value())].into_iter()); + forest.update_tree(lineage_1, version + 1, operations)?; + assert_eq!(forest.lineage_count(), 3); + + Ok(()) +} + +#[test] +fn root_info() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x32; 32]); + + // Let's start by adding a lineage and updating it. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let operations = + SmtUpdateBatch::new([ForestOperation::insert(rng.value(), rng.value())].into_iter()); + let historical_root = forest.add_lineage(lineage_1, version_1, operations)?; + + let version_2 = version_1 + 1; + let operations = + SmtUpdateBatch::new([ForestOperation::insert(rng.value(), rng.value())].into_iter()); + let current_root = forest.update_tree(lineage_1, version_2, operations)?; + + // When we query for a root (lineage_1, version_1), we should get back HistoricalVersion. + assert_eq!( + forest.root_info(TreeId::new(lineage_1, version_1)), + RootInfo::HistoricalVersion(historical_root.root()) + ); + + // When we query for a root (lineage_1, version_2), we should get back LatestVersion. + assert_eq!( + forest.root_info(TreeId::new(lineage_1, version_2)), + RootInfo::LatestVersion(current_root.root()) + ); + + // When we query for a nonexistent version in an existing lineage we should get back Missing. + let version_3 = version_2 + 1; + assert_eq!(forest.root_info(TreeId::new(lineage_1, version_3)), RootInfo::Missing); + + // As we should also get back when the lineage doesn't exist. + let lineage_2: LineageId = rng.value(); + assert_eq!(forest.root_info(TreeId::new(lineage_2, version_1)), RootInfo::Missing); + + Ok(()) +} + +// QUERIES TESTS +// ================================================================================================ + +#[test] +fn open() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x08; 32]); + + // When we query for a tree with a lineage that is not known by the forest, we should get an + // error back. + let missing_lineage: LineageId = rng.value(); + let missing_version: VersionId = rng.value(); + let missing_key: Word = rng.value(); + + let result = forest.open(TreeId::new(missing_lineage, missing_version), missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownLineage(l) if l == missing_lineage); + + // Now let's add an (empty) lineage to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1_v1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2_v1: Word = rng.value(); + forest.add_lineage( + lineage_1, + version_1, + SmtUpdateBatch::new( + [ + ForestOperation::insert(key_1, value_1_v1), + ForestOperation::insert(key_2, value_2_v1), + ] + .into_iter(), + ), + )?; + + // If we query for a tree with a known lineage but unknown version, we should also get an error + // back. + let missing_tree = TreeId::new(lineage_1, missing_version); + let result = forest.open(missing_tree, missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == missing_tree); + + // We should also get an error back if we query for a version that is NEWER than the + // latest-known version. + let too_new_version = version_1 + 1; + let too_new_tree = TreeId::new(lineage_1, too_new_version); + let result = forest.open(too_new_tree, missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == too_new_tree); + + // Let's set up a basic SMT to compare the forest's openings again for correctness. + let mut tree_v1 = Smt::new(); + tree_v1.insert(key_1, value_1_v1)?; + tree_v1.insert(key_2, value_2_v1)?; + + // And get a random opening on the initial tree. + let random_key: Word = rng.value(); + let forest_opening = forest.open(TreeId::new(lineage_1, version_1), random_key)?; + let tree_v1_opening = tree_v1.open(&random_key); + assert_eq!(forest_opening, tree_v1_opening); + + // Now let's make some modifications to the tree. + let version_2: VersionId = rng.value(); + let value_1_v2: Word = rng.value(); + let key_3: Word = rng.value(); + let value_3_v1: Word = rng.value(); + forest.update_tree( + lineage_1, + version_2, + SmtUpdateBatch::new( + [ + ForestOperation::insert(key_1, value_1_v2), + ForestOperation::insert(key_3, value_3_v1), + ForestOperation::remove(key_2), + ] + .into_iter(), + ), + )?; + + // And mirror it on our tree. + let mut tree_v2 = tree_v1.clone(); + tree_v2.insert(key_1, value_1_v2)?; + tree_v2.insert(key_3, value_3_v1)?; + tree_v2.insert(key_2, EMPTY_WORD)?; + + // These two should again produce the same opening when we query for the latest version. + let random_key: Word = rng.value(); + let forest_opening = forest.open(TreeId::new(lineage_1, version_2), random_key)?; + let tree_v2_opening = tree_v2.open(&random_key); + assert_eq!(forest_opening, tree_v2_opening); + + // Most importantly, however, we should get the same opening from the forest when querying a + // historical tree version as we do from the actual tree. + let forest_opening = forest.open(TreeId::new(lineage_1, version_1), random_key)?; + let tree_v1_opening = tree_v1.open(&random_key); + assert_eq!(forest_opening, tree_v1_opening); + + Ok(()) +} + +#[test] +fn get() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x12; 32]); + + // When we query for a tree with a lineage that is not known by the forest, we should get an + // error back. + let missing_lineage: LineageId = rng.value(); + let missing_version: VersionId = rng.value(); + let missing_key: Word = rng.value(); + + let result = forest.get(TreeId::new(missing_lineage, missing_version), missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownLineage(l) if l == missing_lineage); + + // Now let's add an (empty) lineage to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1_v1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2_v1: Word = rng.value(); + forest.add_lineage( + lineage_1, + version_1, + SmtUpdateBatch::new( + [ + ForestOperation::insert(key_1, value_1_v1), + ForestOperation::insert(key_2, value_2_v1), + ] + .into_iter(), + ), + )?; + + // If we query for a tree with a known lineage but unknown version, we should also get an error + // back. + let missing_tree = TreeId::new(lineage_1, missing_version); + let result = forest.get(missing_tree, missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == missing_tree); + + // We should also get an error back if we query for a version that is NEWER than the + // latest-known version. + let too_new_version = version_1 + 1; + let too_new_tree = TreeId::new(lineage_1, too_new_version); + let result = forest.get(too_new_tree, missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == too_new_tree); + + // If we query for a key that has never been inserted we want to get back `None`. + let tree_v1 = TreeId::new(lineage_1, version_1); + let non_inserted_key: Word = rng.value(); + assert!(forest.get(tree_v1, non_inserted_key)?.is_none()); + + // But if we query for a key that has been, we should get back the corresponding value. + assert_eq!(forest.get(tree_v1, key_1)?, Some(value_1_v1)); + assert_eq!(forest.get(tree_v1, key_2)?, Some(value_2_v1)); + + // Now let's add another version. + let version_2: VersionId = version_1 + 1; + let value_1_v2: Word = rng.value(); + let key_3: Word = rng.value(); + let value_3_v1: Word = rng.value(); + forest.update_tree( + lineage_1, + version_2, + SmtUpdateBatch::new( + [ + ForestOperation::insert(key_1, value_1_v2), + ForestOperation::insert(key_3, value_3_v1), + ] + .into_iter(), + ), + )?; + + // When we query at the new version we should see the updated values for all extant keys. + let tree_v2 = TreeId::new(lineage_1, version_2); + assert_eq!(forest.get(tree_v2, key_1)?, Some(value_1_v2)); + assert_eq!(forest.get(tree_v2, key_2)?, Some(value_2_v1)); + assert_eq!(forest.get(tree_v2, key_3)?, Some(value_3_v1)); + + // But if we query for the older version we should still see the older values. + assert_eq!(forest.get(tree_v1, key_1)?, Some(value_1_v1)); + assert_eq!(forest.get(tree_v1, key_2)?, Some(value_2_v1)); + assert!(forest.get(tree_v1, key_3)?.is_none()); + + Ok(()) +} + +#[test] +fn entry_count() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x22; 32]); + + // Let's start by adding a lineage with some values. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1_v1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2_v1: Word = rng.value(); + let mut key_3: Word = rng.value(); + key_3[3] = key_1[3]; + let value_3_v1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::empty(); + operations.add_insert(key_1, value_1_v1); + operations.add_insert(key_2, value_2_v1); + operations.add_insert(key_3, value_3_v1); + + forest.add_lineage(lineage_1, version_1, operations)?; + + // We'll also update this so we have a historical version in play to be sure things work. + let version_2: VersionId = version_1 + 1; + let value_1_v2: Word = rng.value(); + let mut key_4: Word = rng.value(); + key_4[3] = key_2[3]; + let value_4_v1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::empty(); + operations.add_remove(key_3); + operations.add_insert(key_1, value_1_v2); + operations.add_insert(key_4, value_4_v1); + + forest.update_tree(lineage_1, version_2, operations)?; + + // If we try and get the entry count over a lineage that does not exist we should see an error. + let ne_lineage: LineageId = rng.value(); + match forest.entry_count(TreeId::new(ne_lineage, version_1)) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownLineage(l) if l == ne_lineage), + Ok(_) => panic!("Result was not an error"), + }; + + // Similarly, if we try and get the entry count for a nonexistent version in an existing lineage + // we should also see an error. + let tree = TreeId::new(lineage_1, version_1 - 1); + match forest.entry_count(tree) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownTree(t) if t == tree), + Ok(_) => panic!("Result was not an error"), + }; + + // We should also get an error back if we query for a version that is NEWER than the + // latest-known version. + let too_new_version = version_2 + 1; + let too_new_tree = TreeId::new(lineage_1, too_new_version); + let result = forest.entry_count(too_new_tree); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == too_new_tree); + + // If we query for extant trees we should see the correct count regardless of whether it is the + // current tree or a historical tree. + assert_eq!(forest.entry_count(TreeId::new(lineage_1, version_1))?, 3); + assert_eq!(forest.entry_count(TreeId::new(lineage_1, version_2))?, 3); + + Ok(()) +} + +#[test] +fn entries() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x47; 32]); + + // Let's start by adding a lineage with some values. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1_v1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2_v1: Word = rng.value(); + let mut key_3: Word = rng.value(); + key_3[3] = key_1[3]; + let value_3_v1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::empty(); + operations.add_insert(key_1, value_1_v1); + operations.add_insert(key_2, value_2_v1); + operations.add_insert(key_3, value_3_v1); + + forest.add_lineage(lineage_1, version_1, operations)?; + + // We'll also update this so we have a historical version in play to be sure things work. + let version_2: VersionId = version_1 + 1; + let value_1_v2: Word = rng.value(); + let mut key_4: Word = rng.value(); + key_4[3] = key_2[3]; + let value_4_v1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::empty(); + operations.add_remove(key_3); + operations.add_insert(key_1, value_1_v2); + operations.add_insert(key_4, value_4_v1); + + forest.update_tree(lineage_1, version_2, operations)?; + + // If we try and get entries over a lineage that does not exist we should see an error. + let ne_lineage: LineageId = rng.value(); + match forest.entries(TreeId::new(ne_lineage, version_1)) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownLineage(l) if l == ne_lineage), + Ok(_) => panic!("Result was not an error"), + }; + + // Similarly, if we try and get entries for a nonexistent version in an existing lineage we + // should also see an error. + let tree = TreeId::new(lineage_1, version_1 - 1); + match forest.entries(tree) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownTree(t) if t == tree), + Ok(_) => panic!("Result was not an error"), + }; + + // We should also get an error back if we query for a version that is NEWER than the + // latest-known version. + let too_new_version = version_2 + 1; + let too_new_tree = TreeId::new(lineage_1, too_new_version); + match forest.entries(too_new_tree) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownTree(t) if t == too_new_tree), + Ok(_) => panic!("Result was not an error"), + } + + // Grabbing the entries for the latest version in a lineage should do the right thing. + let current_tree = TreeId::new(lineage_1, version_2); + assert_eq!(forest.entries(current_tree)?.count(), 3); + assert!( + forest + .entries(current_tree)? + .contains(&TreeEntry { key: key_1, value: value_1_v2 }) + ); + assert!( + forest + .entries(current_tree)? + .contains(&TreeEntry { key: key_2, value: value_2_v1 }) + ); + assert!( + forest + .entries(current_tree)? + .contains(&TreeEntry { key: key_4, value: value_4_v1 }) + ); + assert!( + !forest + .entries(current_tree)? + .contains(&TreeEntry { key: key_3, value: value_3_v1 }) + ); + + // If we ask for a historical version, things are more complex but should still work. + let historical_tree = TreeId::new(lineage_1, version_1); + assert_eq!(forest.entries(historical_tree)?.count(), 3); + assert!( + forest + .entries(historical_tree)? + .contains(&TreeEntry { key: key_1, value: value_1_v1 }) + ); + assert!( + forest + .entries(historical_tree)? + .contains(&TreeEntry { key: key_2, value: value_2_v1 }) + ); + assert!( + forest + .entries(historical_tree)? + .contains(&TreeEntry { key: key_3, value: value_3_v1 }) + ); + assert!( + !forest + .entries(historical_tree)? + .contains(&TreeEntry { key: key_4, value: value_4_v1 }) + ); + + Ok(()) +} + +#[test] +fn entries_never_returns_empty_entry() -> Result<()> { + // We risk yielding empty entries in a few situations, but all of those situations involve + // iterating over the history on its own. Let's go through them one by one. + // + // For more detailed testing of this behavior, see the `property_tests`. + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x44; 32]); + + // The FIRST such situation is when the iterator contains _only_ historical entries in its + // remaining tail. We can produce such a state by adding an empty lineage and then setting + // values in that lineage. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::empty())?; + + // We now set values in that lineage. + let version_2 = version_1 + 1; + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + let operations = SmtUpdateBatch::new( + [ForestOperation::insert(key_1, value_1), ForestOperation::insert(key_2, value_2)] + .into_iter(), + ); + forest.update_tree(lineage_1, version_2, operations)?; + + // At this point, we should see an empty iterator for entries if we query in the history. + let historical_tree = TreeId::new(lineage_1, version_1); + assert_eq!(forest.entries(historical_tree)?.count(), 0); + + // The SECOND scenario is where only some entries are added, so we end up with entire leaves + // that are history only and contain empty values. + let lineage_2: LineageId = rng.value(); + let key_1 = Word::from([1u32, 0, 0, 42]); + let value_1: Word = rng.value(); + forest.add_lineage( + lineage_2, + version_1, + SmtUpdateBatch::new([ForestOperation::insert(key_1, value_1)].into_iter()), + )?; + + // Now we add an update to a different leaf. + let key_2 = Word::from([2u32, 0, 0, 43]); + let value_2: Word = rng.value(); + forest.update_tree( + lineage_2, + version_2, + SmtUpdateBatch::new([ForestOperation::insert(key_2, value_2)].into_iter()), + )?; + + // Now, when we query for entries on the historical version, we should only see one entry, and + // no entries should be the empty word. + let historical_tree = TreeId::new(lineage_2, version_1); + assert_eq!(forest.entries(historical_tree)?.count(), 1); + assert!(forest.entries(historical_tree)?.all(|e| e.value != EMPTY_WORD)); + + // The third scenario is where entries are added within a shared leaf, where we should only see + // the historical leaf entries and not their reversions. + let lineage_3: LineageId = rng.value(); + let key_1 = Word::from([1u32, 0, 0, 42]); + let value_1: Word = rng.value(); + forest.add_lineage( + lineage_3, + version_1, + SmtUpdateBatch::new([ForestOperation::insert(key_1, value_1)].into_iter()), + )?; + + // We now add an update in the same leaf. + let key_2 = Word::from([2u32, 0, 0, 42]); + let value_2: Word = rng.value(); + forest.update_tree( + lineage_3, + version_2, + SmtUpdateBatch::new([ForestOperation::insert(key_2, value_2)].into_iter()), + )?; + + // Now when we query the historical version, we should only see one entry, and no reversions. + let historical_tree = TreeId::new(lineage_3, version_1); + assert_eq!(forest.entries(historical_tree)?.count(), 1); + assert!(forest.entries(historical_tree)?.all(|e| e.value != EMPTY_WORD)); + + Ok(()) +} + +// SINGLE-TREE MODIFIER TESTS +// ================================================================================================ + +#[test] +fn add_lineage() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x42; 32]); + + // We can add an initial lineage to the forest, starting with no changes from the default tree. + let lineage: LineageId = rng.value(); + let version: VersionId = rng.value(); + let result = forest.add_lineage(lineage, version, SmtUpdateBatch::default()); + assert!(result.is_ok()); + + // This should yield the correct value, which we'll check using a Smt. + let tree = Smt::new(); + + let result = result?; + assert_eq!(result.root(), tree.root()); + assert_eq!(result.lineage(), lineage); + assert_eq!(result.version(), version); + + // The newly-added lineage should also not be listed as having a non-empty history. + assert!(!forest.get_non_empty_histories().contains(&lineage)); + + // If we try and add a duplicated lineage again, we should get an error. + let result = forest.add_lineage(lineage, version, SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::DuplicateLineage(l) if l == lineage); + + Ok(()) +} + +#[test] +fn update_tree() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x69; 32]); + + // Let's start by adding a lineage to the forest... + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + + let result = forest.add_lineage(lineage_1, version_1, operations)?; + + // ... and creating an auxiliary tree with the same value to check consistency. + let mut tree = Smt::new(); + tree.insert(key_1, value_1)?; + + assert_eq!(result.root(), tree.root()); + + // Initially, this new lineage should not be listed as having a non-empty history. + assert!(!forest.get_non_empty_histories().contains(&lineage_1)); + + // If we try and update a lineage that is unknown, we should see an error. + let unknown_lineage: LineageId = rng.value(); + let result = forest.update_tree(unknown_lineage, version_1, SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!( + result.unwrap_err(), + LargeSmtForestError::UnknownLineage(l) if l == unknown_lineage + ); + + // If we add a version that is older than the latest known version for that lineage, we should + // see an error. + let older_version = version_1 - 1; + let result = forest.update_tree(lineage_1, older_version, SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!( + result.unwrap_err(), + LargeSmtForestError::BadVersion { provided, latest } + if provided == older_version && latest == version_1 + ); + + // Let's create some data and actually add it. + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + let key_3: Word = rng.value(); + let value_3: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_2, value_2); + operations.add_insert(key_3, value_3); + operations.add_remove(key_1); + + let version_2: VersionId = rng.value(); + let result = forest.update_tree(lineage_1, version_2, operations)?; + + // And we can check this against the tree. + let mutations = + tree.compute_mutations(vec![(key_1, EMPTY_WORD), (key_2, value_2), (key_3, value_3)])?; + tree.apply_mutations(mutations)?; + + assert_eq!(result.root(), tree.root()); + + // And we should also now have a history version that corresponds to the previous version, which + // we are going to get at via some test helpers. + let history = forest.get_history(lineage_1); + assert_eq!(history.num_versions(), 1); + + // If we query for each value, we should see the correct reversions. + let view = history.get_view_at(version_1)?; + + assert_eq!(view.leaf_delta(&LeafIndex::from(key_1)).get(&key_1), Some(&value_1)); + assert_eq!(view.leaf_delta(&LeafIndex::from(key_2)).get(&key_2), Some(&EMPTY_WORD)); + assert_eq!(view.leaf_delta(&LeafIndex::from(key_3)).get(&key_3), Some(&EMPTY_WORD)); + + // We should also now see this lineage listed as having a non-empty history. + assert!(forest.get_non_empty_histories().contains(&lineage_1)); + + // Finally, if we provide an update that does not change the tree, the method should succeed but + // not result in any state changes. + assert_eq!(forest.lineage_roots(lineage_1).unwrap().count(), 2); + let empty_ops = SmtUpdateBatch::default(); + let version_3 = version_2 + 1; + forest.update_tree(lineage_1, version_3, empty_ops)?; + assert_eq!(forest.lineage_roots(lineage_1).unwrap().count(), 2); + let history = forest.get_history(lineage_1); + assert_eq!(history.num_versions(), 1); + + Ok(()) +} + +// MULTI-TREE MODIFIER TESTS +// ================================================================================================ + +#[test] +fn update_forest() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x69; 32]); + + // Let's start by adding a few empty lineages to the forest, just so we have a starting point. + // Adding all of these should succeed as they are disjoint lineages. + let version_1: VersionId = rng.value(); + let lineage_1: LineageId = rng.value(); + let lineage_2: LineageId = rng.value(); + let lineage_3: LineageId = rng.value(); + let lineage_4: LineageId = rng.value(); + + let l1_r1 = forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::default())?; + let l2_r1 = forest.add_lineage(lineage_2, version_1, SmtUpdateBatch::default())?; + let l3_r1 = forest.add_lineage(lineage_3, version_1, SmtUpdateBatch::default())?; + let l4_r1 = forest.add_lineage(lineage_4, version_1, SmtUpdateBatch::default())?; + + // Let's compose some updates. + let l1_key_1: Word = rng.value(); + let l1_value_1: Word = rng.value(); + let l2_key_1: Word = rng.value(); + let l2_value_1: Word = rng.value(); + let l3_key_1: Word = rng.value(); + let l3_value_1: Word = rng.value(); + let l4_key_1: Word = rng.value(); + let l4_value_1: Word = rng.value(); + + // First we want to test the case where we refer to a lineage that doesn't exist. In this case, + // we should get an error. + let ne_lineage: LineageId = rng.value(); + let version_bad = version_1 - 1; + let version_2 = version_1 + 1; + let mut operations_ne_lineage = SmtForestUpdateBatch::empty(); + operations_ne_lineage.operations(lineage_1).add_insert(l1_key_1, l1_value_1); + operations_ne_lineage.operations(lineage_2).add_insert(l2_key_1, l2_value_1); + operations_ne_lineage.operations(lineage_3).add_insert(l3_key_1, l3_value_1); + operations_ne_lineage.operations(lineage_4).add_insert(l4_key_1, l4_value_1); + let operations_basic = operations_ne_lineage.clone(); + operations_ne_lineage.operations(ne_lineage); + + let result = forest.update_forest(version_2, operations_ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownLineage(l) if l == ne_lineage); + + // When a precondition check like this fails, we should also have unchanged state. + assert_eq!( + forest.root_info(TreeId::new(lineage_1, version_1)), + RootInfo::LatestVersion(l1_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_2, version_1)), + RootInfo::LatestVersion(l2_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_3, version_1)), + RootInfo::LatestVersion(l3_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_4, version_1)), + RootInfo::LatestVersion(l4_r1.root()) + ); + + // We also want to test that we get an error when we ask for a bad version transition. + let result = forest.update_forest(version_bad, operations_basic.clone()); + assert!(result.is_err()); + assert_matches!( + result.unwrap_err(), + LargeSmtForestError::BadVersion { provided, latest } + if provided == version_bad && latest == version_1 + ); + + // This should also leave the internal state unchanged. + assert_eq!( + forest.root_info(TreeId::new(lineage_1, version_1)), + RootInfo::LatestVersion(l1_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_2, version_1)), + RootInfo::LatestVersion(l2_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_3, version_1)), + RootInfo::LatestVersion(l3_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_4, version_1)), + RootInfo::LatestVersion(l4_r1.root()) + ); + + // When a batch goes ahead successfully we should just get back the new roots to the trees, + // which can be associated by their lineages. + let roots = forest.update_forest(version_2, operations_basic)?; + assert_eq!(roots.len(), 4); + + // We can check that the updates went correctly by using auxiliary trees, and checking the + // values in the returned roots. + let mut tree_1 = Smt::new(); + tree_1.insert(l1_key_1, l1_value_1)?; + let mut tree_2 = Smt::new(); + tree_2.insert(l2_key_1, l2_value_1)?; + let mut tree_3 = Smt::new(); + tree_3.insert(l3_key_1, l3_value_1)?; + let mut tree_4 = Smt::new(); + tree_4.insert(l4_key_1, l4_value_1)?; + + assert!(roots.iter().any(|e| e.root() == tree_1.root() + && e.version() == version_2 + && e.lineage() == lineage_1)); + assert!(roots.iter().any(|e| e.root() == tree_2.root() + && e.version() == version_2 + && e.lineage() == lineage_2)); + assert!(roots.iter().any(|e| e.root() == tree_3.root() + && e.version() == version_2 + && e.lineage() == lineage_3)); + assert!(roots.iter().any(|e| e.root() == tree_4.root() + && e.version() == version_2 + && e.lineage() == lineage_4)); + + // We also want to see that each of the updated lineages is now listed as having a non-empty + // history. + assert!(forest.get_non_empty_histories().contains(&lineage_1)); + assert!(forest.get_non_empty_histories().contains(&lineage_2)); + assert!(forest.get_non_empty_histories().contains(&lineage_3)); + assert!(forest.get_non_empty_histories().contains(&lineage_4)); + + // We also want to see that if a batch is processed that does not result in changes for a given + // tree, no state changes are made to that lineage. We check both the case where there are + // operations that result in no changes, and where no operations are specified. + let version_3 = version_2 + 1; + let key_5: Word = rng.value(); + let value_5: Word = rng.value(); + let mut operations_with_nop = SmtForestUpdateBatch::empty(); + operations_with_nop.operations(lineage_1).add_insert(l1_key_1, l1_value_1); + operations_with_nop.operations(lineage_2); + operations_with_nop.operations(lineage_3).add_insert(key_5, value_5); + + // Before we make these batches happen, let's check where things stand. + assert_eq!(forest.lineage_roots(lineage_1).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_2).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_3).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_4).unwrap().count(), 2); + + // Then we should apply the batch. + let roots = forest.update_forest(version_3, operations_with_nop)?; + assert_eq!(roots.len(), 3); + + // And for the no-op or unchanged cases we should not have new roots. + assert_eq!(forest.lineage_roots(lineage_1).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_2).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_3).unwrap().count(), 3); + assert_eq!(forest.lineage_roots(lineage_4).unwrap().count(), 2); + + Ok(()) +} + +// TRUNCATION +// ================================================================================================ + +#[test] +fn truncate_removes_emptied_lineages_from_non_empty_histories() { + let lineage: LineageId = rand_value(); + let root: Word = rand_value(); + + // Build a lineage with one historical version at version 5, and a latest version of 10. + let mut history = History::empty(4); + let nodes = NodeChanges::default(); + let leaves = LeafChanges::default(); + history.add_version(rand_value(), 5, nodes, leaves).unwrap(); + assert_eq!(history.num_versions(), 1); + + let lineage_data = LineageData { + history, + latest_version: 10, + latest_root: root, + }; + + let mut lineage_map = Map::default(); + lineage_map.insert(lineage, lineage_data); + + let mut non_empty = Set::default(); + non_empty.insert(lineage); + + let mut forest = LargeSmtForest { + config: Config::default(), + backend: ForestInMemoryBackend::new(), + lineage_data: lineage_map, + non_empty_histories: non_empty, + }; + + // Sanity: the lineage is tracked as having a non-empty history. + assert!(forest.non_empty_histories.contains(&lineage)); + + // Truncate to a version >= latest_version, which clears the history entirely. + forest.truncate(10); + + // The lineage's history should now be empty, and it must have been removed from the set. + assert!( + !forest.non_empty_histories.contains(&lineage), + "emptied lineage must be removed from non_empty_histories after truncation" + ); +} + +#[test] +fn truncate_retains_non_empty_lineages_in_non_empty_histories() { + let lineage: LineageId = rand_value(); + let root: Word = rand_value(); + + // Build a lineage with two historical versions (5 and 8), latest version 15. + let mut history = History::empty(4); + let nodes = NodeChanges::default(); + let leaves = LeafChanges::default(); + history.add_version(rand_value(), 5, nodes.clone(), leaves.clone()).unwrap(); + history.add_version(rand_value(), 8, nodes, leaves).unwrap(); + assert_eq!(history.num_versions(), 2); + + let lineage_data = LineageData { + history, + latest_version: 15, + latest_root: root, + }; + + let mut lineage_map = Map::new(); + lineage_map.insert(lineage, lineage_data); + + let mut non_empty = Set::default(); + non_empty.insert(lineage); + + let mut forest = LargeSmtForest { + config: Config::default(), + backend: ForestInMemoryBackend::new(), + lineage_data: lineage_map, + non_empty_histories: non_empty, + }; + + // Truncate to version 7: removes versions older than 7, but version 8 should remain. + // Since version < latest_version (15), LineageData::truncate returns false. + forest.truncate(7); + + // The history still has data, so the lineage must stay in non_empty_histories. + assert!( + forest.non_empty_histories.contains(&lineage), + "lineage with remaining history must stay in non_empty_histories" + ); +} diff --git a/miden-crypto/src/merkle/smt/large_forest/utils.rs b/miden-crypto/src/merkle/smt/large_forest/utils.rs new file mode 100644 index 000000000..ff0a2f4bf --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/utils.rs @@ -0,0 +1,10 @@ +//! Contains utility type aliases and functions for use as part of the SMT forest. + +use crate::{Word, merkle::smt::full::SMT_DEPTH}; + +// TYPE ALIASES +// ================================================================================================ + +/// The mutation set used by the forest backends to provide reverse mutations that describe the +/// changes necessary to revert the tree to its previous state. +pub type MutationSet = crate::merkle::smt::MutationSet; diff --git a/miden-crypto/src/merkle/smt/mod.rs b/miden-crypto/src/merkle/smt/mod.rs index 5648d99c5..2dcf4ea8e 100644 --- a/miden-crypto/src/merkle/smt/mod.rs +++ b/miden-crypto/src/merkle/smt/mod.rs @@ -26,6 +26,15 @@ pub use large::{ #[cfg(feature = "rocksdb")] pub use large::{RocksDbConfig, RocksDbStorage}; +mod large_forest; +pub use large_forest::{ + Backend, BackendError, Config as ForestConfig, + DEFAULT_MAX_HISTORY_VERSIONS as FOREST_DEFAULT_MAX_HISTORY_VERSIONS, ForestOperation, + InMemoryBackend as ForestInMemoryBackend, LargeSmtForest, LargeSmtForestError, LineageId, + MIN_HISTORY_VERSIONS as FOREST_MIN_HISTORY_VERSIONS, RootInfo, SmtForestUpdateBatch, + SmtUpdateBatch, TreeEntry, TreeId, TreeWithRoot, VersionId, +}; + mod simple; pub use simple::{SimpleSmt, SimpleSmtProof}; @@ -546,9 +555,13 @@ impl LeafIndex { } /// Returns the numeric value of this leaf index. - pub fn value(&self) -> u64 { + pub fn position(&self) -> u64 { self.index.value() } + + pub fn value(&self) -> u64 { + self.position() + } } impl LeafIndex { @@ -595,7 +608,7 @@ impl Deserializable for LeafIndex { impl Display for LeafIndex { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "DEPTH={}, value={}", DEPTH, self.value()) + write!(f, "DEPTH={}, position={}", DEPTH, self.position()) } } @@ -660,6 +673,12 @@ impl MutationSet { pub fn new_pairs(&self) -> &Map { &self.new_pairs } + + pub fn is_empty(&self) -> bool { + self.node_mutations.is_empty() + && self.new_pairs.is_empty() + && self.old_root == self.new_root + } } // SERIALIZATION diff --git a/miden-crypto/src/rand/mod.rs b/miden-crypto/src/rand/mod.rs index 6e371a2d7..f4e0a6a4f 100644 --- a/miden-crypto/src/rand/mod.rs +++ b/miden-crypto/src/rand/mod.rs @@ -11,6 +11,9 @@ mod rpx; pub use rpo::RpoRandomCoin; pub use rpx::RpxRandomCoin; +#[cfg(any(test, feature = "std"))] +pub mod test_utils; + /// Pseudo-random element generator. /// /// An instance can be used to draw, uniformly at random, base field elements as well as [Word]s. diff --git a/miden-crypto/src/rand/test_utils.rs b/miden-crypto/src/rand/test_utils.rs new file mode 100644 index 000000000..1a5bef3eb --- /dev/null +++ b/miden-crypto/src/rand/test_utils.rs @@ -0,0 +1,145 @@ +//! Test and benchmark utilities for generating random data. +//! +//! This module provides helper functions for tests and benchmarks that need +//! random data generation. These functions replace the functionality previously +//! provided by winter-rand-utils. +//! +//! # no_std Compatibility +//! +//! This module provides both `std`-dependent and `no_std`-compatible functions: +//! +//! - **`std` required**: [`rand_value`], [`rand_array`], [`rand_vector`] use the thread-local RNG +//! and require the `std` feature. +//! - **`no_std` compatible**: [`seeded_rng`], [`prng_array`], [`prng_vector`] use deterministic +//! seeded PRNGs and work in `no_std` environments. +//! +//! For tests that should run in `no_std` mode, prefer using [`seeded_rng`] to obtain +//! a deterministic RNG instead of `rand::rng()`. + +use alloc::{vec, vec::Vec}; + +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha20Rng; + +use crate::rand::Randomizable; + +/// Creates a deterministic seeded RNG suitable for tests. +/// +/// This function returns a ChaCha20 PRNG seeded with the provided seed, providing +/// deterministic random number generation that works in `no_std` environments. +/// +/// # Examples +/// ``` +/// # use miden_crypto::rand::test_utils::seeded_rng; +/// let mut rng = seeded_rng([0u8; 32]); +/// // Use rng with any function that accepts impl RngCore +/// ``` +pub fn seeded_rng(seed: [u8; 32]) -> ChaCha20Rng { + ChaCha20Rng::from_seed(seed) +} + +/// Generates a random value of type T from an RNG. +fn rng_value(rng: &mut impl Rng) -> T { + let mut bytes = vec![0u8; T::VALUE_SIZE]; + rng.fill(&mut bytes[..]); + T::from_random_bytes(&bytes).expect("failed to generate random value") +} + +/// Generates a random value of type T using the thread-local random number generator. +/// +/// # Examples +/// ``` +/// # use miden_crypto::rand::test_utils::rand_value; +/// let x: u64 = rand_value(); +/// let y: u128 = rand_value(); +/// ``` +pub fn rand_value() -> T { + rng_value(&mut rand::rng()) +} + +/// Generates a random array of type T with N elements. +/// +/// # Examples +/// ``` +/// # use miden_crypto::rand::test_utils::rand_array; +/// let arr: [u64; 4] = rand_array(); +/// ``` +#[cfg(feature = "std")] +pub fn rand_array() -> [T; N] { + let mut rng = rand::rng(); + core::array::from_fn(|_| rng_value(&mut rng)) +} + +/// Generates a random vector of type T with the specified length. +/// +/// # Examples +/// ``` +/// # use miden_crypto::rand::test_utils::rand_vector; +/// let vec: Vec = rand_vector(100); +/// ``` +#[cfg(feature = "std")] +pub fn rand_vector(length: usize) -> Vec { + let mut rng = rand::rng(); + (0..length).map(|_| rng_value(&mut rng)).collect() +} + +/// Generates a deterministic value using a PRNG seeded with the provided seed. +/// +/// This function uses ChaCha20 PRNG for deterministic random generation, which is +/// useful for reproducible tests and benchmarks. +/// +/// # Examples +/// ``` +/// # use miden_crypto::rand::test_utils::prng_value; +/// let seed = [0u8; 32]; +/// let val: u64 = prng_value(seed); +/// ``` +pub fn prng_value(seed: [u8; 32]) -> T { + rng_value(&mut seeded_rng(seed)) +} + +/// Generates a deterministic array using a PRNG seeded with the provided seed. +/// +/// # Examples +/// ``` +/// # use miden_crypto::rand::test_utils::prng_array; +/// let seed = [0u8; 32]; +/// let arr: [u64; 4] = prng_array(seed); +/// ``` +pub fn prng_array(seed: [u8; 32]) -> [T; N] { + let mut rng = seeded_rng(seed); + core::array::from_fn(|_| rng_value(&mut rng)) +} + +/// Generates a deterministic vector using a PRNG seeded with the provided seed. +/// +/// # Examples +/// ``` +/// # use miden_crypto::rand::test_utils::prng_vector; +/// let seed = [0u8; 32]; +/// let vec: Vec = prng_vector(seed, 100); +/// ``` +pub fn prng_vector(seed: [u8; 32], length: usize) -> Vec { + let mut rng = seeded_rng(seed); + (0..length).map(|_| rng_value(&mut rng)).collect() +} + +// CONTINUOUS RNG +// ================================================================================================ + +/// A continuous random number generator that works in `no-std` contexts. +#[derive(Debug)] +pub struct ContinuousRng { + rng: ChaCha20Rng, +} +impl ContinuousRng { + /// Creates a new instance of the random number generator from the seed. + pub fn new(seed: [u8; 32]) -> ContinuousRng { + ContinuousRng { rng: ChaCha20Rng::from_seed(seed) } + } + + /// Generates a random value of the [`Randomizable`] type `T`. + pub fn value(&mut self) -> T { + rng_value(&mut self.rng) + } +}