diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..444acfba6 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,52 @@ +# Generates and deploys crate documentation to GitHub Pages + +name: docs + +on: + push: + branches: + - next + paths: + - "**.rs" + - "Cargo.toml" + - "Cargo.lock" + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: write + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + docs: + name: Generate and deploy crate documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@main + + - name: Cleanup large tools for build space + uses: ./.github/actions/cleanup-runner + + - name: Install LLVM/Clang + uses: ./.github/actions/install-llvm + with: + version: "17" + + - name: Generate documentation + run: | + rustup update --no-self-update + rustup default stable + make doc + + - name: Deploy documentation + uses: peaceiris/actions-gh-pages@373f7f263a76c20808c831209c920827a82a2847 # pin@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./target/doc + destination_dir: docs diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index cb39c35e3..0d324e3dc 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -23,6 +23,7 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false + max-parallel: 1 matrix: target: [primitives, collections, string, vint64, goldilocks, budgeted] timeout-minutes: 15 @@ -31,19 +32,22 @@ jobs: - name: Cleanup large tools for build space uses: ./.github/actions/cleanup-runner - uses: dtolnay/rust-toolchain@nightly + with: + toolchain: nightly - uses: Swatinem/rust-cache@v2 - name: Install cargo-fuzz - run: cargo install cargo-fuzz --locked + run: cargo +nightly install cargo-fuzz --locked - name: Run fuzz target (smoke test) working-directory: miden-serde-utils run: | - cargo fuzz run ${{ matrix.target }} -- -max_total_time=60 -runs=10000 + cargo +nightly fuzz run ${{ matrix.target }} -- -max_total_time=60 -runs=10000 fuzz-miden-crypto: name: fuzz miden-crypto (${{ matrix.target }}) runs-on: ubuntu-latest strategy: fail-fast: false + max-parallel: 1 matrix: target: [word, merkle, smt_serde] timeout-minutes: 15 @@ -52,12 +56,14 @@ jobs: - name: Cleanup large tools for build space uses: ./.github/actions/cleanup-runner - uses: dtolnay/rust-toolchain@nightly + with: + toolchain: nightly - uses: Swatinem/rust-cache@v2 - name: Install cargo-fuzz - run: cargo install cargo-fuzz --locked + run: cargo +nightly install cargo-fuzz --locked - name: Run fuzz target (smoke test) run: | # Build the fuzz target first - cargo fuzz build --fuzz-dir miden-crypto-fuzz ${{ matrix.target }} + cargo +nightly fuzz build --fuzz-dir miden-crypto-fuzz ${{ matrix.target }} # Run directly to avoid cargo-fuzz wrapper SIGPIPE issue miden-crypto-fuzz/target/x86_64-unknown-linux-gnu/release/${{ matrix.target }} -max_total_time=60 -runs=10000 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2d6b3a00a..05936a19f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,7 +25,7 @@ jobs: matrix: toolchain: [stable, nightly] os: [ubuntu] - args: [default, hashmaps, no-std, large-smt] + args: [default, no-std, large-smt] timeout-minutes: 30 steps: - uses: actions/checkout@main diff --git a/CHANGELOG.md b/CHANGELOG.md index 376952ed4..79a9057ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## 0.23.0 (TBD) + +- [BREAKING] `PartialMmr::open()` now returns `Option` instead of `Option` ([#787](https://github.com/0xMiden/crypto/pull/787)). +- Fixed `SmtForest` to remove nodes with zero reference count from store ([#821](https://github.com/0xMiden/crypto/pull/821)). +- [BREAKING] Refactored BLAKE3 to use `Digest` struct, added `Digest192` type alias ([#811](https://github.com/0xMiden/crypto/pull/811)). +- [BREAKING] Removed `hashbrown` dependency and `hashmaps` feature; `Map`/`Set` type aliases are now tied to the `std` feature ([#813](https://github.com/0xMiden/crypto/pull/813)). +- [BREAKING] Renamed `NodeIndex::value()` to `NodeIndex::position()`, `NodeIndex::is_value_odd()` to `NodeIndex::is_position_odd()`, and `LeafIndex::value()` to `LeafIndex::position()` ([#814](https://github.com/0xMiden/crypto/pull/814)). +- Fixed tuple `min_serialized_size()` to exclude alignment padding, fixing `BudgetedReader` rejecting valid data ([#827](https://github.com/0xMiden/crypto/pull/827)). + ## 0.22.2 (2026-02-01) - Re-exported `p3_keccak::VECTOR_LEN`. diff --git a/Cargo.lock b/Cargo.lock index 950d51c26..d8dbe25d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,12 +21,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - [[package]] name = "anes" version = "0.1.6" @@ -190,9 +184,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.54" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "jobserver", @@ -290,9 +284,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.55" +version = "4.5.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e34525d5bbbd55da2bb745d34b36121baac88d07619a9a09cfcf4a6c0832785" +checksum = "a75ca66430e33a14957acc24c5077b503e7d374151b2b4b3a10c83b4ceb4be0e" dependencies = [ "clap_builder", "clap_derive", @@ -300,9 +294,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.55" +version = "4.5.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59a20016a20a3da95bef50ec7238dbd09baeef4311dcdd38ec15aba69812fb61" +checksum = "793207c7fa6300a0608d1080b858e5fdbe713cdc1c8db9fb17777d8a13e63df0" dependencies = [ "anstream", "anstyle", @@ -595,9 +589,9 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "find-msvc-tools" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flume" @@ -611,12 +605,6 @@ dependencies = [ "spin 0.9.8", ] -[[package]] -name = "foldhash" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" - [[package]] name = "futures-core" version = "0.3.31" @@ -735,14 +723,6 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", - "rayon", - "serde", - "serde_core", -] [[package]] name = "heck" @@ -946,7 +926,7 @@ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "miden-crypto" -version = "0.22.2" +version = "0.23.0" dependencies = [ "assert_matches", "blake3", @@ -958,7 +938,6 @@ dependencies = [ "ed25519-dalek", "flume", "glob", - "hashbrown", "hex", "hkdf", "itertools 0.14.0", @@ -1004,7 +983,7 @@ dependencies = [ [[package]] name = "miden-crypto-derive" -version = "0.22.2" +version = "0.23.0" dependencies = [ "quote", "syn", @@ -1012,7 +991,7 @@ dependencies = [ [[package]] name = "miden-serde-utils" -version = "0.22.2" +version = "0.23.0" dependencies = [ "p3-field", "p3-goldilocks", @@ -2293,18 +2272,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.34" +version = "0.8.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71ddd76bcebeed25db614f82bf31a9f4222d3fbba300e6fb6c00afa26cbd4d9d" +checksum = "dafd85c832c1b68bbb4ec0c72c7f6f4fc5179627d2bc7c26b30e4c0cc11e76cc" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.34" +version = "0.8.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8187381b52e32220d50b255276aa16a084ec0a9017a0ca2152a1f55c539758d" +checksum = "7cb7e4e8436d9db52fbd6625dbf2f45243ab84994a72882ec8227b99e72b439a" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index e08a2245b..23786e671 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,11 +11,11 @@ keywords = ["crypto", "hash", "merkle", "miden"] license = "MIT OR Apache-2.0" repository = "https://github.com/0xMiden/crypto" rust-version = "1.90" -version = "0.22.2" +version = "0.23.0" [workspace.dependencies] -miden-crypto-derive = { path = "miden-crypto-derive", version = "0.22" } -miden-serde-utils = { path = "miden-serde-utils", version = "0.22" } +miden-crypto-derive = { path = "miden-crypto-derive", version = "0.23" } +miden-serde-utils = { path = "miden-serde-utils", version = "0.23" } [workspace.lints.rust] # Suppress warnings about `cfg(fuzzing)`, which is automatically set when using `cargo-fuzz`. diff --git a/Makefile b/Makefile index ff45aa857..537f1674c 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ help: # -- variables -------------------------------------------------------------------------------------- -ALL_FEATURES_EXCEPT_ROCKSDB="concurrent executable hashmaps internal serde std" +ALL_FEATURES_EXCEPT_ROCKSDB="concurrent executable internal serde std" WARNINGS=RUSTDOCFLAGS="-D warnings" # -- linting -------------------------------------------------------------------------------------- @@ -60,8 +60,9 @@ lint: format fix clippy toml typos-check machete cargo-deny ## Run all linting t # --- docs ---------------------------------------------------------------------------------------- .PHONY: doc -doc: ## Generate and check documentation - $(WARNINGS) cargo doc --all-features --keep-going --release +doc: ## Generate and check documentation for workspace crates only + rm -rf "${CARGO_TARGET_DIR:-target}/doc" + RUSTDOCFLAGS="--enable-index-page -Zunstable-options -D warnings" cargo +nightly doc --all-features --keep-going --release --no-deps # --- testing ------------------------------------------------------------------------------------- @@ -69,10 +70,6 @@ doc: ## Generate and check documentation test-default: ## Run tests with default features cargo nextest run --profile default --cargo-profile test-release --features ${ALL_FEATURES_EXCEPT_ROCKSDB} -.PHONY: test-hashmaps -test-hashmaps: ## Run tests with `hashmaps` feature enabled - cargo nextest run --profile default --cargo-profile test-release --features hashmaps - .PHONY: test-no-std test-no-std: ## Run tests with `no-default-features` (std) cargo nextest run --profile default --cargo-profile test-release --no-default-features @@ -87,10 +84,10 @@ test-docs: .PHONY: test-large-smt test-large-smt: ## Run only large SMT tests - cargo nextest run --success-output immediate --profile large-smt --cargo-profile test-release --features hashmaps,rocksdb + cargo nextest run --success-output immediate --profile large-smt --cargo-profile test-release --features rocksdb .PHONY: test -test: test-default test-hashmaps test-no-std test-docs test-large-smt ## Run all tests except concurrent SMT tests +test: test-default test-no-std test-docs test-large-smt ## Run all tests except concurrent SMT tests # --- checking ------------------------------------------------------------------------------------ @@ -136,15 +133,15 @@ bench-smt-concurrent: ## Run SMT benchmarks with concurrent feature .PHONY: bench-large-smt-memory bench-large-smt-memory: ## Run large SMT benchmarks with memory storage - cargo run --release --features concurrent,hashmaps,executable -- --size 1000000 + cargo run --release --features concurrent,executable -- --size 1000000 .PHONY: bench-large-smt-rocksdb bench-large-smt-rocksdb: ## Run large SMT benchmarks with rocksdb storage - cargo run --release --features concurrent,hashmaps,rocksdb,executable -- --storage rocksdb --size 1000000 + cargo run --release --features concurrent,rocksdb,executable -- --storage rocksdb --size 1000000 .PHONY: bench-large-smt-rocksdb-open bench-large-smt-rocksdb-open: ## Run large SMT benchmarks with rocksdb storage and open existing database - cargo run --release --features concurrent,hashmaps,rocksdb,executable -- --storage rocksdb --open + cargo run --release --features concurrent,rocksdb,executable -- --storage rocksdb --open # --- fuzzing -------------------------------------------------------------------------------- diff --git a/README.md b/README.md index 5cdfa24ac..8d9ce9e39 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,6 @@ This crate can be compiled with the following features: - `concurrent`- enabled by default; enables multi-threaded implementation of `Smt::with_entries()` which significantly improves performance on multi-core CPUs. - `std` - enabled by default and relies on the Rust standard library. - `no_std` does not rely on the Rust standard library and enables compilation to WebAssembly. -- `hashmaps` - uses hashbrown hashmaps in SMT and Merkle Store implementation which significantly improves performance of updates. Keys ordering in iterators is not guaranteed when this feature is enabled. - `rocksdb` - enables the RocksDB-backed storage for `LargeSmt` and related utilities. Implies `concurrent`. All of these features imply the use of [alloc](https://doc.rust-lang.org/alloc/) to support heap-allocated collections. diff --git a/miden-crypto/Cargo.toml b/miden-crypto/Cargo.toml index 2c55d4373..faa9302f1 100644 --- a/miden-crypto/Cargo.toml +++ b/miden-crypto/Cargo.toml @@ -70,23 +70,15 @@ name = "rand" required-features = ["std"] [features] -concurrent = [ - "dep:rayon", - "hashbrown?/rayon", - "p3-maybe-rayon/parallel", - "p3-miden-prover/parallel", - "p3-util/parallel", - "std", -] -default = ["concurrent", "std"] +concurrent = ["dep:rayon", "p3-maybe-rayon/parallel", "p3-miden-prover/parallel", "p3-util/parallel", "std"] +default = ["concurrent", "std"] executable = ["concurrent", "dep:clap", "dep:rand-utils"] -fuzzing = [] -hashmaps = ["dep:hashbrown"] -internal = ["concurrent"] -rocksdb = ["concurrent", "dep:rocksdb"] -serde = ["dep:serde", "serde?/alloc"] -std = ["blake3/std", "dep:cc", "miden-serde-utils/std", "rand/std", "rand/thread_rng"] -testing = ["dep:proptest"] +fuzzing = [] +internal = ["concurrent"] +rocksdb = ["concurrent", "dep:rocksdb"] +serde = ["dep:serde", "serde?/alloc"] +std = ["blake3/std", "dep:cc", "miden-serde-utils/std", "rand/std", "rand/thread_rng", "serde?/std"] +testing = ["dep:proptest"] [dependencies] blake3 = { default-features = false, version = "1.8" } @@ -95,7 +87,6 @@ clap = { features = ["derive"], optional = true, versio curve25519-dalek = { default-features = false, version = "4" } ed25519-dalek = { features = ["zeroize"], version = "2" } flume = { version = "0.11.1" } -hashbrown = { features = ["serde"], optional = true, version = "0.16" } hkdf = { default-features = false, version = "0.12" } k256 = { features = ["ecdh", "ecdsa"], version = "0.13" } miden-crypto-derive.workspace = true diff --git a/miden-crypto/benches/README.md b/miden-crypto/benches/README.md index bbb97ac4e..28b4d9848 100644 --- a/miden-crypto/benches/README.md +++ b/miden-crypto/benches/README.md @@ -84,7 +84,7 @@ For each algorithm, we benchmark three core operations: ### Sparse Merkle Tree -We build cryptographic data structures incorporating these hash functions. What follows are benchmarks of operations on sparse Merkle trees (SMTs) which use the `Poseidon2` hash function. We perform a batched modification of 1,000 values in a tree with 1,000,000 leaves (with the `hashmaps` feature to use the `hashbrown` crate). +We build cryptographic data structures incorporating these hash functions. What follows are benchmarks of operations on sparse Merkle trees (SMTs) which use the `Poseidon2` hash function. We perform a batched modification of 1,000 values in a tree with 1,000,000 leaves. ### Scenario 1: SMT Construction (1M pairs) @@ -155,7 +155,7 @@ cargo run --features=executable The `concurrent` feature enables the concurrent benchmark, and is enabled by default. To run a sequential benchmark, disable the crate's default features: ``` -cargo run --no-default-features --features=executable,hashmaps +cargo run --no-default-features --features=executable,std ``` The benchmark parameters may also be customized with the `-s`/`--size`, `-i`/`--insertions`, and `-u`/`--updates` options. diff --git a/miden-crypto/benches/smt.rs b/miden-crypto/benches/smt.rs index ce8766a34..f08be1473 100644 --- a/miden-crypto/benches/smt.rs +++ b/miden-crypto/benches/smt.rs @@ -464,7 +464,7 @@ benchmark_multi! { .iter() .map(|(key, value)| { let leaf = SmtLeaf::new_single(*key, *value); - let col = leaf.index().value(); + let col = leaf.index().position(); let hash = leaf.hash(); SubtreeLeaf { col, hash } }) @@ -509,7 +509,7 @@ benchmark_multi! { .iter() .map(|(key, value)| { let leaf = SmtLeaf::new_single(*key, *value); - let col = leaf.index().value(); + let col = leaf.index().position(); let hash = leaf.hash(); SubtreeLeaf { col, hash } }) diff --git a/miden-crypto/benches/store.rs b/miden-crypto/benches/store.rs index c198f3fbd..d75c775b3 100644 --- a/miden-crypto/benches/store.rs +++ b/miden-crypto/benches/store.rs @@ -293,7 +293,7 @@ fn get_leaf_path_simplesmt(c: &mut Criterion) { b.iter_batched( || random_index(size_u64, SMT_MAX_DEPTH), |index| { - black_box(smt.open(&LeafIndex::::new(index.value()).unwrap())) + black_box(smt.open(&LeafIndex::::new(index.position()).unwrap())) }, BatchSize::SmallInput, ) diff --git a/miden-crypto/src/hash/blake/mod.rs b/miden-crypto/src/hash/blake/mod.rs index 429058584..481409f17 100644 --- a/miden-crypto/src/hash/blake/mod.rs +++ b/miden-crypto/src/hash/blake/mod.rs @@ -1,14 +1,12 @@ -use alloc::string::String; -use core::{mem::size_of, ops::Deref, slice}; +use core::mem::size_of; -use super::HasherExt; +use super::{ + HasherExt, + digest::{Digest, Digest192, Digest256}, +}; use crate::{ Felt, field::{BasedVectorSpace, PrimeField64}, - utils::{ - ByteReader, ByteWriter, Deserializable, DeserializationError, HexParseError, Serializable, - bytes_to_hex_string, hex_to_bytes, - }, }; #[cfg(test)] @@ -20,90 +18,11 @@ mod tests; /// Re-export of the Blake3 hasher from Plonky3 for use in the prover config downstream. pub use p3_blake3::Blake3 as Blake3Hasher; -// CONSTANTS +// TYPE ALIASES // ================================================================================================ -const DIGEST32_BYTES: usize = 32; -const DIGEST24_BYTES: usize = 24; - -// BLAKE3 N-BIT OUTPUT -// ================================================================================================ - -/// N-bytes output of a blake3 function. -/// -/// Note: `N` can't be greater than `32` because [`Blake3Digest::as_bytes`] currently supports only -/// 32 bytes. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -#[cfg_attr(feature = "serde", serde(into = "String", try_from = "&str"))] -#[repr(transparent)] -pub struct Blake3Digest([u8; N]); - -impl Blake3Digest { - pub fn as_bytes(&self) -> [u8; 32] { - // compile-time assertion - assert!(N <= 32, "digest currently supports only 32 bytes!"); - expand_bytes(&self.0) - } - - pub fn digests_as_bytes(digests: &[Blake3Digest]) -> &[u8] { - let p = digests.as_ptr(); - let len = digests.len() * N; - unsafe { slice::from_raw_parts(p as *const u8, len) } - } -} - -impl Default for Blake3Digest { - fn default() -> Self { - Self([0; N]) - } -} - -impl Deref for Blake3Digest { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl From> for [u8; N] { - fn from(value: Blake3Digest) -> Self { - value.0 - } -} - -impl From<[u8; N]> for Blake3Digest { - fn from(value: [u8; N]) -> Self { - Self(value) - } -} - -impl From> for String { - fn from(value: Blake3Digest) -> Self { - bytes_to_hex_string(value.as_bytes()) - } -} - -impl TryFrom<&str> for Blake3Digest { - type Error = HexParseError; - - fn try_from(value: &str) -> Result { - hex_to_bytes(value).map(|v| v.into()) - } -} - -impl Serializable for Blake3Digest { - fn write_into(&self, target: &mut W) { - target.write_bytes(&self.0); - } -} - -impl Deserializable for Blake3Digest { - fn read_from(source: &mut R) -> Result { - source.read_array().map(Self) - } -} +/// Alias for the generic `Digest` type, for consistency with other hash modules. +pub type Blake3Digest = Digest; // BLAKE3 256-BIT OUTPUT // ================================================================================================ @@ -113,14 +32,14 @@ impl Deserializable for Blake3Digest { pub struct Blake3_256; impl HasherExt for Blake3_256 { - type Digest = Blake3Digest<32>; + type Digest = Digest256; fn hash_iter<'a>(slices: impl Iterator) -> Self::Digest { let mut hasher = blake3::Hasher::new(); for slice in slices { hasher.update(slice); } - Blake3Digest(hasher.finalize().into()) + Digest::new(hasher.finalize().into()) } } @@ -128,37 +47,37 @@ impl Blake3_256 { /// Blake3 collision resistance is 128-bits for 32-bytes output. pub const COLLISION_RESISTANCE: u32 = 128; - pub fn hash(bytes: &[u8]) -> Blake3Digest<32> { - Blake3Digest(blake3::hash(bytes).into()) + pub fn hash(bytes: &[u8]) -> Digest256 { + Digest::new(blake3::hash(bytes).into()) } // Note: merge/merge_many/merge_with_int methods were previously trait delegations // (::merge). They're now direct implementations as part of removing // the Winterfell Hasher trait dependency. These are public API used in benchmarks. - pub fn merge(values: &[Blake3Digest<32>; 2]) -> Blake3Digest<32> { - Self::hash(Blake3Digest::digests_as_bytes(values)) + pub fn merge(values: &[Digest256; 2]) -> Digest256 { + Self::hash(Digest::digests_as_bytes(values)) } - pub fn merge_many(values: &[Blake3Digest<32>]) -> Blake3Digest<32> { - Blake3Digest(blake3::hash(Blake3Digest::digests_as_bytes(values)).into()) + pub fn merge_many(values: &[Digest256]) -> Digest256 { + Digest::new(blake3::hash(Digest::digests_as_bytes(values)).into()) } - pub fn merge_with_int(seed: Blake3Digest<32>, value: u64) -> Blake3Digest<32> { + pub fn merge_with_int(seed: Digest256, value: u64) -> Digest256 { let mut hasher = blake3::Hasher::new(); - hasher.update(&seed.0); + hasher.update(seed.as_bytes()); hasher.update(&value.to_le_bytes()); - Blake3Digest(hasher.finalize().into()) + Digest::new(hasher.finalize().into()) } /// Returns a hash of the provided field elements. #[inline(always)] - pub fn hash_elements>(elements: &[E]) -> Blake3Digest<32> { - Blake3Digest(hash_elements(elements)) + pub fn hash_elements>(elements: &[E]) -> Digest256 { + Digest::new(hash_elements(elements)) } /// Hashes an iterator of byte slices. #[inline(always)] - pub fn hash_iter<'a>(slices: impl Iterator) -> Blake3Digest { + pub fn hash_iter<'a>(slices: impl Iterator) -> Digest256 { ::hash_iter(slices) } } @@ -171,14 +90,14 @@ impl Blake3_256 { pub struct Blake3_192; impl HasherExt for Blake3_192 { - type Digest = Blake3Digest<24>; + type Digest = Digest192; fn hash_iter<'a>(slices: impl Iterator) -> Self::Digest { let mut hasher = blake3::Hasher::new(); for slice in slices { hasher.update(slice); } - Blake3Digest(shrink_array(hasher.finalize().into())) + Digest::new(shrink_array(hasher.finalize().into())) } } @@ -186,36 +105,36 @@ impl Blake3_192 { /// Blake3 collision resistance is 96-bits for 24-bytes output. pub const COLLISION_RESISTANCE: u32 = 96; - pub fn hash(bytes: &[u8]) -> Blake3Digest<24> { - Blake3Digest(shrink_array(blake3::hash(bytes).into())) + pub fn hash(bytes: &[u8]) -> Digest192 { + Digest::new(shrink_array(blake3::hash(bytes).into())) } // Note: Same as Blake3_256 - these methods replaced trait delegations to remove Winterfell. - pub fn merge_many(values: &[Blake3Digest<24>]) -> Blake3Digest<24> { - let bytes = Blake3Digest::digests_as_bytes(values); - Blake3Digest(shrink_array(blake3::hash(bytes).into())) + pub fn merge_many(values: &[Digest192]) -> Digest192 { + let bytes = Digest::digests_as_bytes(values); + Digest::new(shrink_array(blake3::hash(bytes).into())) } - pub fn merge(values: &[Blake3Digest<24>; 2]) -> Blake3Digest<24> { - Self::hash(Blake3Digest::digests_as_bytes(values)) + pub fn merge(values: &[Digest192; 2]) -> Digest192 { + Self::hash(Digest::digests_as_bytes(values)) } - pub fn merge_with_int(seed: Blake3Digest<24>, value: u64) -> Blake3Digest<24> { + pub fn merge_with_int(seed: Digest192, value: u64) -> Digest192 { let mut hasher = blake3::Hasher::new(); - hasher.update(&seed.0); + hasher.update(seed.as_bytes()); hasher.update(&value.to_le_bytes()); - Blake3Digest(shrink_array(hasher.finalize().into())) + Digest::new(shrink_array(hasher.finalize().into())) } /// Returns a hash of the provided field elements. #[inline(always)] - pub fn hash_elements>(elements: &[E]) -> Blake3Digest<32> { - Blake3Digest(hash_elements(elements)) + pub fn hash_elements>(elements: &[E]) -> Digest256 { + Digest::new(hash_elements(elements)) } /// Hashes an iterator of byte slices. #[inline(always)] - pub fn hash_iter<'a>(slices: impl Iterator) -> Blake3Digest { + pub fn hash_iter<'a>(slices: impl Iterator) -> Digest192 { ::hash_iter(slices) } } @@ -269,12 +188,3 @@ fn shrink_array(source: [u8; M]) -> [u8; N] { } core::array::from_fn(|i| source[i]) } - -/// Owned bytes expansion. -fn expand_bytes(bytes: &[u8; M]) -> [u8; N] { - // compile-time assertion - assert!(M <= N, "M should fit in N so M can be expanded!"); - let mut expanded = [0u8; N]; - expanded[..M].copy_from_slice(bytes); - expanded -} diff --git a/miden-crypto/src/hash/digest.rs b/miden-crypto/src/hash/digest.rs index af66d142b..f49cce666 100644 --- a/miden-crypto/src/hash/digest.rs +++ b/miden-crypto/src/hash/digest.rs @@ -15,6 +15,9 @@ use crate::utils::{ // CONSTANTS // ================================================================================================ +/// Size of a 192-bit digest in bytes. +pub const DIGEST192_BYTES: usize = 24; + /// Size of a 256-bit digest in bytes. pub const DIGEST256_BYTES: usize = 32; @@ -24,6 +27,9 @@ pub const DIGEST512_BYTES: usize = 64; // TYPE ALIASES // ================================================================================================ +/// A 192-bit (24-byte) digest. Type alias for `Digest<24>`. +pub type Digest192 = Digest; + /// A 256-bit (32-byte) digest. Type alias for `Digest<32>`. pub type Digest256 = Digest; @@ -137,7 +143,11 @@ mod tests { assert_eq!(size_of::>(), size_of::<[u8; 64]>()); assert_eq!(align_of::>(), align_of::<[u8; 64]>()); + assert_eq!(size_of::>(), size_of::<[u8; 24]>()); + assert_eq!(align_of::>(), align_of::<[u8; 24]>()); + // Verify type aliases as well + assert_eq!(size_of::(), 24); assert_eq!(size_of::(), 32); assert_eq!(size_of::(), 64); } @@ -198,6 +208,15 @@ mod tests { assert_eq!(recovered.as_bytes(), &bytes); } + #[test] + fn test_digest_hex_roundtrip_24() { + let bytes = [0xef; 24]; + let digest = Digest::<24>::from(bytes); + let hex: String = digest.into(); + let recovered = Digest::<24>::try_from(hex.as_str()).unwrap(); + assert_eq!(recovered.as_bytes(), &bytes); + } + #[test] fn test_digest_digests_as_bytes_32() { let d1 = Digest::<32>::from([1u8; 32]); diff --git a/miden-crypto/src/lib.rs b/miden-crypto/src/lib.rs index 18017ba3f..f1f6cfb8d 100644 --- a/miden-crypto/src/lib.rs +++ b/miden-crypto/src/lib.rs @@ -104,36 +104,40 @@ pub mod stark { /// An alias for a key-value map. /// -/// By default, this is an alias for the [`alloc::collections::BTreeMap`], however, when the -/// `hashmaps` feature is enabled, this is an alias for the `hashbrown`'s `HashMap`. -#[cfg(feature = "hashmaps")] -pub type Map = hashbrown::HashMap; +/// When the `std` feature is enabled, this is an alias for [`std::collections::HashMap`]. +/// Otherwise, this is an alias for [`alloc::collections::BTreeMap`]. +#[cfg(feature = "std")] +pub type Map = std::collections::HashMap; -#[cfg(feature = "hashmaps")] -pub use hashbrown::hash_map::Entry as MapEntry; +#[cfg(feature = "std")] +pub use std::collections::hash_map::Entry as MapEntry; +#[cfg(feature = "std")] +pub use std::collections::hash_map::IntoIter as MapIntoIter; /// An alias for a key-value map. /// -/// By default, this is an alias for the [`alloc::collections::BTreeMap`], however, when the -/// `hashmaps` feature is enabled, this is an alias for the `hashbrown`'s `HashMap`. -#[cfg(not(feature = "hashmaps"))] +/// When the `std` feature is enabled, this is an alias for [`std::collections::HashMap`]. +/// Otherwise, this is an alias for [`alloc::collections::BTreeMap`]. +#[cfg(not(feature = "std"))] pub type Map = alloc::collections::BTreeMap; -#[cfg(not(feature = "hashmaps"))] +#[cfg(not(feature = "std"))] pub use alloc::collections::btree_map::Entry as MapEntry; +#[cfg(not(feature = "std"))] +pub use alloc::collections::btree_map::IntoIter as MapIntoIter; /// An alias for a simple set. /// -/// By default, this is an alias for the [`alloc::collections::BTreeSet`]. However, when the -/// `hashmaps` feature is enabled, this becomes an alias for hashbrown's HashSet. -#[cfg(feature = "hashmaps")] -pub type Set = hashbrown::HashSet; +/// When the `std` feature is enabled, this is an alias for [`std::collections::HashSet`]. +/// Otherwise, this is an alias for [`alloc::collections::BTreeSet`]. +#[cfg(feature = "std")] +pub type Set = std::collections::HashSet; /// An alias for a simple set. /// -/// By default, this is an alias for the [`alloc::collections::BTreeSet`]. However, when the -/// `hashmaps` feature is enabled, this becomes an alias for hashbrown's HashSet. -#[cfg(not(feature = "hashmaps"))] +/// When the `std` feature is enabled, this is an alias for [`std::collections::HashSet`]. +/// Otherwise, this is an alias for [`alloc::collections::BTreeSet`]. +#[cfg(not(feature = "std"))] pub type Set = alloc::collections::BTreeSet; // CONSTANTS @@ -175,7 +179,6 @@ pub trait SequentialCommit { // ================================================================================================ mod batch_inversion { - use alloc::vec::Vec; use p3_maybe_rayon::prelude::*; diff --git a/miden-crypto/src/merkle/error.rs b/miden-crypto/src/merkle/error.rs index 11e833424..58b153f1d 100644 --- a/miden-crypto/src/merkle/error.rs +++ b/miden-crypto/src/merkle/error.rs @@ -16,8 +16,8 @@ pub enum MerkleError { DuplicateValuesForIndex(u64), #[error("entry {node} is not a leaf")] EntryIsNotLeaf { node: NodeIndex }, - #[error("node index value {value} is not valid for depth {depth}")] - InvalidNodeIndex { depth: u8, value: u64 }, + #[error("node index position {position} is not valid for depth {depth}")] + InvalidNodeIndex { depth: u8, position: u64 }, #[error("provided node index depth {provided} does not match expected depth {expected}")] InvalidNodeIndexDepth { expected: u8, provided: u8 }, #[error("provided node list should have a minimum length of {0}")] diff --git a/miden-crypto/src/merkle/index.rs b/miden-crypto/src/merkle/index.rs index 6a59f6753..522527e09 100644 --- a/miden-crypto/src/merkle/index.rs +++ b/miden-crypto/src/merkle/index.rs @@ -27,7 +27,7 @@ use crate::utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct NodeIndex { depth: u8, - value: u64, + position: u64, } impl NodeIndex { @@ -39,68 +39,68 @@ impl NodeIndex { /// # Errors /// Returns an error if: /// - `depth` is greater than 64. - /// - `value` is greater than or equal to 2^{depth}. - pub const fn new(depth: u8, value: u64) -> Result { + /// - `position` is greater than or equal to 2^{depth}. + pub const fn new(depth: u8, position: u64) -> Result { if depth > 64 { Err(MerkleError::DepthTooBig(depth as u64)) - } else if (64 - value.leading_zeros()) > depth as u32 { - Err(MerkleError::InvalidNodeIndex { depth, value }) + } else if (64 - position.leading_zeros()) > depth as u32 { + Err(MerkleError::InvalidNodeIndex { depth, position }) } else { - Ok(Self { depth, value }) + Ok(Self { depth, position }) } } /// Creates a new node index without checking its validity. - pub const fn new_unchecked(depth: u8, value: u64) -> Self { + pub const fn new_unchecked(depth: u8, position: u64) -> Self { debug_assert!(depth <= 64); - debug_assert!((64 - value.leading_zeros()) <= depth as u32); - Self { depth, value } + debug_assert!((64 - position.leading_zeros()) <= depth as u32); + Self { depth, position } } /// Creates a new node index for testing purposes. /// /// # Panics - /// Panics if the `value` is greater than or equal to 2^{depth}. + /// Panics if the `position` is greater than or equal to 2^{depth}. #[cfg(test)] - pub fn make(depth: u8, value: u64) -> Self { - Self::new(depth, value).unwrap() + pub fn make(depth: u8, position: u64) -> Self { + Self::new(depth, position).unwrap() } - /// Creates a node index from a pair of field elements representing the depth and value. + /// Creates a node index from a pair of field elements representing the depth and position. /// /// # Errors /// Returns an error if: /// - `depth` is greater than 64. - /// - `value` is greater than or equal to 2^{depth}. - pub fn from_elements(depth: &Felt, value: &Felt) -> Result { + /// - `position` is greater than or equal to 2^{depth}. + pub fn from_elements(depth: &Felt, position: &Felt) -> Result { let depth = depth.as_canonical_u64(); let depth = u8::try_from(depth).map_err(|_| MerkleError::DepthTooBig(depth))?; - let value = value.as_canonical_u64(); - Self::new(depth, value) + let position = position.as_canonical_u64(); + Self::new(depth, position) } /// Creates a new node index pointing to the root of the tree. pub const fn root() -> Self { - Self { depth: 0, value: 0 } + Self { depth: 0, position: 0 } } /// Computes sibling index of the current node. pub const fn sibling(mut self) -> Self { - self.value ^= 1; + self.position ^= 1; self } /// Returns left child index of the current node. pub const fn left_child(mut self) -> Self { self.depth += 1; - self.value <<= 1; + self.position <<= 1; self } /// Returns right child index of the current node. pub const fn right_child(mut self) -> Self { self.depth += 1; - self.value = (self.value << 1) + 1; + self.position = (self.position << 1) + 1; self } @@ -108,7 +108,7 @@ impl NodeIndex { /// a new value instead of mutating `self`. pub const fn parent(mut self) -> Self { self.depth = self.depth.saturating_sub(1); - self.value >>= 1; + self.position >>= 1; self } @@ -119,18 +119,18 @@ impl NodeIndex { /// /// Will evaluate the parity of the current instance to define the result. pub const fn build_node(&self, slf: Word, sibling: Word) -> [Word; 2] { - if self.is_value_odd() { + if self.is_position_odd() { [sibling, slf] } else { [slf, sibling] } } - /// Returns the scalar representation of the depth/value pair. + /// Returns the scalar representation of the depth/position pair. /// - /// It is computed as `2^depth + value`. + /// It is computed as `2^depth + position`. pub const fn to_scalar_index(&self) -> u64 { - (1 << self.depth as u64) + self.value + (1 << self.depth as u64) + self.position } /// Returns the depth of the current instance. @@ -138,19 +138,19 @@ impl NodeIndex { self.depth } - /// Returns the value of this index. - pub const fn value(&self) -> u64 { - self.value + /// Returns the position of this index within its depth layer. + pub const fn position(&self) -> u64 { + self.position } /// Returns `true` if the current instance points to a right sibling node. - pub const fn is_value_odd(&self) -> bool { - (self.value & 1) == 1 + pub const fn is_position_odd(&self) -> bool { + (self.position & 1) == 1 } /// Returns `true` if the n-th node on the path points to a right child. pub const fn is_nth_bit_odd(&self, n: u8) -> bool { - (self.value >> n) & 1 == 1 + (self.position >> n) & 1 == 1 } /// Returns `true` if the depth is `0`. @@ -164,7 +164,7 @@ impl NodeIndex { /// Traverses one level towards the root, decrementing the depth by `1`. pub fn move_up(&mut self) { self.depth = self.depth.saturating_sub(1); - self.value >>= 1; + self.position >>= 1; } /// Traverses towards the root until the specified depth is reached. @@ -174,7 +174,7 @@ impl NodeIndex { debug_assert!(depth < self.depth); let delta = self.depth.saturating_sub(depth); self.depth = self.depth.saturating_sub(delta); - self.value >>= delta as u32; + self.position >>= delta as u32; } // ITERATORS @@ -192,22 +192,22 @@ impl NodeIndex { impl Display for NodeIndex { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "depth={}, value={}", self.depth, self.value) + write!(f, "depth={}, position={}", self.depth, self.position) } } impl Serializable for NodeIndex { fn write_into(&self, target: &mut W) { target.write_u8(self.depth); - target.write_u64(self.value); + target.write_u64(self.position); } } impl Deserializable for NodeIndex { fn read_from(source: &mut R) -> Result { let depth = source.read_u8()?; - let value = source.read_u64()?; - NodeIndex::new(depth, value) + let position = source.read_u64()?; + NodeIndex::new(depth, position) .map_err(|_| DeserializationError::InvalidValue("Invalid index".into())) } } @@ -253,22 +253,22 @@ mod tests { use super::*; #[test] - fn test_node_index_value_too_high() { - assert_eq!(NodeIndex::new(0, 0).unwrap(), NodeIndex { depth: 0, value: 0 }); + fn test_node_index_position_too_high() { + assert_eq!(NodeIndex::new(0, 0).unwrap(), NodeIndex { depth: 0, position: 0 }); let err = NodeIndex::new(0, 1).unwrap_err(); - assert_matches!(err, MerkleError::InvalidNodeIndex { depth: 0, value: 1 }); + assert_matches!(err, MerkleError::InvalidNodeIndex { depth: 0, position: 1 }); - assert_eq!(NodeIndex::new(1, 1).unwrap(), NodeIndex { depth: 1, value: 1 }); + assert_eq!(NodeIndex::new(1, 1).unwrap(), NodeIndex { depth: 1, position: 1 }); let err = NodeIndex::new(1, 2).unwrap_err(); - assert_matches!(err, MerkleError::InvalidNodeIndex { depth: 1, value: 2 }); + assert_matches!(err, MerkleError::InvalidNodeIndex { depth: 1, position: 2 }); - assert_eq!(NodeIndex::new(2, 3).unwrap(), NodeIndex { depth: 2, value: 3 }); + assert_eq!(NodeIndex::new(2, 3).unwrap(), NodeIndex { depth: 2, position: 3 }); let err = NodeIndex::new(2, 4).unwrap_err(); - assert_matches!(err, MerkleError::InvalidNodeIndex { depth: 2, value: 4 }); + assert_matches!(err, MerkleError::InvalidNodeIndex { depth: 2, position: 4 }); - assert_eq!(NodeIndex::new(3, 7).unwrap(), NodeIndex { depth: 3, value: 7 }); + assert_eq!(NodeIndex::new(3, 7).unwrap(), NodeIndex { depth: 3, position: 7 }); let err = NodeIndex::new(3, 8).unwrap_err(); - assert_matches!(err, MerkleError::InvalidNodeIndex { depth: 3, value: 8 }); + assert_matches!(err, MerkleError::InvalidNodeIndex { depth: 3, position: 8 }); } #[test] @@ -277,13 +277,13 @@ mod tests { } prop_compose! { - fn node_index()(value in 0..2u64.pow(u64::BITS - 1)) -> NodeIndex { + fn node_index()(position in 0..2u64.pow(u64::BITS - 1)) -> NodeIndex { // unwrap never panics because the range of depth is 0..u64::BITS - let mut depth = value.ilog2() as u8; - if value > (1 << depth) { // round up + let mut depth = position.ilog2() as u8; + if position > (1 << depth) { // round up depth += 1; } - NodeIndex::new(depth, value).unwrap() + NodeIndex::new(depth, position).unwrap() } } diff --git a/miden-crypto/src/merkle/mmr/inorder.rs b/miden-crypto/src/merkle/mmr/inorder.rs index 2efc4d4cc..344b9f1c3 100644 --- a/miden-crypto/src/merkle/mmr/inorder.rs +++ b/miden-crypto/src/merkle/mmr/inorder.rs @@ -112,6 +112,13 @@ impl InOrderIndex { pub fn inner(&self) -> usize { self.idx } + + /// Returns the leaf position if this index points to a leaf. + /// + /// Leaf positions are 0-indexed and correspond to `from_leaf_pos`. + pub fn to_leaf_pos(&self) -> Option { + if self.is_leaf() { Some((self.idx - 1) / 2) } else { None } + } } impl Serializable for InOrderIndex { diff --git a/miden-crypto/src/merkle/mmr/partial.rs b/miden-crypto/src/merkle/mmr/partial.rs index 2a5502016..2f2e2476f 100644 --- a/miden-crypto/src/merkle/mmr/partial.rs +++ b/miden-crypto/src/merkle/mmr/partial.rs @@ -3,7 +3,7 @@ use alloc::{ vec::Vec, }; -use super::{MmrDelta, MmrPath}; +use super::{MmrDelta, MmrPath, MmrProof}; use crate::{ Word, hash::poseidon2::Poseidon2, @@ -24,8 +24,7 @@ type NodeMap = BTreeMap; /// Partially materialized Merkle Mountain Range (MMR), used to efficiently store and update the /// authentication paths for a subset of the elements in a full MMR. /// -/// This structure store only the authentication path for a value, the value itself is stored -/// separately. +/// This structure stores both the authentication paths and the leaf values for tracked leaves. #[derive(Debug, Clone, PartialEq, Eq)] pub struct PartialMmr { /// The version of the MMR. @@ -53,12 +52,11 @@ pub struct PartialMmr { /// leaves, starting from the peak with most children, to the one with least. pub(crate) peaks: Vec, - /// Authentication nodes used to construct merkle paths for a subset of the MMR's leaves. + /// Nodes used to construct merkle paths for a subset of the MMR's leaves. /// - /// This does not include the MMR's peaks nor the tracked nodes, only the elements required to - /// construct their authentication paths. This property is used to detect when elements can be - /// safely removed, because they are no longer required to authenticate any element in the - /// [PartialMmr]. + /// This includes both: + /// - Tracked leaf values at their own in-order index + /// - Authentication nodes needed for the merkle paths /// /// The elements in the MMR are referenced using a in-order tree index. This indexing scheme /// permits for easy computation of the relative nodes (left/right children, sibling, parent), @@ -66,11 +64,8 @@ pub struct PartialMmr { /// trees in the MMR can be represented without rewrites of the indexes. pub(crate) nodes: NodeMap, - /// Flag indicating if the odd element should be tracked. - /// - /// This flag is necessary because the sibling of the odd doesn't exist yet, so it can not be - /// added into `nodes` to signal the value is being tracked. - pub(crate) track_latest: bool, + /// Set of leaf positions that are being tracked. + pub(crate) tracked_leaves: BTreeSet, } impl Default for PartialMmr { @@ -79,9 +74,9 @@ impl Default for PartialMmr { let forest = Forest::empty(); let peaks = Vec::new(); let nodes = BTreeMap::new(); - let track_latest = false; + let tracked_leaves = BTreeSet::new(); - Self { forest, peaks, nodes, track_latest } + Self { forest, peaks, nodes, tracked_leaves } } } @@ -94,20 +89,21 @@ impl PartialMmr { let forest = peaks.forest(); let peaks = peaks.into(); let nodes = BTreeMap::new(); - let track_latest = false; + let tracked_leaves = BTreeSet::new(); - Self { forest, peaks, nodes, track_latest } + Self { forest, peaks, nodes, tracked_leaves } } /// Returns a new [PartialMmr] instantiated from the specified components. /// - /// This constructor does not check the consistency between peaks and nodes. If the specified - /// peaks are nodes are inconsistent, the returned partial MMR may exhibit undefined behavior. - pub fn from_parts(peaks: MmrPeaks, nodes: NodeMap, track_latest: bool) -> Self { + /// This constructor does not check the consistency between peaks, nodes, and tracked_leaves. + /// If the specified components are inconsistent, the returned partial MMR may exhibit + /// undefined behavior. + pub fn from_parts(peaks: MmrPeaks, nodes: NodeMap, tracked_leaves: BTreeSet) -> Self { let forest = peaks.forest(); let peaks = peaks.into(); - Self { forest, peaks, nodes, track_latest } + Self { forest, peaks, nodes, tracked_leaves } } // PUBLIC ACCESSORS @@ -136,21 +132,28 @@ impl PartialMmr { /// Returns true if this partial MMR tracks an authentication path for the leaf at the /// specified position. pub fn is_tracked(&self, pos: usize) -> bool { - let leaves = self.forest.num_leaves(); - if pos >= leaves { - return false; - } else if pos == leaves - 1 && self.forest.has_single_leaf_tree() { - // if the number of leaves in the MMR is odd and the position is for the last leaf - // whether the leaf is tracked is defined by the `track_latest` flag - return self.track_latest; + self.tracked_leaves.contains(&pos) + } + + /// Returns the leaf value at the specified position, or `None` if the leaf is not tracked. + pub fn get(&self, pos: usize) -> Option { + if !self.tracked_leaves.contains(&pos) { + return None; } + let leaf_idx = InOrderIndex::from_leaf_pos(pos); + self.nodes.get(&leaf_idx).copied() + } - let leaf_index = InOrderIndex::from_leaf_pos(pos); - self.is_tracked_node(&leaf_index) + /// Returns an iterator over the tracked leaves as (position, value) pairs. + pub fn leaves(&self) -> impl Iterator + '_ { + self.tracked_leaves.iter().map(|&pos| { + let leaf_idx = InOrderIndex::from_leaf_pos(pos); + let leaf = *self.nodes.get(&leaf_idx).expect("tracked leaf must have value in nodes"); + (pos, leaf) + }) } - /// Given a leaf position, returns the Merkle path to its corresponding peak, or None if this - /// partial MMR does not track an authentication paths for the specified leaf. + /// Returns an [MmrProof] for the leaf at the specified position, or `None` if not tracked. /// /// Note: The leaf position is the 0-indexed number corresponding to the order the leaves were /// added, this corresponds to the MMR size _prior_ to adding the element. So the 1st element @@ -159,30 +162,38 @@ impl PartialMmr { /// # Errors /// Returns an error if the specified position is greater-or-equal than the number of leaves /// in the underlying MMR. - pub fn open(&self, pos: usize) -> Result, MmrError> { + pub fn open(&self, pos: usize) -> Result, MmrError> { let tree_bit = self .forest .leaf_to_corresponding_tree(pos) .ok_or(MmrError::PositionNotFound(pos))?; - let depth = tree_bit as usize; - let mut nodes = Vec::with_capacity(depth); - let mut idx = InOrderIndex::from_leaf_pos(pos); + // Check if the leaf is tracked + if !self.tracked_leaves.contains(&pos) { + return Ok(None); + } - while let Some(node) = self.nodes.get(&idx.sibling()) { + // Get the leaf value from nodes + let leaf_idx = InOrderIndex::from_leaf_pos(pos); + let leaf = *self.nodes.get(&leaf_idx).expect("tracked leaf must have value in nodes"); + + // Collect authentication path nodes + let depth = tree_bit as usize; + let mut nodes = Vec::with_capacity(depth); + let mut idx = leaf_idx; + + for _ in 0..depth { + let Some(node) = self.nodes.get(&idx.sibling()) else { + // This is expected for partial MMRs that don't track all authentication paths. + // The sibling node is simply not available, so we can't construct a full proof. + return Ok(None); + }; nodes.push(*node); idx = idx.parent(); } - // If there are nodes then the path must be complete, otherwise it is a bug - debug_assert!(nodes.is_empty() || nodes.len() == depth); - - if nodes.len() != depth { - // The requested `pos` is not being tracked. - Ok(None) - } else { - Ok(Some(MmrPath::new(self.forest, pos, MerklePath::new(nodes)))) - } + let path = MmrPath::new(self.forest, pos, MerklePath::new(nodes)); + Ok(Some(MmrProof::new(path, leaf))) } // ITERATORS @@ -222,44 +233,61 @@ impl PartialMmr { /// Adds a new peak and optionally track it. Returns a vector of the authentication nodes /// inserted into this [PartialMmr] as a result of this operation. /// - /// When `track` is `true` the new leaf is tracked. + /// When `track` is `true` the new leaf is tracked and its value is stored. pub fn add(&mut self, leaf: Word, track: bool) -> Vec<(InOrderIndex, Word)> { self.forest.append_leaf(); - // We just incremented the forest, so this cannot panic. - let merges = self.forest.smallest_tree_height_unchecked(); - let mut new_nodes = Vec::with_capacity(merges); + // The smallest tree height equals the number of merges because adding a leaf is like + // adding 1 in binary: each carry corresponds to a merge. For example, forest 3 (0b11) + // + 1 = 4 (0b100) requires 2 carries/merges to form a tree of height 2. + let num_merges = self.forest.smallest_tree_height_unchecked(); + let mut new_nodes = Vec::with_capacity(num_merges + 1); + + // Store the leaf value at its own index if tracking + let leaf_pos = self.forest.num_leaves() - 1; + let leaf_idx = InOrderIndex::from_leaf_pos(leaf_pos); + if track { + self.tracked_leaves.insert(leaf_pos); + self.nodes.insert(leaf_idx, leaf); + new_nodes.push((leaf_idx, leaf)); + } - let peak = if merges == 0 { - self.track_latest = track; + let peak = if num_merges == 0 { leaf } else { let mut track_right = track; - let mut track_left = self.track_latest; + // Check if the previous dangling leaf was tracked. + // If num_merges > 0, there was a single-leaf tree that is now being merged. + let prev_last_pos = self.forest.num_leaves() - 2; + let mut track_left = self.tracked_leaves.contains(&prev_last_pos); let mut right = leaf; let mut right_idx = self.forest.rightmost_in_order_index(); - for _ in 0..merges { + for _ in 0..num_merges { let left = self.peaks.pop().expect("Missing peak"); let left_idx = right_idx.sibling(); if track_right { let old = self.nodes.insert(left_idx, left); - new_nodes.push((left_idx, left)); - + // It's valid to insert if: nothing was there, or same value was there + // (tracked leaf value can match auth node for its sibling) debug_assert!( - old.is_none(), - "Idx {left_idx:?} already contained an element {old:?}", + old.is_none() || old == Some(left), + "Idx {left_idx:?} already contained a different element {old:?}", ); + if old.is_none() { + new_nodes.push((left_idx, left)); + } }; if track_left { let old = self.nodes.insert(right_idx, right); - new_nodes.push((right_idx, right)); - debug_assert!( - old.is_none(), - "Idx {right_idx:?} already contained an element {old:?}", + old.is_none() || old == Some(right), + "Idx {right_idx:?} already contained a different element {old:?}", ); + if old.is_none() { + new_nodes.push((right_idx, right)); + } }; // Update state for the next iteration. @@ -296,9 +324,8 @@ impl PartialMmr { /// this value corresponds to the values used in the MMR structure. /// /// The `leaf` corresponds to the value at `leaf_pos`, and `path` is the authentication path for - /// that element up to its corresponding Mmr peak. The `leaf` is only used to compute the root - /// from the authentication path to valid the data, only the authentication data is saved in - /// the structure. If the value is required it should be stored out-of-band. + /// that element up to its corresponding Mmr peak. Both the authentication path and the leaf + /// value are stored. pub fn track( &mut self, leaf_pos: usize, @@ -312,14 +339,6 @@ impl PartialMmr { return Err(MmrError::UnknownPeak(path.depth())); }; - if leaf_pos + 1 == self.forest.num_leaves() - && path.depth() == 0 - && self.peaks.last().is_some_and(|v| *v == leaf) - { - self.track_latest = true; - return Ok(()); - } - // ignore the trees smaller than the target (these elements are position after the current // target and don't affect the target leaf_pos) let target_forest = self.forest ^ (self.forest & tree.all_smaller_trees_unchecked()); @@ -337,9 +356,17 @@ impl PartialMmr { return Err(MmrError::PeakPathMismatch); } - let mut idx = InOrderIndex::from_leaf_pos(leaf_pos); - for leaf in path.nodes() { - self.nodes.insert(idx.sibling(), *leaf); + // Mark the leaf as tracked + self.tracked_leaves.insert(leaf_pos); + + // Store the leaf value at its own index + let leaf_idx = InOrderIndex::from_leaf_pos(leaf_pos); + self.nodes.insert(leaf_idx, leaf); + + // Store the authentication path nodes + let mut idx = leaf_idx; + for node in path.nodes() { + self.nodes.insert(idx.sibling(), *node); idx = idx.parent(); } @@ -354,16 +381,38 @@ impl PartialMmr { /// /// Note: `leaf_pos` corresponds to the position in the MMR and not on an individual tree. pub fn untrack(&mut self, leaf_pos: usize) -> Vec<(InOrderIndex, Word)> { + // Remove from tracked leaves set + self.tracked_leaves.remove(&leaf_pos); + let mut idx = InOrderIndex::from_leaf_pos(leaf_pos); let mut removed = Vec::new(); - // `idx` represent the element that can be computed by the authentication path, because - // these elements can be computed they are not saved for the authentication of the current - // target. In other words, if the idx is present it was added for the authentication of - // another element, and no more elements should be removed otherwise it would remove that - // element's authentication data. - while let Some(word) = self.nodes.remove(&idx.sibling()) { - removed.push((idx.sibling(), word)); + // Check if the sibling leaf is still tracked. If so, we need to keep our leaf value + // as an auth node for the sibling, and keep all auth nodes above. + let sibling_idx = idx.sibling(); + let sibling_pos = sibling_idx.to_leaf_pos().expect("sibling of a leaf is always a leaf"); + if self.tracked_leaves.contains(&sibling_pos) { + // Sibling is tracked, so don't remove anything - our leaf value and all auth + // nodes above are still needed for the sibling's proof. + return removed; + } + + // Remove the leaf value itself + if let Some(word) = self.nodes.remove(&idx) { + removed.push((idx, word)); + } + + // Remove authentication path nodes that are no longer needed. + loop { + let sibling_idx = idx.sibling(); + + // Try to remove the sibling auth node + let Some(word) = self.nodes.remove(&sibling_idx) else { + break; + }; + removed.push((sibling_idx, word)); + + // If `idx` is present, it was added for another element's authentication. if self.nodes.contains_key(&idx) { break; } @@ -393,25 +442,20 @@ impl PartialMmr { return Ok(inserted_nodes); } - // find the tree merges + // find the trees to merge (bitmask of existing trees that will be combined) let changes = self.forest ^ delta.forest; // `largest_tree_unchecked()` panics if `changes` is empty. `changes` cannot be empty // unless `self.forest == delta.forest`, which is guarded against above. let largest = changes.largest_tree_unchecked(); // The largest tree itself also cannot be an empty forest, so this cannot panic either. - let merges = self.forest & largest.all_smaller_trees_unchecked(); - - debug_assert!( - !self.track_latest || merges.has_single_leaf_tree(), - "if there is an odd element, a merge is required" - ); + let trees_to_merge = self.forest & largest.all_smaller_trees_unchecked(); // count the number elements needed to produce largest from the current state - let (merge_count, new_peaks) = if !merges.is_empty() { + let (merge_count, new_peaks) = if !trees_to_merge.is_empty() { let depth = largest.smallest_tree_height_unchecked(); - // `merges` also cannot be an empty forest, so this cannot panic either. - let skipped = merges.smallest_tree_height_unchecked(); - let computed = merges.num_trees() - 1; + // `trees_to_merge` also cannot be an empty forest, so this cannot panic either. + let skipped = trees_to_merge.smallest_tree_height_unchecked(); + let computed = trees_to_merge.num_trees() - 1; let merge_count = depth - skipped - computed; let new_peaks = delta.forest & largest.all_smaller_trees_unchecked(); @@ -429,32 +473,30 @@ impl PartialMmr { // keeps track of how many data elements from the update have been consumed let mut update_count = 0; - if !merges.is_empty() { + if !trees_to_merge.is_empty() { // starts at the smallest peak and follows the merged peaks let mut peak_idx = self.forest.root_in_order_index(); // match order of the update data while applying it self.peaks.reverse(); - // set to true when the data is needed for authentication paths updates - let mut track = self.track_latest; - self.track_latest = false; + let mut track = false; let mut peak_count = 0; - let mut target = merges.smallest_tree_unchecked(); + let mut target = trees_to_merge.smallest_tree_unchecked(); let mut new = delta.data[0]; update_count += 1; while target < largest { - // check if either the left or right subtrees have saved for authentication paths. - // If so, turn tracking on to update those paths. - if target != Forest::new(1) && !track { + // Check if either the left or right subtrees have nodes saved for authentication + // paths. If so, turn tracking on to update those paths. + if !track { track = self.is_tracked_node(&peak_idx); } // update data only contains the nodes from the right subtrees, left nodes are // either previously known peaks or computed values - let (left, right) = if !(target & merges).is_empty() { + let (left, right) = if !(target & trees_to_merge).is_empty() { let peak = self.peaks[peak_count]; let sibling_idx = peak_idx.sibling(); @@ -488,13 +530,13 @@ impl PartialMmr { target = target.next_larger_tree(); } - debug_assert!(peak_count == merges.num_trees()); + debug_assert!(peak_count == trees_to_merge.num_trees()); // restore the peaks order self.peaks.reverse(); // remove the merged peaks self.peaks.truncate(self.peaks.len() - peak_count); - // add the newly computed peak, the result of the merges + // add the newly computed peak, the result of the tree merges self.peaks.push(new); } @@ -515,8 +557,9 @@ impl PartialMmr { /// Returns true if this [PartialMmr] tracks authentication path for the node at the specified /// index. fn is_tracked_node(&self, node_index: &InOrderIndex) -> bool { - if node_index.is_leaf() { - self.nodes.contains_key(&node_index.sibling()) + if let Some(leaf_pos) = node_index.to_leaf_pos() { + // For leaf nodes, check if the leaf is in the tracked set. + self.tracked_leaves.contains(&leaf_pos) } else { let left_child = node_index.left_child(); let right_child = node_index.right_child(); @@ -606,7 +649,9 @@ impl Serializable for PartialMmr { self.forest.num_leaves().write_into(target); self.peaks.write_into(target); self.nodes.write_into(target); - target.write_bool(self.track_latest); + // Serialize tracked_leaves as a Vec + let tracked: Vec = self.tracked_leaves.iter().copied().collect(); + tracked.write_into(target); } } @@ -617,9 +662,10 @@ impl Deserializable for PartialMmr { let forest = Forest::new(usize::read_from(source)?); let peaks = Vec::::read_from(source)?; let nodes = NodeMap::read_from(source)?; - let track_latest = source.read_bool()?; + let tracked: Vec = Vec::read_from(source)?; + let tracked_leaves: BTreeSet = tracked.into_iter().collect(); - Ok(Self { forest, peaks, nodes, track_latest }) + Ok(Self { forest, peaks, nodes, tracked_leaves }) } } @@ -682,7 +728,8 @@ mod tests { let node = mmr.get(12).unwrap(); let proof = mmr.open(12).unwrap(); partial_mmr.track(12, node, proof.path().merkle_path()).unwrap(); - assert!(partial_mmr.track_latest); + // Position 12 is the last leaf (dangling) and should now be tracked + assert!(partial_mmr.is_tracked(12)); } // by this point we are tracking authentication paths for positions: 1, 8, and 12 @@ -693,12 +740,8 @@ mod tests { } fn validate_apply_delta(mmr: &Mmr, partial: &mut PartialMmr) { - let tracked_leaves = partial - .nodes - .iter() - .filter(|&(index, _)| index.is_leaf()) - .map(|(index, _)| index.sibling()) - .collect::>(); + // Get tracked leaf positions + let tracked_positions: Vec<_> = partial.tracked_leaves.iter().copied().collect(); let nodes_before = partial.nodes.clone(); // compute and apply delta @@ -718,11 +761,10 @@ mod tests { assert_eq!(expected_nodes, partial.nodes); // make sure tracked leaves open to the same proofs as in the underlying MMR - for index in tracked_leaves { - let pos = index.inner() / 2; + for pos in tracked_positions { let proof1 = partial.open(pos).unwrap().unwrap(); let proof2 = mmr.open(pos).unwrap(); - assert_eq!(proof1, *proof2.path()); + assert_eq!(proof1, proof2); } } @@ -850,7 +892,7 @@ mod tests { for pos in 0..i { let mmr_proof = mmr.open(pos).unwrap(); let partialmmr_proof = partial_mmr.open(pos).unwrap().unwrap(); - assert_eq!(*mmr_proof.path(), partialmmr_proof); + assert_eq!(mmr_proof, partialmmr_proof); } } } @@ -871,7 +913,33 @@ mod tests { partial_mmr.add(leaf_at_7, false); // the openings should be the same - assert_eq!(*mmr.open(5).unwrap().path(), partial_mmr.open(5).unwrap().unwrap()); + assert_eq!(mmr.open(5).unwrap(), partial_mmr.open(5).unwrap().unwrap()); + } + + #[test] + fn test_partial_mmr_add_updates_tracked_dangling_leaf() { + // Track a dangling leaf, then add a new untracked leaf. + // The previously dangling leaf's proof should still work. + let mut mmr = Mmr::default(); + let mut partial_mmr = PartialMmr::default(); + + // Add leaf 0 with tracking - it's a dangling leaf (forest=1) + let leaf0 = int_to_node(0); + mmr.add(leaf0); + partial_mmr.add(leaf0, true); + + // Both should produce the same proof (empty path, leaf is a peak) + assert_eq!(mmr.open(0).unwrap(), partial_mmr.open(0).unwrap().unwrap()); + + // Add leaf 1 WITHOUT tracking - triggers merge, leaf 0 gets a sibling + let leaf1 = int_to_node(1); + mmr.add(leaf1); + partial_mmr.add(leaf1, false); + + // Leaf 0 should still be tracked with correct proof after merge + assert!(partial_mmr.is_tracked(0)); + assert!(!partial_mmr.is_tracked(1)); + assert_eq!(mmr.open(0).unwrap(), partial_mmr.open(0).unwrap().unwrap()); } #[test] @@ -948,7 +1016,7 @@ mod tests { // build the MMR let mmr: Mmr = LEAVES.into(); - // track two sibling leaves + // track two sibling leaves (positions 0 and 1) let node0 = mmr.get(0).unwrap(); let proof0 = mmr.open(0).unwrap(); @@ -962,26 +1030,115 @@ mod tests { partial_mmr.track(0, node0, proof0.path().merkle_path()).unwrap(); partial_mmr.track(1, node1, proof1.path().merkle_path()).unwrap(); - // There are 3 unique authentication nodes stored: - // - leaf0's sibling (stored at leaf1's index) - // - leaf1's sibling (stored at leaf0's index) - // - the parent sibling (shared by both openings) + // There are 3 unique nodes stored in `nodes`: + // - nodes[idx0] = leaf0 (tracked leaf value, also serves as auth sibling for leaf1) + // - nodes[idx1] = leaf1 (tracked leaf value, also serves as auth sibling for leaf0) + // - nodes[parent_sibling] = shared higher-level auth node + // + // Note: Each tracked leaf's value is stored at its own InOrderIndex so that `open()` + // can return an MmrProof containing the leaf value. These values also double as the + // authentication siblings for their neighboring leaves. assert_eq!(partial_mmr.nodes().count(), 3); // untrack position 0: - // removes the node stored at leaf1's index (the sibling of leaf0), - // then stops because leaf0's index is still present (needed to authenticate leaf1). + // Even though pos 0 is no longer tracked, we cannot remove any nodes because: + // - leaf0's value (at idx0) is still needed as the auth sibling for leaf1's path + // - leaf1's value (at idx1) is needed for open(1) to return MmrProof + // - parent_sibling is still needed for leaf1's path let removed0 = partial_mmr.untrack(0); - assert_eq!(removed0.len(), 1); - assert_eq!(partial_mmr.nodes().count(), 2); + assert_eq!(removed0.len(), 0); + assert_eq!(partial_mmr.nodes().count(), 3); assert!(partial_mmr.is_tracked(1)); + assert!(!partial_mmr.is_tracked(0)); // untrack position 1: - // removes the node stored at leaf0's index (the sibling of leaf1) and the shared parent - // sibling. + // Now sibling (pos 0) is NOT tracked, so all nodes can be removed: + // - leaf1's value at idx1 (no longer needed for open()) + // - leaf0's value at idx0 (no longer needed as auth sibling) + // - parent_sibling (no longer needed for any path) let removed1 = partial_mmr.untrack(1); - assert_eq!(removed1.len(), 2); + assert_eq!(removed1.len(), 3); assert_eq!(partial_mmr.nodes().count(), 0); assert!(!partial_mmr.is_tracked(1)); } + + #[test] + fn test_partial_mmr_open_returns_proof_with_leaf() { + // build the MMR + let mmr: Mmr = LEAVES.into(); + + // get leaf and proof for position 1 + let leaf1 = mmr.get(1).unwrap(); + let mmr_proof = mmr.open(1).unwrap(); + + // create partial MMR and track position 1 + let mut partial_mmr: PartialMmr = mmr.peaks().into(); + partial_mmr.track(1, leaf1, mmr_proof.path().merkle_path()).unwrap(); + + // open should return MmrProof with the correct leaf value + let partial_proof = partial_mmr.open(1).unwrap().unwrap(); + assert_eq!(partial_proof.leaf(), leaf1); + assert_eq!(partial_proof, mmr_proof); + + // untrack and verify open returns None + partial_mmr.untrack(1); + assert!(partial_mmr.open(1).unwrap().is_none()); + } + + #[test] + fn test_partial_mmr_add_tracks_leaf() { + // create empty partial MMR + let mut partial_mmr = PartialMmr::default(); + + // add leaves, tracking some + let leaf0 = int_to_node(0); + let leaf1 = int_to_node(1); + let leaf2 = int_to_node(2); + + partial_mmr.add(leaf0, true); // track + partial_mmr.add(leaf1, false); // don't track + partial_mmr.add(leaf2, true); // track + + // verify tracked leaves can be opened + let proof0 = partial_mmr.open(0).unwrap(); + assert!(proof0.is_some()); + assert_eq!(proof0.unwrap().leaf(), leaf0); + + // verify untracked leaf returns None + let proof1 = partial_mmr.open(1).unwrap(); + assert!(proof1.is_none()); + + // verify tracked leaf can be opened + let proof2 = partial_mmr.open(2).unwrap(); + assert!(proof2.is_some()); + assert_eq!(proof2.unwrap().leaf(), leaf2); + + // verify get() returns correct values + assert_eq!(partial_mmr.get(0), Some(leaf0)); + assert_eq!(partial_mmr.get(1), None); + assert_eq!(partial_mmr.get(2), Some(leaf2)); + + // verify leaves() iterator returns only tracked leaves + let tracked: Vec<_> = partial_mmr.leaves().collect(); + assert_eq!(tracked, vec![(0, leaf0), (2, leaf2)]); + } + + #[test] + fn test_partial_mmr_track_dangling_leaf() { + // Single-leaf MMR: forest = 1, leaf 0 is a peak with an empty path. + let mut mmr = Mmr::default(); + mmr.add(int_to_node(0)); + let mut partial_mmr: PartialMmr = mmr.peaks().into(); + + let leaf0 = mmr.get(0).unwrap(); + // depth-0 MerklePath + let proof0 = mmr.open(0).unwrap(); + + // Track the dangling leaf via `track` using the empty path. + partial_mmr.track(0, leaf0, proof0.path().merkle_path()).unwrap(); + + // It should now be tracked and open to the same proof as the full MMR. + assert!(partial_mmr.is_tracked(0)); + assert_eq!(partial_mmr.open(0).unwrap().unwrap(), proof0); + } } diff --git a/miden-crypto/src/merkle/mmr/peaks.rs b/miden-crypto/src/merkle/mmr/peaks.rs index 68dbddf69..37f84af23 100644 --- a/miden-crypto/src/merkle/mmr/peaks.rs +++ b/miden-crypto/src/merkle/mmr/peaks.rs @@ -162,15 +162,7 @@ impl MmrPeaks { }; let mut elements = Vec::with_capacity(len); - elements.extend_from_slice( - &self - .peaks - .as_slice() - .iter() - .map(|digest| digest.as_slice()) - .collect::>() - .concat(), - ); + elements.extend_from_slice(Word::words_as_elements(&self.peaks)); elements.resize(len, ZERO); elements } diff --git a/miden-crypto/src/merkle/mmr/tests.rs b/miden-crypto/src/merkle/mmr/tests.rs index 1fc02c142..aeabed8d9 100644 --- a/miden-crypto/src/merkle/mmr/tests.rs +++ b/miden-crypto/src/merkle/mmr/tests.rs @@ -31,7 +31,7 @@ fn test_empty_partial_mmr() { assert_eq!(mmr.forest(), Forest::empty()); assert_eq!(mmr.peaks(), MmrPeaks::default()); assert!(mmr.nodes.is_empty()); - assert!(!mmr.track_latest); + assert!(mmr.tracked_leaves.is_empty()); } #[test] @@ -1033,10 +1033,12 @@ fn test_partial_mmr_simple() { .track(proof1.path().position(), el1, proof1.path().merkle_path()) .unwrap(); - // check the number of nodes increased by the number of nodes in the proof - assert_eq!(partial.nodes.len(), proof1.path().merkle_path().len()); - // check the values match + // check the number of nodes: leaf value + authentication path nodes + assert_eq!(partial.nodes.len(), 1 + proof1.path().merkle_path().len()); + // check the leaf value is stored let idx = InOrderIndex::from_leaf_pos(proof1.path().position()); + assert_eq!(partial.nodes[&idx], el1); + // check the path values match assert_eq!(partial.nodes[&idx.sibling()], proof1.path().merkle_path()[0]); let idx = idx.parent(); assert_eq!(partial.nodes[&idx.sibling()], proof1.path().merkle_path()[1]); @@ -1047,10 +1049,14 @@ fn test_partial_mmr_simple() { .track(proof2.path().position(), el2, proof2.path().merkle_path()) .unwrap(); - // check the number of nodes increased by a single element (the one that is not shared) + // After tracking pos 1: we add leaf1 at its index, but its sibling (pos 0's sibling) + // was already stored as an auth node. So we only add: leaf1 value. + // Total: leaf0, leaf1 (=sibling of leaf0), shared path node = 3 unique indices assert_eq!(partial.nodes.len(), 3); - // check the values match + // check the leaf value is stored let idx = InOrderIndex::from_leaf_pos(proof2.path().position()); + assert_eq!(partial.nodes[&idx], el2); + // check the path values match assert_eq!(partial.nodes[&idx.sibling()], proof2.path().merkle_path()[0]); let idx = idx.parent(); assert_eq!(partial.nodes[&idx.sibling()], proof2.path().merkle_path()[1]); diff --git a/miden-crypto/src/merkle/partial_mt/mod.rs b/miden-crypto/src/merkle/partial_mt/mod.rs index fd1b2e3ce..d0cfa1af3 100644 --- a/miden-crypto/src/merkle/partial_mt/mod.rs +++ b/miden-crypto/src/merkle/partial_mt/mod.rs @@ -108,8 +108,8 @@ impl PartialMerkleTree { nodes.insert(node_index, hash); layers .entry(node_index.depth()) - .and_modify(|layer_vec| layer_vec.push(node_index.value())) - .or_insert(vec![node_index.value()]); + .and_modify(|layer_vec| layer_vec.push(node_index.position())) + .or_insert(vec![node_index.position()]); } // make sure the depth of the last layer is 64 or smaller @@ -143,7 +143,7 @@ impl PartialMerkleTree { // If parent already exists, check if it's user-provided (invalid) or computed // (skip) - if parent_layer.contains(&parent_node.value()) { + if parent_layer.contains(&parent_node.position()) { // If the parent was provided as a leaf, that's invalid - we can't have both // a node and its descendant in the input set. if leaves.contains(&parent_node) { @@ -165,7 +165,7 @@ impl PartialMerkleTree { let parent = Poseidon2::merge(&index.build_node(*node, *sibling)); // add index value of the calculated node to the parents layer - parent_layer.push(parent_node.value()); + parent_layer.push(parent_node.position()); // add index and hash to the nodes map nodes.insert(parent_node, parent); } @@ -421,7 +421,7 @@ impl PartialMerkleTree { for _ in 0..d { s.push_str(indent); } - s.push_str(&format!("({}, {}): ", index.depth(), index.value())); + s.push_str(&format!("({}, {}): ", index.depth(), index.position())); s.push_str(&word_to_hex(&node)?); s.push('\n'); } diff --git a/miden-crypto/src/merkle/partial_mt/tests.rs b/miden-crypto/src/merkle/partial_mt/tests.rs index 3e1c0d69e..1a07aafdd 100644 --- a/miden-crypto/src/merkle/partial_mt/tests.rs +++ b/miden-crypto/src/merkle/partial_mt/tests.rs @@ -105,7 +105,7 @@ fn err_with_leaves_entry_is_not_leaf() { match PartialMerkleTree::with_leaves(entries) { Err(MerkleError::EntryIsNotLeaf { node }) => { assert_eq!(node.depth(), 1); - assert_eq!(node.value(), 0); + assert_eq!(node.position(), 0); }, other => panic!("Expected EntryIsNotLeaf error, got {:?}", other), } diff --git a/miden-crypto/src/merkle/path.rs b/miden-crypto/src/merkle/path.rs index 268e4f00a..6aff5f29a 100644 --- a/miden-crypto/src/merkle/path.rs +++ b/miden-crypto/src/merkle/path.rs @@ -182,7 +182,7 @@ impl Iterator for InnerNodeIterator<'_> { fn next(&mut self) -> Option { if !self.index.is_root() { let sibling_pos = self.nodes.len() - self.index.depth() as usize; - let (left, right) = if self.index.is_value_odd() { + let (left, right) = if self.index.is_position_odd() { (self.nodes[sibling_pos], self.value) } else { (self.value, self.nodes[sibling_pos]) diff --git a/miden-crypto/src/merkle/smt/forest/store.rs b/miden-crypto/src/merkle/smt/forest/store.rs index 4f6104697..c0cd4e57a 100644 --- a/miden-crypto/src/merkle/smt/forest/store.rs +++ b/miden-crypto/src/merkle/smt/forest/store.rs @@ -169,9 +169,9 @@ impl SmtStore { #[allow(unused_mut)] let mut sorted_leaf_indices = leaves_by_index.keys().cloned().collect::>(); - #[cfg(feature = "hashmaps")] + #[cfg(feature = "std")] // Sort leaves by NodeIndex to easily detect when leaves share a parent (only neighboring - // leaves can share a parent). Hashbrown::HashMap doesn't maintain key ordering, so + // leaves can share a parent). std::collections::HashMap doesn't maintain key ordering, so // we need to sort the indices. sorted_leaf_indices.sort(); @@ -285,6 +285,7 @@ impl SmtStore { let left = smt_node.left; let right = smt_node.right; + self.nodes.remove(&node); let mut result = Vec::new(); result.extend(self.remove_node(left)); diff --git a/miden-crypto/src/merkle/smt/forest/tests.rs b/miden-crypto/src/merkle/smt/forest/tests.rs index e842669ef..0a68162c5 100644 --- a/miden-crypto/src/merkle/smt/forest/tests.rs +++ b/miden-crypto/src/merkle/smt/forest/tests.rs @@ -269,6 +269,35 @@ fn test_pop_roots() -> Result<(), MerkleError> { Ok(()) } +#[test] +fn test_pop_and_reinsert_same_tree() -> Result<(), MerkleError> { + let mut forest = SmtForest::new(); + + let empty_tree_root = *EmptySubtreeRoots::entry(SMT_DEPTH, 0); + let key = Word::new([ZERO; WORD_SIZE]); + let value = Word::new([ONE; WORD_SIZE]); + + // Insert a key, then pop the tree + let root1 = forest.insert(empty_tree_root, key, value)?; + forest.pop_smts(vec![root1]); + + // Re-insert the same key-value pair (produces the same tree hashes) + let root2 = forest.insert(empty_tree_root, key, value)?; + assert_eq!(root1, root2, "same key-value must produce the same root"); + + // Verify the proof is valid + let proof = forest.open(root2, key)?; + proof.verify_presence(&key, &value, &root2).unwrap(); + + // Pop again — without the fix this would panic due to rc underflow + forest.pop_smts(vec![root2]); + + assert_eq!(forest.roots.len(), 0); + assert_eq!(forest.leaves.len(), 0); + + Ok(()) +} + #[test] fn test_removing_empty_smt_from_forest() { let mut forest = SmtForest::new(); diff --git a/miden-crypto/src/merkle/smt/full/concurrent/mod.rs b/miden-crypto/src/merkle/smt/full/concurrent/mod.rs index f4ed54f46..2dc240083 100644 --- a/miden-crypto/src/merkle/smt/full/concurrent/mod.rs +++ b/miden-crypto/src/merkle/smt/full/concurrent/mod.rs @@ -104,7 +104,8 @@ impl Smt { { // Collect and sort key-value pairs by their corresponding leaf index let mut sorted_kv_pairs: Vec<_> = kv_pairs.into_iter().collect(); - sorted_kv_pairs.par_sort_unstable_by_key(|(key, _)| Self::key_to_leaf_index(key).value()); + sorted_kv_pairs + .par_sort_unstable_by_key(|(key, _)| Self::key_to_leaf_index(key).position()); // Convert sorted pairs into mutated leaves and capture any new pairs let (mut subtree_leaves, new_pairs) = @@ -199,7 +200,7 @@ impl Smt { // Add the parent node even if it is empty for proper upward updates next_leaves.push(SubtreeLeaf { - col: parent_index.value(), + col: parent_index.position(), hash: combined_hash, }); @@ -233,7 +234,7 @@ impl Smt { fn build_subtrees(mut entries: Vec<(Word, Word)>) -> Result<(InnerNodes, Leaves), MerkleError> { entries.par_sort_unstable_by_key(|item| { let index = Self::key_to_leaf_index(&item.0); - index.value() + index.position() }); build_subtrees_from_sorted_entries(entries) } @@ -278,7 +279,7 @@ impl Smt { // Check for duplicates in a sorted list by comparing adjacent pairs if let Some(window) = pairs.windows(2).find(|window| window[0].0 == window[1].0) { // If we find a duplicate, return an error - let col = Self::key_to_leaf_index(&window[0].0).index.value(); + let col = Self::key_to_leaf_index(&window[0].0).index.position(); return Err(MerkleError::DuplicateValuesForIndex(col)); } Ok(Some(SmtLeaf::new_multiple(pairs).unwrap())) @@ -473,7 +474,7 @@ pub(crate) fn process_sorted_pairs_to_leaves( where F: FnMut(Vec<(Word, Word)>) -> Result, MerkleError>, { - debug_assert!(pairs.is_sorted_by_key(|(key, _)| Smt::key_to_leaf_index(key).value())); + debug_assert!(pairs.is_sorted_by_key(|(key, _)| Smt::key_to_leaf_index(key).position())); let mut accumulator: PairComputations = Default::default(); // As we iterate, we'll keep track of the kv-pairs we've seen so far that correspond to a // single leaf. When we see a pair that's in a different leaf, we'll swap these pairs @@ -481,10 +482,10 @@ where let mut current_leaf_buffer: Vec<(Word, Word)> = Default::default(); let mut iter = pairs.into_iter().peekable(); while let Some((key, value)) = iter.next() { - let col = Smt::key_to_leaf_index(&key).index.value(); + let col = Smt::key_to_leaf_index(&key).index.position(); let peeked_col = iter.peek().map(|(key, _v)| { let index = Smt::key_to_leaf_index(key); - let next_col = index.index.value(); + let next_col = index.index.position(); // We panic if `pairs` is not sorted by column. debug_assert!(next_col >= col); next_col @@ -649,7 +650,7 @@ pub(crate) fn build_subtree( // as a leaf for the next depth. if hash != equivalent_empty_hash { inner_nodes.insert(index, node); - next_leaves.push(SubtreeLeaf { col: index.value(), hash }); + next_leaves.push(SubtreeLeaf { col: index.position(), hash }); } } // Stop borrowing `leaves`, so we can swap it. diff --git a/miden-crypto/src/merkle/smt/full/concurrent/tests.rs b/miden-crypto/src/merkle/smt/full/concurrent/tests.rs index 81d613ac4..ad7959480 100644 --- a/miden-crypto/src/merkle/smt/full/concurrent/tests.rs +++ b/miden-crypto/src/merkle/smt/full/concurrent/tests.rs @@ -22,7 +22,7 @@ use crate::{ fn smtleaf_to_subtree_leaf(leaf: &SmtLeaf) -> SubtreeLeaf { SubtreeLeaf { - col: leaf.index().index.value(), + col: leaf.index().index.position(), hash: leaf.hash(), } } @@ -89,7 +89,7 @@ fn test_sorted_pairs_to_leaves() { // Then finally we might as well check the computed leaf nodes too. let control_leaves: BTreeMap = control .leaves() - .map(|(index, value)| (index.index.value(), value.clone())) + .map(|(index, value)| (index.index.position(), value.clone())) .collect(); for (column, test_leaf) in subtrees.nodes { @@ -136,7 +136,7 @@ fn generate_updates(entries: Vec<(Word, Word)>, updates: usize) -> Vec<(Word, Wo (key, value) }) .collect(); - sorted_entries.sort_by_key(|(key, _)| Smt::key_to_leaf_index(key).value()); + sorted_entries.sort_by_key(|(key, _)| Smt::key_to_leaf_index(key).position()); sorted_entries } @@ -491,7 +491,7 @@ fn test_smt_construction_with_entries_duplicate_keys() { ([ONE; 4].into(), [ONE; 4].into()), ([ONE, ONE, ONE, Felt::new(16)].into(), [ONE; 4].into()), ]; - let expected_col = Smt::key_to_leaf_index(&entries[0].0).index.value(); + let expected_col = Smt::key_to_leaf_index(&entries[0].0).index.position(); let err = Smt::with_entries(entries).unwrap_err(); assert_matches!(err, MerkleError::DuplicateValuesForIndex(col) if col == expected_col); } @@ -637,7 +637,7 @@ fn arb_entries() -> impl Strategy> { let mut result = Vec::new(); for (key, value) in entries { - let leaf_index = LeafIndex::::from(key).value(); + let leaf_index = LeafIndex::::from(key).position(); if used_indices.insert(leaf_index) && used_keys.insert(key) { result.push((key, value)); } diff --git a/miden-crypto/src/merkle/smt/full/leaf.rs b/miden-crypto/src/merkle/smt/full/leaf.rs index 173082e43..7ea1dcca9 100644 --- a/miden-crypto/src/merkle/smt/full/leaf.rs +++ b/miden-crypto/src/merkle/smt/full/leaf.rs @@ -184,6 +184,12 @@ impl SmtLeaf { self.entries().iter().copied().flat_map(kv_to_elements) } + /// Returns an iterator over the key-value pairs in the leaf. + pub fn to_entries(&self) -> impl Iterator + '_ { + // Needed for type conversion from `&(T, T)` to `(&T, &T)`. + self.entries().iter().map(|(k, v)| (k, v)) + } + /// Converts a leaf to a list of field elements. pub fn into_elements(self) -> Vec { self.into_entries().into_iter().flat_map(kv_to_elements).collect() @@ -388,7 +394,7 @@ impl Serializable for SmtLeaf { self.num_entries().write_into(target); // Write: leaf index - let leaf_index: u64 = self.index().value(); + let leaf_index: u64 = self.index().position(); leaf_index.write_into(target); // Write: entries diff --git a/miden-crypto/src/merkle/smt/full/mod.rs b/miden-crypto/src/merkle/smt/full/mod.rs index b8afdba63..efa23f7f4 100644 --- a/miden-crypto/src/merkle/smt/full/mod.rs +++ b/miden-crypto/src/merkle/smt/full/mod.rs @@ -202,7 +202,7 @@ impl Smt { if old_value != EMPTY_WORD || key_set_to_zero.contains(&key) { return Err(MerkleError::DuplicateValuesForIndex( - LeafIndex::::from(key).value(), + LeafIndex::::from(key).position(), )); } @@ -260,6 +260,11 @@ impl Smt { >::get_leaf(self, key) } + /// Returns the leaf corresponding to the provided `index`. + pub fn get_leaf_by_index(&self, index: LeafIndex) -> Option { + self.leaves.get(&index.position()).cloned() + } + /// Returns the value associated with `key` pub fn get_value(&self, key: &Word) -> Word { >::get_value(self, key) @@ -402,7 +407,7 @@ impl Smt { let leaf_index: LeafIndex = Self::key_to_leaf_index(&key); - match self.leaves.get_mut(&leaf_index.value()) { + match self.leaves.get_mut(&leaf_index.position()) { Some(leaf) => { let prev_entries = leaf.num_entries(); let result = leaf.insert(key, value).map_err(|e| match e { @@ -416,7 +421,7 @@ impl Smt { Ok(result) }, None => { - self.leaves.insert(leaf_index.value(), SmtLeaf::Single((key, value))); + self.leaves.insert(leaf_index.position(), SmtLeaf::Single((key, value))); self.num_entries += 1; Ok(None) }, @@ -427,13 +432,13 @@ impl Smt { fn perform_remove(&mut self, key: Word) -> Option { let leaf_index: LeafIndex = Self::key_to_leaf_index(&key); - if let Some(leaf) = self.leaves.get_mut(&leaf_index.value()) { + if let Some(leaf) = self.leaves.get_mut(&leaf_index.position()) { let prev_entries = leaf.num_entries(); let (old_value, is_empty) = leaf.remove(key); let current_entries = leaf.num_entries(); self.num_entries -= prev_entries - current_entries; if is_empty { - self.leaves.remove(&leaf_index.value()); + self.leaves.remove(&leaf_index.position()); } old_value } else { @@ -510,7 +515,7 @@ impl SparseMerkleTree for Smt { } fn get_value(&self, key: &Self::Key) -> Self::Value { - let leaf_pos = LeafIndex::::from(*key).value(); + let leaf_pos = LeafIndex::::from(*key).position(); match self.leaves.get(&leaf_pos) { Some(leaf) => leaf.get_value(key).unwrap_or_default(), @@ -519,7 +524,7 @@ impl SparseMerkleTree for Smt { } fn get_leaf(&self, key: &Word) -> Self::Leaf { - let leaf_pos = LeafIndex::::from(*key).value(); + let leaf_pos = LeafIndex::::from(*key).position(); match self.leaves.get(&leaf_pos) { Some(leaf) => leaf.clone(), diff --git a/miden-crypto/src/merkle/smt/full/proof.rs b/miden-crypto/src/merkle/smt/full/proof.rs index e61503bf6..fb2fe3b70 100644 --- a/miden-crypto/src/merkle/smt/full/proof.rs +++ b/miden-crypto/src/merkle/smt/full/proof.rs @@ -132,7 +132,7 @@ impl SmtProof { /// Computes the root of a [`super::Smt`] to which this proof resolves. pub fn compute_root(&self) -> Word { self.path - .compute_root(self.leaf.index().value(), self.leaf.hash()) + .compute_root(self.leaf.index().position(), self.leaf.hash()) .expect("failed to compute Merkle path root") } @@ -149,7 +149,7 @@ impl SmtProof { /// Returns an iterator over every inner node of this proof's merkle path. pub fn authenticated_nodes(&self) -> impl Iterator + '_ { self.path - .authenticated_nodes(self.leaf.index().value(), self.leaf.hash()) + .authenticated_nodes(self.leaf.index().position(), self.leaf.hash()) .expect("leaf index is u64 and should be less than 2^SMT_DEPTH") } diff --git a/miden-crypto/src/merkle/smt/large/batch_ops.rs b/miden-crypto/src/merkle/smt/large/batch_ops.rs index 9c4b9da35..978e29f84 100644 --- a/miden-crypto/src/merkle/smt/large/batch_ops.rs +++ b/miden-crypto/src/merkle/smt/large/batch_ops.rs @@ -115,7 +115,7 @@ impl LargeSmt { // Collect the unique leaf indices let mut leaf_indices: Vec = sorted_kv_pairs .iter() - .map(|(key, _)| Self::key_to_leaf_index(key).value()) + .map(|(key, _)| Self::key_to_leaf_index(key).position()) .collect(); leaf_indices.dedup(); leaf_indices.par_sort_unstable(); @@ -149,7 +149,7 @@ impl LargeSmt { let accumulator = process_sorted_pairs_to_leaves(pairs, |leaf_pairs| { let leaf_index = LeafIndex::::from(leaf_pairs[0].0); - let old_leaf_opt = leaf_map.get(&leaf_index.value()).and_then(|opt| opt.as_ref()); + let old_leaf_opt = leaf_map.get(&leaf_index.position()).and_then(|opt| opt.as_ref()); let old_entry_count = old_leaf_opt.map(|leaf| leaf.entries().len()).unwrap_or(0); let mut leaf = old_leaf_opt @@ -265,7 +265,7 @@ impl LargeSmt { // Add the parent node even if it is empty for proper upward updates next_leaves.push(SubtreeLeaf { - col: parent_index.value(), + col: parent_index.position(), hash: combined_hash, }); @@ -345,7 +345,7 @@ impl LargeSmt { { // Sort key-value pairs by leaf index let mut sorted_kv_pairs: Vec<_> = kv_pairs.into_iter().collect(); - sorted_kv_pairs.par_sort_by_key(|(key, _)| Self::key_to_leaf_index(key).value()); + sorted_kv_pairs.par_sort_by_key(|(key, _)| Self::key_to_leaf_index(key).position()); // Load leaves from storage let (_leaf_indices, leaf_map) = self.load_leaves_for_pairs(&sorted_kv_pairs)?; @@ -509,12 +509,13 @@ impl LargeSmt { // Collect and sort key-value pairs by their corresponding leaf index let mut sorted_kv_pairs: Vec<_> = new_pairs.iter().map(|(k, v)| (*k, *v)).collect(); - sorted_kv_pairs.par_sort_by_key(|(key, _)| LargeSmt::::key_to_leaf_index(key).value()); + sorted_kv_pairs + .par_sort_by_key(|(key, _)| LargeSmt::::key_to_leaf_index(key).position()); // Collect the unique leaf indices let mut leaf_indices: Vec = sorted_kv_pairs .iter() - .map(|(key, _)| LargeSmt::::key_to_leaf_index(key).value()) + .map(|(key, _)| LargeSmt::::key_to_leaf_index(key).position()) .collect(); leaf_indices.par_sort_unstable(); leaf_indices.dedup(); @@ -599,7 +600,7 @@ impl LargeSmt { let mut entry_count_delta = 0isize; for (key, value) in new_pairs { - let idx = LargeSmt::::key_to_leaf_index(&key).value(); + let idx = LargeSmt::::key_to_leaf_index(&key).position(); let entry = leaf_map.entry(idx).or_insert(None); // New value is empty, handle deletion @@ -686,7 +687,7 @@ impl LargeSmt { // Collect and sort key-value pairs by their corresponding leaf index let mut sorted_kv_pairs: Vec<_> = kv_pairs.into_iter().collect(); sorted_kv_pairs - .par_sort_unstable_by_key(|(key, _)| LargeSmt::::key_to_leaf_index(key).value()); + .par_sort_unstable_by_key(|(key, _)| LargeSmt::::key_to_leaf_index(key).position()); // Load leaves from storage using helper let (_leaf_indices, leaf_map) = self.load_leaves_for_pairs(&sorted_kv_pairs)?; @@ -835,7 +836,7 @@ impl LargeSmt { .new_pairs .keys() .map(|key| { - let leaf_idx = LargeSmt::::key_to_leaf_index(key).value(); + let leaf_idx = LargeSmt::::key_to_leaf_index(key).position(); let old_value = prepared .leaf_map .get(&leaf_idx) diff --git a/miden-crypto/src/merkle/smt/large/construction.rs b/miden-crypto/src/merkle/smt/large/construction.rs index 8d184d5c2..f2282efc2 100644 --- a/miden-crypto/src/merkle/smt/large/construction.rs +++ b/miden-crypto/src/merkle/smt/large/construction.rs @@ -229,7 +229,7 @@ impl LargeSmt { fn build_subtrees(&mut self, mut entries: Vec<(Word, Word)>) -> Result<(), MerkleError> { entries.par_sort_unstable_by_key(|item| { let index = Self::key_to_leaf_index(&item.0); - index.value() + index.position() }); self.build_subtrees_from_sorted_entries(entries)?; Ok(()) diff --git a/miden-crypto/src/merkle/smt/large/mod.rs b/miden-crypto/src/merkle/smt/large/mod.rs index 9f48cc4c8..b19abd879 100644 --- a/miden-crypto/src/merkle/smt/large/mod.rs +++ b/miden-crypto/src/merkle/smt/large/mod.rs @@ -453,8 +453,8 @@ pub(super) fn is_empty_parent(left: Word, right: Word, child_depth: u8) -> bool /// For a node at index i: left child at 2*i, right child at 2*i+1. pub(super) fn to_memory_index(index: &NodeIndex) -> usize { debug_assert!(index.depth() < IN_MEMORY_DEPTH); - debug_assert!(index.value() < (1 << index.depth())); - (1usize << index.depth()) + index.value() as usize + debug_assert!(index.position() < (1 << index.depth())); + (1usize << index.depth()) + index.position() as usize } impl PartialEq for LargeSmt { diff --git a/miden-crypto/src/merkle/smt/large/property_tests.rs b/miden-crypto/src/merkle/smt/large/property_tests.rs index bcc99cee3..9314fcfb3 100644 --- a/miden-crypto/src/merkle/smt/large/property_tests.rs +++ b/miden-crypto/src/merkle/smt/large/property_tests.rs @@ -32,7 +32,7 @@ fn arb_entries(min_size: usize, max_size: usize) -> impl Strategy::from(key).value(); + let leaf_index = LeafIndex::::from(key).position(); if used_indices.insert(leaf_index) && used_keys.insert(key) { result.push((key, value)); } diff --git a/miden-crypto/src/merkle/smt/large/smt_trait.rs b/miden-crypto/src/merkle/smt/large/smt_trait.rs index 65174af1e..db5a7b07e 100644 --- a/miden-crypto/src/merkle/smt/large/smt_trait.rs +++ b/miden-crypto/src/merkle/smt/large/smt_trait.rs @@ -122,7 +122,7 @@ impl SparseMerkleTree for LargeSmt { value: Self::Value, ) -> Result, MerkleError> { // inserting an `EMPTY_VALUE` is equivalent to removing any value associated with `key` - let index = Self::key_to_leaf_index(&key).value(); + let index = Self::key_to_leaf_index(&key).position(); if value != Self::EMPTY_VALUE { match self.storage.insert_value(index, key, value) { Ok(prev) => Ok(prev), @@ -140,7 +140,7 @@ impl SparseMerkleTree for LargeSmt { fn get_value(&self, key: &Self::Key) -> Self::Value { let leaf_pos = LeafIndex::::from(*key); - match self.storage.get_leaf(leaf_pos.value()) { + match self.storage.get_leaf(leaf_pos.position()) { Ok(Some(leaf)) => leaf.get_value(key).unwrap_or_default(), Ok(None) => EMPTY_WORD, Err(_) => { @@ -150,7 +150,7 @@ impl SparseMerkleTree for LargeSmt { } fn get_leaf(&self, key: &Word) -> Self::Leaf { - let leaf_pos = LeafIndex::::from(*key).value(); + let leaf_pos = LeafIndex::::from(*key).position(); match self.storage.get_leaf(leaf_pos) { Ok(Some(leaf)) => leaf, Ok(None) => SmtLeaf::new_empty((*key).into()), @@ -210,7 +210,7 @@ impl SparseMerkleTree for LargeSmt { } let mut path = Vec::with_capacity(idx.depth() as usize); while idx.depth() > 0 { - let is_right = idx.is_value_odd(); + let is_right = idx.is_position_odd(); idx = idx.parent(); let sibling_hash = if idx.depth() < IN_MEMORY_DEPTH { diff --git a/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs b/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs index f639fa507..1865f99cd 100644 --- a/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs +++ b/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs @@ -227,7 +227,7 @@ impl RocksDbStorage { 56 => 7, d => panic!("unsupported depth {d}"), }; - KeyBytes::new(index.value(), keep) + KeyBytes::new(index.position(), keep) } /// Retrieves a handle to a RocksDB column family by its name. @@ -642,7 +642,7 @@ impl SmtStorage for RocksDbStorage { .hash(); let depth24_cf = self.cf_handle(DEPTH_24_CF)?; - let hash_key = Self::index_db_key(subtree.root_index().value()); + let hash_key = Self::index_db_key(subtree.root_index().position()); batch.put_cf(depth24_cf, hash_key, root_hash.to_bytes()); } @@ -677,7 +677,7 @@ impl SmtStorage for RocksDbStorage { if subtree.root_index().depth() == IN_MEMORY_DEPTH && let Some(root_node) = subtree.get_inner_node(subtree.root_index()) { - let hash_key = Self::index_db_key(subtree.root_index().value()); + let hash_key = Self::index_db_key(subtree.root_index().position()); batch.put_cf(depth24_cf, hash_key, root_node.hash().to_bytes()); } } @@ -701,7 +701,7 @@ impl SmtStorage for RocksDbStorage { // Also remove level 24 hash cache if this is a level 24 subtree if index.depth() == IN_MEMORY_DEPTH { let depth24_cf = self.cf_handle(DEPTH_24_CF)?; - let hash_key = Self::index_db_key(index.value()); + let hash_key = Self::index_db_key(index.position()); batch.delete_cf(depth24_cf, hash_key); } @@ -844,14 +844,14 @@ impl SmtStorage for RocksDbStorage { .then(|| subtree.get_inner_node(index)) .flatten() .map(|root_node| { - let hash_key = Self::index_db_key(index.value()); + let hash_key = Self::index_db_key(index.position()); (hash_key, Some(root_node.hash().to_bytes())) }); (index, Some(bytes), depth24_op) }, SubtreeUpdate::Delete { index } => { let depth24_op = is_depth_24(index).then(|| { - let hash_key = Self::index_db_key(index.value()); + let hash_key = Self::index_db_key(index.position()); (hash_key, None) }); (index, None, depth24_op) diff --git a/miden-crypto/src/merkle/smt/large/subtree/mod.rs b/miden-crypto/src/merkle/smt/large/subtree/mod.rs index bc7348fb3..fae5586c8 100644 --- a/miden-crypto/src/merkle/smt/large/subtree/mod.rs +++ b/miden-crypto/src/merkle/smt/large/subtree/mod.rs @@ -220,14 +220,14 @@ impl Subtree { let base_offset = (1 << relative_depth) - 1; // Mask out the lower `relative_depth` bits to find the local position in the subtree let mask = (1 << relative_depth) - 1; - let local_position = (global.value() & mask) as u8; + let local_position = (global.position() & mask) as u8; base_offset + local_position } pub fn subtree_key(root_index: NodeIndex) -> [u8; 9] { let mut key = [0u8; 9]; key[0] = root_index.depth(); - key[1..].copy_from_slice(&root_index.value().to_be_bytes()); + key[1..].copy_from_slice(&root_index.position().to_be_bytes()); key } @@ -238,7 +238,7 @@ impl Subtree { } else { let subtree_root_depth = depth - (depth % SUBTREE_DEPTH); let relative_depth = depth - subtree_root_depth; - let base_value = node_index.value() >> relative_depth; + let base_value = node_index.position() >> relative_depth; NodeIndex::new(subtree_root_depth, base_value).unwrap() } diff --git a/miden-crypto/src/merkle/smt/large/subtree/tests.rs b/miden-crypto/src/merkle/smt/large/subtree/tests.rs index 6c8e52742..7380dac8a 100644 --- a/miden-crypto/src/merkle/smt/large/subtree/tests.rs +++ b/miden-crypto/src/merkle/smt/large/subtree/tests.rs @@ -288,7 +288,7 @@ fn find_subtree_root_for_various_nodes() { subtree_0_root, "Node at depth {}, value {} should belong to subtree rooted at depth {}, value 0", node_idx.depth(), - node_idx.value(), + node_idx.position(), SUBTREE_DEPTH ); } @@ -307,7 +307,7 @@ fn find_subtree_root_for_various_nodes() { subtree_1_root, "Node at depth {}, value {} should belong to subtree rooted at depth {}, value 1", node_idx.depth(), - node_idx.value(), + node_idx.position(), SUBTREE_DEPTH ); } @@ -325,7 +325,7 @@ fn find_subtree_root_for_various_nodes() { deep_subtree_root, "Node at depth {}, value {} should belong to deep subtree", node_idx.depth(), - node_idx.value() + node_idx.position() ); } } diff --git a/miden-crypto/src/merkle/smt/large_forest/backend.rs b/miden-crypto/src/merkle/smt/large_forest/backend.rs deleted file mode 100644 index 9c83b7805..000000000 --- a/miden-crypto/src/merkle/smt/large_forest/backend.rs +++ /dev/null @@ -1,150 +0,0 @@ -//! This file contains the [`Backend`] trait for the [`LargeSmtForest`] implementation and the -//! supporting types it needs. - -use alloc::{boxed::Box, vec::Vec}; -use core::fmt::Debug; - -use thiserror::Error; - -use crate::{ - Word, - merkle::{ - MerkleError, - smt::{ - SmtProof, TreeId, - full::SMT_DEPTH, - large_forest::{ - operation::{SmtForestUpdateBatch, SmtUpdateBatch}, - root::{LineageId, RootValue, TreeEntry, VersionId}, - }, - }, - }, -}; -// TYPE ALIASES -// ================================================================================================ - -/// The mutation set used by the forest backends. -/// -/// At the moment this is used for _reverse_ mutations that "undo" the changes made to the tree(s), -/// but may be harmonised with [`SmtUpdateBatch`] in the future. For more information on its use for -/// reverse mutations, see [`crate::merkle::smt::SparseMerkleTree::apply_mutations_with_reversion`]. -pub type MutationSet = crate::merkle::smt::MutationSet; - -// BACKEND -// ================================================================================================ - -/// The backing storage for the SMT forest, providing the necessary high-level methods for -/// performing operations on the full trees that make up the forest, while allowing the forest -/// itself to be storage agnostic. -/// -/// # Backend Data Storage -/// -/// Having a generic [`Backend`] provides no guarantees to the user about how it stores data and -/// what patterns are used for data access under the hood. It is, however, guaranteed to store -/// _only_ the data necessary to describe the latest state of each tree in the forest. -pub trait Backend -where - Self: Debug, -{ - // QUERIES - // ============================================================================================ - - /// Returns an opening for the specified `key` in the SMT with the specified `lineage`. - /// - /// It is the responsibility of the forest to ensure lineage existence before querying the - /// backend. The backend must return an error if the lineage does not exist. - fn open(&self, lineage: LineageId, key: Word) -> Result; - - /// Returns the value associated with the provided `key` in the SMT with the specified - /// `lineage`, or [`None`] if no such value exists. - /// - /// It is the responsibility of the forest to ensure lineage existence before querying the - /// backend. The backend must return an error if the lineage does not exist. - fn get(&self, lineage: LineageId, key: Word) -> Result>; - - /// Returns the version of the tree with the specified `lineage`. - /// - /// It is the responsibility of the forest to ensure lineage existence before querying the - /// backend. The backend must return an error if the lineage does not exist. - fn version(&self, lineage: LineageId) -> Result; - - /// Returns an iterator over all the lineages that the backend knows about. - /// - /// The iteration order is unspecified. - fn lineages(&self) -> Result>; - - /// Returns an iterator over all the trees (and their corresponding roots) that the backend - /// knows about. - /// - /// The iteration order is unspecified. - fn trees(&self) -> Result>; - - /// Returns the total number of non-empty leaves in the specified `tree`. - /// - /// It is the responsibility of the forest to ensure lineage existence before querying the - /// backend. The backend must return an error if the lineage does not exist. - fn entry_count(&self, tree: TreeId) -> Result; - - /// Returns an iterator that yields the populated entries for the specified `tree`. - /// - /// It is the responsibility of the forest to ensure lineage existence before querying the - /// backend. The backend must return an error if the lineage does not exist. - fn entries(&self, tree: TreeId) -> Result>; - - // SINGLE-TREE MODIFIERS - // ============================================================================================ - - /// Performs the provided `updates` on the tree with the specified `lineage`, returning the - /// mutation set that will revert the changes made to the tree. - /// - /// Implementations must guarantee the following behavior, with non-conforming implementations - /// considered to be a bug: - /// - /// - At most one new root must be added to the forest for the entire batch. - /// - If applying the provided `updates` results in no changes to the tree, no new tree must be - /// allocated. - fn update_tree( - &mut self, - lineage: LineageId, - new_version: VersionId, - updates: SmtUpdateBatch, - ) -> Result; - - // MULTI-TREE MODIFIERS - // ============================================================================================ - - /// Performs the provided `updates` on the forest, setting all new tree states to have the - /// provided `new_version` and returning a vector of the mutation sets that reverse the changes - /// to each changed tree. - /// - /// Implementations must guarantee the following behavior, with non-conforming implementations - /// considered to be a bug: - /// - /// - At most one new root must be added to the forest for each target root in the provided - /// `updates`. - /// - If applying the provided `updates` results in no changes to a given lineage of trees in - /// the forest, then no new tree must be allocated in that lineage. - fn update_forest( - &mut self, - new_version: VersionId, - updates: SmtForestUpdateBatch, - ) -> Result>; -} - -// BACKEND ERROR -// ================================================================================================ - -/// The error type for use within Backends. -#[derive(Debug, Error)] -pub enum BackendError { - /// Raised when there is an error with the merkle tree semantics within the backend. - #[error(transparent)] - Merkle(#[from] MerkleError), - - /// Raised for arbitrary other errors within the backend. - #[error(transparent)] - Other(#[from] Box), -} - -/// The result type for use with backends. -pub type Result = core::result::Result; diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs b/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs new file mode 100644 index 000000000..8e8ca502a --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/backend/memory/mod.rs @@ -0,0 +1,399 @@ +//! This module contains a non-persistent, in-memory [`Backend`] for the SMT forest. It is +//! non-parallel and is not intended to be such, allowing its use on effectively any platform where +//! this library can be built. +//! +//! # Performance + +mod tests; + +use alloc::{ + collections::{BTreeMap, BTreeSet}, + vec::Vec, +}; + +use crate::{ + EMPTY_WORD, Map, Word, + merkle::smt::{ + LeafIndex, SMT_DEPTH, Smt, SmtProof, VersionId, + large_forest::{ + Backend, + backend::{BackendError, MutationSet, Result}, + operation::{SmtForestUpdateBatch, SmtUpdateBatch}, + root::{LineageId, TreeEntry, TreeWithRoot}, + }, + }, +}; + +// IN-MEMORY BACKEND +// ================================================================================================ + +/// The in-memory backend itself. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct InMemoryBackend { + /// The storage for the full trees that are stored in this backend, always guaranteed to be the + /// latest tree in the lineage. + trees: Map, +} + +impl InMemoryBackend { + /// Constructs a new instance of the in-memory backend. + pub fn new() -> Self { + let trees = Map::default(); + Self { trees } + } +} + +// BACKEND TRAIT +// ================================================================================================ + +impl Backend for InMemoryBackend { + /// Returns an opening for the specified `key` in the SMT with the specified `lineage`. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn open(&self, lineage: LineageId, key: Word) -> Result { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(tree.tree.open(&key)) + } + + /// Returns the value associated with the provided `key` in the SMT with the specified + /// `lineage`, or [`None`] if no such value exists. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn get(&self, lineage: LineageId, key: Word) -> Result> { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + let value = tree.tree.get_value(&key); + let value = if value == EMPTY_WORD { None } else { Some(value) }; + + Ok(value) + } + + /// Returns the version of the tree with the specified `lineage`. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn version(&self, lineage: LineageId) -> Result { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(tree.version) + } + + /// Returns an iterator over all the lineages that the backend knows about. + fn lineages(&self) -> Result> { + Ok(self.trees.keys().cloned()) + } + + /// Returns an iterator over all the trees that the backend knows about. + /// + /// The iteration order is unspecified. + fn trees(&self) -> Result> { + Ok(self.trees.iter().map(|(l, t)| TreeWithRoot::new(*l, t.version, t.tree.root()))) + } + + /// Returns the total number of (key-value) entries in the specified `tree`. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn entry_count(&self, lineage: LineageId) -> Result { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(tree.tree.num_entries()) + } + + /// Returns an iterator that yields the populated (key-value) entries for the specified + /// `lineage`. + /// + /// This iterator yields entries in an order such that they are sorted by their leaf index, + /// within which entries that share a leaf index are sorted by key. + /// + /// # Errors + /// + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn entries(&self, lineage: LineageId) -> Result> { + let tree = self.trees.get(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + Ok(InMemoryBackendEntriesIterator::new(&tree.tree)) + } + + /// Adds the provided `lineage` to the forest. + /// + /// # Errors + /// + /// - [`BackendError::DuplicateLineage`] if the provided `lineage` is the same as an + /// already-known lineage. No data is changed in this case. + /// - [`BackendError::Merkle`] if the provided `updates` cannot be applied to the empty tree. + fn add_lineage( + &mut self, + lineage: LineageId, + version: VersionId, + updates: SmtUpdateBatch, + ) -> Result { + // Returning this in the case of a duplicate lineage is required by the method contract on + // the `Backend` trait. + if self.trees.contains_key(&lineage) { + return Err(BackendError::DuplicateLineage(lineage)); + } + + let mut tree = Smt::new(); + + // A failure to compute mutations is a failure derived from user input, so we forward it as + // appropriate. + let mutations = tree.compute_mutations(updates.into_iter().map(|o| o.into()))?; + + // If computation of the mutations has succeeded but the application fails, then this should + // be reported as an internal error, not a merkle error, to allow the caller to decide what + // to do. + tree.apply_mutations(mutations).map_err(BackendError::internal_from)?; + + // The following has had its preconditions checked, so we can change the state without + // worrying about consistency. + let tree_data = TreeData { version, tree }; + let root = tree_data.tree.root(); + self.trees.insert(lineage, tree_data); + Ok(TreeWithRoot::new(lineage, version, root)) + } + + /// Performs the provided `updates` on the tree with the specified `lineage`, returning the + /// mutation set that will revert the changes made to the tree. + /// + /// At most one new root is added to the backend for the entire batch. + /// + /// # Errors + /// + /// - [`BackendError::Merkle`] if the application of `updates` to the tree fails for any reason. + /// - [`BackendError::UnknownLineage`] If the provided `lineage` is one not known by this + /// backend. + fn update_tree( + &mut self, + lineage: LineageId, + new_version: VersionId, + updates: SmtUpdateBatch, + ) -> Result { + // The method contract requires raising this error in the case that `lineage` is unknown to + // the backend. + let tree_data = + self.trees.get_mut(&lineage).ok_or(BackendError::UnknownLineage(lineage))?; + let tree = &mut tree_data.tree; + + // We compute the mutations as a precondition check, which will leave the underlying tree in + // the same state if anything errors. Any error this yields is considered to be derived from + // user-input and hence is forwarded as-is. + let mutations = tree.compute_mutations(updates.into_iter().map(|o| o.into()))?; + + // The invariants on this method given by the `Backend` trait states that no new allocations + // should be performed if the updates do not change the tree. As a result, we can + // short-circuit even trying. + if mutations.is_empty() { + // As the reverse of an empty mutations is also empty mutations, we can just return + // that. + return Ok(mutations); + } + + // Any failure to apply the mutations here is considered an internal error, so we transform + // it as such. + let reversion_set = tree + .apply_mutations_with_reversion(mutations) + .map_err(BackendError::internal_from)?; + + // With preconditions checked, we can actually perform our modifications as it should yield + // a consistent state. + tree_data.version = new_version; + + Ok(reversion_set) + } + + /// Performs the provided `updates` on the entire forest, returning the mutation + /// sets that would reverse the changes to each tree in the forest. + /// + /// The order of application of these mutations is unspecified. + /// + /// # Errors + /// + /// - [`BackendError::Merkle`] if any set of operations on any lineage in the batch fail for any + /// reason. + /// - [`BackendError::UnknownLineage`] if any lineage in the `updates` is not known by the + /// backend. + /// + /// # Panics + /// + /// - If a tree that has been checked to be present is not present upon later access. + fn update_forest( + &mut self, + new_version: VersionId, + updates: SmtForestUpdateBatch, + ) -> Result> { + // We start by checking that all lineages referred to in the batch of `updates` are valid, + // failing early with an error if need be. + let updates = updates + .into_iter() + .map(|(lineage, ops)| { + if !self.trees.contains_key(&lineage) { + return Err(BackendError::UnknownLineage(lineage)); + } + + Ok((lineage, ops)) + }) + .collect::>>()?; + + // Next, we compute all the relevant mutations to each tree, also failing with an error + // where relevant. + let mutations = updates + .into_iter() + .map(|(lineage, ops)| { + let tree = self.trees.get(&lineage).expect("Tree known to be present was not"); + let mutations = tree.tree.compute_mutations(ops.into_iter().map(|o| o.into()))?; + Ok((lineage, mutations)) + }) + .collect::>>()?; + + // With the preconditions checked, we can unconditionally perform the changes on all trees. + let reversion_sets = mutations + .into_iter() + .map(|(lineage, mutations)| { + if mutations.is_empty() { + // The inverse of empty mutations is empty mutations. + Ok((lineage, mutations)) + } else { + let tree = + self.trees.get_mut(&lineage).expect("Tree known to be present was not"); + let reversion = tree + .tree + .apply_mutations_with_reversion(mutations) + .map_err(BackendError::internal_from)?; + tree.version = new_version; + Ok((lineage, reversion)) + } + }) + .collect::>>()?; + + Ok(reversion_sets) + } +} + +// TRAIT IMPLEMENTATIONS +// ================================================================================================ + +impl Default for InMemoryBackend { + fn default() -> Self { + Self::new() + } +} + +// TREE DATA +// ================================================================================================ + +/// A container for the data associated with the latest tree in a given lineage within the backend. +#[derive(Clone, Debug, Eq, PartialEq)] +struct TreeData { + version: VersionId, + tree: Smt, +} + +// ENTRIES ITERATOR +// ================================================================================================ + +/// An iterator over entries in a given tree in the backend. +/// +/// It is guaranteed to yield entries such that they are sorted by their leaf index, and then for +/// entries that share the same leaf index they are sorted by their key. It should never yield +/// entries that have `value == EMPTY_WORD`. +#[derive(Clone, Debug)] +struct InMemoryBackendEntriesIterator<'backend> { + /// A reference to the tree over which the iterator is running. + tree: &'backend Smt, + + /// The leaves that are yet to have their entries iterated over. + remaining_leaves: BTreeSet>, + + /// The current iteration state of the iterator. + state: InMemoryBackendEntriesIteratorState, +} +impl<'backend> InMemoryBackendEntriesIterator<'backend> { + /// Constructs a new iterator over the entries for a tree. + pub fn new(tree: &'backend Smt) -> Self { + let remaining_leaves = tree.leaves().map(|(ix, _)| ix).collect::>(); + assert!(remaining_leaves.iter().is_sorted()); + + let state = InMemoryBackendEntriesIteratorState::NotInLeaf; + + Self { tree, remaining_leaves, state } + } +} + +impl<'backend> Iterator for InMemoryBackendEntriesIterator<'backend> { + type Item = TreeEntry; + + fn next(&mut self) -> Option { + match &mut self.state { + InMemoryBackendEntriesIteratorState::NotInLeaf => { + // If we are not inside a leaf we need to see if we can become so. + if let Some(ix) = self.remaining_leaves.pop_first() { + // If we can move into a new leaf, we transition the state into that leaf and + // return the entry. + let leaf = self + .tree + .get_leaf_by_index(ix) + .expect("Leaf should exist for index derived from tree"); + + // We can now grab the entries from the leaf, and we know that if it was in the + // source iterator it must have at least one. We smoosh them into a BTreeMap to + // ensure that they are sorted by key as required. + let entries: BTreeMap<_, _> = + leaf.to_entries().map(|(k, v)| (*k, *v)).collect(); + let (key, value) = entries.first_key_value() + .expect("The source iterator should have provided only leaves with at least one entry."); + let item = TreeEntry { key: *key, value: *value }; + + self.state = + InMemoryBackendEntriesIteratorState::InEntry { remaining_entries: entries }; + + Some(item) + } else { + // If we can't move into a new leaf, the iterator is done. + None + } + }, + InMemoryBackendEntriesIteratorState::InEntry { remaining_entries } => { + // If we are already inside a leaf when `next` is called, we need to pop the front + // value. + remaining_entries + .pop_first() + .expect("InEntry implies there should be at least one entry"); + + // There are then two cases that can happen. + if let Some((k, v)) = remaining_entries.first_key_value() { + // The simple case is that we have another entry in the current leaf. In that + // case, we just re-write the current state to track this. + let item = TreeEntry { key: *k, value: *v }; + + Some(item) + } else { + // If we reach here there are no further entries in the leaf, so we are + // implicitly in the `NotInLeaf` state. We make this explicit and then recurse + // the once. + self.state = InMemoryBackendEntriesIteratorState::NotInLeaf; + self.next() + } + }, + } + } +} + +#[derive(Clone, Debug)] +enum InMemoryBackendEntriesIteratorState { + /// The iterator is currently not in a leaf. + NotInLeaf, + + /// The iterator is pointing to a specific entry in a leaf. + InEntry { + /// The remaining entries in the leaf. + remaining_entries: BTreeMap, + }, +} diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs b/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs new file mode 100644 index 000000000..dc424fc6e --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/backend/memory/tests.rs @@ -0,0 +1,581 @@ +#![cfg(test)] +//! This module contains the tests for the in-memory backend for the SMT forest. +//! +//! Rather than hard-code specific values for the trees, these tests rely on the correctness of the +//! existing [`Smt`] implementation, comparing the results of the in-memory backend against it +//! wherever relevant. + +use assert_matches::assert_matches; +use itertools::Itertools; + +use crate::{ + EMPTY_WORD, Word, + merkle::smt::{ + Backend, BackendError, Smt, SmtForestUpdateBatch, SmtUpdateBatch, VersionId, + large_forest::{ + InMemoryBackend, + backend::Result, + root::{LineageId, TreeEntry, TreeWithRoot}, + }, + }, + rand::test_utils::ContinuousRng, +}; + +// CONSTRUCTION +// ================================================================================================ + +#[test] +fn new() -> Result<()> { + let backend = InMemoryBackend::new(); + + // A newly created in-memory backend should not know about any lineages. + assert_eq!(backend.lineages()?.count(), 0); + + // It should similarly not know about any trees. + assert_eq!(backend.trees()?.count(), 0); + + Ok(()) +} + +// BACKEND TRAIT +// ================================================================================================ + +#[test] +fn open() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x42; 32]); + + // When we `open` for a lineage that has never been added to the backend, it should yield an + // error. + let ne_lineage: LineageId = rng.value(); + let random_key: Word = rng.value(); + let result = backend.open(ne_lineage, random_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // Let's now add a tree with a few items in it to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // We also want to match this against a reference merkle tree to check correctness, so let's + // create that now. + let mut tree = Smt::new(); + tree.insert(key_1, value_1)?; + tree.insert(key_2, value_2)?; + + // Let's first get the backend's opening for a key that hasn't been inserted. This should still + // return properly, and should match the opening provided by the reference tree. + let backend_result = backend.open(lineage_1, random_key)?; + let smt_result = tree.open(&random_key); + assert_eq!(backend_result, smt_result); + + // It should also generate correct openings for both of the inserted values. + assert_eq!(backend.open(lineage_1, key_1)?, tree.open(&key_1)); + assert_eq!(backend.open(lineage_1, key_2)?, tree.open(&key_2)); + + Ok(()) +} + +#[test] +fn get() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x71; 32]); + + // When we `get` for a lineage that has never been added to the backend, it should yield an + // error. + let ne_lineage: LineageId = rng.value(); + let random_key: Word = rng.value(); + let result = backend.get(ne_lineage, random_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // Let's now add a tree with a few items in it to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // We also want to match this against a reference merkle tree to check correctness, so let's + // create that now. + let mut tree = Smt::new(); + tree.insert(key_1, value_1)?; + tree.insert(key_2, value_2)?; + + // Let's first get the backend's result for a key that hasn't been inserted. This should return + // `None` in our case. + assert!(backend.get(lineage_1, random_key)?.is_none()); + + // It should also provide correct values for both of the inserted values. + assert_eq!(backend.get(lineage_1, key_1)?.unwrap(), tree.get_value(&key_1)); + assert_eq!(backend.get(lineage_1, key_2)?.unwrap(), tree.get_value(&key_2)); + + Ok(()) +} + +#[test] +fn version() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x96; 32]); + + // Getting the version for a lineage that the backend doesn't know about should yield an error. + let ne_lineage: LineageId = rng.value(); + let result = backend.version(ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // Let's now shove a tree into the backend. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // The forest should return the correct version if asked for the version of the lineage. + assert_eq!(backend.version(lineage_1)?, version_1); + + Ok(()) +} + +#[test] +fn lineages() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x91; 32]); + + // Initially there should be no lineages. + assert_eq!(backend.lineages()?.count(), 0); + + // We'll use the same data for each tree here to simplify the test. + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + operations.add_insert(key_2, value_2); + + let version: VersionId = rng.value(); + + // Let's start by adding one lineage and checking that the iterator contains it. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, operations.clone())?; + assert_eq!(backend.lineages()?.count(), 1); + assert!(backend.lineages()?.contains(&lineage_1)); + + // We add another + let lineage_2: LineageId = rng.value(); + backend.add_lineage(lineage_2, version, operations.clone())?; + assert_eq!(backend.lineages()?.count(), 2); + assert!(backend.lineages()?.contains(&lineage_1)); + assert!(backend.lineages()?.contains(&lineage_2)); + + // And yet another + let lineage_3: LineageId = rng.value(); + backend.add_lineage(lineage_3, version, operations.clone())?; + assert_eq!(backend.lineages()?.count(), 3); + assert!(backend.lineages()?.contains(&lineage_1)); + assert!(backend.lineages()?.contains(&lineage_2)); + assert!(backend.lineages()?.contains(&lineage_3)); + + Ok(()) +} + +#[test] +fn trees() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x91; 32]); + + // Initially there should be no lineages. + assert_eq!(backend.lineages()?.count(), 0); + + // We need individual trees and versions here to check on the roots, so let's add our first + // tree. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + + backend.add_lineage(lineage_1, version_1, operations)?; + + let mut tree_1 = Smt::new(); + tree_1.insert(key_1_1, value_1_1)?; + tree_1.insert(key_1_2, value_1_2)?; + + // With one tree added we should only see one root. + assert_eq!(backend.trees()?.count(), 1); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_1, version_1, tree_1.root())) + ); + + // Let's add another tree. + let key_2_1: Word = rng.value(); + let value_2_1: Word = rng.value(); + let key_2_2: Word = rng.value(); + let value_2_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_2_1, value_2_1); + operations.add_insert(key_2_2, value_2_2); + + let lineage_2: LineageId = rng.value(); + let version_2: VersionId = rng.value(); + + backend.add_lineage(lineage_2, version_2, operations)?; + + let mut tree_2 = Smt::new(); + tree_2.insert(key_2_1, value_2_1)?; + tree_2.insert(key_2_2, value_2_2)?; + + // With two added we should see two roots. + assert_eq!(backend.trees()?.count(), 2); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_1, version_1, tree_1.root())) + ); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_2, version_2, tree_2.root())) + ); + + // Let's add one more, just as a sanity check. + let key_3_1: Word = rng.value(); + let value_3_1: Word = rng.value(); + let key_3_2: Word = rng.value(); + let value_3_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_3_1, value_3_1); + operations.add_insert(key_3_2, value_3_2); + + let lineage_3: LineageId = rng.value(); + let version_3: VersionId = rng.value(); + + backend.add_lineage(lineage_3, version_3, operations)?; + + let mut tree_3 = Smt::new(); + tree_3.insert(key_3_1, value_3_1)?; + tree_3.insert(key_3_2, value_3_2)?; + + // With that added, we should see three. + assert_eq!(backend.trees()?.count(), 3); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_1, version_1, tree_1.root())) + ); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_2, version_2, tree_2.root())) + ); + assert!( + backend + .trees()? + .contains(&TreeWithRoot::new(lineage_3, version_3, tree_3.root())) + ); + + Ok(()) +} + +#[test] +fn entry_count() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x67; 32]); + + // It should yield an error for a lineage that doesn't exist. + let ne_lineage: LineageId = rng.value(); + let result = backend.entry_count(ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + let version: VersionId = rng.value(); + + // Let's start by adding a new lineage with an entirely empty tree. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + + // When queried, this should yield zero entries. + assert_eq!(backend.entry_count(lineage_1)?, 0); + + // Now let's modify that tree to add entries. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + + backend.update_tree(lineage_1, version, operations)?; + + // Now if we query we should get two entries. + assert_eq!(backend.entry_count(lineage_1)?, 2); + + Ok(()) +} + +#[test] +fn entries() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x67; 32]); + + // It should yield an error for a lineage that doesn't exist. + let ne_lineage: LineageId = rng.value(); + let result = backend.entry_count(ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + let version: VersionId = rng.value(); + + // If we add an empty lineage, the iterator should yield no items. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + assert_eq!(backend.entries(lineage_1)?.count(), 0); + + // So let's add some entries. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut key_1_3: Word = rng.value(); + key_1_3[3] = key_1_1[3]; + let value_1_3: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + operations.add_insert(key_1_3, value_1_3); + backend.update_tree(lineage_1, version, operations)?; + + // Now, the iterator should yield the expected three items. + assert_eq!(backend.entries(lineage_1)?.count(), 3); + assert!( + backend + .entries(lineage_1)? + .contains(&TreeEntry { key: key_1_1, value: value_1_1 }), + ); + assert!( + backend + .entries(lineage_1)? + .contains(&TreeEntry { key: key_1_2, value: value_1_2 }), + ); + assert!( + backend + .entries(lineage_1)? + .contains(&TreeEntry { key: key_1_3, value: value_1_3 }), + ); + + // Importantly, the iterator should also be sorted in two stages. First by leaf index, and then + // by key. + assert!(backend.entries(lineage_1)?.is_sorted_by(|l, r| { + if l.index() == r.index() { + l.key < r.key + } else { + l.index() < r.index() + } + })); + + Ok(()) +} + +#[test] +fn add_lineage() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x76; 32]); + let version: VersionId = rng.value(); + + // We should be able to add a lineage without actually changing the empty tree. + let lineage_1: LineageId = rng.value(); + backend.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + assert_eq!(backend.entry_count(lineage_1)?, 0); + + // Adding a lineage with a duplicate lineage identifier should yield an error. + let result = backend.add_lineage(lineage_1, version, SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::DuplicateLineage(l) if l == lineage_1); + + // But we should also be able to add lineages that _contain data_ from the get-go. + let key_2_1: Word = rng.value(); + let value_2_1: Word = rng.value(); + let key_2_2: Word = rng.value(); + let value_2_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_2_1, value_2_1); + operations.add_insert(key_2_2, value_2_2); + + let lineage_2: LineageId = rng.value(); + backend.add_lineage(lineage_2, version, operations)?; + assert_eq!(backend.entry_count(lineage_2)?, 2); + + Ok(()) +} + +#[test] +fn update_tree() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x76; 32]); + + // Updating a lineage that does not exist should result in an error. + let ne_lineage: LineageId = rng.value(); + let result = backend.update_tree(ne_lineage, rng.value(), SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // So let's add an actual lineage. + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_1, value_1_1); + operations.add_insert(key_1_2, value_1_2); + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + + backend.add_lineage(lineage_1, version_1, operations)?; + + // And check that it agrees with a standard tree. + let mut tree_1 = Smt::new(); + tree_1.insert(key_1_1, value_1_1)?; + tree_1.insert(key_1_2, value_1_2)?; + + assert_eq!(backend.trees()?.count(), 1); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + + // Now let's add another node to the tree! Note that reusing the same version does not matter; + // version consistency is enforced by the FOREST and not the backend. + let key_1_3: Word = rng.value(); + let value_1_3: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1_3, value_1_3); + backend.update_tree(lineage_1, version_1, operations)?; + + // And we can check against our other tree for consistency again. + tree_1.insert(key_1_3, value_1_3)?; + assert_eq!(backend.trees()?.count(), 1); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + + // Now let's try a remove operation. + let mut operations = SmtUpdateBatch::default(); + operations.add_remove(key_1_2); + backend.update_tree(lineage_1, version_1, operations)?; + + // And check it against our other tree for consistency. + let mutations = tree_1.compute_mutations([(key_1_2, EMPTY_WORD)])?; + tree_1.apply_mutations(mutations)?; + assert_eq!(backend.trees()?.count(), 1); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + + Ok(()) +} + +#[test] +fn update_forest() -> Result<()> { + let mut backend = InMemoryBackend::new(); + let mut rng = ContinuousRng::new([0x76; 32]); + let version: VersionId = rng.value(); + + // Let's start by adding two trees to the forest. + let lineage_1: LineageId = rng.value(); + let key_1_1: Word = rng.value(); + let value_1_1: Word = rng.value(); + let key_1_2: Word = rng.value(); + let value_1_2: Word = rng.value(); + let mut operations_1 = SmtUpdateBatch::default(); + operations_1.add_insert(key_1_1, value_1_1); + operations_1.add_insert(key_1_2, value_1_2); + + let lineage_2: LineageId = rng.value(); + let key_2_1: Word = rng.value(); + let value_2_1: Word = rng.value(); + let mut operations_2 = SmtUpdateBatch::default(); + operations_2.add_insert(key_2_1, value_2_1); + + backend.add_lineage(lineage_1, version, operations_1)?; + backend.add_lineage(lineage_2, version, operations_2)?; + + // Let's replicate them with SMTs to check correctness. + let mut tree_1 = Smt::new(); + tree_1.insert(key_1_1, value_1_1)?; + tree_1.insert(key_1_2, value_1_2)?; + + let mut tree_2 = Smt::new(); + tree_2.insert(key_2_1, value_2_1)?; + + // At this point we should have two trees in the forest, and their roots should match the trees + // we're checking against. + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert!(backend.trees()?.any(|e| e.root() == tree_2.root())); + + // Let's do a batch modification to start with, doing an insert into both trees. + let key_1_3: Word = rng.value(); + let value_1_3: Word = rng.value(); + let key_2_2: Word = rng.value(); + let value_2_2: Word = rng.value(); + + let mut forest_ops = SmtForestUpdateBatch::empty(); + forest_ops.operations(lineage_1).add_insert(key_1_3, value_1_3); + forest_ops.operations(lineage_2).add_insert(key_2_2, value_2_2); + + backend.update_forest(version, forest_ops)?; + + // We can check these results against our trees. + tree_1.insert(key_1_3, value_1_3)?; + tree_2.insert(key_2_2, value_2_2)?; + + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert!(backend.trees()?.any(|e| e.root() == tree_2.root())); + + // We should see an error when performing operations on a lineage that does not exist... + let ne_lineage: LineageId = rng.value(); + let key_1_4: Word = rng.value(); + let value_1_4: Word = rng.value(); + + let mut forest_ops = SmtForestUpdateBatch::empty(); + forest_ops.operations(lineage_1).add_insert(key_1_4, value_1_4); + forest_ops.operations(ne_lineage).add_insert(key_1_4, value_1_4); + + let result = backend.update_forest(version, forest_ops); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), BackendError::UnknownLineage(l) if l == ne_lineage); + + // ... but it should also leave the existing data unchanged. + assert_eq!(backend.trees()?.count(), 2); + assert!(backend.trees()?.any(|e| e.root() == tree_1.root())); + assert!(backend.trees()?.any(|e| e.root() == tree_2.root())); + + Ok(()) +} diff --git a/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs b/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs new file mode 100644 index 000000000..a7261b8f5 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/backend/mod.rs @@ -0,0 +1,221 @@ +//! This file contains the [`Backend`] trait for the SMT forest implementation and the supporting +//! types it needs. + +pub mod memory; + +use alloc::{boxed::Box, vec::Vec}; +use core::fmt::Debug; + +use thiserror::Error; + +use crate::{ + Word, + merkle::{ + MerkleError, + smt::{ + SmtProof, + large_forest::{ + operation::{SmtForestUpdateBatch, SmtUpdateBatch}, + root::{LineageId, TreeEntry, TreeWithRoot, VersionId}, + utils::MutationSet, + }, + }, + }, +}; + +// BACKEND +// ================================================================================================ + +/// The backing storage for the SMT forest, providing the necessary high-level methods for +/// performing operations on the full trees that make up the forest, while allowing the forest +/// itself to be storage agnostic. +/// +/// # Backend Data Storage +/// +/// Having a generic [`Backend`] provides no guarantees to the user about how it stores data and +/// what patterns are used for data access under the hood. It is, however, guaranteed to store +/// _only_ the data necessary to describe the latest state of each tree in the forest. +/// +/// # Error Handling +/// +/// We separate errors in backend implementations into two semantic categories: +/// +/// 1. **User-Derived Errors:** These are errors that arise downstream of data provided by the user. +/// These errors must be signaled by returning an [`Err`] variant with an appropriate error. +/// 2. **Internal Errors:** These are errors that are not derived from data provided by the user. +/// Signaling such an error is up to the implementation, but can be done through both panicking +/// and returning the [`BackendError::Internal`] variant as appropriate. These **may leave the +/// backend in an inconsistent state** as they are designed to effect program termination or +/// perform it directly. +/// +/// The only reason that [`BackendError::Internal`] exists is to allow certain failures to result in +/// termination at the level of the _forest_ instead of the _backend_ as this can sometimes lead to +/// cleaner logic. If this is not appropriate, a panic is a better option. +/// +/// # Expected Behavior +/// +/// Certain methods on this trait (e.g. [`Backend::update_tree`]) provide behaviors expected for +/// that method. These combine with the following trait-level behavior requirements to become part +/// of the contract of the method, but a portion that cannot be encoded in the type system. Any +/// failure to conform to these expected behaviors is **considered a bug in the implementation** of +/// the backend, and must be rectified. +/// +/// The following behavior is expected of all methods in implementations of this trait: +/// +/// - For any failure derived from user input (see _User-Derived Errors_ above), the data and the +/// backend must be **left in a consistent state** when the error is returned to the caller. +/// - Failures derived from user input (see _User-Derived Errors_ above) must be signaled to the +/// caller by returning a variant of [`BackendError`] that is **not [`BackendError::Internal`]**. +/// Methods may place additional constraints on which errors are used to signal certain failures. +pub trait Backend +where + Self: Debug, +{ + // QUERIES + // ============================================================================================ + + /// Returns an opening for the specified `key` in the SMT with the specified `lineage`. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + fn open(&self, lineage: LineageId, key: Word) -> Result; + + /// Returns the value associated with the provided `key` in the SMT with the specified + /// `lineage`, or [`None`] if no such value exists. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + fn get(&self, lineage: LineageId, key: Word) -> Result>; + + /// Returns the version of the tree with the specified `lineage`. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + fn version(&self, lineage: LineageId) -> Result; + + /// Returns an iterator over all the lineages that the backend knows about. + /// + /// The iteration order is unspecified. + fn lineages(&self) -> Result>; + + /// Returns an iterator over all the trees (and their corresponding roots) that the backend + /// knows about. + /// + /// The iteration order is unspecified. + fn trees(&self) -> Result>; + + /// Returns the total number of (key-value) entries in the specified `lineage`. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + fn entry_count(&self, lineage: LineageId) -> Result; + + /// Returns an iterator that yields the populated (key-value) entries for the specified + /// `lineage`. + /// + /// It is the responsibility of the forest to ensure lineage existence before querying the + /// backend. The backend must return an error if the lineage does not exist. + /// + /// This iterator must yield entries in an order such that they are sorted by their leaf index, + /// and entries that share a leaf index are sorted by key. It must not include key-value pairs + /// where the value is the empty word. + fn entries(&self, lineage: LineageId) -> Result>; + + // SINGLE-TREE MODIFIERS + // ============================================================================================ + + /// Adds a new `lineage` to the forest with the provided `version` and sets the associated SMT + /// to have the value created by applying `updates` to the empty tree, returning the new root of + /// that tree. + /// + /// # Expected Behavior + /// + /// Implementations must guarantee the following behavior in addition to the global invariants: + /// + /// - If the provided `lineage` conflicts with an already-existing lineage in the backend, it + /// must return [`BackendError::DuplicateLineage`]. + fn add_lineage( + &mut self, + lineage: LineageId, + version: VersionId, + updates: SmtUpdateBatch, + ) -> Result; + + /// Performs the provided `updates` on the tree with the specified `lineage`, returning the + /// mutation set that will revert the changes made to the tree. + /// + /// # Expected Behavior + /// + /// Implementations must guarantee the following behavior in addition to the global invariants: + /// + /// - At most one new root must be added to the forest for the entire batch. + /// - If applying the provided `updates` results in no changes to the tree, no new tree must be + /// allocated. + fn update_tree( + &mut self, + lineage: LineageId, + new_version: VersionId, + updates: SmtUpdateBatch, + ) -> Result; + + // MULTI-TREE MODIFIERS + // ============================================================================================ + + /// Performs the provided `updates` on the forest, setting all new tree states to have the + /// provided `new_version` and returning a vector of the mutation sets that reverse the changes + /// to each changed tree. + /// + /// # Expected Behavior + /// + /// Implementations must guarantee the following behavior in addition to the global invariants: + /// + /// - At most one new root must be added to the forest for each target root in the provided + /// `updates`. + /// - If applying the provided `updates` results in no changes to a given lineage of trees in + /// the forest, then no new tree must be allocated in that lineage. + fn update_forest( + &mut self, + new_version: VersionId, + updates: SmtForestUpdateBatch, + ) -> Result>; +} + +// BACKEND ERROR +// ================================================================================================ + +/// The error type for use within Backends. +#[derive(Debug, Error)] +pub enum BackendError { + /// Raised when there is a conflict between an existing lineage ID and one already in the + /// forest. + #[error("Duplicate lineage ID {0} provided")] + DuplicateLineage(LineageId), + + /// Raised for arbitrary errors that are not derived from user-input. These should be considered + /// fatal by callers, but exist to forward the termination decision up to an appropriate level. + #[error(transparent)] + Internal(Box), + + /// Raised when there is an error with the merkle tree semantics within the backend. + #[error(transparent)] + Merkle(#[from] MerkleError), + + /// Raised for arbitrary other errors within the backend that are derived from user-input and + /// hence non-fatal. + #[error(transparent)] + Other(Box), + + /// Raised when the backend is queried for a lineage it doesn't know about. + #[error("Lineage {0} is not known by the backend")] + UnknownLineage(LineageId), +} + +impl BackendError { + /// Constructs an internal error variant from the provided concrete error `e`. + fn internal_from(e: E) -> Self { + Self::Internal(Box::new(e)) + } +} + +/// The result type for use with backends. +pub type Result = core::result::Result; diff --git a/miden-crypto/src/merkle/smt/large_forest/config.rs b/miden-crypto/src/merkle/smt/large_forest/config.rs new file mode 100644 index 000000000..dd7c63d39 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/config.rs @@ -0,0 +1,66 @@ +//! This module contains the configuration structure for the forest. + +// CONSTANTS +// ================================================================================================ + +/// The default number of historical versions of each tree to keep. +pub const DEFAULT_MAX_HISTORY_VERSIONS: usize = 10; + +/// The minimum number of historical versions per lineage that the forest can store. +pub const MIN_HISTORY_VERSIONS: usize = 1; + +// CONFIG +// ================================================================================================ + +/// The configuration for the forest's behavior. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Config { + /// The maximum number of historical versions that the forest will keep for any given lineage. + max_historical_versions: usize, +} + +/// This block contains the accessors for the configuration options. +impl Config { + /// The maximum number of historical versions that the forest will keep for any given lineage. + /// + /// If this field is set to `n`, the forest will implicitly store `n + 1` versions of a given + /// lineage once the latest version in that lineage is accounted for. + /// + /// Defaults to [`DEFAULT_MAX_HISTORY_VERSIONS`]. + pub fn max_history_versions(&self) -> usize { + self.max_historical_versions + } +} + +// BUILDERS +// ================================================================================================ + +/// This impl block contains the builder functions for the configuration options. +impl Config { + /// Sets the maximum number of historical versions that the forest will store for any given + /// lineage, clamping to [`MIN_HISTORY_VERSIONS`] on the low end. + /// + /// If this field is set to `n`, the forest will implicitly store `n + 1` versions of a given + /// lineage once the latest version in that lineage is accounted for. + /// + /// This defaults to [`DEFAULT_MAX_HISTORY_VERSIONS`]. + pub fn with_max_history_versions(mut self, max_historical_versions: usize) -> Self { + self.max_historical_versions = if max_historical_versions < MIN_HISTORY_VERSIONS { + MIN_HISTORY_VERSIONS + } else { + max_historical_versions + }; + self + } +} + +// TRAIT IMPLS +// ================================================================================================ + +/// Please see individual methods on [`Config`] for the default value of each configuration option. +impl Default for Config { + fn default() -> Self { + let max_historical_versions = DEFAULT_MAX_HISTORY_VERSIONS; + Self { max_historical_versions } + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/error.rs b/miden-crypto/src/merkle/smt/large_forest/error.rs index 4624764ec..42ef1cfaf 100644 --- a/miden-crypto/src/merkle/smt/large_forest/error.rs +++ b/miden-crypto/src/merkle/smt/large_forest/error.rs @@ -8,7 +8,7 @@ use thiserror::Error; use crate::merkle::{ MerkleError, smt::{ - TreeId, VersionId, + SmtLeafError, SmtProofError, TreeId, VersionId, large_forest::{backend::BackendError, history::error::HistoryError, root::LineageId}, }, }; @@ -19,20 +19,44 @@ use crate::merkle::{ /// The type of errors returned by operations on the large SMT forest. #[derive(Debug, Error)] pub enum LargeSmtForestError { + /// Raised when the provided version for any update is older than the latest-known version for + /// the lineage being updated. + #[error("Version {provided} is not newer than latest-known {latest}")] + BadVersion { provided: VersionId, latest: VersionId }, + + /// Raised when there is a conflict between an existing lineage ID and one already in the + /// forest. + #[error("Duplicate lineage ID {0} provided")] + DuplicateLineage(LineageId), + + /// Raised for arbitrary errors that are not derived from user-input. These **must be considered + /// fatal by the caller**, but exist to provide the caller with control over process termination + /// (e.g. for improved diagnostics or error reporting) wherever possible. + #[error(transparent)] + Fatal(Box), + /// Errors in the history subsystem of the forest. #[error(transparent)] - HistoryError(#[from] HistoryError), + History(#[from] HistoryError), /// Errors with the merkle tree operations of the forest. #[error(transparent)] - MerkleError(#[from] MerkleError), + Merkle(#[from] MerkleError), + + /// Errors in working with leaves in the merkle trees. + #[error(transparent)] + SmtLeaf(#[from] SmtLeafError), + + /// Errors in the construction and manipulation of SMT proofs. + #[error(transparent)] + SmtProof(#[from] SmtProofError), /// Raised when an operation specifies a lineage that is not known. #[error("The lineage {0:?} is not in the forest")] UnknownLineage(LineageId), /// Raised when an operation specifies a tree that is not known. - #[error("The tree")] + #[error("The tree {0} is not in the forest")] UnknownTree(TreeId), /// Raised when an operation requests a version that is not known. @@ -44,13 +68,23 @@ pub enum LargeSmtForestError { Other(#[from] Box), } +impl LargeSmtForestError { + /// Constructs a fatal error variant from the provided concrete error `e`. + pub fn fatal_from(e: E) -> Self { + Self::Fatal(Box::new(e)) + } +} + /// We want to forward backend errors specifically when we can, so we manually implement the /// conversion. impl From for LargeSmtForestError { fn from(value: BackendError) -> Self { match value { + BackendError::DuplicateLineage(l) => LargeSmtForestError::DuplicateLineage(l), + BackendError::Internal(e) => LargeSmtForestError::Fatal(e), BackendError::Merkle(e) => LargeSmtForestError::from(e), BackendError::Other(e) => LargeSmtForestError::from(e), + BackendError::UnknownLineage(t) => LargeSmtForestError::UnknownLineage(t), } } } diff --git a/miden-crypto/src/merkle/smt/large_forest/history/mod.rs b/miden-crypto/src/merkle/smt/large_forest/history/mod.rs index 4c1bf2f4a..7510594bc 100644 --- a/miden-crypto/src/merkle/smt/large_forest/history/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/history/mod.rs @@ -30,10 +30,9 @@ pub mod error; -#[cfg(test)] mod tests; -use alloc::collections::VecDeque; +use alloc::collections::{BTreeMap, BTreeSet, VecDeque}; use core::fmt::Debug; use error::{HistoryError, Result}; @@ -41,10 +40,13 @@ use error::{HistoryError, Result}; use crate::{ Map, Word, merkle::{ - NodeIndex, + EmptySubtreeRoots, NodeIndex, smt::{ - LeafIndex, SMT_DEPTH, - large_forest::root::{RootValue, VersionId}, + LeafIndex, NodeMutation, SMT_DEPTH, + large_forest::{ + root::{RootValue, TreeEntry, VersionId}, + utils::MutationSet, + }, }, }, }; @@ -54,7 +56,9 @@ use crate::{ /// A compact leaf is a mapping from full word-length keys to word-length values, intended to be /// stored in the leaves of an otherwise shallower merkle tree. -pub type CompactLeaf = Map; +/// +/// We use a BTreeMap as we need a guaranteed iteration order over the keys. +pub type CompactLeaf = BTreeMap; /// A collection of changes to arbitrary non-leaf nodes in a merkle tree. /// @@ -69,8 +73,8 @@ pub type NodeChanges = Map; /// A collection of changes to arbitrary leaf nodes in a merkle tree. /// -/// This represents the state of the leaf wholesale, rather than as a delta from the newer version. -/// This massively simplifies querying leaves in the history. +/// While represented as a single leaf, it only contains the changes to the leaf as part of the +/// delta, and still needs to be combined with the actual leaf data for querying. /// /// Note that if in the version of the tree represented by these `LeafChanges` had the default value /// at the leaf, this default value must be made concrete in the map. Failure to do so will retain a @@ -86,7 +90,6 @@ pub type LeafChanges = Map, CompactLeaf>; /// The versions are _cumulative_, meaning that querying the history must account for changes from /// the current tree that take place in versions that are not the queried version or the current /// tree. -#[allow(dead_code)] // Temporary #[derive(Clone, Debug)] pub struct History { /// The maximum number of historical versions to be stored. @@ -110,7 +113,6 @@ pub struct History { deltas: VecDeque, } -#[allow(dead_code)] // Temporary impl History { /// Constructs a new history container, containing at most `max_count` historical versions for /// a tree. @@ -148,25 +150,6 @@ impl History { self.deltas.iter().rev().map(|d| d.root) } - /// Gets the version corresponding to the provided `root`, or returns [`None`] if the provided - /// `root` is not found within this history. - pub fn version(&self, root: RootValue) -> Option { - self.deltas - .iter() - .find_map(|d| if d.root == root { Some(d.version_id) } else { None }) - } - - /// Returns `true` if `root` is in the history and `false` otherwise. - /// - /// # Complexity - /// - /// Calling this method requires a traversal of all the versions and is hence linear in the - /// number of history versions. - #[must_use] - pub fn is_known_root(&self, root: RootValue) -> bool { - self.deltas.iter().any(|r| r.root == root) - } - /// Returns the root value that corresponds to the provided `version`. pub fn root_for_version(&self, version: VersionId) -> Result { let ix = self.find_latest_corresponding_version(version)?; @@ -222,6 +205,52 @@ impl History { } } + /// Adds a version to the history and represented by the changes from the current tree given + /// `mutations`. + /// + /// If adding this version would result in exceeding `self.max_count` historical versions, then + /// the oldest of the versions is automatically removed. + /// + /// # Gotchas + /// + /// When constructing the `mutations`, keep in mind that the set must contain entries for the + /// **default value of a node or leaf** at any position where the tree was sparse in the state + /// represented by `root`. If this is not done, incorrect values may be returned. + /// + /// This is necessary because the changes are the _reverse_ from what one might expect. Namely, + /// the changes in a given version `v` must "_revert_" the changes made in the transition from + /// version `v` to version `v + 1`. + /// + /// # Errors + /// + /// - [`HistoryError::NonMonotonicVersions`] if the provided version is not greater than the + /// previously added version. + pub fn add_version_from_mutation_set( + &mut self, + version_id: VersionId, + mutations: MutationSet, + ) -> Result<()> { + // The leaf changes must be grouped by parent leaf when being inserted, so we do that here. + let mut leaf_changes = LeafChanges::default(); + for (key, val) in mutations.new_pairs { + leaf_changes.entry(LeafIndex::from(key)).or_default().insert(key, val); + } + + // The node changes are more complex, as we have to explicitly handle reversions to empty + // specially. + let node_changes: NodeChanges = mutations + .node_mutations + .into_iter() + .map(|(ix, m)| match m { + NodeMutation::Removal => (ix, *EmptySubtreeRoots::entry(SMT_DEPTH, ix.depth())), + NodeMutation::Addition(n) => (ix, n.hash()), + }) + .collect(); + + // Now we can simply delegate to the standard function. + self.add_version(mutations.new_root, version_id, node_changes, leaf_changes) + } + /// Returns the index in the sequence of deltas of the version that corresponds to the provided /// `version_id`. /// @@ -286,8 +315,7 @@ impl History { /// coherent overlay for the provided `version_id` due to `version_id` being older than the /// oldest version stored. pub fn get_view_at(&self, version_id: VersionId) -> Result> { - let version_index = self.find_latest_corresponding_version(version_id)?; - Ok(HistoryView::new_of(version_index, self)) + HistoryView::new_of(version_id, self) } /// Removes all versions in the history that are older than the version denoted by the provided @@ -318,13 +346,26 @@ impl History { } } +/// The functions in this impl block are specifically used for testing and are not available for +/// general API usage. +#[cfg(test)] +impl History { + /// Returns `true` if `root` is in the history and `false` otherwise. + #[must_use] + pub fn is_known_root(&self, root: RootValue) -> bool { + self.deltas.iter().any(|r| r.root == root) + } +} + // HISTORY VIEW // ================================================================================================ /// A read-only view of the history overlay on the tree at a specified place in the history. -#[allow(dead_code)] // Temporary #[derive(Debug)] pub struct HistoryView<'history> { + /// The version of the history pointed to by the history view. + version: VersionId, + /// The index of the target version in the history. version_ix: usize, @@ -332,17 +373,22 @@ pub struct HistoryView<'history> { history: &'history History, } -#[allow(dead_code)] // Temporary impl<'history> HistoryView<'history> { /// Constructs a new history view that acts as a single overlay of the state represented by the - /// oldest delta for which `f` returns true. + /// history at the provided `version`. /// /// # Complexity /// /// The computational complexity of this method is linear in the number of versions stored in /// the history. - fn new_of(version_ix: usize, history: &'history History) -> Self { - Self { version_ix, history } + /// + /// # Errors + /// + /// - [`HistoryError::VersionTooOld`] if the history does not contain the data to provide a + /// coherent overlay for the provided `version`. + fn new_of(version: VersionId, history: &'history History) -> Result { + let version_ix = history.find_latest_corresponding_version(version)?; + Ok(Self { version, version_ix, history }) } /// Gets the value of the node in the history at the provided `index`, or returns `None` if the @@ -361,36 +407,53 @@ impl<'history> HistoryView<'history> { .find_map(|v| v.nodes.get(index)) } - /// Gets the value of the entire leaf in the history at the specified `index`, or returns `None` - /// if the version does not overlay the current tree at that leaf. + /// Gets a single leaf that represents the delta from the current version of the tree to the + /// point in the history at the specified `index`. + /// + /// If the specified version does not overlay the current tree at that leaf, it will return an + /// empty compact leaf. /// /// # Complexity /// /// The computational complexity of this method is linear in the number of versions due to the /// need to traverse to find the correct overlay value. #[must_use] - pub fn leaf_value(&self, index: &LeafIndex) -> Option<&CompactLeaf> { - self.history - .deltas - .iter() - .skip(self.version_ix) - .find_map(|v| v.leaves.get(index)) + pub fn leaf_delta(&self, index: &LeafIndex) -> CompactLeaf { + let mut leaf = CompactLeaf::default(); + + // We want to keep the _oldest_ change for any particular key in a leaf. + for delta in self.history.deltas.iter().skip(self.version_ix) { + if let Some(leaf_delta) = delta.leaves.get(index) { + for (key, value) in leaf_delta { + leaf.entry(*key).or_insert(*value); + } + } + } + + leaf } - /// Queries the value of a specific key in a leaf in the overlay, returning: - /// - /// - `None` if the version does not overlay that leaf in the current tree, - /// - `Some(None)` if the version does overlay that leaf but the compact leaf does not contain - /// that value, - /// - and `Some(Some(v))` if the version does overlay the leaf and the key exists in that leaf. + /// Queries the value of a specific `key` in a leaf in the overlay, returning the value for that + /// `key` if it has been changed, and [`None`] otherwise. /// /// # Complexity /// /// The computational complexity of this method is linear in the number of versions due to the /// need to traverse to find the correct overlay value. #[must_use] - pub fn value(&self, key: &Word) -> Option> { - self.leaf_value(&LeafIndex::from(*key)).map(|leaf| leaf.get(key)) + pub fn value(&self, key: &Word) -> Option { + self.leaf_delta(&LeafIndex::from(*key)).get(key).copied() + } + + /// Returns an iterator which yields the entries that are changed by this view. + /// + /// This iterator yields entries in an order such that they are sorted by their leaf index, and + /// entries that share a leaf index are sorted by key. It includes key-value pairs where the + /// value is the empty word, as these are necessary for merging with entries in the full tree. + pub fn entries(&self) -> impl Iterator + 'history { + // It is safe to call this directly here as the construction of `HistoryView` has ensured + // that we have such a version. + HistoricalEntriesIterator::new(self.history, self.version) } } @@ -420,22 +483,21 @@ impl<'history> HistoryView<'history> { struct Delta { /// The root of the tree in the `version` corresponding to the application of the reversions in /// this delta to the previous tree state. - pub root: RootValue, + root: RootValue, /// The version of the tree represented by the delta. - pub version_id: VersionId, + version_id: VersionId, /// Any changes to the non-leaf nodes in the tree for this delta. - pub nodes: NodeChanges, + nodes: NodeChanges, /// Any changes to the leaf nodes in the tree for this delta. /// /// Note that the leaf state is **not represented compactly**, and describes the entire state /// of the leaf in the corresponding version. - pub leaves: LeafChanges, + leaves: LeafChanges, } -#[allow(dead_code)] // Temporary impl Delta { /// Creates a new delta with the provided `root`, and representing the provided /// changes to `nodes` and `leaves` in the merkle tree. @@ -449,3 +511,130 @@ impl Delta { Self { root, version_id, nodes, leaves } } } + +// ENTRIES ITERATOR +// ================================================================================================ + +/// An iterator over the historical value for each changed entry at a given point in the history. +/// +/// This iterator yields entries in an order such that they are sorted by their leaf index, and +/// entries that share a leaf index are sorted by key. It includes key-value pairs where the value +/// is the empty word, as these are necessary for merging with entries in the full tree. +#[derive(Debug)] +pub struct HistoricalEntriesIterator<'history> { + /// The history over which the iterator is defined. + history: &'history History, + + /// The version in the history to be working from. + version: VersionId, + + /// The set of all changed leaves in the deltas that make up this iterator that have not yet + /// been visited by the iterator. + /// + /// We use a BTreeSet specifically as we need sorted iteration behavior. + changed_leaves: BTreeSet>, + + /// The current state of the iterator's iteration behavior. + position: HistoricalEntriesIteratorState, +} + +impl<'history> HistoricalEntriesIterator<'history> { + /// Creates a new historical entries iterator that represents a coherent set of delta entries at + /// the position in the history given by `version_ix`. + fn new(history: &'history History, version: VersionId) -> Self { + let changed_leaves = history + .deltas + .iter() + .skip( + history + .find_latest_corresponding_version(version) + .expect("Caller has guaranteed existence of a corresponding version"), + ) + .flat_map(|d| d.leaves.keys()) + .copied() + .collect(); + + // We want to start not pointing to any leaf as we can only advance when `next` is called. + let current_leaf_index = HistoricalEntriesIteratorState::NotInLeaf; + + Self { + history, + version, + changed_leaves, + position: current_leaf_index, + } + } +} + +impl<'history> Iterator for HistoricalEntriesIterator<'history> { + type Item = TreeEntry; + + fn next(&mut self) -> Option { + match &mut self.position { + HistoricalEntriesIteratorState::NotInLeaf => { + // If we are not inside a leaf we need to see if we can become so. + if let Some(ix) = self.changed_leaves.pop_first() { + // If we can move into a new leaf, we transition the state into that leaf and + // return the entry. + let leaf_delta = self + .history + .get_view_at(self.version) + .expect( + "Version was guaranteed to exist before construction of the iterator", + ) + .leaf_delta(&ix); + + // As we are querying based on `changed_leaves`, each of the `leaf_delta` + // results should contain at least one item. + let (key, value) = leaf_delta + .first_key_value() + .expect("At least one item guaranteed by construction"); + let item = TreeEntry { key: *key, value: *value }; + + // At this point we now have the item, but we need to set up the state to point + // to this item as we return it. + self.position = HistoricalEntriesIteratorState::InLeaf { value: leaf_delta }; + + Some(item) + } else { + // If we cannot move to a new leaf index, the iterator is done. + None + } + }, + HistoricalEntriesIteratorState::InLeaf { value } => { + // If we are already inside a leaf, there are two cases that can occur when + // advancing. + value.pop_first().expect("InLeaf implies there is at least one entry in value"); + if let Some((k, v)) = value.first_key_value() { + // The first (and simplest) case is that we have another entry in the current + // leaf value. In this case, the item is just the front of the leaf value, and + // we re-write the key to point to it while leaving the leaf index the same. + let item = TreeEntry { key: *k, value: *v }; + + Some(item) + } else { + // Here, we have no further entries in the current leaf, so we have to check if + // there is another leaf to move to. In other words, we are implicitly in the + // `NotInLeaf` state, so we can just call `next` recursively. + // + // This is not a stack overflow risk as it should only ever recurse once. + self.position = HistoricalEntriesIteratorState::NotInLeaf; + self.next() + } + }, + } + } +} + +/// The state that tracks where the iterator is in the iteration process. +#[derive(Debug)] +enum HistoricalEntriesIteratorState { + /// It currently does not point to any underlying leaf index. + NotInLeaf, + + /// It is currently pointing to the specified key within the specified index. + InLeaf { + /// The combined full delta that represents the compact leaf. + value: CompactLeaf, + }, +} diff --git a/miden-crypto/src/merkle/smt/large_forest/history/tests.rs b/miden-crypto/src/merkle/smt/large_forest/history/tests.rs index 3bb742cb6..25181b7d4 100644 --- a/miden-crypto/src/merkle/smt/large_forest/history/tests.rs +++ b/miden-crypto/src/merkle/smt/large_forest/history/tests.rs @@ -1,15 +1,20 @@ -#![cfg(feature = "std")] +#![cfg(test)] //! The functional tests for the history component. use alloc::vec::Vec; +use core::iter::once; +use itertools::Itertools; use p3_field::PrimeCharacteristicRing; use super::{CompactLeaf, History, LeafChanges, NodeChanges, error::Result}; use crate::{ - Felt, Word, - merkle::{NodeIndex, smt::LeafIndex}, - rand::test_utils::rand_value, + EMPTY_WORD, Felt, Word, + merkle::{ + NodeIndex, + smt::{LeafIndex, Smt, VersionId, large_forest::root::TreeEntry}, + }, + rand::test_utils::ContinuousRng, }; // TESTS @@ -24,12 +29,14 @@ fn empty() { #[test] fn roots() -> Result<()> { + let mut rng = ContinuousRng::new([0x12; 32]); + // Set up our test state let nodes = NodeChanges::default(); let leaves = LeafChanges::default(); let mut history = History::empty(2); - let root_1: Word = rand_value(); - let root_2: Word = rand_value(); + let root_1: Word = rng.value(); + let root_2: Word = rng.value(); history.add_version(root_1, 0, nodes.clone(), leaves.clone())?; history.add_version(root_2, 1, nodes.clone(), leaves.clone())?; @@ -42,29 +49,10 @@ fn roots() -> Result<()> { Ok(()) } -#[test] -fn is_known_root() -> Result<()> { - // Set up our test state - let nodes = NodeChanges::default(); - let leaves = LeafChanges::default(); - let mut history = History::empty(2); - let root_1: Word = rand_value(); - let root_2: Word = rand_value(); - history.add_version(root_1, 0, nodes.clone(), leaves.clone())?; - history.add_version(root_2, 1, nodes.clone(), leaves.clone())?; - - // We should be able to query for existing roots. - assert!(history.is_known_root(root_1)); - assert!(history.is_known_root(root_2)); - - // But not for nonexistent ones. - assert!(!history.is_known_root(rand_value())); - - Ok(()) -} - #[test] fn find_latest_corresponding_version() -> Result<()> { + let mut rng = ContinuousRng::new([0x14; 32]); + // Start by setting up our test data. let nodes = NodeChanges::default(); let leaves = LeafChanges::default(); @@ -76,11 +64,11 @@ fn find_latest_corresponding_version() -> Result<()> { let v4 = 31; let v5 = 45; - history.add_version(rand_value(), v1, nodes.clone(), leaves.clone())?; - history.add_version(rand_value(), v2, nodes.clone(), leaves.clone())?; - history.add_version(rand_value(), v3, nodes.clone(), leaves.clone())?; - history.add_version(rand_value(), v4, nodes.clone(), leaves.clone())?; - history.add_version(rand_value(), v5, nodes.clone(), leaves.clone())?; + history.add_version(rng.value(), v1, nodes.clone(), leaves.clone())?; + history.add_version(rng.value(), v2, nodes.clone(), leaves.clone())?; + history.add_version(rng.value(), v3, nodes.clone(), leaves.clone())?; + history.add_version(rng.value(), v4, nodes.clone(), leaves.clone())?; + history.add_version(rng.value(), v5, nodes.clone(), leaves.clone())?; // When we query for a version that is older than the oldest in the history we should get an // error. @@ -112,6 +100,7 @@ fn find_latest_corresponding_version() -> Result<()> { fn add_version() -> Result<()> { let nodes = NodeChanges::default(); let leaves = LeafChanges::default(); + let mut rng = ContinuousRng::new([0x15; 32]); // We start with an empty state, and we should be able to add deltas up until the limit we // set. @@ -119,18 +108,18 @@ fn add_version() -> Result<()> { assert_eq!(history.num_versions(), 0); assert_eq!(history.max_versions(), 2); - let root_1: Word = rand_value(); + let root_1: Word = rng.value(); let id_1 = 0; history.add_version(root_1, id_1, nodes.clone(), leaves.clone())?; assert_eq!(history.num_versions(), 1); - let root_2: Word = rand_value(); + let root_2: Word = rng.value(); let id_2 = 1; history.add_version(root_2, id_2, nodes.clone(), leaves.clone())?; assert_eq!(history.num_versions(), 2); // At this point, adding any version should remove the oldest. - let root_3: Word = rand_value(); + let root_3: Word = rng.value(); let id_3 = 2; history.add_version(root_3, id_3, nodes.clone(), leaves.clone())?; assert_eq!(history.num_versions(), 2); @@ -147,27 +136,71 @@ fn add_version() -> Result<()> { Ok(()) } +#[test] +fn add_version_from_mutation_set() -> Result<()> { + let mut rng = ContinuousRng::new([0x16; 32]); + + // We start by producing values. + let l1_k1: Word = rng.value(); + let leaf_1_ix = LeafIndex::from(l1_k1); + let l1_v1: Word = rng.value(); + let mut l1_k2: Word = rng.value(); + l1_k2[3] = Felt::from_u64(leaf_1_ix.position()); + let l1_v2: Word = rng.value(); + + let l2_k1: Word = rng.value(); + let leaf_2_ix = LeafIndex::from(l2_k1); + let l2_v1: Word = rng.value(); + let mut l2_k2: Word = rng.value(); + l2_k2[3] = Felt::from_u64(leaf_2_ix.position()); + let l2_v2: Word = rng.value(); + + // We produce a changeset by applying these changes to a merkle tree to put things back in the + // right state. + let tree = Smt::new(); + let mutations = tree + .compute_mutations([(l1_k1, l1_v1), (l1_k2, l1_v2), (l2_k1, l2_v1), (l2_k2, l2_v2)]) + .expect("Failed to compute mutations"); + + // We then set up our history and apply it. + let mut history = History::empty(2); + let version: VersionId = rng.value(); + + history.add_version_from_mutation_set(version, mutations)?; + + // Now we can check that it did things correctly. + let view = history.get_view_at(version)?; + let expected_leaf_1 = CompactLeaf::from([(l1_k1, l1_v1), (l1_k2, l1_v2)]); + assert_eq!(view.leaf_delta(&leaf_1_ix), expected_leaf_1); + let expected_leaf_2 = CompactLeaf::from([(l2_k1, l2_v1), (l2_k2, l2_v2)]); + assert_eq!(view.leaf_delta(&leaf_2_ix), expected_leaf_2); + + Ok(()) +} + #[test] fn truncate() -> Result<()> { + let mut rng = ContinuousRng::new([0x17; 32]); + // Start by setting up the test data let mut history = History::empty(4); let nodes = NodeChanges::default(); let leaves = LeafChanges::default(); - let root_1: Word = rand_value(); + let root_1: Word = rng.value(); let id_1 = 5; history.add_version(root_1, id_1, nodes.clone(), leaves.clone())?; - let root_2: Word = rand_value(); + let root_2: Word = rng.value(); let id_2 = 10; history.add_version(root_2, id_2, nodes.clone(), leaves.clone())?; - let root_3: Word = rand_value(); + let root_3: Word = rng.value(); let id_3 = 15; history.add_version(root_3, id_3, nodes.clone(), leaves.clone())?; - let root_4: Word = rand_value(); + let root_4: Word = rng.value(); let id_4 = 20; history.add_version(root_4, id_4, nodes.clone(), leaves.clone())?; @@ -199,17 +232,19 @@ fn truncate() -> Result<()> { #[test] fn clear() -> Result<()> { + let mut rng = ContinuousRng::new([0x18; 32]); + // Start by setting up the test data let mut history = History::empty(4); let nodes = NodeChanges::default(); let leaves = LeafChanges::default(); - let root_1: Word = rand_value(); + let root_1: Word = rng.value(); let id_1 = 0; history.add_version(root_1, id_1, nodes.clone(), leaves.clone())?; - let root_2: Word = rand_value(); + let root_2: Word = rng.value(); let id_2 = 1; history.add_version(root_2, id_2, nodes.clone(), leaves.clone())?; @@ -226,31 +261,32 @@ fn clear() -> Result<()> { fn view_at() -> Result<()> { // Starting in an empty state we should be able to add deltas up until the limit we set. let mut history = History::empty(3); + let mut rng = ContinuousRng::new([0x19; 32]); assert_eq!(history.num_versions(), 0); assert_eq!(history.max_versions(), 3); // We can add an initial version with some changes in both nodes and leaves. - let root_1 = rand_value::(); + let root_1: Word = rng.value(); let id_1 = 3; let mut nodes_1 = NodeChanges::default(); - let n1_value: Word = rand_value(); - let n2_value: Word = rand_value(); + let n1_value: Word = rng.value(); + let n2_value: Word = rng.value(); nodes_1.insert(NodeIndex::new(2, 1).unwrap(), n1_value); nodes_1.insert(NodeIndex::new(8, 128).unwrap(), n2_value); let mut leaf_1 = CompactLeaf::new(); - let l1_e1_key: Word = rand_value(); - let l1_e1_value: Word = rand_value(); + let l1_e1_key: Word = rng.value(); + let l1_e1_value: Word = rng.value(); let leaf_1_ix = LeafIndex::from(l1_e1_key); leaf_1.insert(l1_e1_key, l1_e1_value); let mut leaf_2 = CompactLeaf::new(); - let l2_e1_key: Word = rand_value(); - let l2_e1_value: Word = rand_value(); + let l2_e1_key: Word = rng.value(); + let l2_e1_value: Word = rng.value(); let leaf_2_ix = LeafIndex::from(l2_e1_key); - let mut l2_e2_key: Word = rand_value(); - l2_e2_key[3] = Felt::from_u64(leaf_2_ix.value()); - let l2_e2_value: Word = rand_value(); + let mut l2_e2_key: Word = rng.value(); + l2_e2_key[3] = Felt::from_u64(leaf_2_ix.position()); + let l2_e2_value: Word = rng.value(); leaf_2.insert(l2_e1_key, l2_e1_value); leaf_2.insert(l2_e2_key, l2_e2_value); @@ -262,20 +298,20 @@ fn view_at() -> Result<()> { assert_eq!(history.num_versions(), 1); // We then add another version that overlaps with the older version. - let root_2 = rand_value::(); + let root_2: Word = rng.value(); let id_2 = 5; let mut nodes_2 = NodeChanges::default(); - let n3_value: Word = rand_value(); - let n4_value: Word = rand_value(); + let n3_value: Word = rng.value(); + let n4_value: Word = rng.value(); nodes_2.insert(NodeIndex::new(2, 1).unwrap(), n3_value); nodes_2.insert(NodeIndex::new(10, 256).unwrap(), n4_value); let mut leaf_3 = CompactLeaf::new(); let leaf_3_ix = leaf_2_ix; - let mut l3_e1_key: Word = rand_value(); - l3_e1_key[3] = Felt::from_u64(leaf_3_ix.value()); - let l3_e1_value: Word = rand_value(); + let mut l3_e1_key: Word = rng.value(); + l3_e1_key[3] = Felt::from_u64(leaf_3_ix.position()); + let l3_e1_value: Word = rng.value(); leaf_3.insert(l3_e1_key, l3_e1_value); let mut leaves_2 = LeafChanges::default(); @@ -284,21 +320,27 @@ fn view_at() -> Result<()> { assert_eq!(history.num_versions(), 2); // And another version for the sake of the test. - let root_3 = rand_value::(); + let root_3: Word = rng.value(); let id_3 = 6; let mut nodes_3 = NodeChanges::default(); - let n5_value: Word = rand_value(); + let n5_value: Word = rng.value(); nodes_3.insert(NodeIndex::new(30, 1).unwrap(), n5_value); let mut leaf_4 = CompactLeaf::new(); - let l4_e1_key: Word = rand_value(); - let l4_e1_value: Word = rand_value(); + let l4_e1_key: Word = rng.value(); + let l4_e1_value: Word = rng.value(); let leaf_4_ix = LeafIndex::from(l4_e1_key); leaf_4.insert(l4_e1_key, l4_e1_value); + let mut leaf_1n = CompactLeaf::new(); + let l1n_e1_key = l1_e1_key; + let l1n_e1_value: Word = rng.value(); + leaf_1n.insert(l1n_e1_key, l1n_e1_value); + let mut leaves_3 = LeafChanges::default(); leaves_3.insert(leaf_4_ix, leaf_4.clone()); + leaves_3.insert(leaf_1_ix, leaf_1n); history.add_version(root_3, id_3, nodes_3.clone(), leaves_3.clone())?; assert_eq!(history.num_versions(), 3); @@ -324,35 +366,43 @@ fn view_at() -> Result<()> { // Getting a node that doesn't exist in ANY versions should return none. assert!(view.node_value(&NodeIndex::new(45, 100).unwrap()).is_none()); - // Similarly, getting a leaf from the targeted version should just return it. - assert_eq!(view.leaf_value(&leaf_1_ix), Some(&leaf_1)); - assert_eq!(view.leaf_value(&leaf_2_ix), Some(&leaf_2)); + // Getting a leaf from the targeted version will compose with other (newer) deltas to yield the + // correct changes. The first test here checks that a value updated in a newer delta is + // nevertheless reverted to the correct value. + assert_eq!(view.leaf_delta(&leaf_1_ix), leaf_1); + + // This test checks that the delta for a single leaf correctly combines non-overlapping key + // reversions. + let leaf_2_delta: CompactLeaf = once((l3_e1_key, l3_e1_value)) + .chain(leaf_2.iter().map(|(k, v)| (*k, *v))) + .collect(); + assert_eq!(view.leaf_delta(&leaf_2_ix), leaf_2_delta); // But getting a leaf that is not in the target delta directly should result in the same // traversal. - assert_eq!(view.leaf_value(&leaf_4_ix), Some(&leaf_4)); + assert_eq!(view.leaf_delta(&leaf_4_ix), leaf_4); - // And getting a leaf that does not exist in any of the versions should return one. - assert!(view.leaf_value(&LeafIndex::new(1024).unwrap()).is_none()); + // And getting a leaf that does not exist in any of the versions should return an empty delta. + assert!(view.leaf_delta(&LeafIndex::new(1024).unwrap()).is_empty()); // Finally, getting a full value from a compact leaf should yield the value directly from // the target version if the target version overlays it AND contains it. - assert_eq!(view.value(&l1_e1_key), Some(Some(&l1_e1_value))); - assert_eq!(view.value(&l2_e1_key), Some(Some(&l2_e1_value))); - assert_eq!(view.value(&l2_e2_key), Some(Some(&l2_e2_value))); + assert_eq!(view.value(&l1_e1_key), Some(l1_e1_value)); + assert_eq!(view.value(&l2_e1_key), Some(l2_e1_value)); + assert_eq!(view.value(&l2_e2_key), Some(l2_e2_value)); // However, if the leaf exists but does not contain the provided word, it should return the // sentinel `Some(None)`. - let mut ne_key_in_existing_leaf: Word = rand_value(); - ne_key_in_existing_leaf[3] = Felt::from_u64(leaf_1_ix.value()); - assert_eq!(view.value(&ne_key_in_existing_leaf), Some(None)); + let mut ne_key_in_existing_leaf: Word = rng.value(); + ne_key_in_existing_leaf[3] = Felt::from_u64(leaf_1_ix.position()); + assert_eq!(view.value(&ne_key_in_existing_leaf), None); // If the leaf is not overlaid, then the lookup should go up the chain just as in the other // cases. - assert_eq!(view.value(&l4_e1_key), Some(Some(&l4_e1_value))); + assert_eq!(view.value(&l4_e1_key), Some(l4_e1_value)); // But if nothing is found, it should just return None; - let ne_key: Word = rand_value(); + let ne_key: Word = rng.value(); assert!(view.value(&ne_key).is_none()); // We can also get views for versions that are not directly contained, such as a version newer @@ -361,107 +411,83 @@ fn view_at() -> Result<()> { assert_eq!(view.node_value(&NodeIndex::new_unchecked(30, 1)), Some(&n5_value)); assert!(view.node_value(&NodeIndex::new_unchecked(30, 2)).is_none()); + // We can also get an iterator over the entries for a given view. This should yield all the + // correctly-collapsed key-value pairs in the overlay. We start with the most recent view. + let view = history.get_view_at(id_3)?; + assert_eq!(view.entries().count(), 2); + assert!(view.entries().contains(&TreeEntry { key: l4_e1_key, value: l4_e1_value })); + assert!(view.entries().contains(&TreeEntry { key: l1n_e1_key, value: l1n_e1_value })); + assert!(view.entries().is_sorted_by(|l, r| { + if l.index() == r.index() { + l.key < r.key + } else { + l.index() < r.index() + } + })); + Ok(()) } // SMT INTEGRATION TESTS // ================================================================================================ -use crate::merkle::smt::{MutationSet, NodeMutation, SMT_DEPTH, Smt, SparseMerkleTree}; - -/// Converts a MutationSet into the format expected by History. -/// -/// This helper extracts node additions and leaf changes from an SMT mutation set, -/// transforming them into the format used by the History tracking mechanism. -fn mutation_set_to_history_changes( - mutations: &MutationSet, -) -> (NodeChanges, LeafChanges) { - let mut node_changes = NodeChanges::default(); - for (index, mutation) in mutations.node_mutations().iter() { - if let NodeMutation::Addition(inner_node) = mutation { - node_changes.insert(*index, inner_node.hash()); - } - } - - let mut leaf_changes = LeafChanges::default(); - for (key, value) in mutations.new_pairs().iter() { - let leaf_index = LeafIndex::new(Smt::key_to_leaf_index(key).value()).unwrap(); - leaf_changes - .entry(leaf_index) - .or_insert_with(CompactLeaf::new) - .insert(*key, *value); - } - - (node_changes, leaf_changes) -} - /// Tests History integration using real SMT mutations. /// /// This test creates an actual SMT, computes mutations via the SMT API, /// and verifies that History correctly tracks the resulting node and leaf changes. #[test] -fn smt_history_with_real_mutations() -> Result<()> { +fn history_from_smt_non_overlapping() -> Result<()> { + let mut rng = ContinuousRng::new([0x1a; 32]); + // Create an empty SMT let mut smt = Smt::new(); let initial_root = smt.root(); // Generate test key-value pairs - let key_1: Word = rand_value(); - let value_1: Word = rand_value(); - let key_2: Word = rand_value(); - let value_2: Word = rand_value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); // Create history to track versions let mut history = History::empty(3); - // Version 0: Insert first key-value pair using real SMT mutation + // Version 0: Insert first key-value pair using real SMT mutation while getting the reversion + // set for the history. let mutations_v0 = smt.compute_mutations(vec![(key_1, value_1)]).unwrap(); - let (node_changes_v0, leaf_changes_v0) = mutation_set_to_history_changes(&mutations_v0); - smt.apply_mutations(mutations_v0).unwrap(); + let reversion_set = smt.apply_mutations_with_reversion(mutations_v0).unwrap(); let root_v0 = smt.root(); - - // Verify stored node hashes match what the SMT computed - for (index, hash) in node_changes_v0.iter() { - assert_eq!(*hash, smt.get_node_hash(*index)); - } - - history.add_version(root_v0, 0, node_changes_v0.clone(), leaf_changes_v0.clone())?; + history.add_version_from_mutation_set(0, reversion_set)?; + assert_eq!(history.num_versions(), 1); // Version 1: Insert second key-value pair let mutations_v1 = smt.compute_mutations(vec![(key_2, value_2)]).unwrap(); - let (node_changes_v1, leaf_changes_v1) = mutation_set_to_history_changes(&mutations_v1); - smt.apply_mutations(mutations_v1).unwrap(); + let reversion_set = smt.apply_mutations_with_reversion(mutations_v1).unwrap(); let root_v1 = smt.root(); + history.add_version_from_mutation_set(1, reversion_set)?; - // Verify stored node hashes match what the SMT computed - for (index, hash) in node_changes_v1.iter() { - assert_eq!(*hash, smt.get_node_hash(*index)); - } - - history.add_version(root_v1, 1, node_changes_v1, leaf_changes_v1)?; - - // Verify roots are tracked correctly + // Verify the roots for older states are tracked correctly in the history. + assert!(history.is_known_root(initial_root)); assert!(history.is_known_root(root_v0)); - assert!(history.is_known_root(root_v1)); - assert!(!history.is_known_root(initial_root)); // Initial empty root not added - // Query version 0 and verify leaf data + // And that the latest root of the tree is not. + assert!(!history.is_known_root(root_v1)); + + // We can start by checking that version 0 performs the correct reversion operations, + // encompassing _both_ changes made to obtain the current version. let view_v0 = history.get_view_at(0)?; - let leaf_index_1 = LeafIndex::new(Smt::key_to_leaf_index(&key_1).value()).unwrap(); - assert!(view_v0.leaf_value(&leaf_index_1).is_some()); - assert_eq!(view_v0.value(&key_1), Some(Some(&value_1))); + assert_eq!(view_v0.value(&key_1), Some(EMPTY_WORD)); + assert_eq!(view_v0.value(&key_2), Some(EMPTY_WORD)); + assert_eq!(view_v0.leaf_delta(&key_1.into()).len(), 1); + assert_eq!(view_v0.leaf_delta(&key_2.into()).len(), 1); - // Query version 1 and verify both leaves accessible + // When we query version 1 it should only make revert one change on top of the current tree. let view_v1 = history.get_view_at(1)?; - let leaf_index_2 = LeafIndex::new(Smt::key_to_leaf_index(&key_2).value()).unwrap(); - assert!(view_v1.leaf_value(&leaf_index_2).is_some()); - assert_eq!(view_v1.value(&key_2), Some(Some(&value_2))); - - // Verify node changes were captured (mutations produce inner node updates) - assert!(!node_changes_v0.is_empty(), "SMT insertion should produce node changes"); + assert_eq!(view_v0.value(&key_2), Some(EMPTY_WORD)); + assert_eq!(view_v0.leaf_delta(&key_2.into()).len(), 1); // Verify querying a non-existent key returns None - let nonexistent_key: Word = rand_value(); + let nonexistent_key: Word = rng.value(); assert!(view_v1.value(&nonexistent_key).is_none()); Ok(()) @@ -469,50 +495,33 @@ fn smt_history_with_real_mutations() -> Result<()> { /// Tests History with SMT value updates (replacing existing values). #[test] -fn smt_history_value_updates() -> Result<()> { +fn history_from_smt_overlapping() -> Result<()> { + let mut rng = ContinuousRng::new([0x1b; 32]); let mut smt = Smt::new(); - let key: Word = rand_value(); - let value_v0: Word = rand_value(); - let value_v1: Word = rand_value(); + let key: Word = rng.value(); + let value_v0: Word = rng.value(); + let value_v1: Word = rng.value(); let mut history = History::empty(2); // Version 0: Insert initial value let mutations_v0 = smt.compute_mutations(vec![(key, value_v0)]).unwrap(); - let (node_changes_v0, leaf_changes_v0) = mutation_set_to_history_changes(&mutations_v0); - smt.apply_mutations(mutations_v0).unwrap(); - - // Verify stored node hashes match what the SMT computed - for (index, hash) in node_changes_v0.iter() { - assert_eq!(*hash, smt.get_node_hash(*index)); - } - - history.add_version(smt.root(), 0, node_changes_v0, leaf_changes_v0)?; + let reversion_set = smt.apply_mutations_with_reversion(mutations_v0).unwrap(); + history.add_version_from_mutation_set(0, reversion_set)?; // Version 1: Update to new value let mutations_v1 = smt.compute_mutations(vec![(key, value_v1)]).unwrap(); - let (node_changes_v1, leaf_changes_v1) = mutation_set_to_history_changes(&mutations_v1); - smt.apply_mutations(mutations_v1).unwrap(); - - // Verify stored node hashes match what the SMT computed - for (index, hash) in node_changes_v1.iter() { - assert_eq!(*hash, smt.get_node_hash(*index)); - } + let reversion_set = smt.apply_mutations_with_reversion(mutations_v1).unwrap(); + history.add_version_from_mutation_set(1, reversion_set)?; - history.add_version(smt.root(), 1, node_changes_v1, leaf_changes_v1)?; - - // Verify version 0 has original value + // In version 0 we should have the correct (empty) value when reverted. let view_v0 = history.get_view_at(0)?; - assert_eq!(view_v0.value(&key), Some(Some(&value_v0))); + assert_eq!(view_v0.value(&key), Some(EMPTY_WORD)); - // Verify version 1 has updated value + // In version 1 we should have the value set in the transition to version 0. let view_v1 = history.get_view_at(1)?; - assert_eq!(view_v1.value(&key), Some(Some(&value_v1))); - - // Verify round-trip consistency: history view matches current SMT value - let current_smt_value = smt.get_value(&key); - assert_eq!(view_v1.value(&key), Some(Some(¤t_smt_value))); + assert_eq!(view_v1.value(&key), Some(value_v0)); Ok(()) } diff --git a/miden-crypto/src/merkle/smt/large_forest/iterator.rs b/miden-crypto/src/merkle/smt/large_forest/iterator.rs new file mode 100644 index 000000000..a1950f9b3 --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/iterator.rs @@ -0,0 +1,461 @@ +//! This module contains the implementation of the iterator over the entries of an arbitrary tree in +//! the forest. +//! +//! # Performance +//! +//! The performance of this iterator has a significant dependency on the tree that it is running +//! over. Due to the differing performance characteristics of backends, we cannot provide exact +//! performance bounds, but the following general rules apply. +//! +//! - Iterating over the entries of the **latest tree in a lineage** is going to be **the fastest +//! possible query**. This depends only on the direct iteration performance of the backend in +//! question. +//! - Iterating over the entries of **a historical tree is going to be slower**. This is because it +//! has to do work to merge the entries provided by the history with the entries of the full tree +//! in order to create a coherent picture of the historical tree. +//! +//! We highly recommend benchmarking the iteration behavior on the concrete workload(s) you are +//! concerned about, rather than trying to statically reason about performance of this iterator. + +use alloc::boxed::Box; +use core::iter::Peekable; + +use crate::{ + EMPTY_WORD, + merkle::smt::{LeafIndex, large_forest::root::TreeEntry}, +}; + +// ENTRIES ITERATOR +// ================================================================================================ + +/// An iterator over the entries of an arbitrary tree in the forest, yielding entries in an +/// arbitrary order. +/// +/// It is split into two variants for performance, as iterating over a full tree is significantly +/// simpler than iterating over a historical tree. While it would be nice to be able to return one +/// of two different iterators depending on the circumstances of construction, Rust's `impl Trait` +/// bounds do not allow for this. +/// +/// The iterator **must never transition between variants** during the process of iteration. +pub(super) enum EntriesIterator<'forest> { + /// An iterator over a tree in the forest that is formed from a merger of the full tree and a + /// historical overlay. + WithHistory { + /// The iterator over the entries in the full tree. + /// + /// This iterator should never yield any entries where `value == EMPTY_WORD`. + full_tree_iter: Peekable + 'forest>>, + + /// The iterator over the entries in the history. + /// + /// This iterator may yield entries with `value == EMPTY_WORD`. These are explicit + /// reversions of entries newly-set in newer versions, and so should be used. While they + /// technically should only ever correspond to a case where they _are_ reverting a + /// newly-set entry, care must be taken to remove them regardless if they do not match up + /// for some reason. + history_entries_iter: Peekable + 'forest>>, + + /// The current state of the iteration state machine. + state: EntriesIteratorState, + }, + + /// An iterator over a tree in the forest that is simply an iterator over the full tree. + WithoutHistory { + /// The iterator over the entries in the full tree. + full_tree_iter: Box + 'forest>, + }, +} + +impl<'forest> EntriesIterator<'forest> { + /// Constructs a new entries iterator pointing to the first item in the designated `tree` in the + /// `forest`, formed by combining a historical overlay with the current tree. + /// + /// Note that it _does not_ perform checks as to the correctness of the provided iterators. If + /// these are not an iterator over the full tree and the historical entries in turn, the results + /// the iterator yields will be invalid. + pub(super) fn new_with_history( + full_tree_iter: impl Iterator + 'forest, + history_entries_iter: impl Iterator + 'forest, + ) -> Self { + // This type gymnastics is unfortunately necessary to let us easily store the `Peekable` + // which we need to avoid carrying additional state in the state machine. + let full_tree_iter: Box> = Box::new(full_tree_iter); + let history_entries_iter: Box> = Box::new(history_entries_iter); + + // We begin in `NotInLeaf`. This is implicitly `Start -> NotInLeaf` + Self::WithHistory { + full_tree_iter: full_tree_iter.peekable(), + history_entries_iter: history_entries_iter.peekable(), + state: EntriesIteratorState::NotInLeaf, + } + } + + /// Constructs a new entries iterator pointing to the first item in the designated `tree` in the + /// `forest` without any associated history. + /// + /// Note that it _does not_ check whether `full_tree_iter` is actually an iterator over the + /// full tree. If it is not, the iterator will yield invalid results. + pub(super) fn new_without_history( + full_tree_iter: impl Iterator + 'forest, + ) -> Self { + let full_tree_iter = Box::new(full_tree_iter); + Self::WithoutHistory { full_tree_iter } + } + + /// Advances the iterator and returns the next value in the case where it is iterating over a + /// historical tree version. + /// + /// For the details of the state machine that this implements, please see the documentation for + /// the [`EntriesIteratorState`]. It explains the valid state transitions and the conditions + /// under which they occur. This implementation does not match them directly in order to + /// simplify the logic, but matches the intended semantics. + /// + /// # Panics + /// + /// - If the method is called with a `self` that is not in the [`Self::WithHistory`] variant. + #[inline(always)] // To help the optimizer eliminate the redundant check in Iterator::next() + fn next_with_history(&mut self) -> Option { + let EntriesIterator::WithHistory { + full_tree_iter, + history_entries_iter, + state, + } = self + else { + panic!("EntriesIterator::next_with_history called without history") + }; + + match state { + EntriesIteratorState::NotInLeaf => { + // Here we are (semantically) not pointing to any specific leaf, so we need to work + // out which of our possible outgoing transitions take place. This state does not + // actually return anything except in the `-> End` case. + match (full_tree_iter.peek(), history_entries_iter.peek()) { + (None, None) => { + // No more entries exist in either of the iterators. `NotInLeaf -> End`. + None + }, + (Some(_), None) => { + // Entries only exist in the full tree iterator. `NotInLeaf -> TreeOnly` + *state = EntriesIteratorState::TreeOnly; + self.next_with_history() + }, + (None, Some(_)) => { + // Entries only exist in the full tree iterator. `NotInLeaf -> HistOnly` + *state = EntriesIteratorState::HistOnly; + self.next_with_history() + }, + (Some(full), Some(hist)) => { + // Entries exist in both, but the exact state transition has not yet been + // determined. We have three other possible outgoing edges from `NotInLeaf`. + let full_idx = LeafIndex::from(full.key); + let hist_idx = LeafIndex::from(hist.key); + + if full_idx == hist_idx { + // We are in the same leaf. `NotInLeaf -> InLeafShared` + *state = EntriesIteratorState::InLeafShared; + } else if full_idx < hist_idx { + // We are in different leaves with full_idx coming first. `NotInLeaf -> + // InLeafTreeOnly` + *state = EntriesIteratorState::InLeafTreeOnly; + } else { + // We are in different leaves with hist_idx coming first. `NotInLeaf -> + // InTreeHistOnly`. + *state = EntriesIteratorState::InLeafHistOnly; + } + + self.next_with_history() + }, + } + }, + EntriesIteratorState::HistOnly => { + // In this state we simply can continue yielding the history entries iterator until + // it is empty. We just have to check that we're not yielding EMPTY_WORD entries + // directly as these should not be seen. + history_entries_iter.next().and_then(|e| { + if e.value == EMPTY_WORD { + self.next_with_history() + } else { + Some(e) + } + }) + }, + EntriesIteratorState::TreeOnly => { + // In this state we can simply continue yielding the tree entries iterator until it + // is empty. When it returns `None` we have `TreeOnly -> End` + full_tree_iter.next() + }, + EntriesIteratorState::InLeafHistOnly => { + // Here, we are in a leaf that is only in the history. We technically only want to + // transition out of this state once we have exhausted the leaf, but in actuality we + // can rely on the logic for `NotInLeaf` to do the right thing here. We only have to + // skip empty words as these should never be yielded. + *state = EntriesIteratorState::NotInLeaf; + history_entries_iter.next().and_then(|e| { + if e.value == EMPTY_WORD { + self.next_with_history() + } else { + Some(e) + } + }) + }, + EntriesIteratorState::InLeafTreeOnly => { + // Here we are in a leaf that is only in the full tree. We technically only want to + // transition out of this state once we have exhausted the leaf, but in actuality we + // can rely on the logic for `NotInleaf` to do the right thing here. + *state = EntriesIteratorState::NotInLeaf; + full_tree_iter.next() + }, + EntriesIteratorState::InLeafShared => { + // Here we have both iterators in the same LEAF but that does not mean they have the + // same item. + let hist_item = + history_entries_iter.peek().expect("Entry already checked to exist"); + let tree_item = full_tree_iter.peek().expect("Entry already checked to exist"); + + if hist_item.key == tree_item.key { + *state = EntriesIteratorState::InLeafBothKeysEq; + } else if hist_item.key < tree_item.key { + *state = EntriesIteratorState::InLeafBothHistPrio; + } else { + *state = EntriesIteratorState::InLeafBothTreePrio; + } + + self.next_with_history() + }, + EntriesIteratorState::InLeafBothKeysEq => { + // If the keys are equal we want to pop both entries and only return the history's + // one. We can again rely on `NotInLeaf` to do our logic correctly. + *state = EntriesIteratorState::NotInLeaf; + + // We can discard this entry entirely as it has been overwritten. + full_tree_iter.next(); + + // But this one may or may not need to be returned. + let hist_item = + history_entries_iter.next().expect("Entry already checked to exist"); + if hist_item.value == EMPTY_WORD { + // We never want to yield empty items, so we skip them. + self.next_with_history() + } else { + // Otherwise the item is real and we want to yield it. + Some(hist_item) + } + }, + EntriesIteratorState::InLeafBothHistPrio => { + // Here we have a history item with a key < the full tree item, so we want to return + // that. We can again rely on `NotInLeaf` to do our logic correctly. + *state = EntriesIteratorState::NotInLeaf; + history_entries_iter.next() + }, + EntriesIteratorState::InLeafBothTreePrio => { + // Here we have a full tree item with a key < the history item, so we want to return + // that. We can again rely on `NotInLeaf` to do our logic correctly. + *state = EntriesIteratorState::NotInLeaf; + full_tree_iter.next() + }, + } + } + + /// Advances the iterator and returns the next value in the case where it is iterating over the + /// current tree version. + /// + /// # Panics + /// + /// - If the method is called with a `self` that is not the [`Self::WithoutHistory`] variant. + #[inline(always)] // To help the optimizer eliminate the redundant check in Iterator::next() + fn next_without_history(&mut self) -> Option { + let EntriesIterator::WithoutHistory { full_tree_iter } = self else { + panic!("EntriesIterator::next_without_history called with history") + }; + + full_tree_iter.next() + } +} + +// ITERATOR TRAIT +// ================================================================================================ + +impl Iterator for EntriesIterator<'_> { + type Item = TreeEntry; + + fn next(&mut self) -> Option { + match self { + EntriesIterator::WithHistory { .. } => self.next_with_history(), + EntriesIterator::WithoutHistory { .. } => self.next_without_history(), + } + } +} + +// ENTRIES ITERATOR STATE +// ================================================================================================ + +/// The state machine that is the entries iterator for the forest. +/// +/// We do not represent the ghost states of `Start` and `End`, so [`Self::NotInLeaf`] serves as the +/// initial state of the machine in practice. A full diagram of the state machine's allowable +/// transitions can be found below. See the individual variants for the conditions under which these +/// transitions take place. +/// +/// ```text +/// ┌─────────┐ +/// │ Start │ +/// └─────────┘ +/// │ +/// │ +/// ▼ +/// ┌───────────┐ +/// ┌─────────────┬────────────│ │◀──────────────┬──────────────────┐ +/// │ │ │ NotInLeaf │ │ │ +/// │ │ ┌────│ │────────────┬──┼───────────────┐ │ +/// │ │ │ └───────────┘ │ │ │ │ +/// │ │ │ │ ▲ │ │ │ │ +/// │ │ │ │ │ │ │ │ │ +/// │ │ │ │ │ │ │ │ │ +/// │ │ │ │ │ │ │ │ │ +/// ▼ ▼ │ ▼ │ ▼ │ ▼ │ +/// ┌──────────┐ ┌──────────┐ │ ┌────────────────┐ ┌────────────────┐ ┌──────────────┐ +/// │ TreeOnly │ │ HistOnly │ │ │ InLeafHistOnly │ │ InLeafTreeOnly │ │ InLeafShared │◀─────────────┐ +/// └──────────┘ └──────────┘ │ └────────────────┘ └────────────────┘ └──────────────┘ │ +/// │ │ │ │ │ +/// │ │ │ │ │ +/// │ │ │ ┌──────────────────────┬──────────────┴────────┐ │ +/// │ │ │ │ │ │ │ +/// │ │ │ ▼ ▼ ▼ │ +/// │ │ │ ┌──────────────────┐ ┌────────────────────┐ ┌────────────────────┐ │ +/// └─────────────┴─────┐ │ │ InLeafBothKeysEq │ │ InLeafBothHistPrio │ │ InLeafBothTreePrio │ │ +/// │ │ └──────────────────┘ └────────────────────┘ └────────────────────┘ │ +/// │ │ │ │ │ │ +/// │ │ │ │ │ │ +/// │ │ └──────────────────────┴───────────────────────┴────────────┘ +/// ▼ ▼ +/// ┌─────────┐ +/// │ End │ +/// └─────────┘ +/// ``` +/// +/// Note that this describes the _semantics_ of the transitions between states, and may not directly +/// correspond to the implementation in [`EntriesIterator::next_with_history`] for reasons of +/// performance and maintainability. +pub(super) enum EntriesIteratorState { + /// The iterator is currently not in any leaf. + /// + /// This state should not advance the underlying iterators directly, and the iterator is not + /// intended to return a value for `next` while in this state. + /// + /// Incoming state transitions: + /// + /// - `Start -> NotInLeaf`: The state of the state machine. + /// - `InLeafHistOnly -> NotInLeaf`: Upon completing the leaf in the history. + /// - `InLeafTreeOnly -> NotInLeaf`: Upon completing the leaf in the tree. + /// - `InLeafShared -> NotInLeaf`: Upon completing the leaf that exists in both. + /// + /// Outgoing state transitions: + /// + /// - `NotInLeaf -> End`: If neither iterator has remaining entries. + /// - `NotInLeaf -> HistOnly`: If the tree entries iterator is empty. + /// - `NotInLeaf -> TreeOnly`: If the history entries iterator is empty. + /// - `NotInLeaf -> InLeafHistOnly`: If the next leaf is only in the history. + /// - `NotInLeaf -> InLeafTreeOnly`: If the next leaf is only in the tree. + /// - `NotInLeaf -> InLeafShared`: If the leaf exists in both iterators. + NotInLeaf, + + /// The iterator over the full tree has no entries, so we can iterate only over the history + /// until completion. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> HistOnly`: The tree entries iterator is empty. + /// + /// Outgoing state transitions: + /// + /// - `HistOnly -> End`: The history entries iterator is empty. + HistOnly, + + /// The iterator over the history has no entries, so we can iterate only over the full tree + /// until completion. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> TreeOnly`: The history entries iterator is empty. + /// + /// Outgoing state transitions: + /// + /// - `TreeOnly -> End`: The tree entries iterator is empty. + TreeOnly, + + /// The iterator is operating over a leaf that only exists in the history iterator. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> InLeafHistOnly`: The tree entries iterator has items but the latest is not + /// in the same leaf as the history's latest. + /// + /// Outgoing state transitions: + /// + /// - `InLeafHistOnly -> NotInLeaf`: Upon completing iteration through the current leaf. + InLeafHistOnly, + + /// The iterator is operating over a leaf that only exists in the tree iterator. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> InLeafTreeOnly`: The history entries iterator has items but the latest is + /// not in the same leaf as the tree's latest. + /// + /// Outgoing state transitions: + /// + /// - `InLeafTreeOnly -> NotInLeaf`: Upon completing iteration through the current leaf. + InLeafTreeOnly, + + /// The iterator is operating over a leaf that exists in both iterators. + /// + /// Incoming state transitions: + /// + /// - `NotInLeaf -> InLeafShared`: Both iterators have their latest entry in the same leaf. + /// + /// Outgoing state transitions: + /// + /// - `InLeafShared -> InLeafBothKeysEq`: If the two keys in the shared leaf are equal. + /// - `InLeafShared -> InLeafBothKeysHistPrio`: If the key in the history < the key in the tree. + /// - `InLeafShared -> InLeafBothKeysTreePrio`: If the key in the tree < the key in the history. + /// - `InLeafShared -> NotInLeaf`: Upon completing iteration through the current leaf. + InLeafShared, + + /// The iterator is operating over a leaf that exists in both iterators, and the current keys + /// are the same. + /// + /// Incoming state transitions: + /// + /// - `InLeafShared -> InLeafBothKeysEq`: If the key in each iterator is the same. + /// + /// Outgoing state transitions: + /// + /// - `InLeafBothKeysEq -> InLeafShared`: When needing to check the next element. + InLeafBothKeysEq, + + /// The iterator is operating over a leaf that exists in both iterators, and the current key + /// in the history is less than the current key in the tree. + /// + /// Incoming state transitions: + /// + /// - `InLeafShared -> InLeafBothHistPrio`: If the key in the history iterator < the key in the + /// tree iterator. + /// + /// Outgoing state transitions: + /// + /// - `InLeafBothHistPrio -> InLeafShared`: When needing to check the next element. + InLeafBothHistPrio, + + /// The iterator is operating over a leaf that exists in both iterators, and the current key in + /// the tree is less than the current key in the history. + /// + /// Incoming state transitions: + /// + /// - `InLeafShared -> InLeafBothTreePrio`: If the key in the tree iterator < the key in the + /// history iterator. + /// + /// Outgoing state transitions: + /// + /// - `InLeafBothTreePrio -> InLeafShared`: When needing to check the next element. + InLeafBothTreePrio, +} diff --git a/miden-crypto/src/merkle/smt/large_forest/lineage.rs b/miden-crypto/src/merkle/smt/large_forest/lineage.rs new file mode 100644 index 000000000..fc925a73c --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/lineage.rs @@ -0,0 +1,67 @@ +//! This module contains the data types used by the forest to store and manage the lineages that it +//! knows about. + +use core::iter::once; + +use crate::merkle::smt::{ + VersionId, + large_forest::{history::History, root::RootValue}, +}; + +// LINEAGE DATA +// ================================================================================================ + +/// The data that the forest stores in memory for each lineage of trees. +#[derive(Clone, Debug)] +pub(super) struct LineageData { + /// The history of changes made to the lineage, representing a contiguous set of historical + /// trees in the lineage up to the configured maximum number of versions. + pub history: History, + + /// The version of the latest tree in the lineage. + pub latest_version: VersionId, + + /// The value of the root for the latest tree in the lineage. + pub latest_root: RootValue, +} + +impl LineageData { + /// Gets an iterator that yields all roots in the lineage. + /// + /// The iteration order of the roots is guaranteed to move backward in time, with earlier items + /// in the iterator being roots from versions closer to the present. The current root of the + /// lineage will always be the first item that the iterator yields. + pub(super) fn roots(&self) -> impl Iterator { + once(self.latest_root).chain(self.history.roots()) + } + + /// Truncates the information on this tree to the provided `version`, returning `true` if the + /// history is empty after truncation, and `false` otherwise. + /// + /// If the latest version in the lineage is older than the specified `version`, this latest + /// version is always retained. In other words, the method cannot prune a lineage from the + /// forest entirely. + pub(super) fn truncate(&mut self, version: VersionId) -> bool { + if version >= self.latest_version { + // Truncation in the history is defined such that it never removes a version that could + // possibly serve as the latest delta for a newer version. This is because it cannot + // safely know if a version `v` is between the latest delta `d` and the current version + // `c`, as it has no knowledge of the current version. + // + // Thus, if we have a version `v` such that `d <= v < c`, we need to retain the + // reversion delta `d` in the history to correctly service queries for `v`. If, however, + // we have `d < c <= v` we need to explicitly remove the last delta as well. + // + // To that end, we handle the latter case first, by explicitly calling + // `History::clear()`. + self.history.clear(); + true + } else { + // The other case is `v < c`, which is handled simply by the truncation mechanism in the + // history as we want. In other words, it retains the necessary delta, and so we can + // just call it here. + self.history.truncate(version); + false + } + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/mod.rs b/miden-crypto/src/merkle/smt/large_forest/mod.rs index faa5cdf37..6016026cc 100644 --- a/miden-crypto/src/merkle/smt/large_forest/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/mod.rs @@ -2,112 +2,340 @@ //! //! # Semantic Layout //! -//! Much like `SparseMerkleTree`, the forest stores trees of depth 64 that use the compact leaf -//! optimization to uniquely store 256-bit elements. This reduces both the size of a merkle path, -//! and the computational work necessary to perform queries into the trees. +//! Much like the `SparseMerkleTree`, the forest stores its trees at depth [`SMT_DEPTH`] and then +//! relies on the compact leaf optimization to uniquely store the 256-bit elements that it contains. +//! This is done to both reduce the size of a merkle path, and to reduce the computational work +//! necessary to perform queries into the trees. //! -//! # Storing Trees and Versions -//! -//! The usage of an SMT forest is conceptually split into two parts: a collection that is able to -//! store **multiple, unrelated trees**, and a container for **multiple versions of those trees**. -//! Both of these use-cases are supported by the forest, but have an explicit delineation between -//! them in both the API and the implementation. This has two impacts that a client of the forest -//! must understand. +//! It also has the benefit of significantly reducing the memory usage for the forest. Even in cases +//! where it relies on a persistent backend, the other peripheral structures are able to be smaller +//! and thus use less memory. //! -//! - While, when using a [`Backend`] that can persist data, **only the current full tree state is -//! persisted**, while **the historical data will not be**. This is designed into the structure of -//! the forest, and does not depend on the choice of storage backend. -//! - It is more expensive to query a given tree at an older point in its history than it is to -//! query it at a newer point, and querying at the current tree will always take the least time. +//! # Backends //! -//! # Lineages +//! The forest is implemented to rely on the API and contract conformance of an arbitrary +//! [`Backend`] implementation. These backends provide the storage for full trees in the forest, and +//! are the main extension point for the way the forest functions. //! -//! We term a set of trees where each is derived from the previous version to be a **lineage**. A -//! single lineage semantically contains the **full information** on the current state of the tree, -//! alongside a set of deltas which describe how to change that full tree to return to a historical -//! state of that tree. +//! The [`InMemoryBackend`] provides simple, in-memory storage for the full trees in the forest. It +//! is _primarily_ intended to be used for testing purposes, but should nevertheless be correct and +//! functional for production use-cases if no persistence is required. //! //! While any given [`Backend`] may choose to share data between lineages, this behavior is not //! guaranteed, and must not be relied upon. //! +//! ## Performance +//! +//! Each [`Backend`] provides the same set of functionality to the forest, but may exhibit +//! significant variance in their performance characteristics. As a result, **any performance +//! analysis of the forest should be done in conjunction with a specific backend**. +//! +//! Take care to read the documentation of the specific [`Backend`] that you are planning to use in +//! order to understand its performance, potential gotchas, and other such details. +//! +//! # Storing Trees and Versions +//! +//! An SMT forest conceptually performs two roles. Firstly, it acts as a collection that is able to +//! store **multiple, unrelated trees**. Secondly, it is a container for **multiple versions of a +//! given tree**. In order to make it tractable to implement a performant forest with pluggable +//! backends, this type makes an explicit delineation between these use-cases in both the API and +//! the implementation. +//! +//! ## Lineages +//! +//! We term a set of trees, where each tree is derived from changing the previous version, to be a +//! **lineage** of trees. A single lineage contains the information necessary to reconstruct any +//! previous version of the tree, within the bounds of the history that the forest stores. +//! +//! Users must take care to ensure that each lineage identifier is unique, as reuse of these +//! identifiers can result in data corruption and hence queries that return incorrect results. +//! //! # Tree Identification //! -//! It is possible for multiple lineages to contain a tree with identical leaves and hence an -//! identical root. As we store lineages separately, we need some way to specify which instance of a -//! given root we mean. +//! It is possible for a tree with identical leaves (and hence an identical root) to exist in +//! multiple lineages in the forest. As lineages are stored separately, there needs to be a way to +//! specify the precise instance of a given tree. //! -//! This is done by identifying trees using the [`TreeId`], which combines the tree's root value -//! with a user-provided identifier that tags the tree with a 'domain'. This allows distinguishing -//! between otherwise identical trees. Users must take care to ensure that each domain is unique, as -//! reusing them will result in overwriting data in the wrong domain, and that queries may return -//! incorrect results. +//! Trees are thus identified using the [`TreeId`], which combines the **lineage** in which the tree +//! exists with the **version** in that lineage. //! -//! # Data Storage +//! ## Potential Gotchas //! -//! The SMT forest is parametrized over the [`Backend`] implementation that it uses. These backends -//! may have significantly varied performance characteristics, and hence any performance analysis of -//! the forest should be done in conjunction with a specific backend. The forest itself takes pains -//! to not make any assumptions about properties of the backend in use. +//! The separation of the forest into lineages of trees has a few impacts that a client of the +//! forest must understand: //! -//! Take care to read the documentation of the specific [`Backend`] that you are planning to use in -//! order to understand its performance, gotchas, and other such details. +//! - When using a [`Backend`] that offers data persistence, **only the state of the current version +//! of each lineage is persisted**, while **the historical data is not persisted**. This is part +//! of the way the forest is structured, and does not depend on the choice of backend. +//! - It is always going to be more expensive to query a given lineage at **an older point** in its +//! history than it is to query at a newer point. +//! - Querying **the latest tree in a lineage will take the least time**. +//! +//! # Batch Operations +//! +//! The [`LargeSmtForest::update_tree`] and [`LargeSmtForest::update_forest`] methods are what is +//! known as **batch operations**. In other words, they are performed in one go and only produce a +//! one-stage update to the forest, rather than a sequence of updates. +//! +//! These methods should be used wherever possible (especially preferring `update_forest` over a +//! sequence of `update_tree` calls) as this will allow the forest and its backend to exploit as +//! much parallelism as possible in the updates. +//! +//! # Examples +//! +//! The following section contains usage examples for the forest. They rely on the included +//! [`InMemoryBackend`] for simplicity, but will work with any conformant [`Backend`] +//! implementation. Each example is designed to build upon the last. +//! +//! ## Constructing a Forest +//! +//! A new forest can be constructed by calling either [`LargeSmtForest::new`], which will use a +//! default [`Config`], or by explicitly providing the config in [`LargeSmtForest::with_config`]. +//! +//! ``` +//! use miden_crypto::merkle::smt::{ForestInMemoryBackend, LargeSmtForest}; +//! # use miden_crypto::merkle::smt::LargeSmtForestError; +//! # +//! # fn main() -> Result<(), LargeSmtForestError> { +//! +//! let backend = ForestInMemoryBackend::new(); +//! let forest = LargeSmtForest::new(backend)?; +//! # +//! # Ok(()) +//! # } +//! ``` +//! +//! Upon startup, the forest has to read the lineages it knows from the provided storage. If it +//! cannot get this information, it cannot start up properly and the constructor may return an +//! error. +//! +//! ## Adding a Lineage +//! +//! Each tree in the forest belongs to a _lineage_, identified by a [`LineageId`]. In order to work +//! with a lineage in the forest, that lineage first has to be added to it! Adding a lineage can +//! either add the empty tree, or specify a set of modifications on the empty tree to create a +//! starting state. +//! +//! ``` +//! # use miden_crypto::merkle::smt::LargeSmtForestError; +//! # use miden_crypto::merkle::smt::{ForestInMemoryBackend, LargeSmtForest}; +//! use miden_crypto::{ +//! Word, +//! merkle::smt::{LineageId, SmtUpdateBatch}, +//! }; +//! +//! # fn main() -> Result<(), LargeSmtForestError> { +//! # let backend = ForestInMemoryBackend::new(); +//! # let mut forest = LargeSmtForest::new(backend)?; +//! # +//! // We can just make some arbitrary values here for demonstration. +//! let key_1 = Word::parse("0x42").unwrap(); +//! let value_1 = Word::parse("0x80").unwrap(); +//! let key_2 = Word::parse("0xAB").unwrap(); +//! let value_2 = Word::parse("0xCD").unwrap(); +//! +//! // Operations are most cleanly specified using a builder. +//! let mut operations = SmtUpdateBatch::empty(); +//! operations.add_insert(key_1, value_1); +//! operations.add_insert(key_2, value_2); +//! +//! // To add a new lineage we also need to give it a lineage ID, and a version. +//! let lineage = LineageId::new([0x42; 32]); +//! let version_1 = 1; +//! +//! // Now we can add the lineage to the forest! +//! assert!(forest.add_lineage(lineage, version_1, operations).is_ok()); +//! # +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Modifying a Lineage +//! +//! A forest is not all that useful if we cannot update it! Modifying a lineage is much like adding +//! a new one, in that we specify operations to be performed on the latest tree in that lineage. +//! +//! ``` +//! # use miden_crypto::merkle::smt::LargeSmtForestError; +//! # use miden_crypto::{ +//! # Word, +//! # merkle::smt::{ForestInMemoryBackend, LargeSmtForest, LineageId, SmtUpdateBatch}, +//! # }; +//! # +//! # fn main() -> Result<(), LargeSmtForestError> { +//! # let backend = ForestInMemoryBackend::new(); +//! # let mut forest = LargeSmtForest::new(backend)?; +//! # +//! # // We can just make some arbitrary values here for demonstration. +//! # let key_1 = Word::parse("0x42").unwrap(); +//! # let value_1 = Word::parse("0x80").unwrap(); +//! # let key_2 = Word::parse("0xAB").unwrap(); +//! # let value_2 = Word::parse("0xCD").unwrap(); +//! # +//! # // Operations are most cleanly specified using a builder. +//! # let mut operations = SmtUpdateBatch::empty(); +//! # operations.add_insert(key_1, value_1); +//! # operations.add_insert(key_2, value_2); +//! # +//! # // To add a new lineage we also need to give it a lineage ID, and a version. +//! # let lineage = LineageId::new([0x42; 32]); +//! # let version_1 = 1; +//! # +//! # // Now we can add the lineage to the forest! +//! # forest.add_lineage(lineage, version_1, operations)?; +//! # +//! // Let's make another arbitrary value. +//! let key_3 = Word::parse("0x67").unwrap(); +//! let value_3 = Word::parse("0x96").unwrap(); +//! +//! // And build a batch of operations again. +//! let mut operations = SmtUpdateBatch::empty(); +//! operations.add_insert(key_3, value_3); +//! operations.add_remove(key_1); +//! +//! // Now we can simply update the tree all in one go with our changes. +//! let version_2 = version_1 + 1; +//! assert!(forest.update_tree(lineage, version_2, operations).is_ok()); +//! # +//! # Ok(()) +//! # } +//! ``` +//! +//! Multiple lineages can be modified at once using the [`LargeSmtForest::update_forest`] method, +//! which works very similarly to the [`LargeSmtForest::update_tree`] method shown above. +//! +//! ## Querying a Lineage +//! +//! Modification is just one part of the puzzle, however. It is just as important to be able to get +//! data _out_ of the forest too! +//! +//! ``` +//! # use miden_crypto::merkle::smt::LargeSmtForestError; +//! # use miden_crypto::{ +//! # Word, +//! # merkle::smt::{ForestInMemoryBackend, LargeSmtForest, LineageId, SmtUpdateBatch}, +//! # }; +//! use miden_crypto::merkle::smt::{TreeEntry, TreeId}; +//! +//! # fn main() -> Result<(), LargeSmtForestError> { +//! # let backend = ForestInMemoryBackend::new(); +//! # let mut forest = LargeSmtForest::new(backend)?; +//! # +//! # // We can just make some arbitrary values here for demonstration. +//! # let key_1 = Word::parse("0x42").unwrap(); +//! # let value_1 = Word::parse("0x80").unwrap(); +//! # let key_2 = Word::parse("0xAB").unwrap(); +//! # let value_2 = Word::parse("0xCD").unwrap(); +//! # +//! # // Operations are most cleanly specified using a builder. +//! # let mut operations = SmtUpdateBatch::empty(); +//! # operations.add_insert(key_1, value_1); +//! # operations.add_insert(key_2, value_2); +//! # +//! # // To add a new lineage we also need to give it a lineage ID, and a version. +//! # let lineage = LineageId::new([0x42; 32]); +//! # let version_1 = 1; +//! # +//! # // Now we can add the lineage to the forest! +//! # forest.add_lineage(lineage, version_1, operations)?; +//! # +//! # // Let's make another arbitrary value. +//! # let key_3 = Word::parse("0x67").unwrap(); +//! # let value_3 = Word::parse("0x96").unwrap(); +//! # +//! # // And build a batch of operations again. +//! # let mut operations = SmtUpdateBatch::empty(); +//! # operations.add_insert(key_3, value_3); +//! # operations.add_remove(key_1); +//! # +//! # // Now we can simply update the tree all in one go with our changes. +//! # let version_2 = version_1 + 1; +//! # forest.update_tree(lineage, version_2, operations)?; +//! # +//! // As discussed above, trees are identified by a combination of their lineage and version. +//! let old_tree = TreeId::new(lineage, version_1); +//! let current_tree = TreeId::new(lineage, version_2); +//! +//! // The first really useful query is `open`, which gets the opening for the specified key. We can +//! // get openings for the current tree AND the historical trees. +//! assert!(forest.open(old_tree, key_1).is_ok()); +//! assert!(forest.open(current_tree, key_3).is_ok()); +//! +//! // We can also just `get` the value associated with a key, which returns `None` if the key is +//! // not populated. +//! assert_eq!(forest.get(old_tree, key_1)?, Some(value_1)); +//! assert_eq!(forest.get(current_tree, key_3)?, Some(value_3)); +//! assert!(forest.get(current_tree, key_1)?.is_none()); +//! +//! // We can also get an iterator over all the entries in the tree. +//! let entries_old: Vec<_> = forest.entries(old_tree)?.collect(); +//! let entries_current: Vec<_> = forest.entries(current_tree)?.collect(); +//! assert!(entries_old.contains(&TreeEntry { key: key_1, value: value_1 })); +//! assert!(entries_old.contains(&TreeEntry { key: key_2, value: value_2 })); +//! assert!(!entries_old.contains(&TreeEntry { key: key_3, value: value_3 })); +//! assert!(!entries_current.contains(&TreeEntry { key: key_1, value: value_1 })); +//! assert!(entries_current.contains(&TreeEntry { key: key_2, value: value_2 })); +//! assert!(entries_current.contains(&TreeEntry { key: key_3, value: value_3 })); +//! # +//! # Ok(()) +//! # } +//! ``` +//! +//! There are many other kinds of queries of course, so taking a look at the methods available on +//! [`LargeSmtForest`] is a good starting point. mod backend; +mod config; mod error; mod history; +mod iterator; +mod lineage; mod operation; mod property_tests; mod root; mod tests; +mod utils; -use core::iter::once; +use alloc::vec::Vec; +use core::num::NonZeroU8; -pub use backend::{Backend, BackendError}; +pub use backend::{Backend, BackendError, memory::InMemoryBackend}; +pub use config::{Config, DEFAULT_MAX_HISTORY_VERSIONS, MIN_HISTORY_VERSIONS}; pub use error::{LargeSmtForestError, Result}; pub use operation::{ForestOperation, SmtForestUpdateBatch, SmtUpdateBatch}; -pub use root::{RootInfo, TreeId, VersionId}; +pub use root::{LineageId, RootInfo, TreeEntry, TreeId, TreeWithRoot, VersionId}; use crate::{ - Map, Set, Word, - merkle::smt::{ - SmtProof, - large_forest::{ - history::History, - root::{LineageId, RootValue, TreeEntry, TreeWithRoot, UniqueRoot}, + EMPTY_WORD, Map, Set, Word, + merkle::{ + NodeIndex, SparseMerklePath, + smt::{ + LeafIndex, SMT_DEPTH, SmtLeaf, SmtProof, + large_forest::{ + history::{CompactLeaf, History, HistoryView}, + iterator::EntriesIterator, + lineage::LineageData, + root::{RootValue, UniqueRoot}, + }, }, }, }; + // SPARSE MERKLE TREE FOREST // ================================================================================================ -/// A high-performance forest of sparse merkle trees with pluggable storage. -/// -/// # Current and Frozen Trees -/// -/// Trees in the forest fall into two categories: +/// A high-performance forest of sparse merkle trees with pluggable storage backends. /// -/// 1. **Current:** These trees represent the latest version of their 'tree lineage' and can be -/// modified to generate a new tree version in the forest. -/// 2. **Frozen:** These are historical versions of trees that are no longer current, and are -/// considered 'frozen' and hence cannot be modified to generate a new tree version in the -/// forest. This is because being able to do so would effectively create a "fork" in the history, -/// and hence allow the forest to yield potentially invalid responses with regard to the -/// blockchain history. -/// -/// The API is designed to avoid any possibility of modifying frozen trees in the forest, and hence -/// ensure the correctness of the history stored in the forest. -/// -/// # Performance -/// -/// The performance characteristics of this forest depend heavily on the choice of underlying -/// [`Backend`] implementation. Where something more specific can be said about a particular method -/// call, the documentation for that method will state it. -#[allow(dead_code)] // Temporarily +/// See the module documentation for more information. #[derive(Debug)] pub struct LargeSmtForest { - /// The backend for storing the full trees that exist as part of the forest. It makes no - /// guarantees as to where the tree data is stored, and **must not be exposed** in the API of - /// the forest for correctness. + /// The configuration for how the forest functions. + config: Config, + + /// The backend for storing the full trees that exist as part of the forest. + /// + /// It makes no guarantees as to where the tree data is stored, and **must not be exposed** in + /// the API of the forest to ensure that internal invariants are maintained. backend: B, /// The container for the in-memory data associated with each lineage in the forest. @@ -126,7 +354,7 @@ pub struct LargeSmtForest { // ================================================================================================ /// These functions deal with the creation of new forest instances, and hence rely on the ability to -/// query storage to do so. +/// query the backend to do so. /// /// # Performance /// @@ -137,20 +365,71 @@ pub struct LargeSmtForest { /// Where anything more specific can be said about performance, the method documentation will /// contain more detail. impl LargeSmtForest { - /// Constructs a new forest backed by the provided `backend`. + /// Constructs a new forest backed by the provided `backend` using the default [`Config`] for + /// the forest's behavior. /// - /// The constructor will treat whatever state is contained within the provided `backend` as the + /// This constructor will treat whatever state is contained within the provided `backend` as the /// starting state for the forest. This means that, if you pass a newly-initialized storage, the /// forest will start in an empty state. Similarly, if you pass a `backend` that already /// contains some data (loaded from disk, for example), then the forest will start in that state /// instead. /// + /// # Performance + /// + /// For performance notes on this method, see [`Self::with_config`] instead. + /// /// # Errors /// /// - [`LargeSmtForestError::Other`] if the forest cannot be started up correctly using the /// provided `backend`. - pub fn new(_backend: B) -> Result { - todo!("LargeSmtForest::new") + pub fn new(backend: B) -> Result { + Self::with_config(backend, Config::default()) + } + + /// Constructs a new forest backed by the provided `backend` and configuring behavior using the + /// provided `config`. + /// + /// This constructor will treat whatever state is contained within the provided `backend` as the + /// starting state for the forest. This means that, if you pass a newly-initialized storage, the + /// forest will start in an empty state. Similarly, if you pass a `backend` that already + /// contains some data (loaded from disk, for example), then the forest will start in that state + /// instead. + /// + /// # Performance + /// + /// This method is required to load the basic tree metadata from the backend during forest + /// construction. This metadata should be stored separately, and hence this method should take a + /// relatively small amount of time. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::Fatal`] if the forest cannot be started up correctly using the + /// provided `backend`. + pub fn with_config(backend: B, config: Config) -> Result { + // The lineages at initialization time are whichever ones the backend knows about. To that + // end, we read from the backend and construct the starting state for each known lineage. + let lineage_data = backend + .trees()? + .map(|t| { + let data = LineageData { + history: History::empty(config.max_history_versions()), + latest_version: t.version(), + latest_root: t.root(), + }; + (t.lineage(), data) + }) + .collect::>(); + + // As no backend is able to preserve history, we can unconditionally initialize the tracking + // for non-empty histories as empty. + let non_empty_histories = Set::default(); + + Ok(Self { + config, + backend, + lineage_data, + non_empty_histories, + }) } } @@ -185,9 +464,9 @@ impl LargeSmtForest { /// Returns an iterator that yields the root values for trees within the specified `lineage`, or /// [`None`] if the lineage is not known. /// - /// The iteration order of the roots is guaranteed to move backward in time, with earlier items - /// being roots from versions closer to the present. The current root of the lineage will always - /// be the first item yielded by the iterator. + /// The iteration order of the roots is guaranteed to move backward in time as the iterator + /// advances, with earlier items being roots from versions closer to the present. The current + /// root of the lineage will thus always be the first item yielded by the iterator. pub fn lineage_roots(&self, lineage: LineageId) -> Option> { self.lineage_data.get(&lineage).map(|d| d.roots()) } @@ -198,18 +477,6 @@ impl LargeSmtForest { self.lineage_data.get(&lineage).map(|d| d.latest_root) } - /// Returns an iterator that yields the historical root values for trees within the specified - /// `lineage`, or [`None`] if the lineage is not known. - /// - /// The iteration order of the roots is guaranteed to move backward in time, with earlier items - /// being roots from versions closer to the present. It does _not_ include the latest root in - /// the specified `lineage`. - pub fn historical_roots(&self, lineage: LineageId) -> Option> { - // We skip the first element as this is always guaranteed to be the current root for the - // lineage. - self.lineage_roots(lineage).map(|i| i.skip(1)) - } - /// Returns the number of trees in the forest that have unique identity. /// /// This is **not** the number of unique tree lineages in the forest, as it includes all @@ -228,17 +495,21 @@ impl LargeSmtForest { /// Returns data describing what information the forest knows about the provided `root`. pub fn root_info(&self, root: TreeId) -> RootInfo { - if let Some(d) = self.lineage_data.get(&root.lineage()) { - if d.latest_version == root.version() { - RootInfo::LatestVersion(d.latest_root) - } else { - match d.history.root_for_version(root.version()) { - Ok(r) => RootInfo::HistoricalVersion(r), - Err(_) => RootInfo::Missing, - } - } - } else { - RootInfo::Missing + let Some(d) = self.lineage_data.get(&root.lineage()) else { + return RootInfo::Missing; + }; + + if d.latest_version == root.version() { + return RootInfo::LatestVersion(d.latest_root); + } + + if root.version() > d.latest_version { + return RootInfo::Missing; + } + + match d.history.root_for_version(root.version()) { + Ok(r) => RootInfo::HistoricalVersion(r), + Err(_) => RootInfo::Missing, } } @@ -274,58 +545,208 @@ impl LargeSmtForest { /// Where anything more specific can be said about performance, the method documentation will /// contain more detail. impl LargeSmtForest { - /// Returns an opening for the specified `key` in the specified `tree`, or [`None`] if there is - /// no value corresponding to the provided `key` in that tree. + /// Returns an opening for the specified `key` in the specified `tree`, regardless of whether + /// the `tree` has a value associated with `key` or not. /// /// # Errors /// + /// - [`LargeSmtForestError::Fatal`] if the backend fails to operate properly during the query. /// - [`LargeSmtForestError::UnknownLineage`] If the provided `tree` specifies a lineage that is /// not one known by the forest. - /// - [`LargeSmtForestError::UnknownTree`] If the provided `tree` refers to a tree that is not a + /// - [`LargeSmtForestError::UnknownTree`] if the provided `tree` refers to a tree that is not a /// member of the forest. - /// - [`LargeSmtForestError::MerkleError`] If there is insufficient data in the specified `tree` - /// to provide an opening for `key`. - pub fn open(&self, _tree: TreeId, _key: Word) -> Result> { - todo!("LargeSmtForest::open") + /// - [`LargeSmtForestError::Merkle`] if there is insufficient data in the specified `tree` to + /// provide an opening for `key`. + pub fn open(&self, tree: TreeId, key: Word) -> Result { + // We want to return an error if the lineage is unknown to comply with the stated contract + // for the function. + let lineage_data = self + .lineage_data + .get(&tree.lineage()) + .ok_or(LargeSmtForestError::UnknownLineage(tree.lineage()))?; + + // We then check if the version exists in the forest. We do this before fetching the full + // tree as to do so otherwise would represent a possible denial-of-service vector. + if tree.version() > lineage_data.latest_version { + // Here the tree is newer than we know about, and so we should error. + return Err(LargeSmtForestError::UnknownTree(tree)); + } + + if tree.version() == lineage_data.latest_version { + // In this case we can service the opening directly from the backend as the query is for + // the latest version of the tree. + return self.backend.open(tree.lineage(), key).map_err(Into::into); + } + + let Ok(view) = lineage_data.history.get_view_at(tree.version()) else { + // In this case, either the version in `tree` is newer than the latest we know about, so + // we can't provide an opening, or it is not serviceable by the history. In either case, + // the specified tree is unknown to the forest. + return Err(LargeSmtForestError::UnknownTree(tree)); + }; + + // We start by computing the relevant leaf index and getting the opening from the full + // tree to do our (potentially) most-expensive work up front. + let leaf_index = LeafIndex::from(key); + let opening = self + .backend + .open(tree.lineage(), key) + .map_err(Into::::into)?; + + // We compute the new leaf and new path by applying any reversions from the history on + // top of the current state. + let new_leaf = self.merge_leaves(opening.leaf(), &view.leaf_delta(&leaf_index))?; + let new_path = self.merge_paths(leaf_index, opening.path(), view)?; + + // Finally we can compose our combined opening. + Ok(SmtProof::new(new_path, new_leaf)?) } /// Returns the value associated with the provided `key` in the specified `tree`, or [`None`] if - /// there is no value corresponding to the provided `key` in that tree. + /// there is no non-default value corresponding to the provided `key` in that tree. /// /// # Errors /// - /// - [`LargeSmtForestError::UnknownLineage`] If the provided `tree` specifies a lineage that is + /// - [`LargeSmtForestError::Fatal`] if the backend fails to operate properly during the query. + /// - [`LargeSmtForestError::UnknownLineage`] if the provided `tree` specifies a lineage that is /// not one known by the forest. - /// - [`LargeSmtForestError::UnknownTree`] If the provided `tree` refers to a tree that is not a + /// - [`LargeSmtForestError::UnknownTree`] if the provided `tree` refers to a tree that is not a /// member of the forest. - pub fn get(&self, _root: TreeId, _key: Word) -> Result> { - todo!("LargeSmtForest::get") + pub fn get(&self, tree: TreeId, key: Word) -> Result> { + // We want to return an error if the lineage is unknown to comply with the stated contract + // for the function. + let lineage_data = self + .lineage_data + .get(&tree.lineage()) + .ok_or(LargeSmtForestError::UnknownLineage(tree.lineage()))?; + + if tree.version() > lineage_data.latest_version { + // Here the tree is newer than we know about, and so we should error. + return Err(LargeSmtForestError::UnknownTree(tree)); + } + + if tree.version() == lineage_data.latest_version { + // In this case we can service the opening directly from the backend as the query is for + // the latest version of the tree. + return self.backend.get(tree.lineage(), key).map_err(Into::into); + } + + let Ok(view) = lineage_data.history.get_view_at(tree.version()) else { + // In this case, either the version in `tree` is newer than the latest we know about, so + // we can't provide an opening, or it is not serviceable by the history. In either case, + // the specified tree is unknown to the forest. + return Err(LargeSmtForestError::UnknownTree(tree)); + }; + + // We prioritize the value in the history if one exists, falling back to the full tree + // if none does. We don't use `or` here because we don't want to query the backend + // unless we have to, and we can't use `or_else` due to lack of support for `Result`. + let result = if let Some(value) = view.value(&key) { + // If the history value is an empty word, the value was unset in the historical tree + // version, so we have to conform to our interface by returning `None` here. + if value == EMPTY_WORD { None } else { Some(value) } + } else { + self.backend.get(tree.lineage(), key)? + }; + + // We can just return that directly. + Ok(result) } /// Returns the number of populated entries in the specified `tree`. /// + /// # Performance + /// + /// Due to the way that tree data is stored, this method exhibits a split performance profile. + /// + /// - If querying for a `tree` that is the latest in its lineage, the time to return a result + /// should be constant. + /// - If querying for a `tree` that is a historical version, the time to return a result will be + /// linear in the number of entries in the tree. This is because an overlaid iterator has to + /// be created to yield the correct entries for the historical version, and then queried for + /// its length. + /// /// # Errors /// - /// - [`LargeSmtForestError::UnknownLineage`] If the provided `tree` specifies a lineage that is + /// - [`LargeSmtForestError::Fatal`] if the backend fails to operate properly during the query. + /// - [`LargeSmtForestError::UnknownLineage`] if the provided `tree` specifies a lineage that is /// not one known by the forest. - /// - [`LargeSmtForestError::UnknownTree`] If the provided `tree` refers to a tree that is not a + /// - [`LargeSmtForestError::UnknownTree`] if the provided `tree` refers to a tree that is not a /// member of the forest. - pub fn entry_count(&self, _tree: TreeId) -> Result { - todo!("LargeSmtForest::entry_count") + pub fn entry_count(&self, tree: TreeId) -> Result { + // We start by yielding an error if we cannot get the lineage data for the specified tree. + let Some(lineage_data) = self.lineage_data.get(&tree.lineage()) else { + return Err(LargeSmtForestError::UnknownLineage(tree.lineage())); + }; + + if tree.version() > lineage_data.latest_version { + // Here the tree is newer than we know about, and so we should error. + return Err(LargeSmtForestError::UnknownTree(tree)); + } + + if tree.version() == lineage_data.latest_version { + // We can fast-path the current tree using the backend. + return Ok(self.backend.entry_count(tree.lineage())?); + } + + let Ok(view) = lineage_data.history.get_view_at(tree.version()) else { + // If neither of these are the case, we do not know the version and so fail out. + return Err(LargeSmtForestError::UnknownTree(tree)); + }; + + // In the general case there is no faster path than doing the iteration to merge the + // history with the full tree, so we just count the iterator. + Ok( + EntriesIterator::new_with_history( + self.backend.entries(tree.lineage())?, + view.entries(), + ) + .count(), + ) } /// Returns an iterator that yields the entries in the specified `tree`. /// + /// # Performance + /// + /// The performance of the iterator depends both on the choice of backend _and_ the type of tree + /// that is queried for. We cannot give exact performance figures, but in general querying over + /// **the current tree** in a lineage will be faster than querying over **a historical tree** in + /// a lineage. + /// /// # Errors /// - /// - [`LargeSmtForestError::UnknownLineage`] If the provided `tree` specifies a lineage that is + /// - [`LargeSmtForestError::Fatal`] if the backend fails to operate properly during the query. + /// - [`LargeSmtForestError::UnknownLineage`] if the provided `tree` specifies a lineage that is /// not one known by the forest. - /// - [`LargeSmtForestError::UnknownTree`] If the provided `tree` refers to a tree that is not a + /// - [`LargeSmtForestError::UnknownTree`] if the provided `tree` refers to a tree that is not a /// member of the forest. - pub fn entries>(&self, _tree: TreeId) -> Result { - // TODO Turn this signature back to an `impl Iterator<...>` once there is a body. `impl` - // generics are fussy alongside `todo!`s. - todo!("LargeSmtForest::entries") + pub fn entries(&self, tree: TreeId) -> Result> { + // We start by yielding an error if we cannot get the lineage data for the specified tree. + let Some(lineage_data) = self.lineage_data.get(&tree.lineage()) else { + return Err(LargeSmtForestError::UnknownLineage(tree.lineage())); + }; + + if tree.version() > lineage_data.latest_version { + // Here the tree is newer than we know about, and so we should error. + return Err(LargeSmtForestError::UnknownTree(tree)); + } + + if tree.version() == lineage_data.latest_version { + // If we match the current version, we can construct the simple iterator variant. + return Ok(EntriesIterator::new_without_history(self.backend.entries(tree.lineage())?)); + } + + let Ok(view) = lineage_data.history.get_view_at(tree.version()) else { + // If neither of these are the case, we do not know the version and so fail out. + return Err(LargeSmtForestError::UnknownTree(tree)); + }; + + // If we can serve it from the history we need to instead construct the complex version. + Ok(EntriesIterator::new_with_history( + self.backend.entries(tree.lineage())?, + view.entries(), + )) } } @@ -343,26 +764,117 @@ impl LargeSmtForest { /// /// Where anything more specific can be said about performance, the method documentation will /// contain more detail. -#[allow(dead_code)] // Temporarily impl LargeSmtForest { + /// Adds a new `lineage` to the tree, creating an empty tree and modifying it as specified by + /// `updates`, with the result taking the provided `new_version`. + /// + /// If the provided `updates` batch is empty, then the **empty tree will be added** as the first + /// version in the lineage. + /// + /// # Errors + /// + /// - [`LargeSmtForestError::DuplicateLineage`] if the provided `lineage` is the same as an + /// already-known lineage. + /// - [`LargeSmtForestError::Fatal`] if the backend fails while being accessed. + /// - [`BackendError::Merkle`] if the provided `updates` cannot be applied to the empty tree. + pub fn add_lineage( + &mut self, + lineage: LineageId, + new_version: VersionId, + updates: SmtUpdateBatch, + ) -> Result { + // We can immediately add lineage in the backend, as by its contract it should return + // `DuplicateLineage` if the new lineage is a duplicate. We forward that, and any other + // errors, as this is the correct behavior for correctly-implemented backends. + let tree_info = self.backend.add_lineage(lineage, new_version, updates)?; + + // We then construct the lineage tracking data and shove it into the corresponding map. The + // history is guaranteed to be empty here, so we do not need to put an entry in the + // non-empty histories set. + let lineage_data = LineageData { + history: History::empty(self.config.max_history_versions()), + latest_version: tree_info.version(), + latest_root: tree_info.root(), + }; + self.lineage_data.insert(lineage, lineage_data); + + Ok(tree_info) + } + /// Performs the provided `updates` on the latest tree in the specified `lineage`, adding a /// single new root to the forest (corresponding to `new_version`) for the entire batch, and /// returning the data for the new root of the tree. /// /// If applying the provided `operations` results in no changes to the tree, then the root data - /// will be returned unchanged and no new tree will be allocated. + /// will be returned unchanged and no new tree will be allocated. It will retain its original + /// version, and not be returned with `new_version`. /// /// # Errors /// - /// - [`LargeSmtForestError::UnknownLineage`] If the provided `tree` specifies a lineage that is + /// - [`LargeSmtForestError::BadVersion`] if the `new_version` is older than the latest version + /// for the provided `lineage`. + /// - [`LargeSmtForestError::Fatal`] if the backend fails while being accessed. + /// - [`LargeSmtForestError::UnknownLineage`] if the provided `tree` specifies a lineage that is /// not one known by the forest. pub fn update_tree( &mut self, - _lineage: LineageId, - _new_version: VersionId, - _updates: SmtUpdateBatch, + lineage: LineageId, + new_version: VersionId, + updates: SmtUpdateBatch, ) -> Result { - todo!("LargeSmtForest::modify_tree") + // We initially check that the lineage is known and that the version is greater than the + // last known version for that lineage. + let lineage_data = if let Some(lineage_data) = self.lineage_data.get_mut(&lineage) { + if lineage_data.latest_version < new_version { + lineage_data + } else { + return Err(LargeSmtForestError::BadVersion { + provided: new_version, + latest: lineage_data.latest_version, + }); + } + } else { + return Err(LargeSmtForestError::UnknownLineage(lineage)); + }; + + // We now know that we have a valid lineage and a valid version, so we perform the update in + // the backend. + let reversion_set = self.backend.update_tree(lineage, new_version, updates)?; + + // We do not want to actually change anything if the tree would not change. + if reversion_set.is_empty() { + return Ok(TreeWithRoot::new( + lineage, + lineage_data.latest_version, + lineage_data.latest_root, + )); + } + + // The new root of the latest tree is actually given by the **old root** in our reverse + // mutation set. + let updated_root = reversion_set.old_root; + + // The call to `add_version_from_mutation_set` should only yield an error if the + // provided version does not pass the version check. This check has already been + // performed as a precondition for reaching this point of the tree update, and + // hence should only ever fail due to a programmer bug so we panic if it does fail. + lineage_data + .history + .add_version_from_mutation_set(lineage_data.latest_version, reversion_set) + .unwrap_or_else(|_| { + panic!("Unable to add valid version {} to history", lineage_data.latest_version) + }); + + // At this point we now have a historical version added, so we track that the lineage has a + // non-empty history. + self.non_empty_histories.insert(lineage); + + // Now we just have to update the other portions of the lineage data in place... + lineage_data.latest_root = updated_root; + lineage_data.latest_version = new_version; + + // ...and return the correct value. + Ok(TreeWithRoot::new(lineage, new_version, updated_root)) } } @@ -393,67 +905,186 @@ impl LargeSmtForest { /// /// - [`LargeSmtForestError::UnknownLineage`] If any lineage in the batch of modifications is /// one that is not known by the forest. + /// - [`LargeSmtForestError::Fatal`] if any error occurs to leave the forest in an inconsistent + /// state. + /// - [`LargeSmtForestError::BadVersion`] if the `new_version` is older than the latest version + /// for the provided `lineage`. pub fn update_forest( &mut self, - _new_version: VersionId, - _updates: SmtForestUpdateBatch, - ) -> Result> { - todo!("LargeSmtForest::modify_forest") + new_version: VersionId, + updates: SmtForestUpdateBatch, + ) -> Result> { + // We start by performing our precondition checks on the lineages and versions. We have to + // ensure both that all the lineages exist, and that the specified version transition is + // valid for all of those lineages. + updates + .lineages() + .map(|lineage| { + let Some(lineage_data) = self.lineage_data.get(lineage) else { + return Err(LargeSmtForestError::UnknownLineage(*lineage)); + }; + + if lineage_data.latest_version < new_version { + Ok(()) + } else { + Err(LargeSmtForestError::BadVersion { + provided: new_version, + latest: lineage_data.latest_version, + }) + } + }) + .collect::>>()?; + + // With the preconditions checked we can call into the backend to perform the updates, and + // we forward all errors as this will be correct for conformant backend implementations. + let reversion_sets = self.backend.update_forest(new_version, updates)?; + + // Now we have to update the lineage data (including the history) to ensure that the state + // remains consistent, and we build our return values while doing so. + reversion_sets + .into_iter() + .map(|(lineage, reversion)| { + let lineage_data = self + .lineage_data + .get_mut(&lineage) + .expect("Lineage has been checked to be present"); + + // If the operations change nothing we want to short-circuit for that tree. + if reversion.is_empty() { + return Ok(TreeWithRoot::new( + lineage, + lineage_data.latest_version, + lineage_data.latest_root, + )); + } + + let updated_root = reversion.old_root; + + // The call to `add_version_from_mutation_set` should only yield an error if the + // provided version does not pass the version check. This check has already been + // performed as a precondition for reaching this point of the forest update, and + // hence should only ever fail due to a programmer bug so we panic if it does fail. + lineage_data + .history + .add_version_from_mutation_set(lineage_data.latest_version, reversion) + .unwrap_or_else(|_| { + panic!( + "Unable to add valid version {} to history", + lineage_data.latest_version + ) + }); + + // At this point we know that we have a historical version for that tree, so we + // should track it as having a non-empty history. + self.non_empty_histories.insert(lineage); + + lineage_data.latest_root = updated_root; + lineage_data.latest_version = new_version; + + Ok(TreeWithRoot::new(lineage, new_version, updated_root)) + }) + .collect::>>() } } -// LINEAGE DATA +// INTERNAL UTILITY FUNCTIONS // ================================================================================================ -/// The data that the forest stores in memory for each lineage of trees. -#[derive(Clone, Debug)] -struct LineageData { - /// The historical overlays for the lineage. - pub history: History, +/// This block contains internal functions that exist to de-duplicate or modularize functionality +/// within the forest. These should not be exposed. +impl LargeSmtForest { + /// Applies the provided `historical_delta` on top of the provided `full_tree_leaf` to produce + /// the correct leaf for a historical opening. + fn merge_leaves( + &self, + full_tree_leaf: &SmtLeaf, + historical_delta: &CompactLeaf, + ) -> Result { + // We apply the historical delta on top of the existing entries to perform the reversion + // back to the previous state. + let mut leaf_entries = Map::new(); + leaf_entries.extend(full_tree_leaf.to_entries().map(|(k, v)| (*k, *v))); + leaf_entries.extend(historical_delta); - /// The version associated with the latest tree in the lineage. - pub latest_version: VersionId, + // Any entries that are still empty at this point should be removed. + let non_empties_only = leaf_entries.into_iter().filter(|(_, v)| *v != EMPTY_WORD).collect(); + Ok(SmtLeaf::new(non_empties_only, full_tree_leaf.index())?) + } + + /// Applies any historical changes contained in `history_view` on top of the merkle path + /// obtained from the full tree to produce the correct path for a historical opening. + fn merge_paths( + &self, + leaf_index: LeafIndex, + full_tree_path: &SparseMerklePath, + history_view: HistoryView, + ) -> Result { + let mut path_elems = [EMPTY_WORD; SMT_DEPTH as usize]; + let mut current_node_ix = NodeIndex::from(leaf_index); + for depth in (1..=SMT_DEPTH).rev() { + // This is the sibling node of the currently-tracked node. In other words, it is the + // node that needs to become part of the path. + let path_node_ix = current_node_ix.sibling(); + + if let Some(historical_value) = history_view.node_value(&path_node_ix) { + // If there is a historical value we need to use it, and so we write it to the + // correct slot in the path elements array. + path_elems[depth as usize - 1] = *historical_value; + } else { + // If there isn't a historical value, we should delegate to the corresponding + // element in the path from the full-tree opening. + let bounded_depth = NonZeroU8::new(depth).expect("depth ∈ 1 ..= SMT_DEPTH]"); + path_elems[depth as usize - 1] = full_tree_path.at_depth(bounded_depth)? + } - /// The value of the root for the latest tree in the lineage. - pub latest_root: RootValue, + // We then need to move upward in the tree of the nodes we know. + current_node_ix = current_node_ix.parent(); + } + + // Now that we have filled in our `path_elems` we can use the construction of a sparse + // merkle path from a sized iterator, and thus not compute the mask ourselves. We + // reverse the iterator to make it go from deepest to shallowest as required. + Ok(SparseMerklePath::from_sized_iter(path_elems.into_iter().rev())?) + } } -impl LineageData { - /// Gets an iterator that yields each root in the lineage. +// TESTING FUNCTIONALITY +// ================================================================================================ + +/// This block contains functions that are exclusively for testing, providing some extra tools to +/// inspect the internal state of the forest that are unsafe to make part of the forest's public +/// API. +#[cfg(test)] +impl LargeSmtForest { + /// Gets an immutable reference to the underlying backend of the forest. + pub fn get_backend(&self) -> &B { + &self.backend + } + + /// Gets a mutable reference to the underlying backend of the forest. + pub fn get_backend_mut(&mut self) -> &mut B { + &mut self.backend + } + + /// Gets an immutable reference to the underlying configuration object for the forest. + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Gets the history container corresponding to the provided `lineage`. + /// + /// # Panics /// - /// The iteration order of the roots is guaranteed to move backward in time, with earlier items - /// being roots from versions closer to the present. The current root of the lineage will always - /// be the first item yielded by the iterator. - fn roots(&self) -> impl Iterator { - once(self.latest_root).chain(self.history.roots()) + /// - If the `lineage` is not one that the tree knows about. + pub fn get_history(&self, lineage: LineageId) -> &History { + self.lineage_data + .get(&lineage) + .map(|d| &d.history) + .unwrap_or_else(|| panic!("Lineage {lineage} had no data")) } - /// Truncates the information on this tree to the provided `version`, returning `true` if the - /// history is empty after truncation, and `false` otherwise. - /// - /// In the case that the version of the latest tree in the lineage is older than `version`, this - /// current version is always retained. - pub(super) fn truncate(&mut self, version: VersionId) -> bool { - if version >= self.latest_version { - // Truncation in the history is defined such that it never removes a version that could - // possibly serve as the latest delta for a newer version. This is because it cannot - // safely know if a version `v` is between the latest delta `d` and the current version - // `c`, as it has no knowledge of the current version. - // - // Thus, if we have a version `v` such that `d <= v < c`, we need to retain the - // reversion delta `d` in the history to correctly service queries for `v`. If, however, - // we have `d < c <= v` we need to explicitly remove the last delta as well. - // - // To that end, we handle the latter case first, by explicitly calling - // `History::clear()`. - self.history.clear(); - true - } else { - // The other case is `v < c`, which is handled simply by the truncation mechanism in the - // history as we want. In other words, it retains the necessary delta, and so we can - // just call it here. - self.history.truncate(version); - false - } + /// Gets an immutable reference to the set tracking the lineages that have non-empty histories. + pub fn get_non_empty_histories(&self) -> &Set { + &self.non_empty_histories } } diff --git a/miden-crypto/src/merkle/smt/large_forest/operation.rs b/miden-crypto/src/merkle/smt/large_forest/operation.rs index 909d7615a..c9912b485 100644 --- a/miden-crypto/src/merkle/smt/large_forest/operation.rs +++ b/miden-crypto/src/merkle/smt/large_forest/operation.rs @@ -5,7 +5,7 @@ use alloc::vec::Vec; -use crate::{Map, Set, Word, merkle::smt::large_forest::root::LineageId}; +use crate::{EMPTY_WORD, Map, Set, Word, merkle::smt::large_forest::root::LineageId}; // FOREST OPERATION // ================================================================================================ @@ -42,6 +42,15 @@ impl ForestOperation { } } +impl From for (Word, Word) { + fn from(value: ForestOperation) -> Self { + match value { + ForestOperation::Insert { key, value } => (key, value), + ForestOperation::Remove { key } => (key, EMPTY_WORD), + } + } +} + // TREE BATCH // ================================================================================================ @@ -101,6 +110,32 @@ impl SmtUpdateBatch { } } +impl IntoIterator for SmtUpdateBatch { + type Item = ForestOperation; + type IntoIter = alloc::vec::IntoIter; + + /// Consumes the batch as an iterator yielding operations while respecting the guarantees given + /// by [`Self::consume`]. + /// + /// The iteration order is unspecified. + fn into_iter(self) -> Self::IntoIter { + self.consume().into_iter() + } +} + +impl From for Vec<(Word, Word)> { + fn from(value: SmtUpdateBatch) -> Self { + value + .consume() + .into_iter() + .map(|op| match op { + ForestOperation::Insert { key, value } => (key, value), + ForestOperation::Remove { key } => (key, EMPTY_WORD), + }) + .collect() + } +} + impl From for SmtUpdateBatch where I: Iterator, @@ -160,6 +195,11 @@ impl SmtForestUpdateBatch { self.operations.entry(lineage).or_insert_with(SmtUpdateBatch::empty) } + /// Gets an iterator over the lineages + pub fn lineages(&self) -> impl Iterator { + self.operations.keys() + } + /// Consumes the batch as a map of batches, with each individual batch guaranteed to be in /// sorted order and contain only the last operation in the batch for any given key. pub fn consume(self) -> Map> { @@ -167,28 +207,42 @@ impl SmtForestUpdateBatch { } } +impl IntoIterator for SmtForestUpdateBatch { + type Item = (LineageId, Vec); + type IntoIter = crate::MapIntoIter>; + + /// Consumes the batch as an iterator yielding pairs of `(lineage, operations)` while respecting + /// the guarantees given by [`Self::consume`]. + /// + /// The iteration order is unspecified. + fn into_iter(self) -> Self::IntoIter { + self.consume().into_iter() + } +} + // TESTS // ================================================================================================ -#[cfg(feature = "std")] #[cfg(test)] mod test { use itertools::Itertools; use super::*; - use crate::rand::test_utils::rand_value; + use crate::rand::test_utils::ContinuousRng; #[test] fn tree_batch() { + let mut rng = ContinuousRng::new([0x12; 32]); + // We start by creating an empty tree batch. let mut batch = SmtUpdateBatch::empty(); // Let's make three operations on different keys... - let o1_key: Word = rand_value(); - let o1_value: Word = rand_value(); - let o2_key: Word = rand_value(); - let o3_key: Word = rand_value(); - let o3_value: Word = rand_value(); + let o1_key: Word = rng.value(); + let o1_value: Word = rng.value(); + let o2_key: Word = rng.value(); + let o3_key: Word = rng.value(); + let o3_value: Word = rng.value(); let o1 = ForestOperation::insert(o1_key, o1_value); let o2 = ForestOperation::remove(o2_key); @@ -209,7 +263,7 @@ mod test { // Let's now make two additional operations with keys that overlay with keys from the first // three... let o4_key = o2_key; - let o4_value: Word = rand_value(); + let o4_value: Word = rng.value(); let o5_key = o1_key; let o4 = ForestOperation::insert(o4_key, o4_value); @@ -235,19 +289,21 @@ mod test { #[test] fn forest_batch() { + let mut rng = ContinuousRng::new([0x13; 32]); + // We can start by creating an empty forest batch. let mut batch = SmtForestUpdateBatch::empty(); // Let's start by adding a few operations to a tree. - let t1_lineage: LineageId = rand_value(); - let t1_o1 = ForestOperation::insert(rand_value(), rand_value()); - let t1_o2 = ForestOperation::remove(rand_value()); + let t1_lineage: LineageId = rng.value(); + let t1_o1 = ForestOperation::insert(rng.value(), rng.value()); + let t1_o2 = ForestOperation::remove(rng.value()); batch.add_operations(t1_lineage, vec![t1_o1, t1_o2].into_iter()); // We can also add them differently. - let t2_lineage: LineageId = rand_value(); - let t2_o1 = ForestOperation::remove(rand_value()); - let t2_o2 = ForestOperation::insert(rand_value(), rand_value()); + let t2_lineage: LineageId = rng.value(); + let t2_o1 = ForestOperation::remove(rng.value()); + let t2_o2 = ForestOperation::insert(rng.value(), rng.value()); batch.operations(t2_lineage).add_operations(vec![t2_o1, t2_o2].into_iter()); // When we consume the batch, each per-tree batch should be unique by key and sorted. diff --git a/miden-crypto/src/merkle/smt/large_forest/property_tests.rs b/miden-crypto/src/merkle/smt/large_forest/property_tests.rs index b59f0e435..2eb3064bd 100644 --- a/miden-crypto/src/merkle/smt/large_forest/property_tests.rs +++ b/miden-crypto/src/merkle/smt/large_forest/property_tests.rs @@ -1 +1,186 @@ +#![cfg(test)] //! This module contains the property tests for the SMT forest. + +use alloc::{collections::BTreeSet, string::ToString, vec::Vec}; +use core::error::Error; + +use itertools::Itertools; +use proptest::prelude::*; + +use crate::{ + EMPTY_WORD, Felt, Map, ONE, Word, ZERO, + merkle::smt::{ + ForestInMemoryBackend, ForestOperation, LargeSmtForest, LeafIndex, LineageId, + MAX_LEAF_ENTRIES, SMT_DEPTH, Smt, SmtUpdateBatch, TreeEntry, TreeId, VersionId, + }, +}; + +// CONSTANTS +// ================================================================================================ + +/// The minimum number of entries that can be included in a batch. +const MIN_BATCH_ENTRIES: usize = 0; + +/// The maximum number of entries that can be included in a batch. +const MAX_BATCH_ENTRIES: usize = 10_000; + +// GENERATORS +// ================================================================================================ + +/// Generates an arbitrary lineage id. +fn arbitrary_lineage() -> impl Strategy { + prop::array::uniform32(any::()).prop_map(LineageId::new) +} + +/// Generates an arbitrary version identifier. +fn arbitrary_version() -> impl Strategy { + any::() +} + +/// Generates an arbitrary valid felt value. +fn arbitrary_felt() -> impl Strategy { + prop_oneof![any::().prop_map(Felt::new), Just(ZERO), Just(ONE)] +} + +/// Generates an arbitrary valid word value. +fn arbitrary_word() -> impl Strategy { + prop_oneof![prop::array::uniform4(arbitrary_felt()).prop_map(Word::new), Just(Word::empty()),] +} + +/// Generates a random number of unique (non-overlapping) key-value pairs. +/// +/// Note that the generated pairs may well have the same leaf index. +fn arbitrary_entries() -> impl Strategy> { + prop::collection::vec( + (arbitrary_word(), arbitrary_word()), + MIN_BATCH_ENTRIES..=MAX_BATCH_ENTRIES, + ) + .prop_map(move |entries| { + // We want to avoid duplicate entries. It is well-defined, but it helps with test simplicity + // to avoid it here. + let mut used_keys = BTreeSet::new(); + let mut keys_in_leaf: Map, usize> = Map::default(); + + entries + .into_iter() + .flat_map(|(k, v)| { + let leaf_index = LeafIndex::from(k); + let count = keys_in_leaf.entry(leaf_index).or_default(); + + // We don't want to overfill a leaf. + if *count >= MAX_LEAF_ENTRIES { + return None; + } else { + *count += 1; + } + + used_keys.insert(k); + Some((k, v)) + }) + .collect() + }) +} + +/// Generates an arbitrary batch of updates to be performed on an arbitrary tree. +fn arbitrary_batch() -> impl Strategy { + arbitrary_entries().prop_map(|e| { + SmtUpdateBatch::new(e.into_iter().map(|(k, v)| { + if v == EMPTY_WORD { + ForestOperation::remove(k) + } else { + ForestOperation::insert(k, v) + } + })) + }) +} + +// ENTRIES +// ================================================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(20))] + + /// This test ensures that the `entries` iterator for the forest always returns the exact same + /// values as the `entries` iterator over a basic SMT with the same state. + #[test] + fn entries_correct( + lineage in arbitrary_lineage(), + version in arbitrary_version(), + entries_v1 in arbitrary_batch(), + entries_v2 in arbitrary_batch(), + ) { + // We now create a forest and add the lineage to it using the first set of entries. + let mut forest = LargeSmtForest::new(ForestInMemoryBackend::new()).map_err(to_fail)?; + forest.add_lineage(lineage, version, entries_v1.clone()).map_err(to_fail)?; + forest.update_tree(lineage, version + 1, entries_v2.clone()).map_err(to_fail)?; + + // We then create two auxiliary trees to work with, to compare our results against. + let mut tree_v1 = Smt::new(); + let tree_v1_mutations = + tree_v1.compute_mutations(Vec::from(entries_v1).into_iter()).map_err(to_fail)?; + tree_v1.apply_mutations(tree_v1_mutations).map_err(to_fail)?; + + let mut tree_v2 = tree_v1.clone(); + let tree_v2_mutations = + tree_v2.compute_mutations(Vec::from(entries_v2).into_iter()).map_err(to_fail)?; + tree_v2.apply_mutations(tree_v2_mutations).map_err(to_fail)?; + + // Iterating over the historical version of the lineage in the forest should produce exactly + // the same entries as iterating over V1 of our test tree. + let old_version = TreeId::new(lineage, version); + let forest_entries = forest.entries(old_version).map_err(to_fail)?.sorted().collect_vec(); + let tree_entries = tree_v1 + .entries() + .map(|(k, v)| TreeEntry { key: *k, value: *v }) + .sorted() + .collect_vec(); + assert_eq!(forest_entries, tree_entries); + + // Iterating over the newest version of the lineage in the forest should provide exactly the + // same entries as iterating over V2 of our test tree. + let current_version = TreeId::new(lineage, version + 1); + let forest_entries = forest.entries(current_version).map_err(to_fail)?.sorted().collect_vec(); + let tree_entries = tree_v2 + .entries() + .map(|(k, v)| TreeEntry { key: *k, value: *v }) + .sorted() + .collect_vec(); + assert_eq!(forest_entries, tree_entries); + } + + /// This test ensures that the `entries` iterator for the forest will never return entries where + /// the value is the empty word. + #[test] + fn entries_never_yields_empty_values( + lineage in arbitrary_lineage(), + version in arbitrary_version(), + entries_v1 in arbitrary_batch(), + entries_v2 in arbitrary_batch(), + ) { + // We now create a forest and add the lineage to it using the first set of entries. + let mut forest = LargeSmtForest::new(ForestInMemoryBackend::new()).map_err(to_fail)?; + forest.add_lineage(lineage, version, entries_v1.clone()).map_err(to_fail)?; + forest.update_tree(lineage, version + 1, entries_v2.clone()).map_err(to_fail)?; + + // Iterating over the historical version of the lineage in the forest should produce exactly + // the same entries as iterating over V1 of our test tree. + let old_version = TreeId::new(lineage, version); + assert!(forest.entries(old_version).map_err(to_fail)?.all(|e| e.value != EMPTY_WORD)); + + // Iterating over the newest version of the lineage in the forest should provide exactly the + // same entries as iterating over V2 of our test tree. + let current_version = TreeId::new(lineage, version + 1); + assert!(forest.entries(current_version).map_err(to_fail)?.all(|e| e.value != EMPTY_WORD)); + } +} + +// UTILS +// ================================================================================================ + +/// Converts the provided `error` into a test case failure. +/// +/// This is necessary because the `From` implementation is only available in builds with +/// `std` enabled, and we want error forwarding to not suck. +fn to_fail(error: impl Error) -> TestCaseError { + TestCaseError::fail(error.to_string()) +} diff --git a/miden-crypto/src/merkle/smt/large_forest/root.rs b/miden-crypto/src/merkle/smt/large_forest/root.rs index d871dea54..179282b68 100644 --- a/miden-crypto/src/merkle/smt/large_forest/root.rs +++ b/miden-crypto/src/merkle/smt/large_forest/root.rs @@ -1,8 +1,11 @@ //! This module contains utility types for working with roots and trees as part of the forest. -use crate::Word; #[cfg(test)] use crate::rand::Randomizable; +use crate::{ + Word, + merkle::smt::{LeafIndex, SMT_DEPTH}, +}; // TYPES // ================================================================================================ @@ -191,8 +194,13 @@ pub enum RootInfo { // ================================================================================================ /// An entry in a given tree. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] pub struct TreeEntry { pub key: Word, pub value: Word, } +impl TreeEntry { + pub fn index(&self) -> LeafIndex { + LeafIndex::from(self.key) + } +} diff --git a/miden-crypto/src/merkle/smt/large_forest/tests.rs b/miden-crypto/src/merkle/smt/large_forest/tests.rs index 49c284ebd..25ad0aa6b 100644 --- a/miden-crypto/src/merkle/smt/large_forest/tests.rs +++ b/miden-crypto/src/merkle/smt/large_forest/tests.rs @@ -1 +1,1114 @@ -//! This module contains the handwritten tests for the SMT forest. +#![cfg(test)] +//! This module contains the handwritten tests of the functionality for the SMT forest. These tests +//! are for the basic functionality, and rely on the +//! +//! Wherever possible, these tests rely on the correctness of the existing [`Smt`] implementation. +//! It is used as a point of comparison to avoid the need to hard-code specific values and scenarios +//! for the trees, instead allowing us to compare things directly. + +use alloc::vec::Vec; + +use assert_matches::assert_matches; +use itertools::Itertools; + +use super::{Config, Result}; +use crate::{ + EMPTY_WORD, Word, + merkle::{ + EmptySubtreeRoots, + smt::{ + Backend, ForestInMemoryBackend, ForestOperation, LargeSmtForest, LargeSmtForestError, + LeafIndex, RootInfo, Smt, SmtForestUpdateBatch, SmtUpdateBatch, TreeId, VersionId, + large_forest::root::{LineageId, TreeEntry, TreeWithRoot}, + }, + }, + rand::test_utils::ContinuousRng, +}; + +// TYPE ALIASES +// ================================================================================================ + +/// We only care about testing with the in-memory backend here for correct functionality. +type Forest = LargeSmtForest; + +// CONSTRUCTION TESTS +// ================================================================================================ + +#[test] +fn new() -> Result<()> { + // Constructing a forest using the default constructor should yield the default configuration. + let backend = ForestInMemoryBackend::new(); + let forest = Forest::new(backend)?; + + // We can just sanity-check the configuration to ensure that things started up right. + let config = forest.get_config(); + + assert_eq!(config.max_history_versions(), 10); + + Ok(()) +} + +#[test] +fn with_config() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let forest = Forest::with_config(backend, Config::default().with_max_history_versions(30))?; + + // Let us sanity check using the config again. + let config = forest.get_config(); + + assert_eq!(config.max_history_versions(), 30); + + Ok(()) +} + +// BASIC QUERIES TESTS +// ================================================================================================ + +#[test] +fn roots() -> Result<()> { + // We start by constructing our forest. + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x96; 32]); + + // We add a number of lineages to the forest, some of which have the same _root_ value. + let version_1: VersionId = rng.value(); + let lineage_1: LineageId = rng.value(); + let lineage_2: LineageId = rng.value(); + let lineage_3: LineageId = rng.value(); + + let root_1 = forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::default())?; + assert_eq!( + root_1, + TreeWithRoot::new(lineage_1, version_1, *EmptySubtreeRoots::entry(64, 0)) + ); + let root_2 = forest.add_lineage(lineage_2, version_1, SmtUpdateBatch::default())?; + assert_eq!( + root_2, + TreeWithRoot::new(lineage_2, version_1, *EmptySubtreeRoots::entry(64, 0)) + ); + let root_3 = forest.add_lineage(lineage_3, version_1, SmtUpdateBatch::default())?; + assert_eq!( + root_3, + TreeWithRoot::new(lineage_3, version_1, *EmptySubtreeRoots::entry(64, 0)) + ); + + // We then update one of them to make sure it ends up with a historical root as well. + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let k2: Word = rng.value(); + let v2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + operations.add_insert(k2, v2); + + let version_2: VersionId = version_1 + 1; + let root_4 = forest.update_tree(lineage_1, version_2, operations)?; + + // We can now check that the roots iterator contains the items we expect. + let roots = forest.roots().collect::>(); + assert_eq!(roots.len(), 4); + assert!(roots.contains(&root_1.into())); + assert!(roots.contains(&root_2.into())); + assert!(roots.contains(&root_3.into())); + assert!(roots.contains(&root_4.into())); + + Ok(()) +} + +#[test] +fn latest_version() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x69; 32]); + + // Let's add some trees to the forest. Two are empty and one is added with data. + let version_1: VersionId = rng.value(); + let version_2: VersionId = version_1 + 1; + let version_3: VersionId = version_2 + 1; + + let lineage_1: LineageId = rng.value(); + let lineage_2: LineageId = rng.value(); + let lineage_3: LineageId = rng.value(); + + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let k2: Word = rng.value(); + let v2: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + operations.add_insert(k2, v2); + + forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::default())?; + forest.add_lineage(lineage_2, version_1, SmtUpdateBatch::default())?; + forest.add_lineage(lineage_3, version_1, operations)?; + + // Now let's update one of the empty ones twice... + let k3: Word = rng.value(); + let v3: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k3, v3); + forest.update_tree(lineage_1, version_2, operations)?; + + let k4: Word = rng.value(); + let v4: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k4, v4); + forest.update_tree(lineage_1, version_3, operations)?; + + // ...and the non-empty one once with a non-contiguous version. + let k5: Word = rng.value(); + let v5: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k5, v5); + forest.update_tree(lineage_3, version_3, operations)?; + + // Now let's query the latest version for all of them. + assert_eq!(forest.latest_version(lineage_1).unwrap(), version_3); + assert_eq!(forest.latest_version(lineage_2).unwrap(), version_1); + assert_eq!(forest.latest_version(lineage_3).unwrap(), version_3); + + // Finally, if we look for a lineage that doesn't exist, we should get `None` back. + let ne_lineage: LineageId = rng.value(); + assert!(forest.latest_version(ne_lineage).is_none()); + + Ok(()) +} + +#[test] +fn lineage_roots() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x42; 32]); + + // Let's add a lineage to the forest and update it a few times. + let lineage: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let version_2 = version_1 + 1; + let version_3 = version_2 + 1; + let root_1 = forest.add_lineage(lineage, version_1, SmtUpdateBatch::default())?; + + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + let root_2 = forest.update_tree(lineage, version_2, operations)?; + + let k2: Word = rng.value(); + let v2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k2, v2); + let root_3 = forest.update_tree(lineage, version_3, operations)?; + + // Now we can query for the roots in this lineage. + let lineage_roots = forest + .lineage_roots(lineage) + .expect("Existing lineage should have roots") + .collect::>(); + assert_eq!(lineage_roots.len(), 3); + + // For this method, the contract insists that it is ordered from newer roots in the lineage to + // older roots. + assert_eq!(lineage_roots[0], root_3.root()); + assert_eq!(lineage_roots[1], root_2.root()); + assert_eq!(lineage_roots[2], root_1.root()); + + // If, however, we query for the roots of a non-existent lineage, we should get `None` back. + let ne_lineage: LineageId = rng.value(); + assert!(forest.lineage_roots(ne_lineage).is_none()); + + Ok(()) +} + +#[test] +fn latest_root() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x97; 32]); + + // Let's add a lineage to the forest. + let lineage: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let version_2 = version_1 + 1; + let root_1 = forest.add_lineage(lineage, version_1, SmtUpdateBatch::default())?; + + // We can get its latest root. + assert_eq!(forest.latest_root(lineage), Some(root_1.root())); + + // And then update it... + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + let root_2 = forest.update_tree(lineage, version_2, operations)?; + + // ...to check that we get the updated root. + assert_eq!(forest.latest_root(lineage), Some(root_2.root())); + + // However, if we query for a nonexistent lineage, we should get `None` back. + let ne_lineage: LineageId = rng.value(); + assert!(forest.latest_root(ne_lineage).is_none()); + + Ok(()) +} + +#[test] +fn tree_count() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x67; 32]); + + // A newly-initialized forest should know about only the trees that its backend knows about. + assert_eq!(forest.tree_count(), forest.get_backend().trees()?.count()); + + // Now let's add some trees. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let version_2 = version_1 + 1; + let version_3 = version_2 + 1; + forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::default())?; + + let k1: Word = rng.value(); + let v1: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k1, v1); + forest.update_tree(lineage_1, version_2, operations)?; + + let k2: Word = rng.value(); + let v2: Word = rng.value(); + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(k2, v2); + forest.update_tree(lineage_1, version_3, operations)?; + + let lineage_2: LineageId = rng.value(); + forest.add_lineage(lineage_2, version_1, SmtUpdateBatch::default())?; + + // As there are two current trees and two historical versions, we should see four trees total. + assert_eq!(forest.tree_count(), 4); + + Ok(()) +} + +#[test] +fn lineage_count() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x64; 32]); + + // A newly-initialized forest should know about only the lineages that its backend knows about. + assert_eq!(forest.lineage_count(), forest.get_backend().lineages()?.count()); + + // So now let's add some lineages. + let version: VersionId = rng.value(); + let lineage_1: LineageId = rng.value(); + forest.add_lineage(lineage_1, version, SmtUpdateBatch::default())?; + let lineage_2: LineageId = rng.value(); + forest.add_lineage(lineage_2, version, SmtUpdateBatch::default())?; + let lineage_3: LineageId = rng.value(); + forest.add_lineage(lineage_3, version, SmtUpdateBatch::default())?; + + // We should see three lineages. + assert_eq!(forest.lineage_count(), 3); + + // This should stay the same if we update a tree. + let operations = + SmtUpdateBatch::new([ForestOperation::insert(rng.value(), rng.value())].into_iter()); + forest.update_tree(lineage_1, version + 1, operations)?; + assert_eq!(forest.lineage_count(), 3); + + Ok(()) +} + +#[test] +fn root_info() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x32; 32]); + + // Let's start by adding a lineage and updating it. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let operations = + SmtUpdateBatch::new([ForestOperation::insert(rng.value(), rng.value())].into_iter()); + let historical_root = forest.add_lineage(lineage_1, version_1, operations)?; + + let version_2 = version_1 + 1; + let operations = + SmtUpdateBatch::new([ForestOperation::insert(rng.value(), rng.value())].into_iter()); + let current_root = forest.update_tree(lineage_1, version_2, operations)?; + + // When we query for a root (lineage_1, version_1), we should get back HistoricalVersion. + assert_eq!( + forest.root_info(TreeId::new(lineage_1, version_1)), + RootInfo::HistoricalVersion(historical_root.root()) + ); + + // When we query for a root (lineage_1, version_2), we should get back LatestVersion. + assert_eq!( + forest.root_info(TreeId::new(lineage_1, version_2)), + RootInfo::LatestVersion(current_root.root()) + ); + + // When we query for a nonexistent version in an existing lineage we should get back Missing. + let version_3 = version_2 + 1; + assert_eq!(forest.root_info(TreeId::new(lineage_1, version_3)), RootInfo::Missing); + + // As we should also get back when the lineage doesn't exist. + let lineage_2: LineageId = rng.value(); + assert_eq!(forest.root_info(TreeId::new(lineage_2, version_1)), RootInfo::Missing); + + Ok(()) +} + +// QUERIES TESTS +// ================================================================================================ + +#[test] +fn open() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x08; 32]); + + // When we query for a tree with a lineage that is not known by the forest, we should get an + // error back. + let missing_lineage: LineageId = rng.value(); + let missing_version: VersionId = rng.value(); + let missing_key: Word = rng.value(); + + let result = forest.open(TreeId::new(missing_lineage, missing_version), missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownLineage(l) if l == missing_lineage); + + // Now let's add an (empty) lineage to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1_v1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2_v1: Word = rng.value(); + forest.add_lineage( + lineage_1, + version_1, + SmtUpdateBatch::new( + [ + ForestOperation::insert(key_1, value_1_v1), + ForestOperation::insert(key_2, value_2_v1), + ] + .into_iter(), + ), + )?; + + // If we query for a tree with a known lineage but unknown version, we should also get an error + // back. + let missing_tree = TreeId::new(lineage_1, missing_version); + let result = forest.open(missing_tree, missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == missing_tree); + + // We should also get an error back if we query for a version that is NEWER than the + // latest-known version. + let too_new_version = version_1 + 1; + let too_new_tree = TreeId::new(lineage_1, too_new_version); + let result = forest.open(too_new_tree, missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == too_new_tree); + + // Let's set up a basic SMT to compare the forest's openings again for correctness. + let mut tree_v1 = Smt::new(); + tree_v1.insert(key_1, value_1_v1)?; + tree_v1.insert(key_2, value_2_v1)?; + + // And get a random opening on the initial tree. + let random_key: Word = rng.value(); + let forest_opening = forest.open(TreeId::new(lineage_1, version_1), random_key)?; + let tree_v1_opening = tree_v1.open(&random_key); + assert_eq!(forest_opening, tree_v1_opening); + + // Now let's make some modifications to the tree. + let version_2: VersionId = rng.value(); + let value_1_v2: Word = rng.value(); + let key_3: Word = rng.value(); + let value_3_v1: Word = rng.value(); + forest.update_tree( + lineage_1, + version_2, + SmtUpdateBatch::new( + [ + ForestOperation::insert(key_1, value_1_v2), + ForestOperation::insert(key_3, value_3_v1), + ForestOperation::remove(key_2), + ] + .into_iter(), + ), + )?; + + // And mirror it on our tree. + let mut tree_v2 = tree_v1.clone(); + tree_v2.insert(key_1, value_1_v2)?; + tree_v2.insert(key_3, value_3_v1)?; + tree_v2.insert(key_2, EMPTY_WORD)?; + + // These two should again produce the same opening when we query for the latest version. + let random_key: Word = rng.value(); + let forest_opening = forest.open(TreeId::new(lineage_1, version_2), random_key)?; + let tree_v2_opening = tree_v2.open(&random_key); + assert_eq!(forest_opening, tree_v2_opening); + + // Most importantly, however, we should get the same opening from the forest when querying a + // historical tree version as we do from the actual tree. + let forest_opening = forest.open(TreeId::new(lineage_1, version_1), random_key)?; + let tree_v1_opening = tree_v1.open(&random_key); + assert_eq!(forest_opening, tree_v1_opening); + + Ok(()) +} + +#[test] +fn get() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x12; 32]); + + // When we query for a tree with a lineage that is not known by the forest, we should get an + // error back. + let missing_lineage: LineageId = rng.value(); + let missing_version: VersionId = rng.value(); + let missing_key: Word = rng.value(); + + let result = forest.get(TreeId::new(missing_lineage, missing_version), missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownLineage(l) if l == missing_lineage); + + // Now let's add an (empty) lineage to the forest. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1_v1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2_v1: Word = rng.value(); + forest.add_lineage( + lineage_1, + version_1, + SmtUpdateBatch::new( + [ + ForestOperation::insert(key_1, value_1_v1), + ForestOperation::insert(key_2, value_2_v1), + ] + .into_iter(), + ), + )?; + + // If we query for a tree with a known lineage but unknown version, we should also get an error + // back. + let missing_tree = TreeId::new(lineage_1, missing_version); + let result = forest.get(missing_tree, missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == missing_tree); + + // We should also get an error back if we query for a version that is NEWER than the + // latest-known version. + let too_new_version = version_1 + 1; + let too_new_tree = TreeId::new(lineage_1, too_new_version); + let result = forest.get(too_new_tree, missing_key); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == too_new_tree); + + // If we query for a key that has never been inserted we want to get back `None`. + let tree_v1 = TreeId::new(lineage_1, version_1); + let non_inserted_key: Word = rng.value(); + assert!(forest.get(tree_v1, non_inserted_key)?.is_none()); + + // But if we query for a key that has been, we should get back the corresponding value. + assert_eq!(forest.get(tree_v1, key_1)?, Some(value_1_v1)); + assert_eq!(forest.get(tree_v1, key_2)?, Some(value_2_v1)); + + // Now let's add another version. + let version_2: VersionId = version_1 + 1; + let value_1_v2: Word = rng.value(); + let key_3: Word = rng.value(); + let value_3_v1: Word = rng.value(); + forest.update_tree( + lineage_1, + version_2, + SmtUpdateBatch::new( + [ + ForestOperation::insert(key_1, value_1_v2), + ForestOperation::insert(key_3, value_3_v1), + ] + .into_iter(), + ), + )?; + + // When we query at the new version we should see the updated values for all extant keys. + let tree_v2 = TreeId::new(lineage_1, version_2); + assert_eq!(forest.get(tree_v2, key_1)?, Some(value_1_v2)); + assert_eq!(forest.get(tree_v2, key_2)?, Some(value_2_v1)); + assert_eq!(forest.get(tree_v2, key_3)?, Some(value_3_v1)); + + // But if we query for the older version we should still see the older values. + assert_eq!(forest.get(tree_v1, key_1)?, Some(value_1_v1)); + assert_eq!(forest.get(tree_v1, key_2)?, Some(value_2_v1)); + assert!(forest.get(tree_v1, key_3)?.is_none()); + + Ok(()) +} + +#[test] +fn entry_count() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x22; 32]); + + // Let's start by adding a lineage with some values. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1_v1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2_v1: Word = rng.value(); + let mut key_3: Word = rng.value(); + key_3[3] = key_1[3]; + let value_3_v1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::empty(); + operations.add_insert(key_1, value_1_v1); + operations.add_insert(key_2, value_2_v1); + operations.add_insert(key_3, value_3_v1); + + forest.add_lineage(lineage_1, version_1, operations)?; + + // We'll also update this so we have a historical version in play to be sure things work. + let version_2: VersionId = version_1 + 1; + let value_1_v2: Word = rng.value(); + let mut key_4: Word = rng.value(); + key_4[3] = key_2[3]; + let value_4_v1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::empty(); + operations.add_remove(key_3); + operations.add_insert(key_1, value_1_v2); + operations.add_insert(key_4, value_4_v1); + + forest.update_tree(lineage_1, version_2, operations)?; + + // If we try and get the entry count over a lineage that does not exist we should see an error. + let ne_lineage: LineageId = rng.value(); + match forest.entry_count(TreeId::new(ne_lineage, version_1)) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownLineage(l) if l == ne_lineage), + Ok(_) => panic!("Result was not an error"), + }; + + // Similarly, if we try and get the entry count for a nonexistent version in an existing lineage + // we should also see an error. + let tree = TreeId::new(lineage_1, version_1 - 1); + match forest.entry_count(tree) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownTree(t) if t == tree), + Ok(_) => panic!("Result was not an error"), + }; + + // We should also get an error back if we query for a version that is NEWER than the + // latest-known version. + let too_new_version = version_2 + 1; + let too_new_tree = TreeId::new(lineage_1, too_new_version); + let result = forest.entry_count(too_new_tree); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownTree(t) if t == too_new_tree); + + // If we query for extant trees we should see the correct count regardless of whether it is the + // current tree or a historical tree. + assert_eq!(forest.entry_count(TreeId::new(lineage_1, version_1))?, 3); + assert_eq!(forest.entry_count(TreeId::new(lineage_1, version_2))?, 3); + + Ok(()) +} + +#[test] +fn entries() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x47; 32]); + + // Let's start by adding a lineage with some values. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1_v1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2_v1: Word = rng.value(); + let mut key_3: Word = rng.value(); + key_3[3] = key_1[3]; + let value_3_v1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::empty(); + operations.add_insert(key_1, value_1_v1); + operations.add_insert(key_2, value_2_v1); + operations.add_insert(key_3, value_3_v1); + + forest.add_lineage(lineage_1, version_1, operations)?; + + // We'll also update this so we have a historical version in play to be sure things work. + let version_2: VersionId = version_1 + 1; + let value_1_v2: Word = rng.value(); + let mut key_4: Word = rng.value(); + key_4[3] = key_2[3]; + let value_4_v1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::empty(); + operations.add_remove(key_3); + operations.add_insert(key_1, value_1_v2); + operations.add_insert(key_4, value_4_v1); + + forest.update_tree(lineage_1, version_2, operations)?; + + // If we try and get entries over a lineage that does not exist we should see an error. + let ne_lineage: LineageId = rng.value(); + match forest.entries(TreeId::new(ne_lineage, version_1)) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownLineage(l) if l == ne_lineage), + Ok(_) => panic!("Result was not an error"), + }; + + // Similarly, if we try and get entries for a nonexistent version in an existing lineage we + // should also see an error. + let tree = TreeId::new(lineage_1, version_1 - 1); + match forest.entries(tree) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownTree(t) if t == tree), + Ok(_) => panic!("Result was not an error"), + }; + + // We should also get an error back if we query for a version that is NEWER than the + // latest-known version. + let too_new_version = version_2 + 1; + let too_new_tree = TreeId::new(lineage_1, too_new_version); + match forest.entries(too_new_tree) { + Err(e) => assert_matches!(e, LargeSmtForestError::UnknownTree(t) if t == too_new_tree), + Ok(_) => panic!("Result was not an error"), + } + + // Grabbing the entries for the latest version in a lineage should do the right thing. + let current_tree = TreeId::new(lineage_1, version_2); + assert_eq!(forest.entries(current_tree)?.count(), 3); + assert!( + forest + .entries(current_tree)? + .contains(&TreeEntry { key: key_1, value: value_1_v2 }) + ); + assert!( + forest + .entries(current_tree)? + .contains(&TreeEntry { key: key_2, value: value_2_v1 }) + ); + assert!( + forest + .entries(current_tree)? + .contains(&TreeEntry { key: key_4, value: value_4_v1 }) + ); + assert!( + !forest + .entries(current_tree)? + .contains(&TreeEntry { key: key_3, value: value_3_v1 }) + ); + + // If we ask for a historical version, things are more complex but should still work. + let historical_tree = TreeId::new(lineage_1, version_1); + assert_eq!(forest.entries(historical_tree)?.count(), 3); + assert!( + forest + .entries(historical_tree)? + .contains(&TreeEntry { key: key_1, value: value_1_v1 }) + ); + assert!( + forest + .entries(historical_tree)? + .contains(&TreeEntry { key: key_2, value: value_2_v1 }) + ); + assert!( + forest + .entries(historical_tree)? + .contains(&TreeEntry { key: key_3, value: value_3_v1 }) + ); + assert!( + !forest + .entries(historical_tree)? + .contains(&TreeEntry { key: key_4, value: value_4_v1 }) + ); + + Ok(()) +} + +#[test] +fn entries_never_returns_empty_entry() -> Result<()> { + // We risk yielding empty entries in a few situations, but all of those situations involve + // iterating over the history on its own. Let's go through them one by one. + // + // For more detailed testing of this behavior, see the `property_tests`. + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x44; 32]); + + // The FIRST such situation is when the iterator contains _only_ historical entries in its + // remaining tail. We can produce such a state by adding an empty lineage and then setting + // values in that lineage. + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::empty())?; + + // We now set values in that lineage. + let version_2 = version_1 + 1; + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + let operations = SmtUpdateBatch::new( + [ForestOperation::insert(key_1, value_1), ForestOperation::insert(key_2, value_2)] + .into_iter(), + ); + forest.update_tree(lineage_1, version_2, operations)?; + + // At this point, we should see an empty iterator for entries if we query in the history. + let historical_tree = TreeId::new(lineage_1, version_1); + assert_eq!(forest.entries(historical_tree)?.count(), 0); + + // The SECOND scenario is where only some entries are added, so we end up with entire leaves + // that are history only and contain empty values. + let lineage_2: LineageId = rng.value(); + let key_1 = Word::from([1u32, 0, 0, 42]); + let value_1: Word = rng.value(); + forest.add_lineage( + lineage_2, + version_1, + SmtUpdateBatch::new([ForestOperation::insert(key_1, value_1)].into_iter()), + )?; + + // Now we add an update to a different leaf. + let key_2 = Word::from([2u32, 0, 0, 43]); + let value_2: Word = rng.value(); + forest.update_tree( + lineage_2, + version_2, + SmtUpdateBatch::new([ForestOperation::insert(key_2, value_2)].into_iter()), + )?; + + // Now, when we query for entries on the historical version, we should only see one entry, and + // no entries should be the empty word. + let historical_tree = TreeId::new(lineage_2, version_1); + assert_eq!(forest.entries(historical_tree)?.count(), 1); + assert!(forest.entries(historical_tree)?.all(|e| e.value != EMPTY_WORD)); + + // The third scenario is where entries are added within a shared leaf, where we should only see + // the historical leaf entries and not their reversions. + let lineage_3: LineageId = rng.value(); + let key_1 = Word::from([1u32, 0, 0, 42]); + let value_1: Word = rng.value(); + forest.add_lineage( + lineage_3, + version_1, + SmtUpdateBatch::new([ForestOperation::insert(key_1, value_1)].into_iter()), + )?; + + // We now add an update in the same leaf. + let key_2 = Word::from([2u32, 0, 0, 42]); + let value_2: Word = rng.value(); + forest.update_tree( + lineage_3, + version_2, + SmtUpdateBatch::new([ForestOperation::insert(key_2, value_2)].into_iter()), + )?; + + // Now when we query the historical version, we should only see one entry, and no reversions. + let historical_tree = TreeId::new(lineage_3, version_1); + assert_eq!(forest.entries(historical_tree)?.count(), 1); + assert!(forest.entries(historical_tree)?.all(|e| e.value != EMPTY_WORD)); + + Ok(()) +} + +// SINGLE-TREE MODIFIER TESTS +// ================================================================================================ + +#[test] +fn add_lineage() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x42; 32]); + + // We can add an initial lineage to the forest, starting with no changes from the default tree. + let lineage: LineageId = rng.value(); + let version: VersionId = rng.value(); + let result = forest.add_lineage(lineage, version, SmtUpdateBatch::default()); + assert!(result.is_ok()); + + // This should yield the correct value, which we'll check using a Smt. + let tree = Smt::new(); + + let result = result?; + assert_eq!(result.root(), tree.root()); + assert_eq!(result.lineage(), lineage); + assert_eq!(result.version(), version); + + // The newly-added lineage should also not be listed as having a non-empty history. + assert!(!forest.get_non_empty_histories().contains(&lineage)); + + // If we try and add a duplicated lineage again, we should get an error. + let result = forest.add_lineage(lineage, version, SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::DuplicateLineage(l) if l == lineage); + + Ok(()) +} + +#[test] +fn update_tree() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x69; 32]); + + // Let's start by adding a lineage to the forest... + let lineage_1: LineageId = rng.value(); + let version_1: VersionId = rng.value(); + let key_1: Word = rng.value(); + let value_1: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_1, value_1); + + let result = forest.add_lineage(lineage_1, version_1, operations)?; + + // ... and creating an auxiliary tree with the same value to check consistency. + let mut tree = Smt::new(); + tree.insert(key_1, value_1)?; + + assert_eq!(result.root(), tree.root()); + + // Initially, this new lineage should not be listed as having a non-empty history. + assert!(!forest.get_non_empty_histories().contains(&lineage_1)); + + // If we try and update a lineage that is unknown, we should see an error. + let unknown_lineage: LineageId = rng.value(); + let result = forest.update_tree(unknown_lineage, version_1, SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!( + result.unwrap_err(), + LargeSmtForestError::UnknownLineage(l) if l == unknown_lineage + ); + + // If we add a version that is older than the latest known version for that lineage, we should + // see an error. + let older_version = version_1 - 1; + let result = forest.update_tree(lineage_1, older_version, SmtUpdateBatch::default()); + assert!(result.is_err()); + assert_matches!( + result.unwrap_err(), + LargeSmtForestError::BadVersion { provided, latest } + if provided == older_version && latest == version_1 + ); + + // Let's create some data and actually add it. + let key_2: Word = rng.value(); + let value_2: Word = rng.value(); + let key_3: Word = rng.value(); + let value_3: Word = rng.value(); + + let mut operations = SmtUpdateBatch::default(); + operations.add_insert(key_2, value_2); + operations.add_insert(key_3, value_3); + operations.add_remove(key_1); + + let version_2: VersionId = rng.value(); + let result = forest.update_tree(lineage_1, version_2, operations)?; + + // And we can check this against the tree. + let mutations = + tree.compute_mutations(vec![(key_1, EMPTY_WORD), (key_2, value_2), (key_3, value_3)])?; + tree.apply_mutations(mutations)?; + + assert_eq!(result.root(), tree.root()); + + // And we should also now have a history version that corresponds to the previous version, which + // we are going to get at via some test helpers. + let history = forest.get_history(lineage_1); + assert_eq!(history.num_versions(), 1); + + // If we query for each value, we should see the correct reversions. + let view = history.get_view_at(version_1)?; + + assert_eq!(view.leaf_delta(&LeafIndex::from(key_1)).get(&key_1), Some(&value_1)); + assert_eq!(view.leaf_delta(&LeafIndex::from(key_2)).get(&key_2), Some(&EMPTY_WORD)); + assert_eq!(view.leaf_delta(&LeafIndex::from(key_3)).get(&key_3), Some(&EMPTY_WORD)); + + // We should also now see this lineage listed as having a non-empty history. + assert!(forest.get_non_empty_histories().contains(&lineage_1)); + + // Finally, if we provide an update that does not change the tree, the method should succeed but + // not result in any state changes. + assert_eq!(forest.lineage_roots(lineage_1).unwrap().count(), 2); + let empty_ops = SmtUpdateBatch::default(); + let version_3 = version_2 + 1; + forest.update_tree(lineage_1, version_3, empty_ops)?; + assert_eq!(forest.lineage_roots(lineage_1).unwrap().count(), 2); + let history = forest.get_history(lineage_1); + assert_eq!(history.num_versions(), 1); + + Ok(()) +} + +// MULTI-TREE MODIFIER TESTS +// ================================================================================================ + +#[test] +fn update_forest() -> Result<()> { + let backend = ForestInMemoryBackend::new(); + let mut forest = Forest::new(backend)?; + let mut rng = ContinuousRng::new([0x69; 32]); + + // Let's start by adding a few empty lineages to the forest, just so we have a starting point. + // Adding all of these should succeed as they are disjoint lineages. + let version_1: VersionId = rng.value(); + let lineage_1: LineageId = rng.value(); + let lineage_2: LineageId = rng.value(); + let lineage_3: LineageId = rng.value(); + let lineage_4: LineageId = rng.value(); + + let l1_r1 = forest.add_lineage(lineage_1, version_1, SmtUpdateBatch::default())?; + let l2_r1 = forest.add_lineage(lineage_2, version_1, SmtUpdateBatch::default())?; + let l3_r1 = forest.add_lineage(lineage_3, version_1, SmtUpdateBatch::default())?; + let l4_r1 = forest.add_lineage(lineage_4, version_1, SmtUpdateBatch::default())?; + + // Let's compose some updates. + let l1_key_1: Word = rng.value(); + let l1_value_1: Word = rng.value(); + let l2_key_1: Word = rng.value(); + let l2_value_1: Word = rng.value(); + let l3_key_1: Word = rng.value(); + let l3_value_1: Word = rng.value(); + let l4_key_1: Word = rng.value(); + let l4_value_1: Word = rng.value(); + + // First we want to test the case where we refer to a lineage that doesn't exist. In this case, + // we should get an error. + let ne_lineage: LineageId = rng.value(); + let version_bad = version_1 - 1; + let version_2 = version_1 + 1; + let mut operations_ne_lineage = SmtForestUpdateBatch::empty(); + operations_ne_lineage.operations(lineage_1).add_insert(l1_key_1, l1_value_1); + operations_ne_lineage.operations(lineage_2).add_insert(l2_key_1, l2_value_1); + operations_ne_lineage.operations(lineage_3).add_insert(l3_key_1, l3_value_1); + operations_ne_lineage.operations(lineage_4).add_insert(l4_key_1, l4_value_1); + let operations_basic = operations_ne_lineage.clone(); + operations_ne_lineage.operations(ne_lineage); + + let result = forest.update_forest(version_2, operations_ne_lineage); + assert!(result.is_err()); + assert_matches!(result.unwrap_err(), LargeSmtForestError::UnknownLineage(l) if l == ne_lineage); + + // When a precondition check like this fails, we should also have unchanged state. + assert_eq!( + forest.root_info(TreeId::new(lineage_1, version_1)), + RootInfo::LatestVersion(l1_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_2, version_1)), + RootInfo::LatestVersion(l2_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_3, version_1)), + RootInfo::LatestVersion(l3_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_4, version_1)), + RootInfo::LatestVersion(l4_r1.root()) + ); + + // We also want to test that we get an error when we ask for a bad version transition. + let result = forest.update_forest(version_bad, operations_basic.clone()); + assert!(result.is_err()); + assert_matches!( + result.unwrap_err(), + LargeSmtForestError::BadVersion { provided, latest } + if provided == version_bad && latest == version_1 + ); + + // This should also leave the internal state unchanged. + assert_eq!( + forest.root_info(TreeId::new(lineage_1, version_1)), + RootInfo::LatestVersion(l1_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_2, version_1)), + RootInfo::LatestVersion(l2_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_3, version_1)), + RootInfo::LatestVersion(l3_r1.root()) + ); + assert_eq!( + forest.root_info(TreeId::new(lineage_4, version_1)), + RootInfo::LatestVersion(l4_r1.root()) + ); + + // When a batch goes ahead successfully we should just get back the new roots to the trees, + // which can be associated by their lineages. + let roots = forest.update_forest(version_2, operations_basic)?; + assert_eq!(roots.len(), 4); + + // We can check that the updates went correctly by using auxiliary trees, and checking the + // values in the returned roots. + let mut tree_1 = Smt::new(); + tree_1.insert(l1_key_1, l1_value_1)?; + let mut tree_2 = Smt::new(); + tree_2.insert(l2_key_1, l2_value_1)?; + let mut tree_3 = Smt::new(); + tree_3.insert(l3_key_1, l3_value_1)?; + let mut tree_4 = Smt::new(); + tree_4.insert(l4_key_1, l4_value_1)?; + + assert!(roots.iter().any(|e| e.root() == tree_1.root() + && e.version() == version_2 + && e.lineage() == lineage_1)); + assert!(roots.iter().any(|e| e.root() == tree_2.root() + && e.version() == version_2 + && e.lineage() == lineage_2)); + assert!(roots.iter().any(|e| e.root() == tree_3.root() + && e.version() == version_2 + && e.lineage() == lineage_3)); + assert!(roots.iter().any(|e| e.root() == tree_4.root() + && e.version() == version_2 + && e.lineage() == lineage_4)); + + // We also want to see that each of the updated lineages is now listed as having a non-empty + // history. + assert!(forest.get_non_empty_histories().contains(&lineage_1)); + assert!(forest.get_non_empty_histories().contains(&lineage_2)); + assert!(forest.get_non_empty_histories().contains(&lineage_3)); + assert!(forest.get_non_empty_histories().contains(&lineage_4)); + + // We also want to see that if a batch is processed that does not result in changes for a given + // tree, no state changes are made to that lineage. We check both the case where there are + // operations that result in no changes, and where no operations are specified. + let version_3 = version_2 + 1; + let key_5: Word = rng.value(); + let value_5: Word = rng.value(); + let mut operations_with_nop = SmtForestUpdateBatch::empty(); + operations_with_nop.operations(lineage_1).add_insert(l1_key_1, l1_value_1); + operations_with_nop.operations(lineage_2); + operations_with_nop.operations(lineage_3).add_insert(key_5, value_5); + + // Before we make these batches happen, let's check where things stand. + assert_eq!(forest.lineage_roots(lineage_1).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_2).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_3).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_4).unwrap().count(), 2); + + // Then we should apply the batch. + let roots = forest.update_forest(version_3, operations_with_nop)?; + assert_eq!(roots.len(), 3); + + // And for the no-op or unchanged cases we should not have new roots. + assert_eq!(forest.lineage_roots(lineage_1).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_2).unwrap().count(), 2); + assert_eq!(forest.lineage_roots(lineage_3).unwrap().count(), 3); + assert_eq!(forest.lineage_roots(lineage_4).unwrap().count(), 2); + + Ok(()) +} diff --git a/miden-crypto/src/merkle/smt/large_forest/utils.rs b/miden-crypto/src/merkle/smt/large_forest/utils.rs new file mode 100644 index 000000000..ff0a2f4bf --- /dev/null +++ b/miden-crypto/src/merkle/smt/large_forest/utils.rs @@ -0,0 +1,10 @@ +//! Contains utility type aliases and functions for use as part of the SMT forest. + +use crate::{Word, merkle::smt::full::SMT_DEPTH}; + +// TYPE ALIASES +// ================================================================================================ + +/// The mutation set used by the forest backends to provide reverse mutations that describe the +/// changes necessary to revert the tree to its previous state. +pub type MutationSet = crate::merkle::smt::MutationSet; diff --git a/miden-crypto/src/merkle/smt/mod.rs b/miden-crypto/src/merkle/smt/mod.rs index 0965c658e..076810e08 100644 --- a/miden-crypto/src/merkle/smt/mod.rs +++ b/miden-crypto/src/merkle/smt/mod.rs @@ -30,8 +30,11 @@ pub use large::{RocksDbConfig, RocksDbStorage}; mod large_forest; pub use large_forest::{ - Backend, BackendError, ForestOperation, LargeSmtForest, LargeSmtForestError, RootInfo, - SmtForestUpdateBatch, SmtUpdateBatch, TreeId, VersionId, + Backend, BackendError, Config as ForestConfig, + DEFAULT_MAX_HISTORY_VERSIONS as FOREST_DEFAULT_MAX_HISTORY_VERSIONS, ForestOperation, + InMemoryBackend as ForestInMemoryBackend, LargeSmtForest, LargeSmtForestError, LineageId, + MIN_HISTORY_VERSIONS as FOREST_MIN_HISTORY_VERSIONS, RootInfo, SmtForestUpdateBatch, + SmtUpdateBatch, TreeEntry, TreeId, TreeWithRoot, VersionId, }; mod simple; @@ -121,7 +124,7 @@ pub(crate) trait SparseMerkleTree { let InnerNode { left, right } = self.get_inner_node(index.parent()); - let index_is_right = index.is_value_odd(); + let index_is_right = index.is_position_odd(); if index_is_right { right } else { left } } @@ -168,7 +171,7 @@ pub(crate) trait SparseMerkleTree { ) { let mut node_hash = node_hash_at_index; for node_depth in (0..index.depth()).rev() { - let is_right = index.is_value_odd(); + let is_right = index.is_position_odd(); index.move_up(); let InnerNode { left, right } = self.get_inner_node(index); let (left, right) = if is_right { @@ -261,7 +264,7 @@ pub(crate) trait SparseMerkleTree { for node_depth in (0..node_index.depth()).rev() { // Whether the node we're replacing is the right child or the left child. - let is_right = node_index.is_value_odd(); + let is_right = node_index.is_position_odd(); node_index.move_up(); let old_node = node_mutations @@ -553,9 +556,9 @@ impl LeafIndex { Ok(LeafIndex { index: NodeIndex::new(DEPTH, value)? }) } - /// Returns the numeric value of this leaf index. - pub fn value(&self) -> u64 { - self.index.value() + /// Returns the position of this leaf index within its depth layer. + pub fn position(&self) -> u64 { + self.index.position() } } @@ -585,7 +588,7 @@ impl TryFrom for LeafIndex { }); } - Self::new(node_index.value()) + Self::new(node_index.position()) } } @@ -603,7 +606,7 @@ impl Deserializable for LeafIndex { impl Display for LeafIndex { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "DEPTH={}, value={}", DEPTH, self.value()) + write!(f, "DEPTH={}, position={}", DEPTH, self.position()) } } @@ -668,6 +671,13 @@ impl MutationSet { pub fn new_pairs(&self) -> &Map { &self.new_pairs } + + /// Returns `true` if the mutation set represents no changes to the tree, and `false` otherwise. + pub fn is_empty(&self) -> bool { + self.node_mutations.is_empty() + && self.new_pairs.is_empty() + && self.old_root == self.new_root + } } // SERIALIZATION diff --git a/miden-crypto/src/merkle/smt/partial/mod.rs b/miden-crypto/src/merkle/smt/partial/mod.rs index 2a6c1d75a..14698325b 100644 --- a/miden-crypto/src/merkle/smt/partial/mod.rs +++ b/miden-crypto/src/merkle/smt/partial/mod.rs @@ -229,7 +229,7 @@ impl PartialSmt { let leaf = self .leaves - .entry(leaf_index.value()) + .entry(leaf_index.position()) .or_insert_with(|| SmtLeaf::new_empty(leaf_index)); if value != EMPTY_WORD { @@ -248,7 +248,7 @@ impl PartialSmt { // Remove empty leaf if current_entries == 0 { - self.leaves.remove(&leaf_index.value()); + self.leaves.remove(&leaf_index.position()); } // Recompute the path from leaf to root @@ -307,15 +307,15 @@ impl PartialSmt { let prev_entries = self .leaves - .get(¤t_index.value()) + .get(¤t_index.position()) .map(|leaf| leaf.num_entries()) .unwrap_or(0); let current_entries = leaf.num_entries(); // Only store non-empty leaves if current_entries > 0 { - self.leaves.insert(current_index.value(), leaf); + self.leaves.insert(current_index.position(), leaf); } else { - self.leaves.remove(¤t_index.value()); + self.leaves.remove(¤t_index.position()); } // Guaranteed not to over/underflow. All variables are <= MAX_LEAF_ENTRIES and result > 0. @@ -323,7 +323,7 @@ impl PartialSmt { for sibling_hash in path { // Find the index of the sibling node and compute whether it is a left or right child. - let is_sibling_right = current_index.sibling().is_value_odd(); + let is_sibling_right = current_index.sibling().is_position_odd(); // Move the index up so it points to the parent of the current index and the sibling. current_index.move_up(); @@ -362,7 +362,7 @@ impl PartialSmt { let leaf_index = Self::key_to_leaf_index(key); // Explicitly stored leaves are always trackable - if let Some(leaf) = self.leaves.get(&leaf_index.value()) { + if let Some(leaf) = self.leaves.get(&leaf_index.position()) { return Some(leaf.clone()); } @@ -446,7 +446,7 @@ impl PartialSmt { let InnerNode { left, right } = self.get_inner_node_or_empty(index.parent()); - if index.is_value_odd() { right } else { left } + if index.is_position_odd() { right } else { left } } /// Recomputes all inner nodes from a leaf up to the root after a leaf value change. @@ -461,7 +461,7 @@ impl PartialSmt { let mut node_hash = leaf_hash; for _ in (0..index.depth()).rev() { - let is_right = index.is_value_odd(); + let is_right = index.is_position_odd(); index.move_up(); let InnerNode { left, right } = self.get_inner_node_or_empty(index); let (left, right) = if is_right { @@ -530,7 +530,7 @@ impl From for PartialSmt { Self { root: smt.root(), num_entries: smt.num_entries(), - leaves: smt.leaves().map(|(idx, leaf)| (idx.value(), leaf.clone())).collect(), + leaves: smt.leaves().map(|(idx, leaf)| (idx.position(), leaf.clone())).collect(), inner_nodes: smt.inner_node_indices().collect(), } } diff --git a/miden-crypto/src/merkle/smt/simple/mod.rs b/miden-crypto/src/merkle/smt/simple/mod.rs index e310269f9..be9c8833a 100644 --- a/miden-crypto/src/merkle/smt/simple/mod.rs +++ b/miden-crypto/src/merkle/smt/simple/mod.rs @@ -330,7 +330,7 @@ impl SimpleSmt { let new_branch_idx = { let new_depth = subtree_root_insertion_depth + branch_idx.depth(); let new_value = subtree_insertion_index * 2_u64.pow(branch_idx.depth().into()) - + branch_idx.value(); + + branch_idx.position(); NodeIndex::new(new_depth, new_value).expect("index guaranteed to be valid") }; @@ -401,9 +401,9 @@ impl SparseMerkleTree for SimpleSmt { value: Word, ) -> Result, MerkleError> { let result = if value == Self::EMPTY_VALUE { - self.leaves.remove(&key.value()) + self.leaves.remove(&key.position()) } else { - self.leaves.insert(key.value(), value) + self.leaves.insert(key.position(), value) }; Ok(result) } @@ -413,7 +413,7 @@ impl SparseMerkleTree for SimpleSmt { } fn get_leaf(&self, key: &LeafIndex) -> Word { - let leaf_pos = key.value(); + let leaf_pos = key.position(); match self.leaves.get(&leaf_pos) { Some(word) => *word, None => Self::EMPTY_VALUE, diff --git a/miden-crypto/src/merkle/sparse_path.rs b/miden-crypto/src/merkle/sparse_path.rs index 46b62b755..2a155b93f 100644 --- a/miden-crypto/src/merkle/sparse_path.rs +++ b/miden-crypto/src/merkle/sparse_path.rs @@ -1025,7 +1025,7 @@ mod tests { let (merkle_path, leaf) = tree.open(key).into_parts(); let sparse_path = SparseMerklePath::from_sized_iter(merkle_path.clone().into_iter()).unwrap(); - let leaf_index = Smt::key_to_leaf_index(key).value(); + let leaf_index = Smt::key_to_leaf_index(key).position(); let actual_value = leaf.hash(); // Use the actual leaf hash // Verify both paths have same depth diff --git a/miden-crypto/src/merkle/store/mod.rs b/miden-crypto/src/merkle/store/mod.rs index c36f8ab51..8200c6599 100644 --- a/miden-crypto/src/merkle/store/mod.rs +++ b/miden-crypto/src/merkle/store/mod.rs @@ -466,7 +466,7 @@ impl MerkleStore { // performs the update only if the node value differs from the opening if node != value { - root = self.add_merkle_path(index.value(), node, path.clone())?; + root = self.add_merkle_path(index.position(), node, path.clone())?; } Ok(RootPath { root, path }) diff --git a/miden-crypto/src/merkle/store/tests.rs b/miden-crypto/src/merkle/store/tests.rs index adbbf1f92..5446c7c8a 100644 --- a/miden-crypto/src/merkle/store/tests.rs +++ b/miden-crypto/src/merkle/store/tests.rs @@ -613,7 +613,7 @@ fn test_constructors() -> Result<(), MerkleError> { assert!( store.has_path(mtree.root(), index), "path for index {} at depth {} must exist", - index.value(), + index.position(), depth ); } @@ -693,7 +693,7 @@ fn node_path_should_be_truncated_by_midtier_insert() { let path = store.get_path(root, index).unwrap().path; assert_eq!(node, result); assert_eq!(path.depth(), depth); - assert!(path.verify(index.value(), result, &root).is_ok()); + assert!(path.verify(index.position(), result, &root).is_ok()); assert!(store.has_path(root, index), "path for first inserted node must exist"); // flip the first bit of the key and insert the second node on a different depth @@ -707,7 +707,7 @@ fn node_path_should_be_truncated_by_midtier_insert() { let path = store.get_path(root, index).unwrap().path; assert_eq!(node, result); assert_eq!(path.depth(), depth); - assert!(path.verify(index.value(), result, &root).is_ok()); + assert!(path.verify(index.position(), result, &root).is_ok()); assert!(store.has_path(root, index), "path for second inserted node must exist"); // attempt to fetch a path of the second node to depth 64 diff --git a/miden-crypto/src/rand/test_utils.rs b/miden-crypto/src/rand/test_utils.rs index 137e1f002..296a3e178 100644 --- a/miden-crypto/src/rand/test_utils.rs +++ b/miden-crypto/src/rand/test_utils.rs @@ -124,3 +124,23 @@ pub fn prng_vector(seed: [u8; 32], length: usize) -> Vec { let mut rng = seeded_rng(seed); (0..length).map(|_| rng_value(&mut rng)).collect() } + +// CONTINUOUS RNG +// ================================================================================================ + +/// A continuous random number generator that works in `no-std` contexts. +#[derive(Debug)] +pub struct ContinuousRng { + rng: ChaCha20Rng, +} +impl ContinuousRng { + /// Creates a new instance of the random number generator from the seed. + pub fn new(seed: [u8; 32]) -> ContinuousRng { + ContinuousRng { rng: ChaCha20Rng::from_seed(seed) } + } + + /// Generates a random value of the [`Randomizable`] type `T`. + pub fn value(&mut self) -> T { + rng_value(&mut self.rng) + } +} diff --git a/miden-serde-utils/fuzz/Cargo.toml b/miden-serde-utils/fuzz/Cargo.toml index 0214e841b..406ba583f 100644 --- a/miden-serde-utils/fuzz/Cargo.toml +++ b/miden-serde-utils/fuzz/Cargo.toml @@ -10,8 +10,8 @@ version = "0.0.0" cargo-fuzz = true [dependencies] -libfuzzer-sys = "0.4" -p3-miden-goldilocks = "0.4" +libfuzzer-sys = "0.4" +p3-goldilocks = { default-features = false, version = "0.4.2" } [dependencies.miden-serde-utils] path = ".." diff --git a/miden-serde-utils/fuzz/fuzz_targets/goldilocks.rs b/miden-serde-utils/fuzz/fuzz_targets/goldilocks.rs index 8a410835d..2ebf9fc98 100644 --- a/miden-serde-utils/fuzz/fuzz_targets/goldilocks.rs +++ b/miden-serde-utils/fuzz/fuzz_targets/goldilocks.rs @@ -6,8 +6,8 @@ use miden_serde_utils::Deserializable; fuzz_target!(|data: &[u8]| { // Test Goldilocks field element deserialization // The key validation: value must be < modulus (2^64 - 2^32 + 1) - // This is defined in p3_miden_goldilocks::Goldilocks::MODULUS - use p3_miden_goldilocks::Goldilocks; + // This is defined in p3_goldilocks::Goldilocks::MODULUS + use p3_goldilocks::Goldilocks; let _ = Goldilocks::read_from_bytes(data); diff --git a/miden-serde-utils/src/byte_reader.rs b/miden-serde-utils/src/byte_reader.rs index fecb608cb..4dfd538b0 100644 --- a/miden-serde-utils/src/byte_reader.rs +++ b/miden-serde-utils/src/byte_reader.rs @@ -1351,4 +1351,42 @@ mod tests { let collect_result: Result>, _> = result.unwrap().collect(); assert!(collect_result.is_err()); } + + /// Tuples should use sum of element min_serialized_size, not size_of (which includes padding). + /// + /// This test verifies that (u8, u64) has min_serialized_size = 9 (1 + 8) not 16 (in-memory size + /// with 7 bytes of alignment padding). + #[test] + fn tuple_min_serialized_size_excludes_padding() { + // Serialized: 1 byte for u8 + 8 bytes for u64 = 9 bytes + // In-memory: 8 bytes for u8 (with 7 bytes padding) + 8 bytes for u64 = 16 bytes + assert_eq!(<(u8, u64)>::min_serialized_size(), 9); + assert_eq!(core::mem::size_of::<(u8, u64)>(), 16); + + // Verify budget calculation uses 9, not 16 + let mut data = Vec::new(); + data.push(0); // 9-byte vint64 marker + data.extend_from_slice(&4u64.to_le_bytes()); // claim 4 tuples + // Provide exactly 4 tuples worth of data: 4 * 9 = 36 bytes + for i in 0u8..4 { + data.push(i); // u8 + data.extend_from_slice(&(i as u64).to_le_bytes()); // u64 + } + + let inner = SliceReader::new(&data); + // Budget: 9 (length prefix) + 36 (data) = 45 bytes + let mut reader = BudgetedReader::new(inner, 45); + + let _len = reader.read_usize().unwrap(); + // With min_serialized_size = 9: remaining = 45 - 9 = 36, max_elements = 36 / 9 = 4 + // This should succeed (4 <= 4) + let result = reader.read_many_iter::<(u8, u64)>(4); + assert!(result.is_ok()); + + // With min_serialized_size = 16 (wrong): max_elements = 36 / 16 = 2 + // This would fail (4 > 2) + let collect_result: Result, _> = result.unwrap().collect(); + assert!(collect_result.is_ok()); + assert_eq!(collect_result.unwrap().len(), 4); + } } diff --git a/miden-serde-utils/src/lib.rs b/miden-serde-utils/src/lib.rs index 11b6cca76..bcd484092 100644 --- a/miden-serde-utils/src/lib.rs +++ b/miden-serde-utils/src/lib.rs @@ -483,6 +483,10 @@ where let v2 = T2::read_from(source)?; Ok((v1, v2)) } + + fn min_serialized_size() -> usize { + T1::min_serialized_size().saturating_add(T2::min_serialized_size()) + } } impl Deserializable for (T1, T2, T3) @@ -497,6 +501,12 @@ where let v3 = T3::read_from(source)?; Ok((v1, v2, v3)) } + + fn min_serialized_size() -> usize { + T1::min_serialized_size() + .saturating_add(T2::min_serialized_size()) + .saturating_add(T3::min_serialized_size()) + } } impl Deserializable for (T1, T2, T3, T4) @@ -513,6 +523,13 @@ where let v4 = T4::read_from(source)?; Ok((v1, v2, v3, v4)) } + + fn min_serialized_size() -> usize { + T1::min_serialized_size() + .saturating_add(T2::min_serialized_size()) + .saturating_add(T3::min_serialized_size()) + .saturating_add(T4::min_serialized_size()) + } } impl Deserializable for (T1, T2, T3, T4, T5) @@ -531,6 +548,14 @@ where let v5 = T5::read_from(source)?; Ok((v1, v2, v3, v4, v5)) } + + fn min_serialized_size() -> usize { + T1::min_serialized_size() + .saturating_add(T2::min_serialized_size()) + .saturating_add(T3::min_serialized_size()) + .saturating_add(T4::min_serialized_size()) + .saturating_add(T5::min_serialized_size()) + } } impl Deserializable for (T1, T2, T3, T4, T5, T6) @@ -551,6 +576,15 @@ where let v6 = T6::read_from(source)?; Ok((v1, v2, v3, v4, v5, v6)) } + + fn min_serialized_size() -> usize { + T1::min_serialized_size() + .saturating_add(T2::min_serialized_size()) + .saturating_add(T3::min_serialized_size()) + .saturating_add(T4::min_serialized_size()) + .saturating_add(T5::min_serialized_size()) + .saturating_add(T6::min_serialized_size()) + } } impl Deserializable for u8 {