diff --git a/stacks-common/src/codec/mod.rs b/stacks-common/src/codec/mod.rs index a01dab97eb6..96972c5526f 100644 --- a/stacks-common/src/codec/mod.rs +++ b/stacks-common/src/codec/mod.rs @@ -95,6 +95,18 @@ impl_stacks_message_codec_for_int!(u32; [0; 4]); impl_stacks_message_codec_for_int!(u64; [0; 8]); impl_stacks_message_codec_for_int!(i64; [0; 8]); +impl StacksMessageCodec for [u8; 4] { + fn consensus_serialize(&self, fd: &mut W) -> Result<(), Error> { + fd.write_all(self).map_err(Error::WriteError) + } + + fn consensus_deserialize(fd: &mut R) -> Result<[u8; 4], Error> { + let mut buf = [0u8; 4]; + fd.read_exact(&mut buf).map_err(Error::ReadError)?; + Ok(buf) + } +} + impl StacksMessageCodec for [u8; 20] { fn consensus_serialize(&self, fd: &mut W) -> Result<(), Error> { fd.write_all(self).map_err(Error::WriteError) diff --git a/stackslib/src/chainstate/stacks/index/bits.rs b/stackslib/src/chainstate/stacks/index/bits.rs index 12a9fa68b94..738e7ebd508 100644 --- a/stackslib/src/chainstate/stacks/index/bits.rs +++ b/stackslib/src/chainstate/stacks/index/bits.rs @@ -18,15 +18,17 @@ use std::io::{ErrorKind, Read, Seek, SeekFrom, Write}; use sha2::{Digest, Sha512_256 as TrieHasher}; -use stacks_common::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; -use stacks_common::util::hash::to_hex; use crate::chainstate::stacks::index::node::{ - clear_backptr, ConsensusSerializable, TrieNode, TrieNode16, TrieNode256, TrieNode4, TrieNode48, - TrieNodeID, TrieNodeType, TriePtr, TRIEPTR_SIZE, + clear_compressed, clear_ctrl_bits, is_compressed, ptrs_fmt, ConsensusSerializable, TrieNode, + TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeID, TrieNodePatch, TrieNodeType, + TriePtr, TRIEPTR_SIZE, }; use crate::chainstate::stacks::index::storage::TrieStorageConnection; use crate::chainstate::stacks::index::{BlockMap, Error, MarfTrieId, TrieLeaf}; +use crate::codec::StacksMessageCodec; +use crate::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; +use crate::util::hash::to_hex; /// Get the size of a Trie path (note that a Trie path is 32 bytes long, and can definitely _not_ /// be over 255 bytes). @@ -37,14 +39,18 @@ pub fn get_path_byte_len(p: &[u8]) -> usize { } /// Decode a trie path from a Readable object. -/// Returns Error::CorruptionError if the path doesn't decode. +/// This is up to 32 bytes, and must be prefixed by a 1-byte length. +/// +/// Returns Ok(path-bytes) on success +/// Returns Err(CorruptionError) if the path doesn't decode, or if the length prefix is invalid +/// Returns Err(IOError) on disk I/O failure pub fn path_from_bytes(r: &mut R) -> Result, Error> { let mut lenbuf = [0u8; 1]; r.read_exact(&mut lenbuf).map_err(|e| { if e.kind() == ErrorKind::UnexpectedEof { Error::CorruptionError("Failed to read len buf".to_string()) } else { - eprintln!("failed: {:?}", &e); + error!("failed: {e:?}"); Error::IOError(e) } })?; @@ -66,7 +72,7 @@ pub fn path_from_bytes(r: &mut R) -> Result, Error> { if e.kind() == ErrorKind::UnexpectedEof { Error::CorruptionError(format!("Failed to read {} bytes of path", lenbuf[0])) } else { - eprintln!("failed: {:?}", &e); + error!("failed: {e:?}"); Error::IOError(e) } })?; @@ -74,15 +80,10 @@ pub fn path_from_bytes(r: &mut R) -> Result, Error> { Ok(retbuf) } -/// Helper to verify that a Trie node's ID byte is valid. -pub fn check_node_id(nid: u8) -> bool { - let node_id = clear_backptr(nid); - TrieNodeID::from_u8(node_id).is_some() -} - -/// Helper to return the number of children in a Trie, given its ID. -pub fn node_id_to_ptr_count(node_id: u8) -> usize { - match TrieNodeID::from_u8(clear_backptr(node_id)) +/// Helper to return the number of children in a Trie, given its numeric ID +/// Panics if `node_id` is not a valid trie node ID value +fn node_id_to_ptr_count(node_id: u8) -> usize { + match TrieNodeID::from_u8(clear_ctrl_bits(node_id)) .unwrap_or_else(|| panic!("Unknown node ID {}", node_id)) { TrieNodeID::Leaf => 1, @@ -90,51 +91,204 @@ pub fn node_id_to_ptr_count(node_id: u8) -> usize { TrieNodeID::Node16 => 16, TrieNodeID::Node48 => 48, TrieNodeID::Node256 => 256, - TrieNodeID::Empty => panic!("node_id_to_ptr_count: tried getting empty node pointer count"), + TrieNodeID::Empty | TrieNodeID::Patch => { + panic!("node_id_to_ptr_count: tried getting empty node pointer count") + } } } -/// Helper to determine how many bytes a Trie node's child pointers will take to encode. +/// Helper to determine the maximum number of bytes a Trie node's child pointers will take to encode. pub fn get_ptrs_byte_len(ptrs: &[TriePtr]) -> usize { let node_id_len = 1; node_id_len + TRIEPTR_SIZE * ptrs.len() } -/// Read a Trie node's children from a Readable object, and write them to the given ptrs_buf slice. -/// Returns the Trie node ID detected. -pub fn ptrs_from_bytes( +/// Helper to determine a sparse TriePtr list's bitmap size, given the node ID's numeric value. +/// Returns Some(size) if the node identified node type has ptrs +/// Returns None if `id` is a `Leaf`, `Patch`, or `Empty` node, or is unrecognized. +pub fn get_sparse_ptrs_bitmap_size(id: u8) -> Option { + match TrieNodeID::from_u8(clear_ctrl_bits(id))? { + TrieNodeID::Leaf => None, + TrieNodeID::Node4 => Some(1), + TrieNodeID::Node16 => Some(2), + TrieNodeID::Node48 => Some(6), + TrieNodeID::Node256 => Some(32), + TrieNodeID::Empty => None, + TrieNodeID::Patch => None, + } +} + +/// Helper to determine what the compressed size of a ptrs list will be, depending on whether or +/// not it's sparse or dense. +/// +/// Returns Some((size, is-sparse?)) on success +/// Returns None if the node doesn't have ptrs +pub fn get_compressed_ptrs_size(id: u8, ptrs: &[TriePtr]) -> Option<(usize, bool)> { + let bitmap_size = get_sparse_ptrs_bitmap_size(id)?; + + // compute stored ptrs size + let mut sparse_ptrs_size = 0; + let mut ptrs_size = 0; + for ptr in ptrs.iter() { + if ptr.id() != TrieNodeID::Empty as u8 { + sparse_ptrs_size += ptr.compressed_size(); + } + ptrs_size += ptr.compressed_size(); + } + + // +1 is for the 0xff bitmap marker + let sparse_size = usize::try_from(1 + bitmap_size + sparse_ptrs_size).expect("infallible"); + if sparse_size < ptrs_size { + return Some((sparse_size, true)); + } else { + return Some((ptrs_size, false)); + } +} + +/// Helper to determine how many bytes a Trie node's child pointers will take to encode. +/// Size is id + ptrs encoding +pub fn get_ptrs_byte_len_compressed(id: u8, ptrs: &[TriePtr]) -> usize { + 1 + get_compressed_ptrs_size(id, ptrs) + .map(|(sz, _)| sz) + .unwrap_or(0) +} + +/// Read a trie node's children pointers from a Read object, and write them to the given `ptrs_buf` slice. +/// The `node_id` will indicate whether or not the pointers list is compressed (via its compressed +/// bit). +/// +/// An uncompressed list of `TriePtr`s is simply a sequence of uncompressed `TriePtr`s. They are +/// read verbatim into the `ptrs_buf` slice. +/// +/// A compressed list of `TriePtr`s has either a sparse form or a dense form, and is comprised of +/// compressed `TriePtr`s (which have variable length). In the sparse form, the byte encoding is +/// as follows: +/// +/// 0 1 1+B 1+B+N +/// |---|-----------|---------------------------------------| +/// 0xff bitmap list of compressed `TriePtr`s +/// +/// Where +/// * 0xff is a marker bit that cannot be the first byte of a `TriePtr`, and indicates that a +/// bitmap follows +/// * `bitmap` is a bit field in which the ith bit is set if the ith `TriePtr` is not empty. All +/// other `TriePtr`s in `ptrs_buf` will be considered empty, and initialized as such. +/// +/// The remaining bytes 1+B through 1+B+N contain the list of compressed `TriePtr`s -- one for each +/// set bit in `bitmap`. +/// +/// If the dense form is used, then the byte encoding is as follows: +/// +/// 0 N +/// |-------------------------------------| +/// list of compressed `TriePtr`s +/// +/// The dense form includes empty `TriePtr`s. The dense form is used if the size of using the +/// sparse form (with the bitmap) exceeds the size of using the dense form. The dense form is used +/// for tries that are full or nearly full. +/// +/// This code path is not guaranteed to read a node's `TriePtr` list; it may instead read a +/// `TriePatchNode`, which contains the _delta_ between two successive copies of the same node +/// across a copy-on-write operation. If a `TriePatchNode` is found, then it is returned as an +/// Err(..) result, so the caller can apply it atop its targeted trie node. +/// +/// Returns Ok(node-id) on success, where the compressed bit in `node-id` is NOT set. However, the +/// backptr bit MAY be set (it is preserved). +/// +/// Returns Err(Patch(..)) if the code encountered a TrieNodePatch instead of the expected trie +/// node. In this case, the patch will be decoded and returned, so that it can be applied by the +/// caller on top of a base node. +/// +/// Returns Err(CorruptionError(..)) if the node ID is invalid, the read node ID is missing, the +/// read node ID does not match the given node ID, or the byte encoding is invalid given the +/// expected pointers encoding. +/// +/// Returns Err(IOError(..)) on read failure. +/// +/// Returns Err(OverflowError) on integer or potential buffer overflow, which should never happen. +pub fn ptrs_from_bytes( node_id: u8, r: &mut R, ptrs_buf: &mut [TriePtr], ) -> Result { - if !check_node_id(node_id) { - trace!("Bad node ID {:x}", node_id); + if TrieNodeID::from_u8(clear_ctrl_bits(node_id)).is_none() { + error!("Bad node ID {:x}", node_id); return Err(Error::CorruptionError(format!( "Bad node ID: {:x}", node_id ))); - } + }; let num_ptrs = node_id_to_ptr_count(node_id); + + // NOTE: this may overshoot the length of the readable object, since this is the maximum possible size of the + // concatenated ptr bytes. As such, treat EOF as a non-error + let ptrs_start_disk_ptr = r + .stream_position() + .inspect_err(|e| error!("Failed to ftell the read handle: {e:?}"))?; + + trace!( + "Read ptrs for node {} at offset {}", + node_id, + ptrs_start_disk_ptr + ); + let mut bytes = vec![0u8; 1 + num_ptrs * TRIEPTR_SIZE]; - r.read_exact(&mut bytes).map_err(|e| { - if e.kind() == ErrorKind::UnexpectedEof { - Error::CorruptionError(format!( - "Failed to read 1 + {} bytes of ptrs", - num_ptrs * TRIEPTR_SIZE - )) - } else { - eprintln!("failed: {:?}", &e); - Error::IOError(e) + let mut offset = 0; + loop { + let nr = match r.read( + bytes + .get_mut(offset..) + .ok_or_else(|| Error::OverflowError)?, + ) { + Ok(nr) => nr, + Err(e) => match e.kind() { + ErrorKind::UnexpectedEof => { + // done + 0 + } + ErrorKind::Interrupted => { + // try again + continue; + } + _ => { + error!("Failed to read trie ptrs: {e:?}"); + return Err(Error::IOError(e)); + } + }, + }; + if nr == 0 { + // EOF + break; } - })?; + offset = offset.checked_add(nr).ok_or_else(|| Error::OverflowError)?; + } + + trace!("Read bytes ({}) {}", bytes.len(), &to_hex(&bytes)); // verify the id is correct let nid = bytes .first() - .ok_or_else(|| Error::CorruptionError("Failed to read 1 byte from bytes array".into()))?; - if clear_backptr(*nid) != clear_backptr(node_id) { - trace!("Bad idbuf: {:x} != {:x}", nid, node_id); + .ok_or_else(|| Error::CorruptionError("Failed to read 1st byte from bytes array".into()))?; + + if clear_ctrl_bits(*nid) != clear_ctrl_bits(node_id) { + let Some(nid_node_id) = TrieNodeID::from_u8(clear_ctrl_bits(*nid)) else { + return Err(Error::CorruptionError( + "Failed to read expected node ID -- not a valid ID".to_string(), + )); + }; + if nid_node_id == TrieNodeID::Patch { + trace!("Encountered a patch node at offset {}", ptrs_start_disk_ptr); + // this is really a node that patches the target node. + // try and read the patch node instead + let patch_node = TrieNodePatch::consensus_deserialize(&mut &bytes[..]) + .map_err(|e| Error::CorruptionError(format!("Failed to read patch node: {e:?}")))?; + + // the caller should read the node that this node patches + return Err(Error::Patch(None, patch_node)); + } + + error!("Bad idbuf: {:x} != {:x}", nid, node_id); return Err(Error::CorruptionError( "Failed to read expected node ID".to_string(), )); @@ -143,18 +297,156 @@ pub fn ptrs_from_bytes( let ptr_bytes = bytes .get(1..) .ok_or_else(|| Error::CorruptionError("Failed to read >1 bytes from bytes array".into()))?; - // iterate over the read-in bytes in chunks of TRIEPTR_SIZE and store them - // to `ptrs_buf` - let reading_ptrs = ptr_bytes - .chunks_exact(TRIEPTR_SIZE) - .zip(ptrs_buf.iter_mut()); - for (next_ptr_bytes, ptr_slot) in reading_ptrs { - *ptr_slot = TriePtr::from_bytes(next_ptr_bytes); + + if is_compressed(*nid) { + trace!("Node {} has compressed ptrs", clear_ctrl_bits(*nid)); + let sparse_flag = ptr_bytes.get(0).ok_or_else(|| { + Error::CorruptionError("Failed to read 2nd byte from bytes array".into()) + })?; + + if *sparse_flag == 0xff { + trace!("Node {} has sparse compressed ptrs", clear_ctrl_bits(*nid)); + + // this is a sparse ptrs list + let ptr_bytes = ptr_bytes.get(1..).ok_or_else(|| { + Error::CorruptionError("Failed to read >2 bytes from bytes array".into()) + })?; + + let bitmap_size = + get_sparse_ptrs_bitmap_size(clear_ctrl_bits(*nid)).ok_or_else(|| { + Error::CorruptionError(format!( + "Unable to determine bitmap size for node type {}", + clear_ctrl_bits(*nid) + )) + })?; + + if ptr_bytes.len() < bitmap_size { + return Err(Error::CorruptionError( + "Tried to read a bitmap but not enough bytes".to_string(), + )); + } + let bitmap = &ptr_bytes.get(0..bitmap_size).ok_or_else(|| { + Error::CorruptionError("Tried to read a bitmap but not enough bytes".to_string()) + })?; + + trace!( + "Node {} has sparse compressed ptrs bitmap {}", + clear_ctrl_bits(*nid), + to_hex(&bitmap) + ); + + let ptr_bytes = &ptr_bytes.get(bitmap_size..).ok_or_else(|| { + Error::CorruptionError("Failed to read bitmap_size bytes from bytes array".into()) + })?; + + let mut nextptr = 0; + let mut cursor = 0; + for i in 0..(8 * bitmap_size) { + if nextptr >= ptrs_buf.len() { + break; + } + let bi = i / 8; + let bt = i % 8; + let mask = 1u8 << bt; + let next_ptrs_buf = ptrs_buf.get_mut(nextptr).ok_or_else(|| { + Error::CorruptionError("infallible: nextptr < ptrs_buf.len()".into()) + })?; + let byte = *bitmap.get(bi).ok_or_else(|| { + Error::CorruptionError("infallible: i / 8 < bitmap.len()".into()) + })?; + if byte & mask == 0 { + // empty + *next_ptrs_buf = TriePtr::default(); + } else { + trace!( + "read sparse ptr {} at {}", + &to_hex( + &ptr_bytes + .get(cursor..(cursor + TRIEPTR_SIZE).min(ptr_bytes.len())) + .unwrap_or(&[]) + ), + cursor + ); + *next_ptrs_buf = + TriePtr::from_bytes_compressed(ptr_bytes.get(cursor..).ok_or_else( + || Error::CorruptionError("ptr_bytes runs short".into()), + )?); + cursor = cursor + .checked_add(next_ptrs_buf.compressed_size()) + .ok_or_else(|| Error::OverflowError)?; + } + nextptr += 1; + } + trace!( + "Node {} sparse compressed ptrs ({} bytes): {}", + clear_ctrl_bits(*nid), + cursor, + &ptrs_fmt(&ptrs_buf) + ); + + // seek to the end of the decoded ptrs + // the +2 is for the nid and bitmap marker + r.seek(SeekFrom::Start( + ptrs_start_disk_ptr + .checked_add(u64::try_from(cursor + 2 + bitmap_size).expect("infallible")) + .expect("FATAL: read far too many bytes"), + )) + .inspect_err(|e| { + error!("Failed to seek to the end of the sparse compressed ptrs: {e:?}") + })?; + } else { + trace!("Node {} has dense compressed ptrs", clear_ctrl_bits(*nid)); + // this is a nearly-full ptrs list + // ptrs list is compressed, meaning each ptr might be a different size + let mut cursor = 0; + for nextptr in 0..num_ptrs { + let next_ptrs_buf = ptrs_buf + .get_mut(nextptr) + .ok_or_else(|| Error::CorruptionError("ptrs_buf runs short".into()))?; + *next_ptrs_buf = TriePtr::from_bytes_compressed( + ptr_bytes + .get(cursor..) + .ok_or_else(|| Error::CorruptionError("ptr_bytes runs short".into()))?, + ); + cursor = cursor + .checked_add(next_ptrs_buf.compressed_size()) + .ok_or_else(|| Error::OverflowError)?; + } + trace!( + "Node {} dense compressed ptrs: {}", + clear_ctrl_bits(*nid), + &ptrs_fmt(&ptrs_buf) + ); + + // seek to the end of the decoded ptrs + // the +1 is for the nid + r.seek(SeekFrom::Start( + ptrs_start_disk_ptr + .checked_add(u64::try_from(cursor + 1).expect("infallible")) + .expect("FATAL: read far too many bytes"), + )) + .inspect_err(|e| { + error!("Failed to seek to the end of the dense compressed ptrs: {e:?}") + })?; + } + } else { + // ptrs list is not compressed + // iterate over the read-in bytes in chunks of TRIEPTR_SIZE and store them + // to `ptrs_buf` + trace!("Node {} has uncompressed ptrs", clear_ctrl_bits(*nid)); + let reading_ptrs = ptr_bytes + .chunks_exact(TRIEPTR_SIZE) + .zip(ptrs_buf.iter_mut()); + for (next_ptr_bytes, ptr_slot) in reading_ptrs { + *ptr_slot = TriePtr::from_bytes(next_ptr_bytes); + } } - Ok(*nid) + + Ok(clear_compressed(*nid)) } /// Calculate the hash of a TrieNode, given its childrens' hashes. +/// Returns the TrieHash pub fn get_node_hash + std::fmt::Debug>( node: &T, child_hashes: &[TrieHash], @@ -182,6 +474,7 @@ pub fn get_node_hash + std::fmt::Debug>( } /// Calculate the hash of a TrieLeaf +/// Returns the TrieHash pub fn get_leaf_hash(node: &TrieLeaf) -> TrieHash { let mut hasher = TrieHasher::new(); node.write_bytes(&mut hasher) @@ -194,6 +487,8 @@ pub fn get_leaf_hash(node: &TrieLeaf) -> TrieHash { ret } +/// Given a `TrieNodeType`, a slice of `TrieHash`, and a `BlockMap` for converting back-block +/// pointers to block hashes, compute the hash of the node. pub fn get_nodetype_hash_bytes( node: &TrieNodeType, child_hash_bytes: &[TrieHash], @@ -227,13 +522,19 @@ pub fn read_hash_bytes(f: &mut F) -> Result<[u8; TRIEHASH_ENCODED_SIZE] Ok(hashbytes) } +/// Lowl-level method for reading a block ID from a Read+Seek object. The block ID is +/// little-endian. +/// +/// Returns Ok(block-id) on success +/// Returns Err(CorruptionError(..)) if we run out of bytes to read (EOF) +/// Returns Err(IOError(..)) if we encounter a disk I/O error pub fn read_block_identifier(f: &mut F) -> Result { let mut bytes = [0u8; 4]; f.read_exact(&mut bytes).map_err(|e| { if e.kind() == ErrorKind::UnexpectedEof { Error::CorruptionError(format!( "Failed to read hash in full from {}", - f.seek(SeekFrom::Current(0)).unwrap() + f.stream_position().unwrap() )) } else { eprintln!("failed: {:?}", &e); @@ -245,7 +546,10 @@ pub fn read_block_identifier(f: &mut F) -> Result { } /// Low-level method for reading a node's hash bytes into a buffer from a Read-able and Seek-able struct. -/// The byte buffer must have sufficient space to hold the hash, or this program panics. +/// This function is only concerned with getting the bytes, not casting it to a TrieHash. +/// +/// Returns Ok(32-byte hash) on success. +/// Returns Err(IOError(..)) on seek error or disk I/O error pub fn read_node_hash_bytes( f: &mut F, ptr: &TriePtr, @@ -255,13 +559,18 @@ pub fn read_node_hash_bytes( read_hash_bytes(f) } -/// Read the root hash from a TrieFileStorage instance +/// Read the root hash from a TrieFileStorage instance. +/// This is always at the same location (s.root_trieptr()) +/// Returns Ok(root hash) on success +/// Returns Err(NotFoundError) if, for some reason, the storage medium doesn't have the root node +/// (should never happen) +/// Returns Err(IOError(..)) on storage I/O failure pub fn read_root_hash(s: &mut TrieStorageConnection) -> Result { let ptr = s.root_trieptr(); Ok(s.read_node_hash_bytes(&ptr)?) } -/// count the number of allocated children in a list of a node's children pointers. +/// Count the number of allocated children in a list of a node's children pointers. pub fn count_children(children: &[TriePtr]) -> usize { let mut cnt = 0; for child in children.iter() { @@ -272,7 +581,8 @@ pub fn count_children(children: &[TriePtr]) -> usize { cnt } -/// Read a node and its hash +/// Read a node and its hash. +/// Convenience wrapper around `inner_read_nodetype_at_head` pub fn read_nodetype( f: &mut F, ptr: &TriePtr, @@ -283,7 +593,10 @@ pub fn read_nodetype( read_nodetype_at_head(f, ptr.id()) } -/// Read a node +/// Read a node, but ignore its hash. +/// A hash of all 0's will be returned instead. +/// +/// Convenience wrapper around `inner_read_nodetype_at_head` pub fn read_nodetype_nohash( f: &mut F, ptr: &TriePtr, @@ -294,7 +607,8 @@ pub fn read_nodetype_nohash( read_nodetype_at_head_nohash(f, ptr.id()) } -/// Read a node and hash at the stream's current position +/// Read a node and hash at the stream's current position. +/// Convenience wrapper around `inner_read_nodetype_at_head` pub fn read_nodetype_at_head( f: &mut F, ptr_id: u8, @@ -307,7 +621,10 @@ pub fn read_nodetype_at_head( }) } -/// Read a node at the stream's current position +/// Read a node at the stream's current position. +/// Does not read the hash, and instead just returns the `TrieNodeType` +/// +/// Convenience wrapper around `inner_read_nodetype_at_head` pub fn read_nodetype_at_head_nohash( f: &mut F, ptr_id: u8, @@ -315,16 +632,27 @@ pub fn read_nodetype_at_head_nohash( inner_read_nodetype_at_head(f, ptr_id, false).map(|(node, _)| node) } -/// Deserialize a node. -/// Node wire format: +/// Deserialize a TrieNodeType and optionally its hash from the given Read+Seek object. +/// The given `ptr_id` identifies the expected node type. +/// +/// Node wire format for non-patch ("normal") nodes: +/// /// 0 32 33 33+X 33+X+Y /// |---------------|--|------------------|-----------| /// node hash id ptrs & ptr data path /// -/// X is fixed and determined by the TrieNodeType variant. -/// Y is variable, but no more than TrieHash::len(). +/// Node wire format for patch nodes: +/// +/// 0 32 33 33+X +/// |---------------|--|------------------| +/// base node hash id compressed ptrs /// -/// If `read_hash` is false, then the contents of the node hash are undefined. +/// Returns Ok(node, Some(hash)) if the node is found, and `read_hash` is true +/// Returns Ok(node, None) if the node is found, and `read_hash` is false +/// Returns Err(Patch(..)) if a `TrieNodePatch` is found instead of the targeted node +/// Returns Err(CorruptionError(..)) if the given `ptr_id` is not recognized, or the data read does +/// not decode to a valid node or patch. +/// Returns Err(IOError(..)) on disk I/O error fn inner_read_nodetype_at_head( f: &mut F, ptr_id: u8, @@ -339,47 +667,100 @@ fn inner_read_nodetype_at_head( }; let node = match TrieNodeID::from_u8(ptr_id).ok_or_else(|| { - Error::CorruptionError(format!("read_node_type: Unknown trie node type {}", ptr_id)) + Error::CorruptionError(format!( + "inner_read_nodetype_at_head: Unknown trie node type {}", + ptr_id + )) })? { TrieNodeID::Node4 => { - let node = TrieNode4::from_bytes(f)?; + let node = TrieNode4::from_bytes(f).map_err(|e| { + if let Error::Patch(_, patch) = e { + Error::Patch(h, patch) + } else { + e + } + })?; TrieNodeType::Node4(node) } TrieNodeID::Node16 => { - let node = TrieNode16::from_bytes(f)?; + let node = TrieNode16::from_bytes(f).map_err(|e| { + if let Error::Patch(_, patch) = e { + Error::Patch(h, patch) + } else { + e + } + })?; TrieNodeType::Node16(node) } TrieNodeID::Node48 => { - let node = TrieNode48::from_bytes(f)?; + let node = TrieNode48::from_bytes(f).map_err(|e| { + if let Error::Patch(_, patch) = e { + Error::Patch(h, patch) + } else { + e + } + })?; TrieNodeType::Node48(Box::new(node)) } TrieNodeID::Node256 => { - let node = TrieNode256::from_bytes(f)?; + let node = TrieNode256::from_bytes(f).map_err(|e| { + if let Error::Patch(_, patch) = e { + Error::Patch(h, patch) + } else { + e + } + })?; TrieNodeType::Node256(Box::new(node)) } TrieNodeID::Leaf => { - let node = TrieLeaf::from_bytes(f)?; + let node = TrieLeaf::from_bytes(f).map_err(|e| { + if let Error::Patch(_, patch) = e { + Error::Patch(h, patch) + } else { + e + } + })?; TrieNodeType::Leaf(node) } TrieNodeID::Empty => { return Err(Error::CorruptionError( - "read_node_type: stored empty node type".to_string(), + "inner_read_nodetype_at_head: stored empty node type".to_string(), )) } + TrieNodeID::Patch => { + let patch = TrieNodePatch::consensus_deserialize(f).map_err(|e| { + Error::CorruptionError(format!( + "inner_read_nodetype_at_head: failed to read patch node: {e:?}" + )) + })?; + return Err(Error::Patch(h, patch)); + } }; Ok((node, h)) } -/// calculate how many bytes a node will be when serialized, including its hash. +/// Calculate how many bytes a node will be when serialized, including its hash. +/// This assumes that none of the trie nodes will be compressed pub fn get_node_byte_len(node: &TrieNodeType) -> usize { let hash_len = TRIEHASH_ENCODED_SIZE; let node_byte_len = node.byte_len(); hash_len + node_byte_len } -/// write all the bytes for a node, including its hash, to the given Writeable object. -/// Returns the number of bytes written. +/// calculate how many bytes a node will be when serialized, including its hash, using a compressed +/// representation. This includes considering whether or not the compressed representation will be +/// dense or sparse. +pub fn get_node_byte_len_compressed(node: &TrieNodeType) -> usize { + let hash_len = TRIEHASH_ENCODED_SIZE; + let node_byte_len = node.byte_len_compressed(); + hash_len + node_byte_len +} + +/// Write all the bytes for a node, including its hash, to the given Writeable object. +/// The list of child pointers will NOT be compressed. +/// Returns Ok(nw) on success, where `nw` is the number of bytes written. +/// Returns Err(IOError(..)) on disk I/O error pub fn write_nodetype_bytes( f: &mut F, node: &TrieNodeType, @@ -389,8 +770,25 @@ pub fn write_nodetype_bytes( f.write_all(hash.as_bytes())?; node.write_bytes(f)?; let end = f.stream_position().map_err(Error::IOError)?; + trace!("write_nodetype_bytes: {node:?} {hash:?} at {start}-{end}"); + Ok(end - start) +} + +/// Write all of the bytes for a node, including its hash, to the given Writable object. +/// The list of child pointers will be compressed as best as possible. +/// Returns Ok(nw) on success, where `nw` is the number of bytes written. +/// Returns Err(IOError(..)) on disk I/O error +pub fn write_nodetype_bytes_compressed( + f: &mut F, + node: &TrieNodeType, + hash: TrieHash, +) -> Result { + let start = f.stream_position().map_err(Error::IOError)?; + f.write_all(hash.as_bytes())?; + node.write_bytes_compressed(f)?; + let end = f.stream_position().map_err(Error::IOError)?; trace!( - "write_nodetype: {:?} {:?} at {}-{}", + "write_nodetype_bytes_compressed: {:?} {:?} at {}-{}", node, &hash, start, @@ -400,7 +798,18 @@ pub fn write_nodetype_bytes( Ok(end - start) } +/// Write out the path to the given writable object. +/// This includes the length prefix and path bytes +/// +/// Returns Ok(()) on success +/// Returns Err(CorruptionError(..)) if `path.len()` is greater than 32. +/// Returns Err(IOError(..)) on disk I/O error pub fn write_path_to_bytes(path: &[u8], w: &mut W) -> Result<(), Error> { + if path.len() > 32 { + return Err(Error::CorruptionError( + "Invali path -- greater than 32 bytes".into(), + )); + } w.write_all(&[path.len() as u8])?; w.write_all(path)?; Ok(()) diff --git a/stackslib/src/chainstate/stacks/index/cache.rs b/stackslib/src/chainstate/stacks/index/cache.rs index 8e4569e8de4..4b4ee9de4be 100644 --- a/stackslib/src/chainstate/stacks/index/cache.rs +++ b/stackslib/src/chainstate/stacks/index/cache.rs @@ -18,10 +18,9 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use std::hash::Hash; -use stacks_common::types::chainstate::TrieHash; - use crate::chainstate::stacks::index::node::{is_backptr, TrieNodeID, TrieNodeType, TriePtr}; use crate::chainstate::stacks::index::MarfTrieId; +use crate::types::chainstate::TrieHash; /// Fully-qualified address of a Trie node. Includes both the block's blob rowid and the pointer within the /// block's blob as to where it is stored. @@ -329,530 +328,3 @@ impl TrieCache { self.state_ref().load_block_id(block_hash) } } - -#[cfg(test)] -pub mod test { - use std::time::SystemTime; - use std::{cmp, fs}; - - use clarity::util::hash::to_hex; - use stacks_common::util::hash::Sha512Trunc256Sum; - - use super::*; - use crate::chainstate::stacks::index::marf::*; - use crate::chainstate::stacks::index::storage::*; - use crate::chainstate::stacks::index::*; - - /// Deterministic random keys to insert - pub fn make_test_insert_data( - num_inserts_per_block: u64, - num_blocks: u64, - ) -> Vec> { - let mut data = vec![0u8; 32]; - let mut ret = vec![]; - - for blk in 0..num_blocks { - let mut block_data = vec![]; - test_debug!("Make block {}", blk); - for val in 0..num_inserts_per_block { - let path_bytes = Sha512Trunc256Sum::from_data(&data).as_bytes().to_vec(); - data.copy_from_slice(&path_bytes[0..32]); - - let path = to_hex(&path_bytes); - - let value_bytes = Sha512Trunc256Sum::from_data(&data).as_bytes().to_vec(); - data.copy_from_slice(&value_bytes[0..32]); - - let mut value_bytes_slice = [0u8; 40]; - value_bytes_slice[0..32].copy_from_slice(&value_bytes); - - let value = MARFValue(value_bytes_slice); - block_data.push((path, value)); - } - ret.push(block_data); - } - ret - } - - fn test_marf_with_cache( - test_name: &str, - cache_strategy: &str, - hash_strategy: TrieHashCalculationMode, - data: &[Vec<(String, MARFValue)>], - batch_size: Option, - ) -> TrieHash { - let test_file = if test_name == ":memory:" { - test_name.to_string() - } else { - let test_dir = format!("/tmp/stacks-marf-tests/{}", test_name); - if fs::metadata(&test_dir).is_ok() { - fs::remove_dir_all(&test_dir).unwrap(); - } - fs::create_dir_all(&test_dir).unwrap(); - - let test_file = format!( - "{}/marf-cache-{}-{:?}.sqlite", - &test_dir, cache_strategy, hash_strategy - ); - test_file - }; - - let marf_opts = MARFOpenOpts::new(hash_strategy, cache_strategy, true); - let f = TrieFileStorage::open(&test_file, marf_opts).unwrap(); - let mut marf = MARF::from_storage(f); - let mut last_block_header = BlockHeaderHash::sentinel(); - let batch_size = batch_size.unwrap_or(0); - - for (i, block_data) in data.iter().enumerate() { - test_debug!("Write block {}", i); - let mut block_hash_bytes = [0u8; 32]; - block_hash_bytes[0..8].copy_from_slice(&(i as u64).to_be_bytes()); - - let block_header = BlockHeaderHash(block_hash_bytes); - marf.begin(&last_block_header, &block_header).unwrap(); - - if batch_size > 0 { - for b in (0..block_data.len()).step_by(batch_size) { - let batch = &block_data[b..cmp::min(block_data.len(), b + batch_size)]; - let keys: Vec<_> = batch.iter().map(|(k, _)| k.clone()).collect(); - let values = batch.iter().map(|(_, v)| v.clone()).collect(); - marf.insert_batch(&keys, values).unwrap(); - } - } else { - for (key, value) in block_data.iter() { - let path = TrieHash::from_key(key); - let leaf = TrieLeaf::from_value(&[], value.clone()); - marf.insert_raw(path, leaf).unwrap(); - } - } - - marf.commit().unwrap(); - last_block_header = block_header; - } - - let write_bench = marf.borrow_storage_backend().get_benchmarks(); - marf.borrow_storage_backend().reset_benchmarks(); - eprintln!("MARF bench writes: {:#?}", &write_bench); - - debug!("---------"); - debug!("MARF gets"); - debug!("---------"); - - let mut total_read_time = 0; - let mut root_hash = TrieHash([0u8; 32]); - for (i, block_data) in data.iter().enumerate() { - test_debug!("Read block {}", i); - for (key, value) in block_data.iter() { - let path = TrieHash::from_key(key); - let marf_leaf = TrieLeaf::from_value(&[], value.clone()); - - let read_time = SystemTime::now(); - let leaf = MARF::get_path( - &mut marf.borrow_storage_backend(), - &last_block_header, - &path, - ) - .unwrap() - .unwrap(); - - let read_time = read_time.elapsed().unwrap().as_nanos(); - total_read_time += read_time; - - assert_eq!(leaf.data.to_vec(), marf_leaf.data.to_vec()); - } - } - - let read_bench = marf.borrow_storage_backend().get_benchmarks(); - eprintln!( - "MARF bench reads ({} total): {:#?}", - total_read_time, &read_bench - ); - - let mut bench = write_bench; - bench.add(&read_bench); - - eprintln!("MARF bench total: {:#?}", &bench); - - root_hash = marf.get_root_hash_at(&last_block_header).unwrap(); - eprintln!("root hash at {:?}: {:?}", &last_block_header, &root_hash); - root_hash - } - - #[test] - fn test_marf_node_cache_noop() { - let test_data = make_test_insert_data(128, 128); - let root_hash = test_marf_with_cache( - "test_marf_node_cache_noop", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - None, - ); - eprintln!("Final root hash is {}", root_hash); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_noop", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - Some(64), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_noop", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - Some(128), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_noop", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - Some(67), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_noop", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - Some(13), - ); - assert_eq!(root_hash, root_hash_batched); - } - - #[test] - fn test_marf_node_cache_noop_deferred() { - let test_data = make_test_insert_data(128, 128); - let root_hash = test_marf_with_cache( - "test_marf_node_cache_noop_deferred", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - None, - ); - eprintln!("Final root hash is {}", root_hash); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_noop_deferred", - "noop", - TrieHashCalculationMode::Deferred, - &test_data, - None, - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_noop_deferred", - "noop", - TrieHashCalculationMode::Deferred, - &test_data, - Some(64), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_noop_deferred", - "noop", - TrieHashCalculationMode::Deferred, - &test_data, - Some(128), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_noop_deferred", - "noop", - TrieHashCalculationMode::Deferred, - &test_data, - Some(67), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_noop_deferred", - "noop", - TrieHashCalculationMode::Deferred, - &test_data, - Some(13), - ); - assert_eq!(root_hash, root_hash_batched); - } - - /* - #[test] - fn test_marf_node_cache_ram_noop_deferred() { - let test_data = make_test_insert_data(16384, 32); - test_marf_with_cache( - ":memory:", - "noop", - TrieHashCalculationMode::Deferred, - &test_data, - None, - ); - } - - #[test] - fn test_marf_node_cache_big_noop_deferred() { - let test_data = make_test_insert_data(16384, 32); - test_marf_with_cache( - "test_marf_node_cache_big_noop_deferred", - "noop", - TrieHashCalculationMode::Deferred, - &test_data, - None, - ); - } - */ - - #[test] - fn test_marf_node_cache_everything() { - let test_data = make_test_insert_data(128, 128); - let root_hash = test_marf_with_cache( - "test_marf_node_cache_everything", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - None, - ); - eprintln!("Final root hash is {}", root_hash); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_everything", - "everything", - TrieHashCalculationMode::Immediate, - &test_data, - Some(64), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_everything", - "everything", - TrieHashCalculationMode::Immediate, - &test_data, - Some(128), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_everything", - "everything", - TrieHashCalculationMode::Immediate, - &test_data, - Some(67), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_everything", - "everything", - TrieHashCalculationMode::Immediate, - &test_data, - Some(13), - ); - assert_eq!(root_hash, root_hash_batched); - } - - #[test] - fn test_marf_node_cache_everything_deferred() { - let test_data = make_test_insert_data(128, 128); - let root_hash = test_marf_with_cache( - "test_marf_node_cache_everything_deferred", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - None, - ); - eprintln!("Final root hash is {}", root_hash); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_everything_deferred", - "everything", - TrieHashCalculationMode::Deferred, - &test_data, - Some(64), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_everything_deferred", - "everything", - TrieHashCalculationMode::Deferred, - &test_data, - Some(128), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_everything_deferred", - "everything", - TrieHashCalculationMode::Deferred, - &test_data, - Some(67), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_everything_deferred", - "everything", - TrieHashCalculationMode::Deferred, - &test_data, - Some(13), - ); - assert_eq!(root_hash, root_hash_batched); - } - - #[test] - fn test_marf_node_cache_node256() { - let test_data = make_test_insert_data(128, 128); - let root_hash = test_marf_with_cache( - "test_marf_node_cache_node256", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - None, - ); - eprintln!("Final root hash is {}", root_hash); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256", - "node256", - TrieHashCalculationMode::Immediate, - &test_data, - Some(64), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256", - "node256", - TrieHashCalculationMode::Immediate, - &test_data, - Some(128), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256", - "node256", - TrieHashCalculationMode::Immediate, - &test_data, - Some(67), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256", - "node256", - TrieHashCalculationMode::Immediate, - &test_data, - Some(13), - ); - assert_eq!(root_hash, root_hash_batched); - } - - #[test] - fn test_marf_node_cache_node256_deferred() { - let test_data = make_test_insert_data(128, 128); - let root_hash = test_marf_with_cache( - "test_marf_node_cache_node256_deferred", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - None, - ); - eprintln!("Final root hash is {}", root_hash); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256_deferred", - "node256", - TrieHashCalculationMode::Deferred, - &test_data, - Some(64), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256_deferred", - "node256", - TrieHashCalculationMode::Deferred, - &test_data, - Some(128), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256_deferred", - "node256", - TrieHashCalculationMode::Deferred, - &test_data, - Some(67), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256_deferred", - "node256", - TrieHashCalculationMode::Deferred, - &test_data, - Some(13), - ); - assert_eq!(root_hash, root_hash_batched); - } - - #[test] - fn test_marf_node_cache_node256_deferred_15500() { - let test_data = make_test_insert_data(15500, 10); - let root_hash = test_marf_with_cache( - "test_marf_node_cache_node256_deferred_15500", - "noop", - TrieHashCalculationMode::Immediate, - &test_data, - None, - ); - eprintln!("Final root hash is {}", root_hash); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256_deferred_15500", - "node256", - TrieHashCalculationMode::Deferred, - &test_data, - Some(64), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256_deferred_15500", - "node256", - TrieHashCalculationMode::Deferred, - &test_data, - Some(128), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256_deferred_15500", - "node256", - TrieHashCalculationMode::Deferred, - &test_data, - Some(67), - ); - assert_eq!(root_hash, root_hash_batched); - - let root_hash_batched = test_marf_with_cache( - "test_marf_node_cache_node256_deferred_15500", - "node256", - TrieHashCalculationMode::Deferred, - &test_data, - Some(13), - ); - assert_eq!(root_hash, root_hash_batched); - } -} diff --git a/stackslib/src/chainstate/stacks/index/file.rs b/stackslib/src/chainstate/stacks/index/file.rs index e835e7563b6..4d4bf703706 100644 --- a/stackslib/src/chainstate/stacks/index/file.rs +++ b/stackslib/src/chainstate/stacks/index/file.rs @@ -21,9 +21,8 @@ use std::path::Path; use std::{env, fs, io}; #[cfg(test)] -use clarity::types::sqlite::NO_PARAMS; +use rusqlite::params; use rusqlite::Connection; -use stacks_common::types::chainstate::TrieHash; use crate::chainstate::stacks::index::bits::{ read_hash_bytes, read_nodetype_at_head, read_nodetype_at_head_nohash, @@ -33,6 +32,7 @@ use crate::chainstate::stacks::index::storage::NodeHashReader; #[cfg(test)] use crate::chainstate::stacks::index::storage::TrieStorageConnection; use crate::chainstate::stacks::index::{trie_sql, Error, MarfTrieId}; +use crate::types::chainstate::TrieHash; use crate::util_lib::db::sql_vacuum; /// Mapping between block IDs and trie offsets @@ -145,7 +145,8 @@ impl TrieFile { let trie_blob = { let mut fd = trie_sql::open_trie_blob_readonly(db, block_id)?; let mut trie_blob = vec![]; - fd.read_to_end(&mut trie_blob)?; + fd.read_to_end(&mut trie_blob) + .inspect_err(|e| error!("Failed to read trie blob {block_id} from DB: {e:}"))?; trie_blob }; Ok(trie_blob) @@ -158,7 +159,8 @@ impl TrieFile { self.seek(SeekFrom::Start(offset))?; let mut buf = vec![0u8; length as usize]; - self.read_exact(&mut buf)?; + self.read_exact(&mut buf) + .inspect_err(|e| error!("Failed to read trie blob {block_id}: {e:}"))?; Ok(buf) } @@ -412,7 +414,7 @@ impl TrieFile { ) -> Result, Error> { let mut s = db.prepare("SELECT block_hash, external_offset FROM marf_data WHERE unconfirmed = 0 ORDER BY block_hash")?; - let rows = s.query_and_then(NO_PARAMS, |row| { + let rows = s.query_and_then(params![], |row| { let block_hash: T = row.get_unwrap("block_hash"); let offset_i64: i64 = row.get_unwrap("external_offset"); let offset = offset_i64 as u64; diff --git a/stackslib/src/chainstate/stacks/index/marf.rs b/stackslib/src/chainstate/stacks/index/marf.rs index 5a2d5f2e67c..31c61c02ab5 100644 --- a/stackslib/src/chainstate/stacks/index/marf.rs +++ b/stackslib/src/chainstate/stacks/index/marf.rs @@ -16,20 +16,20 @@ use std::ops::DerefMut; use rusqlite::{Connection, Transaction}; -use stacks_common::types::chainstate::TrieHash; -use stacks_common::util::hash::Sha512Trunc256Sum; use super::storage::ReopenedTrieStorageConnection; use crate::chainstate::stacks::index::bits::{get_leaf_hash, get_node_hash}; use crate::chainstate::stacks::index::node::{ - clear_backptr, is_backptr, set_backptr, CursorError, TrieCursor, TrieNode256, TrieNodeID, - TrieNodeType, TriePtr, + clear_backptr, node_copy_update_ptrs, set_backptr, CursorError, TrieCowPtr, TrieCursor, + TrieNode256, TrieNodeID, TrieNodeType, TriePtr, }; use crate::chainstate::stacks::index::storage::{ TrieFileStorage, TrieHashCalculationMode, TrieStorageConnection, TrieStorageTransaction, }; use crate::chainstate::stacks::index::trie::Trie; use crate::chainstate::stacks::index::{Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof}; +use crate::types::chainstate::TrieHash; +use crate::util::hash::Sha512Trunc256Sum; use crate::util_lib::db::Error as db_error; pub const BLOCK_HASH_TO_HEIGHT_MAPPING_KEY: &str = "__MARF_BLOCK_HASH_TO_HEIGHT"; @@ -64,6 +64,8 @@ pub struct MARFOpenOpts { pub external_blobs: bool, /// unconditionally do a DB migration (used for testing) pub force_db_migrate: bool, + /// compress the MARF + pub compress: bool, } impl MARFOpenOpts { @@ -73,6 +75,7 @@ impl MARFOpenOpts { cache_strategy: "noop".to_string(), external_blobs: false, force_db_migrate: false, + compress: false, } } @@ -86,20 +89,32 @@ impl MARFOpenOpts { cache_strategy: cache_strategy.to_string(), external_blobs, force_db_migrate: false, + compress: false, } } + pub fn with_compression(mut self, compression: bool) -> Self { + self.compress = compression; + self + } + #[cfg(test)] pub fn all() -> Vec { vec![ + /* MARFOpenOpts::new(TrieHashCalculationMode::Immediate, "noop", false), MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", false), MARFOpenOpts::new(TrieHashCalculationMode::Immediate, "noop", true), MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true), - MARFOpenOpts::new(TrieHashCalculationMode::Immediate, "everything", false), - MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "everything", false), - MARFOpenOpts::new(TrieHashCalculationMode::Immediate, "everything", true), - MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "everything", true), + MARFOpenOpts::new(TrieHashCalculationMode::Immediate, "noop", false) + .with_compression(true), + MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", false) + .with_compression(true), + MARFOpenOpts::new(TrieHashCalculationMode::Immediate, "noop", true) + .with_compression(true), + */ + MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true) + .with_compression(true), ] } } @@ -687,30 +702,17 @@ impl MARF { ); // this node had a child for this chr at one point - let (node, node_hash, node_ptr) = Trie::walk_backptr(storage, &ptr, cursor)?; + let (_, node, node_hash, node_ptr) = Trie::walk_backptr(storage, &ptr, cursor)?; Ok((node, node_hash, node_ptr, ptr.back_block)) } } } - fn node_copy_update_ptrs(ptrs: &mut [TriePtr], child_block_id: u32) { - for pointer in ptrs.iter_mut() { - // if the node is empty, do nothing, if it's a back pointer, - if pointer.id() == TrieNodeID::Empty as u8 || is_backptr(pointer.id()) { - continue; - } else { - // make backptr - pointer.back_block = child_block_id; - pointer.id = set_backptr(pointer.id()); - } - } - } - fn node_copy_update(node: &mut TrieNodeType, child_block_id: u32) -> TrieHash { let hash = match node { TrieNodeType::Leaf(leaf) => get_leaf_hash(leaf), _ => { - MARF::::node_copy_update_ptrs(node.ptrs_mut(), child_block_id); + node_copy_update_ptrs(node.ptrs_mut(), child_block_id); TrieHash::from_data(&[]) } }; @@ -735,10 +737,15 @@ impl MARF { ); let (cur_block_hash, cur_block_id) = storage.get_cur_block_and_id(); + let child_backptr = node.walk(chr).ok_or_else(|| Error::NotFoundError)?; + let (mut child_node, _, child_ptr, _) = MARF::walk_backptr(storage, node, chr, cursor)?; + let child_block_hash = storage.get_cur_block(); let child_block_identifier = storage.get_cur_block_identifier()?; + child_node.set_cow_ptr(TrieCowPtr::new(child_block_hash.clone(), child_backptr)); + // update child_node with new ptrs and hashes storage.open_block_maybe_id(&cur_block_hash, cur_block_id)?; let child_hash = MARF::::node_copy_update(&mut child_node, child_block_identifier); @@ -772,6 +779,15 @@ impl MARF { }); let (mut prev_root, _) = Trie::read_root(storage)?; + if prev_block_hash != &T::sentinel() { + let mut prev_root_backptr = TriePtr::new( + set_backptr(TrieNodeID::Node256 as u8), + 0, + storage.root_ptr(), + ); + prev_root_backptr.back_block = prev_block_identifier; + prev_root.set_cow_ptr(TrieCowPtr::new(prev_block_hash.clone(), prev_root_backptr)); + } let new_root_hash = MARF::::node_copy_update(&mut prev_root, prev_block_identifier); storage.open_block_maybe_id(&cur_block_hash, cur_block_id)?; @@ -860,6 +876,11 @@ impl MARF { if !node.is_leaf() || clear_backptr(node_ptr.id()) != TrieNodeID::Leaf as u8 { + trace!( + "Out-of-path but encountered at {:?}: {:?}", + &node_ptr, + &node + ); error!("Out-of-path but encountered a non-leaf"); return Err(Error::CorruptionError( "Non-leaf encountered at end of path".to_string(), @@ -963,6 +984,7 @@ impl MARF { None => { // end of path. Must be at a leaf. if clear_backptr(cursor.ptr().id()) != TrieNodeID::Leaf as u8 { + trace!("Out-of-path but encountered at {:?}", &cursor.ptr()); return Err(Error::CorruptionError( "Non-leaf encountered at end of path".to_string(), )); @@ -1055,8 +1077,8 @@ impl MARF { })?; // a NotFoundError _here_ means that the key doesn't exist in this view - let (cursor, node) = MARF::walk(storage, block_hash, path).inspect_err(|e| { - trace!("Failed to look up key {block_hash:?} {path:?}: {e:?}"); + let (cursor, node) = MARF::walk(storage, block_hash, path).inspect_err(|_e| { + trace!("Failed to look up key {block_hash:?} {path:?}: {_e:?}"); })?; // both of these get caught by get_by_key and turned into Ok(None) diff --git a/stackslib/src/chainstate/stacks/index/mod.rs b/stackslib/src/chainstate/stacks/index/mod.rs index 5b920784560..d5b6592b5c3 100644 --- a/stackslib/src/chainstate/stacks/index/mod.rs +++ b/stackslib/src/chainstate/stacks/index/mod.rs @@ -40,6 +40,8 @@ pub mod trie_sql; #[cfg(test)] pub mod test; +use crate::chainstate::stacks::index::node::TrieNodePatch; + #[derive(Debug)] pub struct TrieMerkleProof(pub Vec>); @@ -147,6 +149,8 @@ impl MarfTrieId for BurnchainHeaderHash {} #[cfg(test)] impl MarfTrieId for BlockHeaderHash {} +pub const MAX_PATCH_DEPTH: u32 = 16; + /// Structure that holds the actual data in a MARF leaf node. /// It only stores the hash of some value string, but we add 8 extra bytes for future extensions. /// If not used (the rule today), then they should all be 0. @@ -257,6 +261,9 @@ pub enum Error { CursorError(node::CursorError), RestoreMarfBlockError(Box), NonMatchingForks([u8; 32], [u8; 32]), + OverflowError, + Patch(Option, TrieNodePatch), + NodeTooDeep, } impl From for Error { @@ -322,6 +329,11 @@ impl fmt::Display for Error { Error::RequestedIdentifierForExtensionTrie => { write!(f, "BUG: MARF requested the identifier for a RAM trie") } + Error::OverflowError => write!(f, "Overflow"), + Error::Patch(ref _h, ref p) => { + write!(f, "Read patch node instead of expected node: {p:?}") + } + Error::NodeTooDeep => write!(f, "Node is too deeply buried under patches"), } } } diff --git a/stackslib/src/chainstate/stacks/index/node.rs b/stackslib/src/chainstate/stacks/index/node.rs index 1d0332dd1e0..555cb2d321b 100644 --- a/stackslib/src/chainstate/stacks/index/node.rs +++ b/stackslib/src/chainstate/stacks/index/node.rs @@ -14,19 +14,19 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -use std::io::{Read, Write}; +use std::io::{Read, Seek, Write}; use std::{error, fmt}; -use stacks_common::codec::{read_next, Error as codec_error, StacksMessageCodec}; -use stacks_common::types::chainstate::{TrieHash, BLOCK_HEADER_HASH_ENCODED_SIZE}; -use stacks_common::util::hash::to_hex; - use crate::chainstate::stacks::index::bits::{ - get_path_byte_len, get_ptrs_byte_len, path_from_bytes, ptrs_from_bytes, write_path_to_bytes, + get_compressed_ptrs_size, get_path_byte_len, get_ptrs_byte_len, get_ptrs_byte_len_compressed, + get_sparse_ptrs_bitmap_size, path_from_bytes, ptrs_from_bytes, write_path_to_bytes, }; use crate::chainstate::stacks::index::{ BlockMap, ClarityMarfTrieId, Error, MARFValue, MarfTrieId, TrieLeaf, MARF_VALUE_ENCODED_SIZE, }; +use crate::codec::{read_next, write_next, Error as codec_error, StacksMessageCodec}; +use crate::types::chainstate::{TrieHash, BLOCK_HEADER_HASH_ENCODED_SIZE}; +use crate::util::hash::to_hex; #[derive(Debug, Clone, PartialEq)] pub enum CursorError { @@ -52,15 +52,19 @@ impl error::Error for CursorError { } // All numeric values of a Trie node when encoded. -// They are all 7-bit numbers -- the 8th bit is used to indicate whether or not the value +// They are all 6-bit numbers +// * the 8th bit is used to indicate whether or not the value // identifies a back-pointer to be followed. +// * the 7th bit is used to indicate whether or not the ptrs +// are compressed. This bit is cleared on read. define_u8_enum!(TrieNodeID { Empty = 0, Leaf = 1, Node4 = 2, Node16 = 3, Node48 = 4, - Node256 = 5 + Node256 = 5, + Patch = 6 }); /// A node ID encodes a back-pointer if its high bit is set @@ -78,8 +82,32 @@ pub fn clear_backptr(id: u8) -> u8 { id & 0x7f } +/// Is this node compressed? +pub fn is_compressed(id: u8) -> bool { + id & 0x40 != 0 +} + +/// Set the compressed bit +pub fn set_compressed(id: u8) -> u8 { + id | 0x40 +} + +/// Clear the compressed bit +pub fn clear_compressed(id: u8) -> u8 { + id & 0xbf +} + +/// Clear all control bits (backptr and compressed) +pub fn clear_ctrl_bits(id: u8) -> u8 { + id & 0x3f +} + // Byte writing operations for pointer lists, paths. +/// Write out the list of TriePtrs to the given Write object. +/// The written pointers will NOT be compressed. +/// Returns Ok(()) on success +/// Returns Err(IOError(..)) on disk I/O error fn write_ptrs_to_bytes(ptrs: &[TriePtr], w: &mut W) -> Result<(), Error> { for ptr in ptrs.iter() { ptr.write_bytes(w)?; @@ -87,6 +115,96 @@ fn write_ptrs_to_bytes(ptrs: &[TriePtr], w: &mut W) -> Result<(), Erro Ok(()) } +/// Write the list of TriePtrs to the given Write object. +/// The given `id` is a node ID with some control bits set -- in particular, the compressed bit. +/// If the compressed bit is set, then the TriePtr list will be compressed as best as possible +/// before written. See `bits::ptrs_to_bytes()` for details. +/// +/// Returns Ok(()) on success +/// Returns Err(CorruptionError(..)) if the id does not correspond to a valid node ID or is a patch +/// node ID +/// Returns Err(IOError(..)) on disk I/O error +fn write_ptrs_to_bytes_compressed( + id: u8, + ptrs: &[TriePtr], + w: &mut W, +) -> Result<(), Error> { + let Some(node_id) = TrieNodeID::from_u8(id) else { + return Err(Error::CorruptionError( + "Tried to store invalid trie node ID".to_string(), + )); + }; + if node_id == TrieNodeID::Patch { + // NB the only proper way to store a patch node is to have it dumped as part of a TrieRAM + return Err(Error::CorruptionError( + "Tried to store patch node's ptrs improperly".to_string(), + )); + } + + let Some((ptrs_size, is_sparse)) = get_compressed_ptrs_size(id, ptrs) else { + // doesn't apply -- this node has no ptrs + return Ok(()); + }; + + if is_sparse { + // do a sparse write -- just write the bitmap and the non-empty trieptrs. + // the first byte is 0xff to indicate that this is a sparse list, since 0xff cannot be a + // valid trie node ID + w.write_all(&[0xff])?; + + // compute the bitmap + let bitmap_size = get_sparse_ptrs_bitmap_size(id).ok_or_else(|| { + Error::CorruptionError(format!("No bitmap size defined for node id {id}")) + })?; + + let mut bitmap = vec![0u8; bitmap_size]; + for (i, ptr) in ptrs.iter().enumerate() { + if ptr.id() != TrieNodeID::Empty as u8 { + // SAFETY: have checked ptrs.len() against bitmap size + let bi = i / 8; + let bt = i % 8; + let mask = 1u8 << bt; + let byte_mut = bitmap + .get_mut(bi) + .ok_or_else(|| Error::CorruptionError("bitmap not long enough".into()))?; + *byte_mut |= mask; + } + } + trace!( + "Write sparse compressed ptrs list ({} bytes) for node {}; bitmap {}", + ptrs_size, + id, + to_hex(&bitmap) + ); + + // write out bitmap + w.write_all(&bitmap)?; + + // write out non-empty ptrs + for ptr in ptrs.iter() { + if ptr.id() != TrieNodeID::Empty as u8 { + let mut byte_buffer = vec![]; + ptr.write_bytes_compressed(&mut byte_buffer)?; + trace!("write sparse ptr {}", &to_hex(&byte_buffer)); + ptr.write_bytes_compressed(w)?; + } + } + return Ok(()); + } + + // ptrs are not sparse enough. + // compute a bitmap of which ptrs are non-empty + trace!( + "Write dense compressed ptrs list ({} bytes) for node {}", + ptrs_size, + id + ); + for ptr in ptrs.iter() { + ptr.write_bytes_compressed(w)?; + } + Ok(()) +} + fn ptrs_consensus_hash( ptrs: &[TriePtr], map: &mut M, @@ -98,6 +216,43 @@ fn ptrs_consensus_hash( Ok(()) } +/// Copy-on-write pointer to a node. When the MARF writes a new key/value pair, it copies +/// intermediate nodes from the parent trie into the new trie being built. This struct is a +/// pointer stored in the new trie's nodes which point back to the node it was copied from. +/// +/// This data is not stored anywhere. It is used instead to compute TrieNodePatch nodes to write +/// to disk as a space-efficient alternative to copying over the same lightly-modified node over +/// and over again. +/// +/// Fields are (trie block hash holding the node, pointer to the node in the trie) +#[derive(Clone, PartialEq, Copy)] +pub struct TrieCowPtr([u8; 32], TriePtr); + +impl TrieCowPtr { + pub fn new(trie_id: T, ptr: TriePtr) -> Self { + Self(trie_id.to_bytes(), ptr) + } + + pub fn block_id(&self) -> T { + T::from_bytes(self.0) + } + + pub fn ptr(&self) -> &TriePtr { + &self.1 + } +} + +impl fmt::Debug for TrieCowPtr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "TrieCowPtr({},{})", + &to_hex(&self.0), + &ptrs_fmt(&[self.1]) + ) + } +} + /// All Trie nodes implement the following methods: pub trait TrieNode { /// Node ID for encoding/decoding @@ -118,9 +273,9 @@ pub trait TrieNode { fn replace(&mut self, ptr: &TriePtr) -> bool; /// Read an encoded instance of this node from a byte stream and instantiate it. - fn from_bytes(r: &mut R) -> Result + fn from_bytes(r: &mut R) -> Result where - Self: std::marker::Sized; + Self: Sized; /// Get a reference to the children of this node. fn ptrs(&self) -> &[TriePtr]; @@ -131,13 +286,37 @@ pub trait TrieNode { /// Construct a TrieNodeType from a TrieNode fn as_trie_node_type(&self) -> TrieNodeType; + /// Get the ptr to the node we were copied from (on COW) + fn get_cow_ptr(&self) -> Option<&TrieCowPtr>; + + /// Set the ptr to the node we were copied from (on COW) + fn set_cow_ptr(&mut self, cowptr: TrieCowPtr); + + /// Apply a list of TrieNodePatches to produce this node + fn apply_patches( + self, + patches: &[(u32, TriePtr, TrieNodePatch)], + cur_block_id: u32, + ) -> Option + where + Self: Sized; + /// Encode this node instance into a byte stream and write it to w. + /// The TriePtrs willl NOT be compressed fn write_bytes(&self, w: &mut W) -> Result<(), Error> { w.write_all(&[self.id()])?; write_ptrs_to_bytes(self.ptrs(), w)?; write_path_to_bytes(self.path().as_slice(), w) } + /// Encode this node instance into a byte stream and write it to w. + /// The TriePtrs will be compressed to the smallest possible size. + fn write_bytes_compressed(&self, w: &mut W) -> Result<(), Error> { + w.write_all(&[set_compressed(self.id())])?; + write_ptrs_to_bytes_compressed(self.id(), self.ptrs(), w)?; + write_path_to_bytes(self.path().as_slice(), w) + } + #[cfg(test)] fn to_bytes(&self) -> Vec { let mut r = Vec::new(); @@ -150,6 +329,11 @@ pub trait TrieNode { fn byte_len(&self) -> usize { get_ptrs_byte_len(self.ptrs()) + get_path_byte_len(self.path()) } + + /// Calculate how many bytes this node will take to encode. + fn byte_len_compressed(&self) -> usize { + get_ptrs_byte_len_compressed(self.id(), self.ptrs()) + get_path_byte_len(self.path()) + } } /// Trait for types that can serialize to consensus bytes @@ -193,13 +377,14 @@ pub struct TriePtr { } pub const TRIEPTR_SIZE: usize = 10; // full size of a TriePtr +pub const TRIEPTR_SIZE_COMPRESSED: usize = 6; // full size of a compressed TriePtr pub fn ptrs_fmt(ptrs: &[TriePtr]) -> String { let mut strs = vec![]; for ptr in ptrs.iter() { if ptr.id != TrieNodeID::Empty as u8 { strs.push(format!( - "id{}chr{:02x}ptr{}bblk{}", + "id({})chr({:02x})ptr({})bblk({})", ptr.id, ptr.chr, ptr.ptr, ptr.back_block )) } @@ -274,6 +459,16 @@ impl TriePtr { Ok(()) } + #[inline] + pub fn write_bytes_compressed(&self, w: &mut W) -> Result<(), Error> { + w.write_all(&[set_compressed(self.id()), self.chr()])?; + w.write_all(&self.ptr().to_be_bytes())?; + if is_backptr(self.id()) { + w.write_all(&self.back_block().to_be_bytes())?; + } + Ok(()) + } + /// The parts of a child pointer that are relevant for consensus are only its ID, path /// character, and referred-to block hash. The software doesn't care about the details of how/where /// nodes are stored. @@ -313,6 +508,67 @@ impl TriePtr { back_block, } } + + /// Load up this TriePtr from a slice of bytes, assuming that they represent a compressed + /// TriePtr. A TriePtr that is compressed will not have a stored `back_block` field if the + /// node ID does not have the backptr bit set. + #[inline] + #[allow(clippy::indexing_slicing)] + pub fn from_bytes_compressed(bytes: &[u8]) -> TriePtr { + assert!(bytes.len() >= TRIEPTR_SIZE_COMPRESSED); + let id = clear_compressed(bytes[0]); + let chr = bytes[1]; + let ptr = u32::from_be_bytes([bytes[2], bytes[3], bytes[4], bytes[5]]); + + let back_block = if is_backptr(id) { + assert!(bytes.len() >= TRIEPTR_SIZE); + u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]) + } else { + 0 + }; + + TriePtr { + id, + chr, + ptr, + back_block, + } + } + + /// Load up a compressed TriePtr from a Read object. + /// Returns Ok(ptr) on success + /// Returns Err(codec_error::*) on disk I/O failure, or failure to decode the requested bytes + #[inline] + pub fn read_bytes_compressed(fd: &mut R) -> Result { + let id_bits: u8 = read_next(fd)?; + let id = clear_compressed(id_bits); + let chr: u8 = read_next(fd)?; + let ptr_be_bytes: [u8; 4] = read_next(fd)?; + let ptr = u32::from_be_bytes(ptr_be_bytes); + let back_block = if is_backptr(id) { + let bytes: [u8; 4] = read_next(fd)?; + u32::from_be_bytes(bytes) + } else { + 0 + }; + + Ok(TriePtr { + id, + chr, + ptr, + back_block, + }) + } + + /// Size of this TriePtr on disk, if compression is to be used. + #[inline] + pub fn compressed_size(&self) -> usize { + if !is_backptr(self.id) { + TRIEPTR_SIZE_COMPRESSED + } else { + TRIEPTR_SIZE + } + } } /// Cursor structure for walking down one or more Tries. This structure helps other parts of the @@ -627,6 +883,10 @@ impl StacksMessageCodec for TrieLeaf { pub struct TrieNode4 { pub path: Vec, pub ptrs: [TriePtr; 4], + /// If this node was created by copy-on-write, then this points to the node it was copied from. + pub cowptr: Option, + /// List of patches applied to this node. Fields are (node block ID, pointer to node, patch itself) + pub patches: Vec<(u32, TriePtr, TrieNodePatch)>, } impl fmt::Debug for TrieNode4 { @@ -645,6 +905,8 @@ impl TrieNode4 { TrieNode4 { path: path.to_owned(), ptrs: [TriePtr::default(); 4], + cowptr: None, + patches: vec![], } } } @@ -654,6 +916,10 @@ impl TrieNode4 { pub struct TrieNode16 { pub path: Vec, pub ptrs: [TriePtr; 16], + /// If this node was created by copy-on-write, then this points to the node it was copied from. + pub cowptr: Option, + /// List of patches applied to this node. Fields are (node block ID, pointer to node, patch itself) + pub patches: Vec<(u32, TriePtr, TrieNodePatch)>, } impl fmt::Debug for TrieNode16 { @@ -672,6 +938,8 @@ impl TrieNode16 { TrieNode16 { path: path.to_owned(), ptrs: [TriePtr::default(); 16], + cowptr: None, + patches: vec![], } } @@ -682,6 +950,8 @@ impl TrieNode16 { TrieNode16 { path: node4.path.clone(), ptrs, + cowptr: None, + patches: vec![], } } } @@ -692,6 +962,10 @@ pub struct TrieNode48 { pub path: Vec, indexes: [i8; 256], // indexes[i], if non-negative, is an index into ptrs. pub ptrs: [TriePtr; 48], + /// If this node was created by copy-on-write, then this points to the node it was copied from. + pub cowptr: Option, + /// List of patches applied to this node. Fields are (node block ID, pointer to node, patch itself) + pub patches: Vec<(u32, TriePtr, TrieNodePatch)>, } impl fmt::Debug for TrieNode48 { @@ -717,6 +991,8 @@ impl TrieNode48 { path: path.to_owned(), indexes: [-1; 256], ptrs: [TriePtr::default(); 48], + cowptr: None, + patches: vec![], } } @@ -735,6 +1011,8 @@ impl TrieNode48 { path: node16.path.clone(), indexes, ptrs, + cowptr: None, + patches: vec![], } } } @@ -744,6 +1022,10 @@ impl TrieNode48 { pub struct TrieNode256 { pub path: Vec, pub ptrs: [TriePtr; 256], + /// If this node was created by copy-on-write, then this points to the node it was copied from. + pub cowptr: Option, + /// List of patches applied to this node. Fields are (node block ID, pointer to node, patch itself) + pub patches: Vec<(u32, TriePtr, TrieNodePatch)>, } impl fmt::Debug for TrieNode256 { @@ -768,6 +1050,8 @@ impl TrieNode256 { TrieNode256 { path: path.to_owned(), ptrs: [TriePtr::default(); 256], + cowptr: None, + patches: vec![], } } @@ -783,6 +1067,8 @@ impl TrieNode256 { TrieNode256 { path: node4.path.clone(), ptrs, + cowptr: None, + patches: vec![], } } @@ -799,8 +1085,378 @@ impl TrieNode256 { TrieNode256 { path: node48.path.clone(), ptrs, + cowptr: None, + patches: vec![], + } + } +} + +/// This is a non-consensus "patch node" that applies a diff atop a base node. There can be up to +/// MAX_PATCH_DEPTH patch nodes applied atop the base node. +#[derive(Clone, PartialEq)] +pub struct TrieNodePatch { + /// Pointer to the node we're patching (will always be a back-block ptr) + pub ptr: TriePtr, + /// Field of ptrs to insert atop the base node + pub ptr_diff: Vec, +} + +impl fmt::Debug for TrieNodePatch { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "TrieNodePatch(ptr={} ptr_diff={})", + &ptrs_fmt(&[self.ptr]), + ptrs_fmt(&self.ptr_diff) + ) + } +} + +impl StacksMessageCodec for TrieNodePatch { + fn consensus_serialize(&self, fd: &mut W) -> Result<(), codec_error> { + write_next(fd, &(TrieNodeID::Patch as u8))?; + self.ptr + .write_bytes_compressed(fd) + .map_err(|e| codec_error::SerializeError(format!("Failed to serialize .ptr: {e:?}")))?; + + let num_ptrs = self.ptr_diff.len(); + if num_ptrs >= 256 { + return Err(codec_error::SerializeError( + "Cannot serialize TrieNodePatch with more than 256 ptrs".to_string(), + )); + } + // SAFETY: checked that num_ptrs < 256 + let num_ptrs_u8 = u8::try_from(num_ptrs).expect("infallible"); + write_next(fd, &num_ptrs_u8).map_err(|e| { + codec_error::SerializeError(format!("Failed to serialize .ptr_diff.len(): {e:?}")) + })?; + + for ptr in self.ptr_diff.iter() { + ptr.write_bytes_compressed(fd).map_err(|e| { + codec_error::SerializeError(format!("Failed to serialize ptr in .ptr_diff: {e:?}")) + })?; + } + Ok(()) + } + + fn consensus_deserialize(fd: &mut R) -> Result { + let id: u8 = read_next(fd)?; + if id != TrieNodeID::Patch as u8 { + return Err(codec_error::DeserializeError( + "Did not read a TrieNodeID::Patch".to_string(), + )); + } + + let ptr = TriePtr::read_bytes_compressed(fd)?; + let num_ptrs: u8 = read_next(fd)?; + let num_ptrs = usize::try_from(num_ptrs).expect("infallible"); + let mut ptr_diff: Vec = Vec::with_capacity(num_ptrs); + for _ in 0..num_ptrs { + ptr_diff.push(TriePtr::read_bytes_compressed(fd)?); + } + Ok(Self { ptr, ptr_diff }) + } +} + +/// Turn each non-empty, non-backptr in `ptrs` into a backptr pointing at `child_block_id` +pub(crate) fn node_copy_update_ptrs(ptrs: &mut [TriePtr], child_block_id: u32) { + for pointer in ptrs.iter_mut() { + // if the node is empty, do nothing, if it's a back pointer, + if pointer.id() == TrieNodeID::Empty as u8 || is_backptr(pointer.id()) { + continue; + } else { + // make backptr + pointer.back_block = child_block_id; + pointer.id = set_backptr(pointer.id()); + } + } +} + +/// Given the current block ID, convert every backptr pointer whose back_block is equal to +/// `cur_block_id` to a normal pointer. This is used when applying patches. +fn node_normalize_ptrs(ptrs: &mut [TriePtr], cur_block_id: u32) { + for ptr in ptrs.iter_mut() { + if is_backptr(ptr.id) && ptr.back_block == cur_block_id { + // normalize + ptr.id = clear_backptr(ptr.id); + ptr.back_block = 0; + } + } +} + +impl TrieNodePatch { + /// Compute the difference between `old_ptrs` and `new_ptrs`. In particular, if a pointer in + /// `new_ptrs` is in the same block as indicatd by `old_node_ptr`, this code will first need to + /// normalize it (i.e. convert it into a non-backpointer) in order to compare it against the + /// corresponding pointer in `old_ptrs` (which might have that very same pointer, but not yet + /// made into a backptr by a COW) + fn make_ptr_diff( + old_node_ptr: &TriePtr, + old_ptrs: &[TriePtr], + new_ptrs: &[TriePtr], + ) -> Vec { + let mut ret = Vec::with_capacity(new_ptrs.len()); + let mut mapped: [Option<&TriePtr>; 256] = [None; 256]; + for old_ptr in old_ptrs.iter() { + // SAFETY: chr() is a u8, so it's in range [0, 256) + if !old_ptr.is_empty() { + let mapped_ptr = mapped + .get_mut(old_ptr.chr() as usize) + .expect("infallible: mapped has 256 elements and .chr() is a u8"); + *mapped_ptr = Some(old_ptr); + } + } + + for new_ptr in new_ptrs.iter() { + if new_ptr.is_empty() { + continue; + } + // SAFETY: chr() is a u8, so it's in range [0, 256) + if let Some(old_ptr) = *mapped + .get(new_ptr.chr() as usize) + .expect("infallible: mapped has 256 elements and .chr() is a u8") + { + if !is_backptr(old_ptr.id()) + && is_backptr(new_ptr.id()) + && new_ptr.back_block == old_node_ptr.back_block + { + // new_ptr may be the backptr-ified version of old_ptr + let mut normalized_new_ptr = + TriePtr::new(clear_ctrl_bits(new_ptr.id()), new_ptr.chr(), new_ptr.ptr()); + normalized_new_ptr.back_block = 0; + if *old_ptr != normalized_new_ptr { + trace!( + "new overritten ptr: {:?} != {:?}", + &normalized_new_ptr, + old_ptr + ); + ret.push(*new_ptr); + } + } else { + if old_ptr != new_ptr { + trace!("new overritten ptr: {:?} != {:?}", &new_ptr, old_ptr); + ret.push(*new_ptr); + } + } + } else { + ret.push(*new_ptr); + } + } + ret + } + + /// Create a patch from one node4 to another + pub fn from_node4(old_node_ptr: TriePtr, old_node: &TrieNode4, new_node: &TrieNode4) -> Self { + let ptr_diff = Self::make_ptr_diff(&old_node_ptr, old_node.ptrs(), new_node.ptrs()); + Self { + ptr: old_node_ptr, + ptr_diff: ptr_diff, + } + } + + /// Create a patch from one node16 to another + pub fn from_node16( + old_node_ptr: TriePtr, + old_node: &TrieNode16, + new_node: &TrieNode16, + ) -> Self { + let ptr_diff = Self::make_ptr_diff(&old_node_ptr, old_node.ptrs(), new_node.ptrs()); + Self { + ptr: old_node_ptr, + ptr_diff: ptr_diff, } } + + /// Create a patch from one node48 to another + pub fn from_node48( + old_node_ptr: TriePtr, + old_node: &TrieNode48, + new_node: &TrieNode48, + ) -> Self { + let ptr_diff = Self::make_ptr_diff(&old_node_ptr, old_node.ptrs(), new_node.ptrs()); + Self { + ptr: old_node_ptr, + ptr_diff: ptr_diff, + } + } + + /// Create a patch from one node256 to another + pub fn from_node256( + old_node_ptr: TriePtr, + old_node: &TrieNode256, + new_node: &TrieNode256, + ) -> Self { + let ptr_diff = Self::make_ptr_diff(&old_node_ptr, old_node.ptrs(), new_node.ptrs()); + Self { + ptr: old_node_ptr, + ptr_diff: ptr_diff, + } + } + + /// Create a patch from one nodetype to a another. If they're not the same nodetype, then this + /// function returns None. + pub fn try_from_nodetype( + old_node_ptr: TriePtr, + old_node: &TrieNodeType, + new_node: &TrieNodeType, + ) -> Option { + if clear_ctrl_bits(old_node.id()) != clear_ctrl_bits(new_node.id()) { + return None; + } + + let patch_opt = match (old_node, new_node) { + (TrieNodeType::Node4(old_data), TrieNodeType::Node4(new_data)) => { + Some(Self::from_node4(old_node_ptr, old_data, new_data)) + } + (TrieNodeType::Node16(old_data), TrieNodeType::Node16(new_data)) => { + Some(Self::from_node16(old_node_ptr, old_data, new_data)) + } + (TrieNodeType::Node48(old_data), TrieNodeType::Node48(new_data)) => { + Some(Self::from_node48(old_node_ptr, old_data, new_data)) + } + (TrieNodeType::Node256(old_data), TrieNodeType::Node256(new_data)) => { + Some(Self::from_node256(old_node_ptr, old_data, new_data)) + } + (_, _) => None, + }; + let Some(patch) = patch_opt else { + return None; + }; + if patch.ptr_diff.len() == 0 { + return None; + } + Some(patch) + } + + /// Create a patch from one patch ao a node + pub fn try_from_patch( + old_patch_ptr: TriePtr, + old_patch: &TrieNodePatch, + new_node: &TrieNodeType, + ) -> Option { + if clear_ctrl_bits(old_patch.ptr.id) != clear_ctrl_bits(new_node.id()) { + return None; + } + + let ptr_diff = Self::make_ptr_diff(&old_patch_ptr, &old_patch.ptr_diff, new_node.ptrs()); + let patch = Self { + ptr: old_patch_ptr, + ptr_diff, + }; + if patch.ptr_diff.len() == 0 { + return None; + } + return Some(patch); + } + + /// Apply this patch to a node4, given the node, block ID where the patch was found, and block + /// ID where the node was written. + pub fn apply_node4( + &self, + mut old_node: TrieNode4, + patch_block_id: u32, + cur_block_id: u32, + ) -> Option { + trace!("Apply patch {self:?} read from block ID {patch_block_id} to {old_node:?}"); + node_copy_update_ptrs(&mut old_node.ptrs, self.ptr.back_block); + for ptr in self.ptr_diff.iter() { + if !old_node.insert(ptr) { + return None; + } + } + node_copy_update_ptrs(&mut old_node.ptrs, patch_block_id); + node_normalize_ptrs(&mut old_node.ptrs, cur_block_id); + trace!("Patched up to {old_node:?}"); + Some(old_node) + } + + /// Apply this patch to a node16, given the node, block ID where the patch was found, and block + /// ID where the node was written. + pub fn apply_node16( + &self, + mut old_node: TrieNode16, + patch_block_id: u32, + cur_block_id: u32, + ) -> Option { + trace!("Apply patch {self:?} read from block ID {patch_block_id} to {old_node:?}"); + node_copy_update_ptrs(&mut old_node.ptrs, self.ptr.back_block); + for ptr in self.ptr_diff.iter() { + if !old_node.insert(ptr) { + return None; + } + } + node_copy_update_ptrs(&mut old_node.ptrs, patch_block_id); + node_normalize_ptrs(&mut old_node.ptrs, cur_block_id); + trace!("Patched up to {old_node:?}"); + Some(old_node) + } + + /// Apply this patch to a node48, given the node, block ID where the patch was found, and block + /// ID where the node was written. + pub fn apply_node48( + &self, + mut old_node: TrieNode48, + patch_block_id: u32, + cur_block_id: u32, + ) -> Option { + trace!("Apply patch {self:?} read from block ID {patch_block_id} to {old_node:?}"); + node_copy_update_ptrs(&mut old_node.ptrs, self.ptr.back_block); + for ptr in self.ptr_diff.iter() { + if !old_node.insert(ptr) { + return None; + } + } + node_copy_update_ptrs(&mut old_node.ptrs, patch_block_id); + node_normalize_ptrs(&mut old_node.ptrs, cur_block_id); + trace!("Patched up to {old_node:?}"); + Some(old_node) + } + + /// Apply this patch to a node256, given the node, block ID where the patch was found, and block + /// ID where the node was written. + pub fn apply_node256( + &self, + mut old_node: TrieNode256, + patch_block_id: u32, + cur_block_id: u32, + ) -> Option { + trace!("Apply patch {self:?} read from block ID {patch_block_id} to {old_node:?}"); + node_copy_update_ptrs(&mut old_node.ptrs, self.ptr.back_block); + for ptr in self.ptr_diff.iter() { + if !old_node.insert(ptr) { + return None; + } + } + node_copy_update_ptrs(&mut old_node.ptrs, patch_block_id); + node_normalize_ptrs(&mut old_node.ptrs, cur_block_id); + trace!("Patched up to {old_node:?}"); + Some(old_node) + } + + /// Compute the size of the TriePatchNode. Its pointers are always compressed. + #[inline] + pub fn size(&self) -> usize { + // ID + let mut sz = 1; + // previous node ptr + sz += self.ptr.compressed_size(); + // length prefix + sz += 1; + // ptr_diff + for ptr in self.ptr_diff.iter() { + sz += ptr.compressed_size(); + } + sz + } + + /// Load a TrieNodePatch from a Read object + /// Returns Ok(Self) on success + /// Returns Err(codec_error::*) on failure to decode the bytes + /// Returns Err(IOError(..)) on disk I/O failure + pub fn from_bytes(f: &mut R) -> Result { + Self::consensus_deserialize(f) + .map_err(|e| Error::CorruptionError(format!("Codec error: {e:?}"))) + } } impl TrieNode for TrieNode4 { @@ -812,6 +1468,8 @@ impl TrieNode for TrieNode4 { TrieNode4 { path: vec![], ptrs: [TriePtr::default(); 4], + cowptr: None, + patches: vec![], } } @@ -824,7 +1482,7 @@ impl TrieNode for TrieNode4 { None } - fn from_bytes(r: &mut R) -> Result { + fn from_bytes(r: &mut R) -> Result { let mut ptrs_slice = [TriePtr::default(); 4]; ptrs_from_bytes(TrieNodeID::Node4 as u8, r, &mut ptrs_slice)?; let path = path_from_bytes(r)?; @@ -832,6 +1490,8 @@ impl TrieNode for TrieNode4 { Ok(TrieNode4 { path, ptrs: ptrs_slice, + cowptr: None, + patches: vec![], }) } @@ -870,6 +1530,30 @@ impl TrieNode for TrieNode4 { fn as_trie_node_type(&self) -> TrieNodeType { TrieNodeType::Node4(self.clone()) } + + fn get_cow_ptr(&self) -> Option<&TrieCowPtr> { + self.cowptr.as_ref() + } + + fn set_cow_ptr(&mut self, cowptr: TrieCowPtr) { + self.cowptr.replace(cowptr); + } + + fn apply_patches( + self, + patches: &[(u32, TriePtr, TrieNodePatch)], + cur_block_id: u32, + ) -> Option { + let mut node = self; + for (patch_block_id, _, patch) in patches.iter() { + let Some(next_node) = patch.apply_node4(node, *patch_block_id, cur_block_id) else { + return None; + }; + node = next_node; + } + node.patches.extend_from_slice(patches); + Some(node) + } } impl TrieNode for TrieNode16 { @@ -881,6 +1565,8 @@ impl TrieNode for TrieNode16 { TrieNode16 { path: vec![], ptrs: [TriePtr::default(); 16], + cowptr: None, + patches: vec![], } } @@ -893,7 +1579,7 @@ impl TrieNode for TrieNode16 { None } - fn from_bytes(r: &mut R) -> Result { + fn from_bytes(r: &mut R) -> Result { let mut ptrs_slice = [TriePtr::default(); 16]; ptrs_from_bytes(TrieNodeID::Node16 as u8, r, &mut ptrs_slice)?; @@ -902,6 +1588,8 @@ impl TrieNode for TrieNode16 { Ok(TrieNode16 { path, ptrs: ptrs_slice, + cowptr: None, + patches: vec![], }) } @@ -940,6 +1628,30 @@ impl TrieNode for TrieNode16 { fn as_trie_node_type(&self) -> TrieNodeType { TrieNodeType::Node16(self.clone()) } + + fn get_cow_ptr(&self) -> Option<&TrieCowPtr> { + self.cowptr.as_ref() + } + + fn set_cow_ptr(&mut self, cowptr: TrieCowPtr) { + self.cowptr.replace(cowptr); + } + + fn apply_patches( + self, + patches: &[(u32, TriePtr, TrieNodePatch)], + cur_block_id: u32, + ) -> Option { + let mut node = self; + for (patch_block_id, _, patch) in patches.iter() { + let Some(next_node) = patch.apply_node16(node, *patch_block_id, cur_block_id) else { + return None; + }; + node = next_node; + } + node.patches.extend_from_slice(patches); + Some(node) + } } impl TrieNode for TrieNode48 { @@ -952,6 +1664,8 @@ impl TrieNode for TrieNode48 { path: vec![], indexes: [-1; 256], ptrs: [TriePtr::default(); 48], + cowptr: None, + patches: vec![], } } @@ -978,23 +1692,34 @@ impl TrieNode for TrieNode48 { write_path_to_bytes(self.path().as_slice(), w) } + fn write_bytes_compressed(&self, w: &mut W) -> Result<(), Error> { + w.write_all(&[set_compressed(self.id())])?; + write_ptrs_to_bytes_compressed(self.id(), self.ptrs(), w)?; + + for i in self.indexes.iter() { + w.write_all(&[*i as u8])?; + } + + write_path_to_bytes(self.path().as_slice(), w) + } + fn byte_len(&self) -> usize { get_ptrs_byte_len(&self.ptrs) + 256 + get_path_byte_len(&self.path) } + fn byte_len_compressed(&self) -> usize { + get_ptrs_byte_len_compressed(self.id(), &self.ptrs) + 256 + get_path_byte_len(&self.path) + } + #[allow(clippy::indexing_slicing)] - fn from_bytes(r: &mut R) -> Result { + fn from_bytes(r: &mut R) -> Result { let mut ptrs_slice = [TriePtr::default(); 48]; ptrs_from_bytes(TrieNodeID::Node48 as u8, r, &mut ptrs_slice)?; let mut indexes = [0u8; 256]; - let l_indexes = r.read(&mut indexes).map_err(Error::IOError)?; - - if l_indexes != 256 { - return Err(Error::CorruptionError( - "Node48: Failed to read 256 indexes".to_string(), - )); - } + r.read_exact(&mut indexes).inspect_err(|e| { + error!("I/O error reading TrieNode48 indexes: {e:?}"); + })?; let path = path_from_bytes(r)?; @@ -1034,6 +1759,8 @@ impl TrieNode for TrieNode48 { path, indexes: indexes_slice, ptrs: ptrs_slice, + cowptr: None, + patches: vec![], }) } @@ -1076,6 +1803,30 @@ impl TrieNode for TrieNode48 { fn as_trie_node_type(&self) -> TrieNodeType { TrieNodeType::Node48(Box::new(self.clone())) } + + fn get_cow_ptr(&self) -> Option<&TrieCowPtr> { + self.cowptr.as_ref() + } + + fn set_cow_ptr(&mut self, cowptr: TrieCowPtr) { + self.cowptr.replace(cowptr); + } + + fn apply_patches( + self, + patches: &[(u32, TriePtr, TrieNodePatch)], + cur_block_id: u32, + ) -> Option { + let mut node = self; + for (patch_block_id, _, patch) in patches.iter() { + let Some(next_node) = patch.apply_node48(node, *patch_block_id, cur_block_id) else { + return None; + }; + node = next_node; + } + node.patches.extend_from_slice(patches); + Some(node) + } } impl TrieNode for TrieNode256 { @@ -1087,6 +1838,8 @@ impl TrieNode for TrieNode256 { TrieNode256 { path: vec![], ptrs: [TriePtr::default(); 256], + cowptr: None, + patches: vec![], } } @@ -1099,7 +1852,7 @@ impl TrieNode for TrieNode256 { Some(*ptr) } - fn from_bytes(r: &mut R) -> Result { + fn from_bytes(r: &mut R) -> Result { let mut ptrs_slice = [TriePtr::default(); 256]; ptrs_from_bytes(TrieNodeID::Node256 as u8, r, &mut ptrs_slice)?; @@ -1108,6 +1861,8 @@ impl TrieNode for TrieNode256 { Ok(TrieNode256 { path, ptrs: ptrs_slice, + cowptr: None, + patches: vec![], }) } @@ -1143,6 +1898,30 @@ impl TrieNode for TrieNode256 { fn as_trie_node_type(&self) -> TrieNodeType { TrieNodeType::Node256(Box::new(self.clone())) } + + fn get_cow_ptr(&self) -> Option<&TrieCowPtr> { + self.cowptr.as_ref() + } + + fn set_cow_ptr(&mut self, cowptr: TrieCowPtr) { + self.cowptr.replace(cowptr); + } + + fn apply_patches( + self, + patches: &[(u32, TriePtr, TrieNodePatch)], + cur_block_id: u32, + ) -> Option { + let mut node = self; + for (patch_block_id, _, patch) in patches.iter() { + let Some(next_node) = patch.apply_node256(node, *patch_block_id, cur_block_id) else { + return None; + }; + node = next_node; + } + node.patches.extend_from_slice(patches); + Some(node) + } } impl TrieNode for TrieLeaf { @@ -1165,36 +1944,44 @@ impl TrieNode for TrieLeaf { Ok(()) } + fn write_bytes_compressed(&self, w: &mut W) -> Result<(), Error> { + w.write_all(&[self.id()])?; + write_path_to_bytes(&self.path, w)?; + w.write_all(&self.data.0[..])?; + Ok(()) + } + fn byte_len(&self) -> usize { 1 + get_path_byte_len(&self.path) + self.data.len() } - fn from_bytes(r: &mut R) -> Result { - let mut idbuf = [0u8; 1]; - let l_idbuf = r.read(&mut idbuf).map_err(Error::IOError)?; + fn byte_len_compressed(&self) -> usize { + 1 + get_path_byte_len(&self.path) + self.data.len() + } - if l_idbuf != 1 { - return Err(Error::CorruptionError( - "Leaf: failed to read ID".to_string(), - )); - } + fn from_bytes(r: &mut R) -> Result { + let mut idbuf = [0u8; 1]; + r.read_exact(&mut idbuf).inspect_err(|e| { + error!("I/O error reading TrieLeaf ID: {e:?}"); + })?; - if clear_backptr(idbuf[0]) != TrieNodeID::Leaf as u8 { + if clear_ctrl_bits(idbuf[0]) != TrieNodeID::Leaf as u8 { return Err(Error::CorruptionError(format!( - "Leaf: bad ID {:x}", + "Leaf: bad ID 0x{:02x}", idbuf[0] ))); } let path = path_from_bytes(r)?; let mut leaf_data = [0u8; MARF_VALUE_ENCODED_SIZE as usize]; - let l_leaf_data = r.read(&mut leaf_data).map_err(Error::IOError)?; - if l_leaf_data != (MARF_VALUE_ENCODED_SIZE as usize) { - return Err(Error::CorruptionError(format!( - "Leaf: read only {l_leaf_data} out of {MARF_VALUE_ENCODED_SIZE} bytes" - ))); - } + r.read_exact(&mut leaf_data).inspect_err(|e| { + error!( + "I/O error reading TrieLeaf data: {e:?}. Got idbuf = {:02x}, path = {}", + &idbuf[0], + &to_hex(&path) + ); + })?; Ok(TrieLeaf { path, @@ -1221,6 +2008,23 @@ impl TrieNode for TrieLeaf { fn as_trie_node_type(&self) -> TrieNodeType { TrieNodeType::Leaf(self.clone()) } + + fn get_cow_ptr(&self) -> Option<&TrieCowPtr> { + // no-op + None + } + + fn set_cow_ptr(&mut self, _cowptr: TrieCowPtr) { + // no-op + } + + fn apply_patches( + self, + _patches: &[(u32, TriePtr, TrieNodePatch)], + _cur_block_id: u32, + ) -> Option { + Some(self) + } } #[derive(Debug, Clone, PartialEq)] @@ -1277,6 +2081,10 @@ impl TrieNodeType { with_node!(self, ref data, data.write_bytes(w)) } + pub fn write_bytes_compressed(&self, w: &mut W) -> Result<(), Error> { + with_node!(self, ref data, data.write_bytes_compressed(w)) + } + pub fn write_consensus_bytes( &self, map: &mut M, @@ -1289,6 +2097,10 @@ impl TrieNodeType { with_node!(self, ref data, data.byte_len()) } + pub fn byte_len_compressed(&self) -> usize { + with_node!(self, ref data, data.byte_len_compressed()) + } + pub fn insert(&mut self, ptr: &TriePtr) -> bool { with_node!(self, ref mut data, data.insert(ptr)) } @@ -1328,4 +2140,56 @@ impl TrieNodeType { pub fn set_path(&mut self, new_path: Vec) { with_node!(self, ref mut data, data.path = new_path) } + + pub fn get_cow_ptr(&self) -> Option<&TrieCowPtr> { + with_node!(self, ref data, data.get_cow_ptr()) + } + + pub fn set_cow_ptr(&mut self, cowptr: TrieCowPtr) { + with_node!(self, ref mut data, data.set_cow_ptr(cowptr)) + } + + pub fn apply_patches( + self, + patches: &[(u32, TriePtr, TrieNodePatch)], + cur_block_id: u32, + ) -> Option { + match self { + TrieNodeType::Node4(data) => { + let Some(new_data) = data.apply_patches(patches, cur_block_id) else { + return None; + }; + Some(TrieNodeType::Node4(new_data)) + } + TrieNodeType::Node16(data) => { + let Some(new_data) = data.apply_patches(patches, cur_block_id) else { + return None; + }; + Some(TrieNodeType::Node16(new_data)) + } + TrieNodeType::Node48(data) => { + let Some(new_data) = data.apply_patches(patches, cur_block_id) else { + return None; + }; + Some(TrieNodeType::Node48(Box::new(new_data))) + } + TrieNodeType::Node256(data) => { + let Some(new_data) = data.apply_patches(patches, cur_block_id) else { + return None; + }; + Some(TrieNodeType::Node256(Box::new(new_data))) + } + TrieNodeType::Leaf(data) => Some(TrieNodeType::Leaf(data)), + } + } + + pub fn get_patches(&self) -> &[(u32, TriePtr, TrieNodePatch)] { + match self { + TrieNodeType::Node4(ref data) => &data.patches, + TrieNodeType::Node16(ref data) => &data.patches, + TrieNodeType::Node48(ref data) => &data.patches, + TrieNodeType::Node256(ref data) => &data.patches, + TrieNodeType::Leaf(_) => panic!("Leaf has no patches"), + } + } } diff --git a/stackslib/src/chainstate/stacks/index/storage.rs b/stackslib/src/chainstate/stacks/index/storage.rs index 9213f5bb448..4b283c18d8f 100644 --- a/stackslib/src/chainstate/stacks/index/storage.rs +++ b/stackslib/src/chainstate/stacks/index/storage.rs @@ -24,27 +24,25 @@ use std::{fmt, fs, io}; use rusqlite::{Connection, OpenFlags, Transaction}; use sha2::Digest; -use stacks_common::types::chainstate::{ - TrieHash, BLOCK_HEADER_HASH_ENCODED_SIZE, TRIEHASH_ENCODED_SIZE, -}; -use stacks_common::util::hash::to_hex; use crate::chainstate::stacks::index::bits::{ - get_node_byte_len, read_hash_bytes, read_nodetype, read_root_hash, write_nodetype_bytes, + get_node_byte_len, get_node_byte_len_compressed, read_hash_bytes, read_nodetype, + read_root_hash, write_nodetype_bytes, write_nodetype_bytes_compressed, }; use crate::chainstate::stacks::index::cache::*; use crate::chainstate::stacks::index::file::{TrieFile, TrieFileNodeHashReader}; use crate::chainstate::stacks::index::marf::MARFOpenOpts; -#[cfg(test)] -use crate::chainstate::stacks::index::node::set_backptr; use crate::chainstate::stacks::index::node::{ - is_backptr, TrieNode, TrieNodeID, TrieNodeType, TriePtr, + is_backptr, set_backptr, TrieCowPtr, TrieNode, TrieNodeID, TrieNodePatch, TrieNodeType, TriePtr, }; use crate::chainstate::stacks::index::profile::TrieBenchmark; use crate::chainstate::stacks::index::trie::Trie; use crate::chainstate::stacks::index::{ - trie_sql, BlockMap, ClarityMarfTrieId, Error, MarfTrieId, TrieHasher, + trie_sql, BlockMap, ClarityMarfTrieId, Error, MarfTrieId, TrieHasher, MAX_PATCH_DEPTH, }; +use crate::codec::StacksMessageCodec; +use crate::types::chainstate::{TrieHash, BLOCK_HEADER_HASH_ENCODED_SIZE, TRIEHASH_ENCODED_SIZE}; +use crate::util::hash::to_hex; use crate::util_lib::db::{ sql_pragma, sqlite_open, tx_begin_immediate, Error as db_error, SQLITE_MARF_PAGE_SIZE, SQLITE_MMAP_SIZE, @@ -340,6 +338,39 @@ impl UncommittedState { } } + /// Dump the TrieRAM to the given writeable `f`. If the TrieRAM is not sealed yet, then seal + /// it first and then dump it. The nodes in the trie will be compressed before writing. + fn dump_compressed( + self, + storage_tx: &mut TrieStorageTransaction, + f: &mut F, + bhh: &T, + ) -> Result<(), Error> { + if self.trie_ram_ref().block_header != *bhh { + error!("Failed to dump {:?}: not the current block", bhh); + return Err(Error::NotFoundError); + } + + match self { + UncommittedState::RW(mut trie_ram) => { + // seal it first, then dump it + debug!("Seal and dump trie for {}", bhh); + trie_ram.inner_seal_dump(storage_tx)?; + trie_ram.dump_compressed_consume(storage_tx, f)?; + Ok(()) + } + UncommittedState::Sealed(trie_ram, _rh) => { + // already sealed + debug!( + "Dump already-sealed trie for {} (root hash was {})", + bhh, _rh + ); + trie_ram.dump_compressed_consume(storage_tx, f)?; + Ok(()) + } + } + } + #[cfg(test)] pub fn print_to_stderr(&self) { self.trie_ram_ref().print_to_stderr() @@ -369,6 +400,51 @@ pub struct TrieRAM { is_moved: bool, parent: T, + + /// whether or not to compress the trie on dump + compress: bool, +} + +pub enum DumpPtr { + Normal(u32), + Patch(u32, [u8; 32], TrieNodePatch), +} + +impl DumpPtr { + pub fn ptr(&self) -> u32 { + match self { + Self::Normal(ptr) => *ptr, + Self::Patch(ptr, ..) => *ptr, + } + } + + pub fn hash_bytes(&self) -> Option<&[u8; 32]> { + match self { + Self::Normal(..) => None, + Self::Patch(_, bytes, _) => Some(bytes), + } + } + + pub fn patch(&self) -> Option<&TrieNodePatch> { + match self { + Self::Normal(..) => None, + Self::Patch(_, _, patch) => Some(patch), + } + } + + pub fn hash_and_patch(&self) -> Option<(&[u8; 32], &TrieNodePatch)> { + match self { + Self::Normal(..) => None, + Self::Patch(_, hash_bytes, patch) => Some((hash_bytes, patch)), + } + } + + pub fn patch_mut(&mut self) -> Option<&mut TrieNodePatch> { + match self { + Self::Normal(..) => None, + Self::Patch(_, _, patch) => Some(patch), + } + } } /// Trie in RAM without the serialization overhead @@ -393,9 +469,16 @@ impl TrieRAM { is_moved: false, parent: parent.clone(), + compress: false, } } + /// Iterative constructor to set compression + pub fn with_compression(mut self, compression: bool) -> Self { + self.compress = compression; + self + } + /// Inner method to instantiate a TrieRAM from existing Trie data. fn from_data(block_header: T, data: Vec<(TrieNodeType, TrieHash)>, parent: T) -> TrieRAM { TrieRAM { @@ -417,6 +500,7 @@ impl TrieRAM { is_moved: false, parent, + compress: false, } } @@ -449,6 +533,7 @@ impl TrieRAM { is_moved: true, parent: self.parent.clone(), + compress: self.compress, } } @@ -559,9 +644,10 @@ impl TrieRAM { assert_eq!(node_data_order.len(), offsets.len()); // write parent block ptr - f.seek(SeekFrom::Start(0))?; + f.rewind()?; f.write_all(parent_hash.as_bytes()) .map_err(Error::IOError)?; + // write zero-identifier (TODO: this is a convenience hack for now, we should remove the // identifier from the trie data blob) f.seek(SeekFrom::Start(BLOCK_HEADER_HASH_ENCODED_SIZE as u64))?; @@ -584,6 +670,65 @@ impl TrieRAM { Ok(()) } + /// write the trie data to f, using node_data_order to + /// iterate over node_data + /// Compression improvements: + /// * Do not store backptr 0's if the node isn't a backptr + /// * Store a compact representation for sparse child pointer lists + /// * If a node was copied from another, then only store the difference in ptrs (TrieNodePatch) + pub fn write_trie_indirect_compressed( + f: &mut F, + node_data_order: &[DumpPtr], + node_data: &[(TrieNodeType, TrieHash)], + offsets: &[u32], + parent_hash: &T, + ) -> Result<(), Error> { + assert_eq!(node_data_order.len(), offsets.len()); + + // write parent block ptr + f.rewind()?; + f.write_all(parent_hash.as_bytes()) + .map_err(Error::IOError)?; + + // write zero-identifier (TODO: this is a convenience hack for now, we should remove the + // identifier from the trie data blob) + f.seek(SeekFrom::Start(BLOCK_HEADER_HASH_ENCODED_SIZE as u64))?; + f.write_all(&0u32.to_le_bytes()).map_err(Error::IOError)?; + + for (ix, indirect) in node_data_order.iter().enumerate() { + if let Some((hash_bytes, patch)) = indirect.hash_and_patch() { + let f_pos_before = f.stream_position()?; + f.write_all(hash_bytes)?; + patch.consensus_serialize(f).map_err(|e| { + Error::CorruptionError(format!("Failed to serialize patch: {e:?}")) + })?; + + let f_pos_after = f.stream_position()?; + trace!( + "write {:?} {} at {}-{}", + &patch, + &to_hex(hash_bytes), + f_pos_before, + f_pos_after + ); + } else { + // dump the node to storage + let node = node_data.get(indirect.ptr() as usize).ok_or_else(|| { + Error::CorruptionError("node_data_order pointer invalid".into()) + })?; + + write_nodetype_bytes_compressed(f, &node.0, node.1)?; + } + // next node + let next_offset = *offsets.get(ix).ok_or_else(|| { + Error::CorruptionError("node_data_order.len() != offsets.len()".into()) + })?; + f.seek(SeekFrom::Start(u64::from(next_offset)))?; + } + + Ok(()) + } + /// Calculate the MARF root hash from a trie root hash. /// This hashes the trie root hash with a geometric series of prior trie hashes. fn calculate_marf_root_hash( @@ -865,6 +1010,256 @@ impl TrieRAM { Ok(ptr) } + fn make_node_patch( + storage_tx: &mut TrieStorageTransaction, + base_ptr: TrieCowPtr, + node: &TrieNodeType, + ) -> Result, Error> { + let cur_block = storage_tx.get_cur_block(); + let old_node_res: Result = (|| { + storage_tx.open_block(&base_ptr.block_id())?; + let node = storage_tx.read_nodetype_nohash(base_ptr.ptr())?; + Ok(node) + })(); + + if old_node_res.is_err() { + // restore + storage_tx.open_block(&cur_block)?; + } + + match old_node_res { + Ok(old_node) => { + if old_node.path_bytes() != node.path_bytes() { + return Ok(None); + } + + trace!( + "Make patch from old node from block {:?} to new node {:?}", + &old_node, + node + ); + return Ok(TrieNodePatch::try_from_nodetype( + *base_ptr.ptr(), + &old_node, + &node, + )); + } + Err(Error::Patch(_, old_patch)) => { + // building atop an existing patch. + // Make sure that the base node's path isn't different from this node + match storage_tx.inner_read_patched_persisted_nodetype( + base_ptr.ptr().back_block(), + *base_ptr.ptr(), + false, + ) { + Ok((base_node, _)) => { + if base_node.path_bytes() != node.path_bytes() { + return Ok(None); + } + trace!( + "Make patch from old patch {:?} to new node {:?}", + &old_patch, + node + ); + return Ok(TrieNodePatch::try_from_patch( + *base_ptr.ptr(), + &old_patch, + &node, + )); + } + Err(e) => { + storage_tx.open_block(&cur_block)?; + return Err(e); + } + } + } + Err(e) => { + return Err(e); + } + } + } + + /// Walk through the buffered TrieNodes and dump them to f, compressing the trie. + /// This consumes this TrieRAM instance. + /// The trie will already have been sealed. + /// + /// Space improvements: + /// * Do not store backptr 0's if the node isn't a backptr + /// * Store a compact representation for sparse child pointer lists + /// * If a node was copied from another, then only store the difference in ptrs (TrieNodePatch) + /// + /// Returns Ok(len) to report number of bytes written + /// Returns Err(..) if we fail to write + fn dump_compressed_consume( + mut self, + storage_tx: &mut TrieStorageTransaction, + f: &mut F, + ) -> Result { + // step 1: write out each node in breadth-first order to get their ptr offsets + let mut frontier: VecDeque = VecDeque::new(); + + let mut node_data = vec![]; + let mut offsets = vec![]; + + let start = TriePtr::new(TrieNodeID::Node256 as u8, 0, 0).ptr(); + frontier.push_back(start); + + // first 32 bytes is reserved for the parent block hash + // next 4 bytes is the local block identifier + let mut ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + + while let Some(pointer) = frontier.pop_front() { + let (node, node_hash) = self.get_nodetype(pointer)?; + + // IMPROVEMENT: if we can, store a patch node instead of the whole node. + // Only applies to non-leaf nodes, and only if doing so results in a stack of patches + // that's less than MAX_PATCH_DEPTH. Also, only patch a node if the path is the same. + let mut patch_node_opt = if !node.is_leaf() + && node.get_patches().len() + 1 < MAX_PATCH_DEPTH as usize + { + if let Some((last_patch_block_id, last_patch_ptr, _)) = node.get_patches().last() { + // this node is a patch to a node in a previous trie. Try to amend a patch + // atop it. + let block_hash = storage_tx.get_block_hash_caching(*last_patch_block_id)?; + + // construct a COW pointer to this patch node + let mut patch_ptr = TriePtr::new( + set_backptr(TrieNodeID::Patch as u8), + last_patch_ptr.chr(), + last_patch_ptr.ptr(), + ); + patch_ptr.back_block = *last_patch_block_id; + + let base_ptr = TrieCowPtr::new(block_hash.clone(), patch_ptr); + let patch_node_opt = Self::make_node_patch(storage_tx, base_ptr, &node)?; + if let Some(patch_node) = patch_node_opt { + trace!( + "Create amendment patch for node at {:?}: {:?}", + &base_ptr, + &node + ); + Some((node_hash.to_bytes(), patch_node)) + } else { + None + } + } else if let Some(cowptr) = node.get_cow_ptr() { + // this node was a COW node for this trie + let patch_node_opt = Self::make_node_patch(storage_tx, *cowptr, &node)?; + if let Some(patch_node) = patch_node_opt { + trace!("Create COW patch for node at {:?}: {:?}", &cowptr, &node); + Some((node_hash.to_bytes(), patch_node)) + } else { + None + } + } else { + None + } + } else { + None + }; + + // calculate size + if let Some((_, patch_node)) = patch_node_opt.as_ref() { + // IMPROVEMENT: don't store a copy of a node that was copied forward via + // MARF::walk_cow(). Instead, store only the new ptrs in the copied node, and store + // a pointer to the original node in the ancestral trie. + // +32 is for the hash + trace!( + "Patch node {:?} for {:?} to be written at {}", + &patch_node, + &node, + ptr + ); + let num_written = 32 + patch_node.size(); + ptr += num_written as u64; + + let mut num_new_nodes = 0; + if !node.is_leaf() { + for ptr in node.ptrs().iter() { + if !ptr.is_empty() && !is_backptr(ptr.id) { + num_new_nodes += 1; + } + } + } + assert_eq!(num_new_nodes, patch_node.ptr_diff.len()); + } else { + // IMPROVEMENT: don't store backptr block ID if it's 0 + trace!("Normal node {:?} to be written at {}", &node, ptr); + let num_written = get_node_byte_len_compressed(node); + ptr += num_written as u64; + } + + // queue each child + if !node.is_leaf() { + for ptr in node.ptrs().iter() { + if !ptr.is_empty() && !is_backptr(ptr.id) { + frontier.push_back(ptr.ptr()); + } + } + } + + if let Some((hash_bytes, patch)) = patch_node_opt.take() { + node_data.push(DumpPtr::Patch(pointer, hash_bytes, patch)); + } else { + node_data.push(DumpPtr::Normal(pointer)); + } + offsets.push(ptr as u32); + } + + assert_eq!(offsets.len(), node_data.len()); + + // step 2: update ptrs in all nodes + let mut i = 0; + for node_data_ptr in node_data.iter_mut() { + if let Some(patch) = node_data_ptr.patch_mut() { + for ptr in patch.ptr_diff.iter_mut() { + if !ptr.is_empty() && !is_backptr(ptr.id) { + ptr.ptr = *offsets.get(i).ok_or_else(|| { + Error::CorruptionError( + "Miscalculated dump_compressed_consume offsets".into(), + ) + })?; + i += 1; + } + } + } else { + let next_node = &mut self + .data + .get_mut(node_data_ptr.ptr() as usize) + .ok_or_else(|| { + Error::CorruptionError( + "Miscalculated dump_compressed_consume pointer".into(), + ) + })? + .0; + if !next_node.is_leaf() { + let ptrs = next_node.ptrs_mut(); + for ptr in ptrs.iter_mut() { + if !ptr.is_empty() && !is_backptr(ptr.id) { + ptr.ptr = *offsets.get(i).ok_or_else(|| { + Error::CorruptionError( + "Miscalculated dump_compressed_consume offsets".into(), + ) + })?; + i += 1; + } + } + } + } + } + + // step 3: write out each node (now that they have the write ptrs) + TrieRAM::write_trie_indirect_compressed( + f, + &node_data, + self.data.as_slice(), + offsets.as_slice(), + &self.parent, + )?; + + Ok(ptr) + } + /// load the trie from F. /// The trie will have the same structure as the on-disk trie, but it may have nodes in a /// different order. @@ -873,7 +1268,7 @@ impl TrieRAM { let mut frontier = VecDeque::new(); // read parent - f.seek(SeekFrom::Start(0))?; + f.rewind()?; let parent_hash_bytes = read_hash_bytes(f)?; let parent_hash = T::from_bytes(parent_hash_bytes); @@ -987,6 +1382,8 @@ impl TrieRAM { /// Get an owned instance of a node and its hash from the TrieRAM. ptr.ptr() is an array /// index. + /// Note that this will never return a patch node, since we only ever store patch nodes to + /// persistent media. pub fn read_nodetype(&mut self, ptr: &TriePtr) -> Result<(TrieNodeType, TrieHash), Error> { trace!( "TrieRAM: read_nodetype({:?}): at {:?}", @@ -1196,6 +1593,7 @@ pub struct TrieStorageConnection<'a, T: MarfTrieId> { cache: &'a mut TrieCache, bench: &'a mut TrieBenchmark, pub hash_calculation_mode: TrieHashCalculationMode, + compress: bool, /// row ID of a trie that represents unconfirmed state (i.e. trie state that will never become /// part of the MARF, but nevertheless represents a persistent scratch space). If this field @@ -1262,6 +1660,7 @@ pub struct TrieFileStorage { cache: TrieCache, bench: TrieBenchmark, hash_calculation_mode: TrieHashCalculationMode, + compress: bool, // used in testing in order to short-circuit block-height lookups // when the trie struct is tested outside of marf.rs usage @@ -1306,6 +1705,7 @@ pub struct ReopenedTrieStorageConnection<'a, T: MarfTrieId> { cache: TrieCache, bench: TrieBenchmark, pub hash_calculation_mode: TrieHashCalculationMode, + compress: bool, /// row ID of a trie that represents unconfirmed state (i.e. trie state that will never become /// part of the MARF, but nevertheless represents a persistent scratch space). If this field @@ -1335,6 +1735,7 @@ impl<'a, T: MarfTrieId> ReopenedTrieStorageConnection<'a, T> { bench: &mut self.bench, hash_calculation_mode: self.hash_calculation_mode, unconfirmed_block_id: None, + compress: self.compress, #[cfg(test)] test_genesis_block: &mut self.test_genesis_block, @@ -1353,6 +1754,7 @@ impl TrieFileStorage { bench: &mut self.bench, hash_calculation_mode: self.hash_calculation_mode, unconfirmed_block_id: None, + compress: self.compress, #[cfg(test)] test_genesis_block: &mut self.test_genesis_block, @@ -1401,6 +1803,7 @@ impl TrieFileStorage { cache, bench, hash_calculation_mode, + compress: self.compress, unconfirmed_block_id, #[cfg(test)] test_genesis_block: self.test_genesis_block.clone(), @@ -1421,6 +1824,7 @@ impl TrieFileStorage { cache: &mut self.cache, bench: &mut self.bench, hash_calculation_mode: self.hash_calculation_mode, + compress: self.compress, unconfirmed_block_id: None, #[cfg(test)] @@ -1521,6 +1925,7 @@ impl TrieFileStorage { blobs, bench: TrieBenchmark::new(), hash_calculation_mode: marf_opts.hash_calculation_mode, + compress: marf_opts.compress, data: TrieStorageTransientData { uncommitted_writes: None, @@ -1610,6 +2015,7 @@ impl TrieFileStorage { cache, bench: TrieBenchmark::new(), hash_calculation_mode: self.hash_calculation_mode, + compress: self.compress, data: TrieStorageTransientData { uncommitted_writes: self.data.uncommitted_writes.clone(), @@ -1679,6 +2085,7 @@ impl<'a, T: MarfTrieId> TrieStorageTransaction<'a, T> { cache, bench: TrieBenchmark::new(), hash_calculation_mode: self.hash_calculation_mode, + compress: self.compress, data: TrieStorageTransientData { uncommitted_writes: None, @@ -1733,13 +2140,26 @@ impl<'a, T: MarfTrieId> TrieStorageTransaction<'a, T> { } if let Some((bhh, trie_ram)) = self.data.uncommitted_writes.take() { trace!("Buffering block flush started."); - let mut buffer = Cursor::new(Vec::new()); - trie_ram.dump(self, &mut buffer, &bhh)?; - // consume the cursor, get the buffer - let buffer = buffer.into_inner(); - trace!("Buffering block flush finished."); + // Enable MARF compression only when: + // - Compression is explicitly requested, and + // - The flush option is *not* `FlushOptions::UnconfirmedTable`, which is used when + // writing an unconfirmed trie for Stacks 2.x. + // + // Compression is intentionally disabled for unconfirmed tries to avoid regressions + // in `TrieRAM::load`, which is responsible for loading these unconfirmed structures. + let marf_compression_enabled = + self.compress && !matches!(flush_options, FlushOptions::UnconfirmedTable); + + let mut cursor = Cursor::new(Vec::new()); + if marf_compression_enabled { + trie_ram.dump_compressed(self, &mut cursor, &bhh)?; + } else { + trie_ram.dump(self, &mut cursor, &bhh)?; + } + let buffer = cursor.into_inner(); + trace!("Buffering block flush finished."); debug!("Flush: {} to {}", &bhh, flush_options); let block_id = match flush_options { @@ -1768,17 +2188,22 @@ impl<'a, T: MarfTrieId> TrieStorageTransaction<'a, T> { if real_bhh != &bhh { // note: this was moved from the block_retarget function // to avoid stepping on the borrow checker. - debug!("Retarget block {} to {}", bhh, real_bhh); + debug!( + "Retarget block {} to {}. Current block ID is {:?}", + bhh, real_bhh, &self.data.cur_block_id + ); // switch over state self.data.retarget_block(real_bhh.clone()); } - self.with_trie_blobs(|db, blobs| match blobs { + let new_block_id = self.with_trie_blobs(|db, blobs| match blobs { Some(blobs) => blobs.store_trie_blob(db, real_bhh, &buffer), None => { test_debug!("Stored trie blob {} to db", real_bhh); trie_sql::write_trie_blob(db, real_bhh, &buffer) } - })? + })?; + self.data.set_block(real_bhh.clone(), Some(new_block_id)); + new_block_id } FlushOptions::MinedTable(real_bhh) => { if self.unconfirmed() { @@ -2198,11 +2623,14 @@ impl TrieStorageConnection<'_, T> { /// when following a backptr, which stores the block identifier directly. pub fn open_block_known_id(&mut self, bhh: &T, id: u32) -> Result<(), Error> { trace!( - "open_block_known_id({},{}) (unconfirmed={:?},{})", + "open_block_known_id({},{}) (unconfirmed={:?},{}) from {},{:?} in {}", bhh, id, &self.unconfirmed_block_id, - self.unconfirmed() + self.unconfirmed(), + &self.data.cur_block, + &self.data.cur_block_id, + self.db_path, ); if *bhh == self.data.cur_block && self.data.cur_block_id.is_some() { // no-op @@ -2224,10 +2652,11 @@ impl TrieStorageConnection<'_, T> { /// that all node reads will occur relative to it. pub fn open_block(&mut self, bhh: &T) -> Result<(), Error> { trace!( - "open_block({}) (unconfirmed={:?},{})", + "open_block({}) (unconfirmed={:?},{}) in {}", bhh, &self.unconfirmed_block_id, - self.unconfirmed() + self.unconfirmed(), + self.db_path ); self.bench.open_block_start(); @@ -2587,9 +3016,7 @@ impl TrieStorageConnection<'_, T> { .map(|(node, _)| node) } - /// Inner method for reading a node, and optionally its hash as well. - /// Uses either the DB or the .blobs file, depending on which is configured. - /// If `read_hash` is `false`, then the returned hash is just the empty hash of all 0's. + /// Inner loop of [`TrieStorageConnection::inner_read_patched_persisted_nodetype`] fn inner_read_persisted_nodetype( &mut self, block_id: u32, @@ -2634,10 +3061,65 @@ impl TrieStorageConnection<'_, T> { Ok((node_inst, node_hash)) } + /// Inner method for reading a node, and optionally its hash as well. + /// Uses either the DB or the .blobs file, depending on which is configured. + /// If `read_hash` is `false`, then the returned hash is just the empty hash of all 0's. + fn inner_read_patched_persisted_nodetype( + &mut self, + mut block_id: u32, + mut ptr: TriePtr, + read_hash: bool, + ) -> Result<(TrieNodeType, TrieHash), Error> { + trace!( + "inner_read_patched_persisted_nodetype({block_id}): {ptr:?} (unconfirmed={:?},{})", + &self.unconfirmed_block_id, + self.unconfirmed() + ); + + let (saved_block_hash, saved_block_id) = self.get_cur_block_and_id(); + + let cur_block_id = block_id; + let mut node_hash_opt = None; + let mut patches: Vec<(u32, TriePtr, TrieNodePatch)> = vec![]; + for _ in 0..MAX_PATCH_DEPTH { + match self.inner_read_persisted_nodetype(block_id, &ptr, read_hash) { + Ok((node, hash)) => { + patches.reverse(); + let node = node.apply_patches(&patches, cur_block_id).ok_or_else(|| { + Error::CorruptionError("Failed to apply patches to node".to_string()) + })?; + self.open_block_maybe_id(&saved_block_hash, saved_block_id)?; + return Ok((node, node_hash_opt.unwrap_or(hash))); + } + Err(Error::Patch(hash_opt, node_patch)) => { + trace!("inner_read_patched_persisted_nodetype({block_id}): at {ptr:?} read patch {node_patch:?} (original hash is {hash_opt:?})"); + let new_ptr = node_patch.ptr.from_backptr(); + let new_block_id = node_patch.ptr.back_block(); + + patches.push((block_id, ptr, node_patch)); + + ptr = new_ptr; + block_id = new_block_id; + if node_hash_opt.is_none() { + node_hash_opt = hash_opt; + } + } + Err(e) => { + self.open_block_maybe_id(&saved_block_hash, saved_block_id)?; + return Err(e); + } + } + } + self.open_block_maybe_id(&saved_block_hash, saved_block_id)?; + return Err(Error::NodeTooDeep); + } + /// Read a node and optionally its hash. If `read_hash` is false, then an empty hash will be /// returned /// NOTE: ptr will not be treated as a backptr -- the node returned will be from the - /// currently-open trie. + /// currently-open trie. However, if ptr refers to a patch node, then the base node and the + /// one or more patch nodes written atop it will be loaded and used to reconstruct the new + /// node. fn read_nodetype_maybe_hash( &mut self, ptr: &TriePtr, @@ -2672,19 +3154,23 @@ impl TrieStorageConnection<'_, T> { if let Some((node_inst, node_hash)) = self.cache.load_node_and_hash(id, &clear_ptr) { + trace!("Cache hit: {:?} {} {:?}", ptr, node_hash, node_inst); (node_inst, node_hash) } else { + trace!("Cache miss: {:?}", ptr); let (node_inst, node_hash) = - self.inner_read_persisted_nodetype(id, &clear_ptr, read_hash)?; + self.inner_read_patched_persisted_nodetype(id, clear_ptr, read_hash)?; self.cache .store_node_and_hash(id, clear_ptr, node_inst.clone(), node_hash); (node_inst, node_hash) } } else if let Some(node_inst) = self.cache.load_node(id, &clear_ptr) { + trace!("Cache hit: {:?}", ptr); (node_inst, TrieHash([0u8; TRIEHASH_ENCODED_SIZE])) } else { + trace!("Cache miss: {:?}", ptr); let (node_inst, _) = - self.inner_read_persisted_nodetype(id, &clear_ptr, read_hash)?; + self.inner_read_patched_persisted_nodetype(id, clear_ptr, read_hash)?; self.cache.store_node(id, clear_ptr, node_inst.clone()); (node_inst, TrieHash([0u8; TRIEHASH_ENCODED_SIZE])) }; diff --git a/stackslib/src/chainstate/stacks/index/test/cache.rs b/stackslib/src/chainstate/stacks/index/test/cache.rs index 288eff6a1da..157924e3fa9 100644 --- a/stackslib/src/chainstate/stacks/index/test/cache.rs +++ b/stackslib/src/chainstate/stacks/index/test/cache.rs @@ -59,6 +59,41 @@ fn test_marf_with_cache( hash_strategy: TrieHashCalculationMode, data: &[Vec<(String, MARFValue)>], batch_size: Option, +) -> TrieHash { + inner_test_marf_with_cache( + test_name, + cache_strategy, + hash_strategy, + data, + batch_size, + false, + ) +} + +fn test_marf_with_cache_compressed( + test_name: &str, + cache_strategy: &str, + hash_strategy: TrieHashCalculationMode, + data: &[Vec<(String, MARFValue)>], + batch_size: Option, +) -> TrieHash { + inner_test_marf_with_cache( + &format!("{}.compressed", test_name), + cache_strategy, + hash_strategy, + data, + batch_size, + true, + ) +} + +fn inner_test_marf_with_cache( + test_name: &str, + cache_strategy: &str, + hash_strategy: TrieHashCalculationMode, + data: &[Vec<(String, MARFValue)>], + batch_size: Option, + compress: bool, ) -> TrieHash { let test_file = if test_name == ":memory:" { test_name.to_string() @@ -76,14 +111,16 @@ fn test_marf_with_cache( test_file }; - let marf_opts = MARFOpenOpts::new(hash_strategy, cache_strategy, true); + let mut marf_opts = MARFOpenOpts::new(hash_strategy, cache_strategy, true); + marf_opts.compress = compress; + let f = TrieFileStorage::open(&test_file, marf_opts).unwrap(); let mut marf = MARF::from_storage(f); let mut last_block_header = BlockHeaderHash::sentinel(); let batch_size = batch_size.unwrap_or(0); for (i, block_data) in data.iter().enumerate() { - test_debug!("Write block {}", i); + info!("Write block {}", i); let mut block_hash_bytes = [0u8; 32]; block_hash_bytes[0..8].copy_from_slice(&(i as u64).to_be_bytes()); @@ -106,7 +143,21 @@ fn test_marf_with_cache( } marf.commit().unwrap(); - last_block_header = block_header; + last_block_header = block_header.clone(); + + let proof_block_data = data.get(i / 2).unwrap(); + info!("Prove block {}", i / 2); + for (key, value) in proof_block_data.iter() { + let path = TrieHash::from_key(key); + info!("Prove {} = {}", &key, &to_hex(value.as_bytes())); + merkle_test_marf( + &mut marf.borrow_storage_backend(), + &block_header, + TrieHash::from_key(key).as_bytes(), + value.as_bytes(), + None, + ); + } } let write_bench = marf.borrow_storage_backend().get_benchmarks(); @@ -157,6 +208,211 @@ fn test_marf_with_cache( root_hash } +#[test] +fn test_marf_node_compressed_1_insert() { + let test_data = make_test_insert_data(1, 256); + let compressed_root_hash = test_marf_with_cache_compressed( + "test_marf_node_compressed_1_insert", + "noop", + TrieHashCalculationMode::Immediate, + &test_data, + Some(8), + ); + eprintln!("Final root hash is {}", compressed_root_hash); + + let root_hash = test_marf_with_cache( + "test_marf_node_compressed_1_insert", + "noop", + TrieHashCalculationMode::Immediate, + &test_data, + Some(8), + ); + eprintln!("Final root hash is {}", root_hash); + + assert_eq!(root_hash, compressed_root_hash); +} + +#[test] +fn test_marf_node_compressed_1_trie() { + let test_data = make_test_insert_data(2048, 1); + let root_hash = test_marf_with_cache( + "test_marf_node_compressed_1_trie", + "noop", + TrieHashCalculationMode::Immediate, + &test_data, + Some(8), + ); + eprintln!("Final root hash is {}", root_hash); + + let compressed_root_hash = test_marf_with_cache_compressed( + "test_marf_node_compressed_1_trie", + "noop", + TrieHashCalculationMode::Immediate, + &test_data, + Some(8), + ); + + eprintln!("Final compressed root hash is {}", compressed_root_hash); + + assert_eq!(root_hash, compressed_root_hash); +} + +#[test] +fn test_marf_node_compressed_8_inserts() { + let test_data = make_test_insert_data(8, 256); + let root_hash = test_marf_with_cache( + "test_marf_node_compressed_1_insert", + "noop", + TrieHashCalculationMode::Immediate, + &test_data, + Some(8), + ); + eprintln!("Final root hash is {}", root_hash); + + let compressed_root_hash = test_marf_with_cache_compressed( + "test_marf_node_compressed_1_insert", + "noop", + TrieHashCalculationMode::Immediate, + &test_data, + Some(8), + ); + eprintln!("Final root hash is {}", compressed_root_hash); + + assert_eq!(root_hash, compressed_root_hash); +} + +#[test] +fn test_marf_node_compressed_8_inserts_different_batches() { + let test_data = make_test_insert_data(8, 256); + let root_hash = test_marf_with_cache( + "test_marf_node_compressed_1_insert", + "noop", + TrieHashCalculationMode::Immediate, + &test_data, + Some(8), + ); + eprintln!("Final root hash is {}", root_hash); + + let compressed_root_hash = test_marf_with_cache_compressed( + "test_marf_node_compressed_1_insert", + "noop", + TrieHashCalculationMode::Immediate, + &test_data, + Some(5), + ); + eprintln!("Final root hash is {}", compressed_root_hash); + + assert_eq!(root_hash, compressed_root_hash); +} + +/// Test that expanding a path into a leaf, node4, node16, node48, and then node256 repeatedly +/// will produce patch nodes which can be read +#[test] +fn test_marf_patch_expansion() { + let hash_strategy = TrieHashCalculationMode::Deferred; + let cache_strategy = "noop"; + let test_name = "test_marf_patch_expansion"; + + let data: Vec<_> = (0u8..=255u8) + .map(|i| { + let mut path = [0u8; 32]; + path[31] = i; + vec![(TrieHash(path), MARFValue::from(u32::from(i)))] + }) + .collect(); + + let test_dir = format!("/tmp/stacks-marf-tests/{}", test_name); + if fs::metadata(&test_dir).is_ok() { + fs::remove_dir_all(&test_dir).unwrap(); + } + fs::create_dir_all(&test_dir).unwrap(); + + let test_file = format!( + "{}/marf-cache-{}-{:?}.sqlite", + &test_dir, cache_strategy, hash_strategy + ); + + let marf_opts = MARFOpenOpts::new(hash_strategy, cache_strategy, true); + let f = TrieFileStorage::open(&test_file, marf_opts).unwrap(); + let mut marf = MARF::from_storage(f); + let mut last_block_header = BlockHeaderHash::sentinel(); + + for (i, block_data) in data.iter().enumerate() { + test_debug!("Write block {}", i); + let mut block_hash_bytes = [0u8; 32]; + block_hash_bytes[0..8].copy_from_slice(&(i as u64).to_be_bytes()); + + let block_header = BlockHeaderHash(block_hash_bytes); + marf.begin(&last_block_header, &block_header).unwrap(); + + for (path, value) in block_data.iter() { + let leaf = TrieLeaf::from_value(&[], value.clone()); + marf.insert_raw(path.clone(), leaf).unwrap(); + } + + marf.commit().unwrap(); + last_block_header = block_header; + } + + let write_bench = marf.borrow_storage_backend().get_benchmarks(); + marf.borrow_storage_backend().reset_benchmarks(); + eprintln!("MARF bench writes: {:#?}", &write_bench); + + debug!("---------"); + debug!("MARF gets"); + debug!("---------"); + + let mut total_read_time = 0; + let mut root_hash = TrieHash([0u8; 32]); + for (i, block_data) in data.iter().enumerate() { + test_debug!("Read block {}", i); + for (path, value) in block_data.iter() { + let marf_leaf = TrieLeaf::from_value(&[], value.clone()); + + let read_time = SystemTime::now(); + let leaf = MARF::get_path( + &mut marf.borrow_storage_backend(), + &last_block_header, + &path, + ) + .unwrap() + .unwrap(); + + let read_time = read_time.elapsed().unwrap().as_nanos(); + total_read_time += read_time; + + assert_eq!(leaf.data.to_vec(), marf_leaf.data.to_vec()); + } + } + + let read_bench = marf.borrow_storage_backend().get_benchmarks(); + eprintln!( + "MARF bench reads ({} total): {:#?}", + total_read_time, &read_bench + ); + + let mut bench = write_bench; + bench.add(&read_bench); + + eprintln!("MARF bench total: {:#?}", &bench); + + root_hash = marf.get_root_hash_at(&last_block_header).unwrap(); + eprintln!("root hash at {:?}: {:?}", &last_block_header, &root_hash); +} + +#[test] +fn test_marf_node_compressed() { + let test_data = make_test_insert_data(8, 256); + let root_hash = test_marf_with_cache( + "test_marf_node_compressed", + "noop", + TrieHashCalculationMode::Immediate, + &test_data, + Some(8), + ); + eprintln!("Final root hash is {}", root_hash); +} + #[test] fn test_marf_node_cache_noop() { let test_data = make_test_insert_data(128, 128); diff --git a/stackslib/src/chainstate/stacks/index/test/file.rs b/stackslib/src/chainstate/stacks/index/test/file.rs index 101593b2679..639c3ab50f2 100644 --- a/stackslib/src/chainstate/stacks/index/test/file.rs +++ b/stackslib/src/chainstate/stacks/index/test/file.rs @@ -19,8 +19,8 @@ use std::fs; use rusqlite::{Connection, OpenFlags}; use super::*; -use crate::chainstate::stacks::index::cache::test::make_test_insert_data; use crate::chainstate::stacks::index::file::*; +use crate::chainstate::stacks::index::test::cache::make_test_insert_data; use crate::chainstate::stacks::index::*; use crate::util_lib::db::*; diff --git a/stackslib/src/chainstate/stacks/index/test/marf.rs b/stackslib/src/chainstate/stacks/index/test/marf.rs index 16a0e7bdeee..dbe66183896 100644 --- a/stackslib/src/chainstate/stacks/index/test/marf.rs +++ b/stackslib/src/chainstate/stacks/index/test/marf.rs @@ -1609,8 +1609,8 @@ fn marf_read_random_1048576_4096_file_storage() { } } -// insert a range of 4096 consecutive keys (forcing node promotions) by varying the low-order bits. -// every 128 keys, make a new trie. +// insert a range of 128 consecutive keys (forcing node promotions) by varying the low-order bits. +// every 32 keys, make a new trie. #[test] fn marf_insert_128_32() { marf_insert( @@ -2226,3 +2226,114 @@ fn test_marf_unconfirmed() { .unwrap_err(); assert!(matches!(e, Error::NotFoundError)); } + +#[test] +fn test_marf_commit_to_same_block_hash() { + let sentinel: StacksBlockId = StacksBlockId::sentinel(); + let block_0 = StacksBlockId::from_bytes(&[0u8; 32]).unwrap(); + + let marf_opts = MARFOpenOpts::default(); + let mut marf: MARF = MARF::from_path(":memory:", marf_opts).unwrap(); + + marf.begin(&sentinel, &block_0).unwrap(); + marf.insert("key1", MARFValue::from_value("value2")) + .unwrap(); + marf.seal().unwrap(); + + marf.with_conn(|conn| { + let (cur_block, cur_opt_id) = conn.get_cur_block_and_id(); + // Before the commit the `TrieStorageConnection` knows just the next_chain_tip block + assert_eq!(block_0, cur_block, "Current block before commit"); + assert_eq!(None, cur_opt_id, "Current id before commit"); + }); + + // ensure that before a commit open chain tip is properly set + assert_eq!( + Some(&block_0), + marf.get_open_chain_tip(), + "Open tip block before commit" + ); + assert_eq!( + Some(0), + marf.get_open_chain_tip_height(), + "Open tip block height before commit" + ); + + // commit to the same block used in begin as next_chain_tip + marf.commit_to(&block_0).unwrap(); + + marf.with_conn(|conn| { + let (cur_block, cur_opt_id) = conn.get_cur_block_and_id(); + // After the commit the `TrieStorageConnection` must knows both block and id (related to the block specified in `commit_to`) + assert_eq!(block_0, cur_block, "Current block after commit"); + assert_eq!(Some(1), cur_opt_id, "Current id after commit"); + }); + + // ensure that after a commit open chain tip gets reset + assert_eq!( + None, + marf.get_open_chain_tip(), + "Open tip block after commit" + ); + assert_eq!( + None, + marf.get_open_chain_tip_height(), + "Open tip block height after commit" + ); +} + +#[test] +fn test_marf_commit_to_other_block_hash() { + let sentinel: StacksBlockId = StacksBlockId::sentinel(); + let block_0 = StacksBlockId::from_bytes(&[0u8; 32]).unwrap(); + let block_1 = StacksBlockId::from_bytes(&[1u8; 32]).unwrap(); + + let marf_opts = MARFOpenOpts::default(); + let mut marf: MARF = MARF::from_path(":memory:", marf_opts).unwrap(); + + marf.begin(&sentinel, &block_0).unwrap(); + marf.insert("key1", MARFValue::from_value("value2")) + .unwrap(); + marf.seal().unwrap(); + + marf.with_conn(|conn| { + let (cur_block, cur_opt_id) = conn.get_cur_block_and_id(); + // Before the commit the `TrieStorageConnection` knows just the next_chain_tip block + assert_eq!(block_0, cur_block, "Current block before commit"); + assert_eq!(None, cur_opt_id, "Current id before commit"); + }); + + // ensure that before a commit open chain tip is properly set + assert_eq!( + Some(&block_0), + marf.get_open_chain_tip(), + "Open tip block before commit" + ); + assert_eq!( + Some(0), + marf.get_open_chain_tip_height(), + "Open tip block height before commit" + ); + + // commit to the same block used in begin as next_chain_tip + marf.commit_to(&block_1).unwrap(); + + marf.with_conn(|conn| { + let (cur_block, cur_opt_id) = conn.get_cur_block_and_id(); + // After the commit the `TrieStorageConnection` must knows both block and id (related to the block specified in `commit_to`) + assert_eq!(block_1, cur_block, "Current block after commit"); + assert_eq!(Some(1), cur_opt_id, "Current id after commit"); + }); + + // ensure that after a commit open chain tip gets reset + assert_eq!( + None, + marf.get_open_chain_tip(), + "Open tip block after commit" + ); + assert_eq!( + None, + marf.get_open_chain_tip_height(), + "Open tip block height after commit" + ); +} diff --git a/stackslib/src/chainstate/stacks/index/test/mod.rs b/stackslib/src/chainstate/stacks/index/test/mod.rs index d558ff2a11c..5443731c937 100644 --- a/stackslib/src/chainstate/stacks/index/test/mod.rs +++ b/stackslib/src/chainstate/stacks/index/test/mod.rs @@ -125,6 +125,13 @@ pub fn merkle_test_marf( value: &[u8], root_to_block: Option>, ) -> HashMap { + s.open_block(header).unwrap(); + let (_, root_hash) = Trie::read_root(s).unwrap(); + let triepath = TrieHash::from_bytes(path).unwrap(); + + let mut marf_value = [0u8; 40]; + marf_value.copy_from_slice(&value[0..40]); + test_debug!("---------"); test_debug!( "MARF merkle prove: merkle_test_marf({:?}, {:?}, {:?})?", @@ -132,25 +139,20 @@ pub fn merkle_test_marf( path, value ); + test_debug!("MARF merkle verify target root hash: {:?}", &root_hash); + test_debug!("MARF merkle verify source block: {:?}", header); test_debug!("---------"); - s.open_block(header).unwrap(); - let (_, root_hash) = Trie::read_root(s).unwrap(); - let triepath = TrieHash::from_bytes(path).unwrap(); - - let mut marf_value = [0u8; 40]; - marf_value.copy_from_slice(&value[0..40]); - let proof = TrieMerkleProof::from_path(s, &triepath, &MARFValue(marf_value), header).unwrap(); + let root_to_block = root_to_block.unwrap_or_else(|| s.read_root_to_block_table().unwrap()); test_debug!("---------"); test_debug!("MARF merkle verify: {:?}", &proof); test_debug!("MARF merkle verify target root hash: {:?}", &root_hash); test_debug!("MARF merkle verify source block: {:?}", header); + test_debug!("MARF root-to-block: {:?}", &root_to_block); test_debug!("---------"); - let root_to_block = root_to_block.unwrap_or_else(|| s.read_root_to_block_table().unwrap()); - assert!(proof.verify( &triepath, &MARFValue(marf_value), diff --git a/stackslib/src/chainstate/stacks/index/trie.rs b/stackslib/src/chainstate/stacks/index/trie.rs index 3bc53819801..85860ff0d1c 100644 --- a/stackslib/src/chainstate/stacks/index/trie.rs +++ b/stackslib/src/chainstate/stacks/index/trie.rs @@ -16,8 +16,6 @@ /// This module defines the methods for reading and inserting into a Trie use sha2::Digest; -use stacks_common::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; -use stacks_common::util::macros::is_trace; use crate::chainstate::stacks::index::bits::{get_leaf_hash, get_node_hash}; use crate::chainstate::stacks::index::marf::MARF; @@ -27,6 +25,8 @@ use crate::chainstate::stacks::index::node::{ }; use crate::chainstate::stacks::index::storage::{TrieHashCalculationMode, TrieStorageConnection}; use crate::chainstate::stacks::index::{Error, MarfTrieId, TrieHasher, TrieLeaf}; +use crate::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; +use crate::util::macros::is_trace; /// We don't actually instantiate a Trie, but we still need to pass a type parameter for the /// storage implementation. @@ -190,7 +190,7 @@ impl Trie { storage: &mut TrieStorageConnection, ptr: &TriePtr, cursor: &mut TrieCursor, - ) -> Result<(TrieNodeType, TrieHash, TriePtr), Error> { + ) -> Result<(T, TrieNodeType, TrieHash, TriePtr), Error> { if !is_backptr(ptr.id()) { // child is in this block if ptr.id() == (TrieNodeID::Empty as u8) { @@ -198,7 +198,7 @@ impl Trie { return Err(Error::CorruptionError("ptr is empty".to_string())); } let (node, node_hash) = storage.read_nodetype(ptr)?; - Ok((node, node_hash, *ptr)) + Ok((storage.get_cur_block(), node, node_hash, *ptr)) } else { storage.bench_mut().marf_find_backptr_node_start(); // ptr is a backptr -- find the block @@ -224,7 +224,7 @@ impl Trie { let (node, node_hash) = storage.read_nodetype(&backptr)?; cursor.repair_backptr_step_backptr(&node, &backptr, storage.get_cur_block()); - Ok((node, node_hash, backptr)) + Ok((back_block_hash, node, node_hash, backptr)) } } diff --git a/stackslib/src/chainstate/stacks/index/trie_sql.rs b/stackslib/src/chainstate/stacks/index/trie_sql.rs index 009f2b68886..389a5d65da0 100644 --- a/stackslib/src/chainstate/stacks/index/trie_sql.rs +++ b/stackslib/src/chainstate/stacks/index/trie_sql.rs @@ -21,8 +21,6 @@ use std::io::Write; use rusqlite::blob::Blob; use rusqlite::{params, Connection, DatabaseName, OptionalExtension, Transaction}; -use stacks_common::types::chainstate::TrieHash; -use stacks_common::types::sqlite::NO_PARAMS; #[cfg(test)] use crate::chainstate::stacks::index::bits::read_hash_bytes; @@ -33,6 +31,8 @@ use crate::chainstate::stacks::index::node::{TrieNodeType, TriePtr}; #[cfg(test)] use crate::chainstate::stacks::index::storage::TrieStorageConnection; use crate::chainstate::stacks::index::{trie_sql, Error, MarfTrieId}; +use crate::types::chainstate::TrieHash; +use crate::types::sqlite::NO_PARAMS; use crate::util_lib::db::{query_count, query_row, tx_begin_immediate, u64_to_sql}; static SQL_MARF_DATA_TABLE: &str = " @@ -498,7 +498,7 @@ pub fn get_external_trie_offset_length( ) -> Result<(u64, u64), Error> { let qry = "SELECT external_offset, external_length FROM marf_data WHERE block_id = ?1"; let args = params![block_id]; - let (offset, length) = query_row(conn, qry, args)?.ok_or(Error::NotFoundError)?; + let (offset, length): (u64, u64) = query_row(conn, qry, args)?.ok_or(Error::NotFoundError)?; Ok((offset, length)) } @@ -509,7 +509,7 @@ pub fn get_external_trie_offset_length_by_bhh( ) -> Result<(u64, u64), Error> { let qry = "SELECT external_offset, external_length FROM marf_data WHERE block_hash = ?1"; let args = params![bhh]; - let (offset, length) = query_row(conn, qry, args)?.ok_or(Error::NotFoundError)?; + let (offset, length): (u64, u64) = query_row(conn, qry, args)?.ok_or(Error::NotFoundError)?; Ok((offset, length)) } @@ -517,7 +517,7 @@ pub fn get_external_trie_offset_length_by_bhh( /// which the next trie will be appended. pub fn get_external_blobs_length(conn: &Connection) -> Result { let qry = "SELECT (external_offset + external_length) AS blobs_length FROM marf_data ORDER BY external_offset DESC LIMIT 1"; - let max_len = query_row(conn, qry, NO_PARAMS)?.unwrap_or(0); + let max_len: u64 = query_row(conn, qry, NO_PARAMS)?.unwrap_or(0); Ok(max_len) }