From 8a047ed6e437a87454c3d6ab5f19f5029b6af9de Mon Sep 17 00:00:00 2001 From: Matthias Seitz Date: Mon, 2 Dec 2024 17:02:19 +0100 Subject: [PATCH] chore: move Integerlist to db-api (#13062) --- Cargo.lock | 3 +- crates/net/ecies/Cargo.toml | 2 +- crates/primitives-traits/Cargo.toml | 2 - crates/primitives-traits/src/integer_list.rs | 196 ------------------ crates/primitives-traits/src/lib.rs | 3 - crates/storage/db-api/Cargo.toml | 1 + .../storage/db-api/src/models/integer_list.rs | 181 +++++++++++++++- crates/storage/db-api/src/models/mod.rs | 6 +- crates/storage/db-common/src/init.rs | 3 +- .../storage/db/src/implementation/mdbx/mod.rs | 5 +- .../db/src/tables/codecs/fuzz/inputs.rs | 2 +- .../storage/db/src/tables/codecs/fuzz/mod.rs | 3 - crates/storage/db/src/tables/mod.rs | 8 +- crates/storage/libmdbx-rs/Cargo.toml | 2 +- 14 files changed, 195 insertions(+), 222 deletions(-) delete mode 100644 crates/primitives-traits/src/integer_list.rs diff --git a/Cargo.lock b/Cargo.lock index 2b02dc2e3e25..b39cd76e09be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6953,6 +6953,7 @@ dependencies = [ "reth-stages-types", "reth-storage-errors", "reth-trie-common", + "roaring", "serde", "test-fuzz", ] @@ -8670,7 +8671,6 @@ dependencies = [ "rand 0.8.5", "reth-codecs", "revm-primitives", - "roaring", "serde", "serde_json", "serde_with", @@ -9720,7 +9720,6 @@ checksum = "f81dc953b2244ddd5e7860cb0bb2a790494b898ef321d4aff8e260efab60cc88" dependencies = [ "bytemuck", "byteorder", - "serde", ] [[package]] diff --git a/crates/net/ecies/Cargo.toml b/crates/net/ecies/Cargo.toml index eb2a0b023b3f..ec34e3e7a323 100644 --- a/crates/net/ecies/Cargo.toml +++ b/crates/net/ecies/Cargo.toml @@ -28,7 +28,7 @@ tracing.workspace = true # HeaderBytes generic-array.workspace = true typenum = "1.15.0" -byteorder = "1.4.3" +byteorder.workspace = true # crypto rand.workspace = true diff --git a/crates/primitives-traits/Cargo.toml b/crates/primitives-traits/Cargo.toml index ceee1e26cecd..9265c878d6cb 100644 --- a/crates/primitives-traits/Cargo.toml +++ b/crates/primitives-traits/Cargo.toml @@ -30,7 +30,6 @@ op-alloy-consensus = { workspace = true, optional = true } byteorder = { workspace = true, optional = true } bytes.workspace = true derive_more.workspace = true -roaring = "0.10.2" serde_with = { workspace = true, optional = true } auto_impl.workspace = true @@ -100,7 +99,6 @@ serde = [ "rand/serde", "reth-codecs?/serde", "revm-primitives/serde", - "roaring/serde", "revm-primitives/serde", "op-alloy-consensus?/serde" ] diff --git a/crates/primitives-traits/src/integer_list.rs b/crates/primitives-traits/src/integer_list.rs deleted file mode 100644 index 6fc6d75899ce..000000000000 --- a/crates/primitives-traits/src/integer_list.rs +++ /dev/null @@ -1,196 +0,0 @@ -use alloc::vec::Vec; -use core::fmt; - -use bytes::BufMut; -use derive_more::Deref; -use roaring::RoaringTreemap; - -/// A data structure that uses Roaring Bitmaps to efficiently store a list of integers. -/// -/// This structure provides excellent compression while allowing direct access to individual -/// elements without the need for full decompression. -/// -/// Key features: -/// - Efficient compression: the underlying Roaring Bitmaps significantly reduce memory usage. -/// - Direct access: elements can be accessed or queried without needing to decode the entire list. -/// - [`RoaringTreemap`] backing: internally backed by [`RoaringTreemap`], which supports 64-bit -/// integers. -#[derive(Clone, PartialEq, Default, Deref)] -pub struct IntegerList(pub RoaringTreemap); - -impl fmt::Debug for IntegerList { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("IntegerList")?; - f.debug_list().entries(self.0.iter()).finish() - } -} - -impl IntegerList { - /// Creates a new empty [`IntegerList`]. - pub fn empty() -> Self { - Self(RoaringTreemap::new()) - } - - /// Creates an [`IntegerList`] from a list of integers. - /// - /// Returns an error if the list is not pre-sorted. - pub fn new(list: impl IntoIterator) -> Result { - RoaringTreemap::from_sorted_iter(list) - .map(Self) - .map_err(|_| IntegerListError::UnsortedInput) - } - - /// Creates an [`IntegerList`] from a pre-sorted list of integers. - /// - /// # Panics - /// - /// Panics if the list is not pre-sorted. - #[inline] - #[track_caller] - pub fn new_pre_sorted(list: impl IntoIterator) -> Self { - Self::new(list).expect("IntegerList must be pre-sorted and non-empty") - } - - /// Appends a list of integers to the current list. - pub fn append(&mut self, list: impl IntoIterator) -> Result { - self.0.append(list).map_err(|_| IntegerListError::UnsortedInput) - } - - /// Pushes a new integer to the list. - pub fn push(&mut self, value: u64) -> Result<(), IntegerListError> { - self.0.push(value).then_some(()).ok_or(IntegerListError::UnsortedInput) - } - - /// Clears the list. - pub fn clear(&mut self) { - self.0.clear(); - } - - /// Serializes a [`IntegerList`] into a sequence of bytes. - pub fn to_bytes(&self) -> Vec { - let mut vec = Vec::with_capacity(self.0.serialized_size()); - self.0.serialize_into(&mut vec).expect("not able to encode IntegerList"); - vec - } - - /// Serializes a [`IntegerList`] into a sequence of bytes. - pub fn to_mut_bytes(&self, buf: &mut B) { - self.0.serialize_into(buf.writer()).unwrap(); - } - - /// Deserializes a sequence of bytes into a proper [`IntegerList`]. - pub fn from_bytes(data: &[u8]) -> Result { - RoaringTreemap::deserialize_from(data) - .map(Self) - .map_err(|_| IntegerListError::FailedToDeserialize) - } -} - -#[cfg(feature = "serde")] -impl serde::Serialize for IntegerList { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - use serde::ser::SerializeSeq; - - let mut seq = serializer.serialize_seq(Some(self.len() as usize))?; - for e in &self.0 { - seq.serialize_element(&e)?; - } - seq.end() - } -} - -#[cfg(feature = "serde")] -struct IntegerListVisitor; - -#[cfg(feature = "serde")] -impl<'de> serde::de::Visitor<'de> for IntegerListVisitor { - type Value = IntegerList; - - fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("a usize array") - } - - fn visit_seq(self, mut seq: E) -> Result - where - E: serde::de::SeqAccess<'de>, - { - let mut list = IntegerList::empty(); - while let Some(item) = seq.next_element()? { - list.push(item).map_err(serde::de::Error::custom)?; - } - Ok(list) - } -} - -#[cfg(feature = "serde")] -impl<'de> serde::Deserialize<'de> for IntegerList { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - deserializer.deserialize_byte_buf(IntegerListVisitor) - } -} - -#[cfg(any(test, feature = "arbitrary"))] -use arbitrary::{Arbitrary, Unstructured}; - -#[cfg(any(test, feature = "arbitrary"))] -impl<'a> Arbitrary<'a> for IntegerList { - fn arbitrary(u: &mut Unstructured<'a>) -> Result { - let mut nums: Vec = Vec::arbitrary(u)?; - nums.sort_unstable(); - Self::new(nums).map_err(|_| arbitrary::Error::IncorrectFormat) - } -} - -/// Primitives error type. -#[derive(Debug, derive_more::Display, derive_more::Error)] -pub enum IntegerListError { - /// The provided input is unsorted. - #[display("the provided input is unsorted")] - UnsortedInput, - /// Failed to deserialize data into type. - #[display("failed to deserialize data into type")] - FailedToDeserialize, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn empty_list() { - assert_eq!(IntegerList::empty().len(), 0); - assert_eq!(IntegerList::new_pre_sorted(std::iter::empty()).len(), 0); - } - - #[test] - fn test_integer_list() { - let original_list = [1, 2, 3]; - let ef_list = IntegerList::new(original_list).unwrap(); - assert_eq!(ef_list.iter().collect::>(), original_list); - } - - #[test] - fn test_integer_list_serialization() { - let original_list = [1, 2, 3]; - let ef_list = IntegerList::new(original_list).unwrap(); - - let blist = ef_list.to_bytes(); - assert_eq!(IntegerList::from_bytes(&blist).unwrap(), ef_list) - } - - #[test] - fn serde_serialize_deserialize() { - let original_list = [1, 2, 3]; - let ef_list = IntegerList::new(original_list).unwrap(); - - let serde_out = serde_json::to_string(&ef_list).unwrap(); - let serde_ef_list = serde_json::from_str::(&serde_out).unwrap(); - assert_eq!(serde_ef_list, ef_list); - } -} diff --git a/crates/primitives-traits/src/lib.rs b/crates/primitives-traits/src/lib.rs index c88da5ad7a7b..04d02be0b7dd 100644 --- a/crates/primitives-traits/src/lib.rs +++ b/crates/primitives-traits/src/lib.rs @@ -31,9 +31,6 @@ pub use transaction::{ FullTransaction, Transaction, }; -mod integer_list; -pub use integer_list::{IntegerList, IntegerListError}; - pub mod block; pub use block::{ body::{BlockBody, FullBlockBody}, diff --git a/crates/storage/db-api/Cargo.toml b/crates/storage/db-api/Cargo.toml index 3aa908a60093..05581b9725d7 100644 --- a/crates/storage/db-api/Cargo.toml +++ b/crates/storage/db-api/Cargo.toml @@ -29,6 +29,7 @@ alloy-consensus.workspace = true # codecs modular-bitfield.workspace = true +roaring = "0.10.2" parity-scale-codec = { version = "3.2.1", features = ["bytes"] } serde = { workspace = true, default-features = false } diff --git a/crates/storage/db-api/src/models/integer_list.rs b/crates/storage/db-api/src/models/integer_list.rs index 480b52a9e2c0..5301ec303e50 100644 --- a/crates/storage/db-api/src/models/integer_list.rs +++ b/crates/storage/db-api/src/models/integer_list.rs @@ -4,7 +4,159 @@ use crate::{ table::{Compress, Decompress}, DatabaseError, }; -use reth_primitives_traits::IntegerList; +use bytes::BufMut; +use core::fmt; +use derive_more::Deref; +use roaring::RoaringTreemap; + +/// A data structure that uses Roaring Bitmaps to efficiently store a list of integers. +/// +/// This structure provides excellent compression while allowing direct access to individual +/// elements without the need for full decompression. +/// +/// Key features: +/// - Efficient compression: the underlying Roaring Bitmaps significantly reduce memory usage. +/// - Direct access: elements can be accessed or queried without needing to decode the entire list. +/// - [`RoaringTreemap`] backing: internally backed by [`RoaringTreemap`], which supports 64-bit +/// integers. +#[derive(Clone, PartialEq, Default, Deref)] +pub struct IntegerList(pub RoaringTreemap); + +impl fmt::Debug for IntegerList { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("IntegerList")?; + f.debug_list().entries(self.0.iter()).finish() + } +} + +impl IntegerList { + /// Creates a new empty [`IntegerList`]. + pub fn empty() -> Self { + Self(RoaringTreemap::new()) + } + + /// Creates an [`IntegerList`] from a list of integers. + /// + /// Returns an error if the list is not pre-sorted. + pub fn new(list: impl IntoIterator) -> Result { + RoaringTreemap::from_sorted_iter(list) + .map(Self) + .map_err(|_| IntegerListError::UnsortedInput) + } + + /// Creates an [`IntegerList`] from a pre-sorted list of integers. + /// + /// # Panics + /// + /// Panics if the list is not pre-sorted. + #[inline] + #[track_caller] + pub fn new_pre_sorted(list: impl IntoIterator) -> Self { + Self::new(list).expect("IntegerList must be pre-sorted and non-empty") + } + + /// Appends a list of integers to the current list. + pub fn append(&mut self, list: impl IntoIterator) -> Result { + self.0.append(list).map_err(|_| IntegerListError::UnsortedInput) + } + + /// Pushes a new integer to the list. + pub fn push(&mut self, value: u64) -> Result<(), IntegerListError> { + self.0.push(value).then_some(()).ok_or(IntegerListError::UnsortedInput) + } + + /// Clears the list. + pub fn clear(&mut self) { + self.0.clear(); + } + + /// Serializes a [`IntegerList`] into a sequence of bytes. + pub fn to_bytes(&self) -> Vec { + let mut vec = Vec::with_capacity(self.0.serialized_size()); + self.0.serialize_into(&mut vec).expect("not able to encode IntegerList"); + vec + } + + /// Serializes a [`IntegerList`] into a sequence of bytes. + pub fn to_mut_bytes(&self, buf: &mut B) { + self.0.serialize_into(buf.writer()).unwrap(); + } + + /// Deserializes a sequence of bytes into a proper [`IntegerList`]. + pub fn from_bytes(data: &[u8]) -> Result { + RoaringTreemap::deserialize_from(data) + .map(Self) + .map_err(|_| IntegerListError::FailedToDeserialize) + } +} + +impl serde::Serialize for IntegerList { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeSeq; + + let mut seq = serializer.serialize_seq(Some(self.len() as usize))?; + for e in &self.0 { + seq.serialize_element(&e)?; + } + seq.end() + } +} + +struct IntegerListVisitor; + +impl<'de> serde::de::Visitor<'de> for IntegerListVisitor { + type Value = IntegerList; + + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("a usize array") + } + + fn visit_seq(self, mut seq: E) -> Result + where + E: serde::de::SeqAccess<'de>, + { + let mut list = IntegerList::empty(); + while let Some(item) = seq.next_element()? { + list.push(item).map_err(serde::de::Error::custom)?; + } + Ok(list) + } +} + +impl<'de> serde::Deserialize<'de> for IntegerList { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + deserializer.deserialize_byte_buf(IntegerListVisitor) + } +} + +#[cfg(any(test, feature = "arbitrary"))] +use arbitrary::{Arbitrary, Unstructured}; + +#[cfg(any(test, feature = "arbitrary"))] +impl<'a> Arbitrary<'a> for IntegerList { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let mut nums: Vec = Vec::arbitrary(u)?; + nums.sort_unstable(); + Self::new(nums).map_err(|_| arbitrary::Error::IncorrectFormat) + } +} + +/// Primitives error type. +#[derive(Debug, derive_more::Display, derive_more::Error)] +pub enum IntegerListError { + /// The provided input is unsorted. + #[display("the provided input is unsorted")] + UnsortedInput, + /// Failed to deserialize data into type. + #[display("failed to deserialize data into type")] + FailedToDeserialize, +} impl Compress for IntegerList { type Compressed = Vec; @@ -23,3 +175,30 @@ impl Decompress for IntegerList { Self::from_bytes(value).map_err(|_| DatabaseError::Decode) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_list() { + assert_eq!(IntegerList::empty().len(), 0); + assert_eq!(IntegerList::new_pre_sorted(std::iter::empty()).len(), 0); + } + + #[test] + fn test_integer_list() { + let original_list = [1, 2, 3]; + let ef_list = IntegerList::new(original_list).unwrap(); + assert_eq!(ef_list.iter().collect::>(), original_list); + } + + #[test] + fn test_integer_list_serialization() { + let original_list = [1, 2, 3]; + let ef_list = IntegerList::new(original_list).unwrap(); + + let blist = ef_list.to_bytes(); + assert_eq!(IntegerList::from_bytes(&blist).unwrap(), ef_list) + } +} diff --git a/crates/storage/db-api/src/models/mod.rs b/crates/storage/db-api/src/models/mod.rs index 614dc598bdb7..0a008bb88a5f 100644 --- a/crates/storage/db-api/src/models/mod.rs +++ b/crates/storage/db-api/src/models/mod.rs @@ -8,9 +8,8 @@ use alloy_consensus::Header; use alloy_genesis::GenesisAccount; use alloy_primitives::{Address, Bytes, Log, B256, U256}; use reth_codecs::{add_arbitrary_tests, Compact}; -use reth_primitives::{ - Account, Bytecode, Receipt, StorageEntry, TransactionSigned, TransactionSignedNoHash, TxType, -}; +use reth_primitives::{Receipt, StorageEntry, TransactionSigned, TransactionSignedNoHash, TxType}; +use reth_primitives_traits::{Account, Bytecode}; use reth_prune_types::{PruneCheckpoint, PruneSegment}; use reth_stages_types::StageCheckpoint; use reth_trie_common::{StoredNibbles, StoredNibblesSubKey, *}; @@ -24,6 +23,7 @@ pub mod storage_sharded_key; pub use accounts::*; pub use blocks::*; +pub use integer_list::IntegerList; pub use reth_db_models::{ AccountBeforeTx, ClientVersion, StoredBlockBodyIndices, StoredBlockWithdrawals, }; diff --git a/crates/storage/db-common/src/init.rs b/crates/storage/db-common/src/init.rs index d738aaec4399..493b27be7808 100644 --- a/crates/storage/db-common/src/init.rs +++ b/crates/storage/db-common/src/init.rs @@ -609,12 +609,11 @@ mod tests { use reth_db::DatabaseEnv; use reth_db_api::{ cursor::DbCursorRO, - models::{storage_sharded_key::StorageShardedKey, ShardedKey}, + models::{storage_sharded_key::StorageShardedKey, IntegerList, ShardedKey}, table::{Table, TableRow}, transaction::DbTx, Database, }; - use reth_primitives_traits::IntegerList; use reth_provider::{ test_utils::{create_test_provider_factory_with_chain_spec, MockNodeTypesWithDB}, ProviderFactory, diff --git a/crates/storage/db/src/implementation/mdbx/mod.rs b/crates/storage/db/src/implementation/mdbx/mod.rs index 006213e4cb91..8a6811b15391 100644 --- a/crates/storage/db/src/implementation/mdbx/mod.rs +++ b/crates/storage/db/src/implementation/mdbx/mod.rs @@ -507,12 +507,11 @@ mod tests { use alloy_primitives::{Address, B256, U256}; use reth_db_api::{ cursor::{DbDupCursorRO, DbDupCursorRW, ReverseWalker, Walker}, - models::{AccountBeforeTx, ShardedKey}, + models::{AccountBeforeTx, IntegerList, ShardedKey}, table::{Encode, Table}, }; use reth_libmdbx::Error; - use reth_primitives::{Account, StorageEntry}; - use reth_primitives_traits::IntegerList; + use reth_primitives_traits::{Account, StorageEntry}; use reth_storage_errors::db::{DatabaseWriteError, DatabaseWriteOperation}; use std::str::FromStr; use tempfile::TempDir; diff --git a/crates/storage/db/src/tables/codecs/fuzz/inputs.rs b/crates/storage/db/src/tables/codecs/fuzz/inputs.rs index bb26e8b9e217..da15c112e628 100644 --- a/crates/storage/db/src/tables/codecs/fuzz/inputs.rs +++ b/crates/storage/db/src/tables/codecs/fuzz/inputs.rs @@ -1,6 +1,6 @@ //! Curates the input coming from the fuzzer for certain types. -use reth_primitives_traits::IntegerList; +use reth_db_api::models::IntegerList; use serde::{Deserialize, Serialize}; /// Makes sure that the list provided by the fuzzer is not empty and pre-sorted diff --git a/crates/storage/db/src/tables/codecs/fuzz/mod.rs b/crates/storage/db/src/tables/codecs/fuzz/mod.rs index e64a3841df49..f6b68897e349 100644 --- a/crates/storage/db/src/tables/codecs/fuzz/mod.rs +++ b/crates/storage/db/src/tables/codecs/fuzz/mod.rs @@ -16,9 +16,6 @@ macro_rules! impl_fuzzer_with_input { pub mod $name { use reth_db_api::table; - #[allow(unused_imports)] - - #[allow(unused_imports)] use reth_primitives_traits::*; diff --git a/crates/storage/db/src/tables/mod.rs b/crates/storage/db/src/tables/mod.rs index 940bb3aa2596..9ff21261eee5 100644 --- a/crates/storage/db/src/tables/mod.rs +++ b/crates/storage/db/src/tables/mod.rs @@ -26,13 +26,13 @@ use reth_db_api::{ accounts::BlockNumberAddress, blocks::{HeaderHash, StoredBlockOmmers}, storage_sharded_key::StorageShardedKey, - AccountBeforeTx, ClientVersion, CompactU256, ShardedKey, StoredBlockBodyIndices, - StoredBlockWithdrawals, + AccountBeforeTx, ClientVersion, CompactU256, IntegerList, ShardedKey, + StoredBlockBodyIndices, StoredBlockWithdrawals, }, table::{Decode, DupSort, Encode, Table}, }; -use reth_primitives::{Account, Bytecode, Receipt, StorageEntry, TransactionSignedNoHash}; -use reth_primitives_traits::IntegerList; +use reth_primitives::{Receipt, StorageEntry, TransactionSignedNoHash}; +use reth_primitives_traits::{Account, Bytecode}; use reth_prune_types::{PruneCheckpoint, PruneSegment}; use reth_stages_types::StageCheckpoint; use reth_trie_common::{BranchNodeCompact, StorageTrieEntry, StoredNibbles, StoredNibblesSubKey}; diff --git a/crates/storage/libmdbx-rs/Cargo.toml b/crates/storage/libmdbx-rs/Cargo.toml index fa10a73cb330..4679f4fe9149 100644 --- a/crates/storage/libmdbx-rs/Cargo.toml +++ b/crates/storage/libmdbx-rs/Cargo.toml @@ -15,7 +15,7 @@ workspace = true reth-mdbx-sys.workspace = true bitflags.workspace = true -byteorder = "1" +byteorder.workspace = true derive_more.workspace = true indexmap = "2" parking_lot.workspace = true