diff --git a/Cargo.lock b/Cargo.lock index c5541df21f..a07e25ee42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -18,6 +18,18 @@ dependencies = [ "generic-array", ] +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -1462,6 +1474,7 @@ dependencies = [ name = "commonware-runtime" version = "0.0.64" dependencies = [ + "ahash", "async-lock", "axum", "bytes", @@ -1474,6 +1487,7 @@ dependencies = [ "futures", "getrandom 0.2.16", "governor", + "hashbrown 0.16.1", "io-uring", "libc", "opentelemetry", diff --git a/Cargo.toml b/Cargo.toml index 5f4d0b35f9..1b36d9dfc0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,7 @@ undocumented_unsafe_blocks = "deny" suspicious_op_assign_impl = "allow" [workspace.dependencies] +ahash = { version = "0.8.12", default-features = false } anyhow = { version = "1.0.99", default-features = false } arbitrary = "1.4.1" async-lock = "3.4.0" diff --git a/p2p/src/simulated/network.rs b/p2p/src/simulated/network.rs index e65b15b5de..f518e517e1 100644 --- a/p2p/src/simulated/network.rs +++ b/p2p/src/simulated/network.rs @@ -15,8 +15,8 @@ use commonware_codec::{DecodeExt, FixedSize}; use commonware_cryptography::PublicKey; use commonware_macros::{select, select_loop}; use commonware_runtime::{ - spawn_cell, Clock, ContextCell, Handle, Listener as _, Metrics, Network as RNetwork, Quota, - Spawner, + spawn_cell, Clock, ContextCell, Handle, HashMap, HashSet, Listener as _, Metrics, + Network as RNetwork, Quota, Spawner, }; use commonware_stream::utils::codec::{recv_frame, send_frame}; use commonware_utils::{channels::ring, ordered::Set, NZUsize, TryCollect}; @@ -29,7 +29,7 @@ use prometheus_client::metrics::{counter::Counter, family::Family}; use rand::Rng; use rand_distr::{Distribution, Normal}; use std::{ - collections::{BTreeMap, HashMap, HashSet}, + collections::BTreeMap, fmt::Debug, net::{IpAddr, Ipv4Addr, SocketAddr}, time::{Duration, SystemTime}, @@ -133,13 +133,13 @@ pub struct Network links: HashMap<(P, P), Link>, // A map from a public key to a peer - peers: BTreeMap>, + peers: HashMap>, // Peer sets indexed by their ID peer_sets: BTreeMap>, // Reference count for each peer (number of peer sets they belong to) - peer_refs: BTreeMap, + peer_refs: HashMap, // Maximum number of peer sets to track tracked_peer_sets: Option, @@ -193,11 +193,11 @@ impl Network oracle_sender: oracle_sender.clone(), sender, receiver, - links: HashMap::new(), - peers: BTreeMap::new(), - peer_sets: BTreeMap::new(), - peer_refs: BTreeMap::new(), - blocks: HashSet::new(), + links: HashMap::default(), + peers: HashMap::default(), + peer_sets: BTreeMap::default(), + peer_refs: HashMap::default(), + blocks: HashSet::default(), transmitter: transmitter::State::new(), subscribers: Vec::new(), peer_subscribers: Vec::new(), @@ -363,7 +363,7 @@ impl Network .keys() .cloned() .try_collect() - .expect("BTreeMap keys are unique"), + .expect("HashMap keys are unique"), )); } else { // Return the peer set at the given index @@ -525,13 +525,13 @@ impl Network .keys() .cloned() .try_collect() - .expect("BTreeMap keys are unique") + .expect("HashMap keys are unique") } else { self.peer_refs .keys() .cloned() .try_collect() - .expect("BTreeMap keys are unique") + .expect("HashMap keys are unique") } } } @@ -1096,7 +1096,7 @@ impl Peer

{ // Spawn router context.with_label("router").spawn(|context| async move { // Map of channels to mailboxes (senders to particular channels) - let mut mailboxes = HashMap::new(); + let mut mailboxes = HashMap::default(); // Continually listen for control messages and outbound messages select_loop! { diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 46177ca4bd..08de9c25d2 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -14,6 +14,7 @@ documentation = "https://docs.rs/commonware-runtime" workspace = true [dependencies] +ahash.workspace = true async-lock.workspace = true bytes.workspace = true cfg-if.workspace = true @@ -22,6 +23,7 @@ commonware-macros.workspace = true commonware-utils = { workspace = true, features = ["std"] } futures.workspace = true governor.workspace = true +hashbrown.workspace = true io-uring = { workspace = true, optional = true } libc.workspace = true opentelemetry.workspace = true diff --git a/runtime/src/deterministic.rs b/runtime/src/deterministic.rs index 3f9908ee4d..8c79b3a00f 100644 --- a/runtime/src/deterministic.rs +++ b/runtime/src/deterministic.rs @@ -37,6 +37,7 @@ //! ``` use crate::{ + hash, network::{ audited::Network as AuditedNetwork, deterministic::Network as DeterministicNetwork, metered::Network as MeteredNetwork, @@ -365,6 +366,7 @@ pub struct Checkpoint { storage: Arc, dns: Mutex>>, catch_panics: bool, + hash_seed: u64, } impl Checkpoint { @@ -393,6 +395,9 @@ impl From for Runner { impl From for Runner { fn from(checkpoint: Checkpoint) -> Self { + // Restore TLS hash seed for deterministic HashMap/HashSet iteration order + hash::set_seed(checkpoint.hash_seed); + Self { state: State::Checkpoint(checkpoint), } @@ -404,6 +409,10 @@ impl Runner { pub fn new(cfg: Config) -> Self { // Ensure config is valid cfg.assert(); + + // Set TLS hash seed for deterministic HashMap/HashSet iteration order + hash::set_seed(cfg.seed); + Self { state: State::Config(cfg), } @@ -606,6 +615,7 @@ impl Runner { storage, dns: executor.dns, catch_panics: executor.panicker.catch(), + hash_seed: hash::get_seed().expect("hash seed not set"), }; (output, checkpoint) @@ -1811,4 +1821,69 @@ mod tests { assert!(iterations > 500); }); } + + #[test] + fn test_hashmap_deterministic_iteration() { + use crate::HashMap; + + // Helper to create a HashMap and collect iteration order + fn collect_iteration_order(seed: u64) -> Vec { + let executor = deterministic::Runner::seeded(seed); + executor.start(|_context| async move { + let mut map: HashMap = HashMap::default(); + for i in 0..100 { + map.insert(i, "value"); + } + map.keys().copied().collect() + }) + } + + // Same seed should produce same iteration order + let order1 = collect_iteration_order(42); + let order2 = collect_iteration_order(42); + assert_eq!( + order1, order2, + "same seed should produce same iteration order" + ); + + // Different seeds should produce different iteration order + let order3 = collect_iteration_order(12345); + assert_ne!( + order1, order3, + "different seeds should produce different iteration order" + ); + } + + #[test] + fn test_hashmap_deterministic_after_checkpoint() { + use crate::HashMap; + + // Create a HashMap, checkpoint, and verify iteration order is preserved + let seed = 42u64; + let executor = deterministic::Runner::seeded(seed); + + let (order_before, checkpoint) = executor.start_and_recover(|_context| async move { + let mut map: HashMap = HashMap::default(); + for i in 0..50 { + map.insert(i, "value"); + } + map.keys().copied().collect::>() + }); + + // Recover from checkpoint and create another HashMap + let executor = deterministic::Runner::from(checkpoint); + let order_after = executor.start(|_context| async move { + let mut map: HashMap = HashMap::default(); + for i in 0..50 { + map.insert(i, "value"); + } + map.keys().copied().collect::>() + }); + + // Both should have the same iteration order (same seed preserved through checkpoint) + assert_eq!( + order_before, order_after, + "iteration order should be preserved through checkpoint" + ); + } } diff --git a/runtime/src/hash.rs b/runtime/src/hash.rs new file mode 100644 index 0000000000..53c9bc78cd --- /dev/null +++ b/runtime/src/hash.rs @@ -0,0 +1,174 @@ +//! Runtime-aware HashMap and HashSet collections. +//! +//! These types should be used instead of `std::collections::{HashMap, HashSet}` throughout +//! the codebase. They automatically use the appropriate hashing strategy based on the runtime: +//! +//! - **Deterministic runtime**: Fixed seed for reproducible iteration order +//! - **Tokio runtime**: Random seed for DoS resistance (same as std) +//! +//! # Usage +//! +//! ```ignore +//! use commonware_runtime::{HashMap, HashSet}; +//! ``` + +use std::{cell::Cell, hash::BuildHasher}; + +thread_local! { + /// Thread-local hash seed. + /// - `None` = production mode (use random seed for DoS resistance) + /// - `Some(seed)` = deterministic mode (use fixed seed for reproducibility) + static HASH_SEED: Cell> = const { Cell::new(None) }; +} + +/// Set the hash seed for the current thread. +/// +/// Called by the deterministic runtime at startup to enable reproducible hashing. +/// All HashMaps/HashSets created after this call will use the specified seed. +pub fn set_seed(seed: u64) { + HASH_SEED.with(|s| s.set(Some(seed))); +} + +/// Get the current hash seed for the current thread. +/// +/// Returns `None` if no seed has been set (production mode). +/// Used when creating checkpoints to preserve the seed for recovery. +pub fn get_seed() -> Option { + HASH_SEED.with(|s| s.get()) +} + +/// Clear the hash seed for the current thread. +/// +/// After this call, new HashMaps will use random seeds (production mode). +pub fn clear_seed() { + HASH_SEED.with(|s| s.set(None)); +} + +/// A BuildHasher that uses the TLS seed if set, otherwise random keys. +/// +/// Uses aHash for fast, DoS-resistant hashing. +#[derive(Clone)] +pub struct RandomState(ahash::RandomState); + +impl Default for RandomState { + fn default() -> Self { + let state = HASH_SEED.with(|s| { + s.get().map_or_else( + // Production mode: random keys for DoS resistance + ahash::RandomState::new, + // Deterministic mode: use fixed seed + |seed| ahash::RandomState::with_seed(seed as usize), + ) + }); + Self(state) + } +} + +impl BuildHasher for RandomState { + type Hasher = ahash::AHasher; + + fn build_hasher(&self) -> Self::Hasher { + self.0.build_hasher() + } +} + +/// A HashMap that uses deterministic hashing when a seed is set via TLS. +/// +/// In production mode (no seed set), behaves identically to std HashMap with +/// random keys for DoS resistance. +/// +/// Uses hashbrown under the hood (same implementation as std::collections::HashMap). +pub type HashMap = hashbrown::HashMap; + +/// A HashSet that uses deterministic hashing when a seed is set via TLS. +/// +/// In production mode (no seed set), behaves identically to std HashSet with +/// random keys for DoS resistance. +/// +/// Uses hashbrown under the hood (same implementation as std::collections::HashSet). +pub type HashSet = hashbrown::HashSet; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_deterministic_hashmap_iteration_order() { + // Set a seed + set_seed(12345); + + // Create and populate a HashMap + let mut map1: HashMap = HashMap::default(); + for i in 0..100 { + map1.insert(i, "value"); + } + + // Collect iteration order + let order1: Vec = map1.keys().copied().collect(); + + // Create another HashMap with the same seed + clear_seed(); + set_seed(12345); + + let mut map2: HashMap = HashMap::default(); + for i in 0..100 { + map2.insert(i, "value"); + } + + let order2: Vec = map2.keys().copied().collect(); + + // Iteration order should be identical + assert_eq!(order1, order2); + + // Clean up + clear_seed(); + } + + #[test] + fn test_different_seeds_different_order() { + set_seed(11111); + let mut map1: HashMap = HashMap::default(); + for i in 0..100 { + map1.insert(i, "value"); + } + let order1: Vec = map1.keys().copied().collect(); + + clear_seed(); + set_seed(22222); + let mut map2: HashMap = HashMap::default(); + for i in 0..100 { + map2.insert(i, "value"); + } + let order2: Vec = map2.keys().copied().collect(); + + // Different seeds should produce different iteration orders + // (with very high probability for 100 elements) + assert_ne!(order1, order2); + + clear_seed(); + } + + #[test] + fn test_no_seed_uses_random() { + clear_seed(); + + // Without a seed, each HashMap should get random keys + // We can't easily test randomness, but we can verify it doesn't panic + let mut map: HashMap = HashMap::default(); + map.insert(1, "one"); + map.insert(2, "two"); + assert_eq!(map.len(), 2); + } + + #[test] + fn test_get_seed() { + clear_seed(); + assert_eq!(get_seed(), None); + + set_seed(42); + assert_eq!(get_seed(), Some(42)); + + clear_seed(); + assert_eq!(get_seed(), None); + } +} diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 2720ee5455..cbbf4c26b4 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -37,6 +37,8 @@ use thiserror::Error; mod macros; pub mod deterministic; +pub mod hash; +pub use hash::{HashMap, HashSet}; pub mod mocks; cfg_if::cfg_if! { if #[cfg(not(target_arch = "wasm32"))] {