diff --git a/Cargo.lock b/Cargo.lock index f5d0c6a1a4..9b7834128d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1180,6 +1180,8 @@ dependencies = [ "ecdsa", "ed25519-consensus", "getrandom 0.2.16", + "num-rational", + "num-traits", "p256", "proptest", "rand 0.8.5", @@ -1204,8 +1206,10 @@ dependencies = [ "commonware-cryptography", "commonware-math", "commonware-parallel", + "commonware-utils", "ed25519-zebra", "libfuzzer-sys", + "num-rational", "p256", "rand 0.8.5", "sha2 0.10.9", diff --git a/cryptography/Cargo.toml b/cryptography/Cargo.toml index 6a73caa153..d31c09118a 100644 --- a/cryptography/Cargo.toml +++ b/cryptography/Cargo.toml @@ -27,6 +27,8 @@ commonware-parallel.workspace = true commonware-utils.workspace = true ecdsa.workspace = true ed25519-consensus = { workspace = true, default-features = false } +num-rational = { workspace = true, optional = true } +num-traits = { workspace = true, optional = true } p256 = { workspace = true, features = ["ecdsa"] } rand.workspace = true rand_chacha.workspace = true @@ -83,6 +85,8 @@ std = [ "ecdsa/std", "ed25519-consensus/std", "getrandom/std", + "num-rational", + "num-traits", "p256/std", "rand/std", "rand/std_rng", diff --git a/cryptography/conformance.toml b/cryptography/conformance.toml index 92365ae60f..016d43564b 100644 --- a/cryptography/conformance.toml +++ b/cryptography/conformance.toml @@ -6,9 +6,9 @@ hash = "c0501d4a691d1fccec7c5906e8608228569d24164150edd215838593e3b77512" n_cases = 65536 hash = "698dac6cf35fcd1557a962e7215da4251d7e42f68a2331b70ff0dd542c4116dd" -["commonware_cryptography::bloomfilter::conformance::FpRateBuckets"] +["commonware_cryptography::bloomfilter::conformance::RationalOptimalBits"] n_cases = 1024 -hash = "723f3a5fd8b147f722d5fa990f39b3ffd6a2f0df46d00dcb1e294647b565eb85" +hash = "62ed87a0f03d0ea9bc55b97389d288ad1dc8b43d01ce3fc3ee946457107423f3" ["commonware_cryptography::bls12381::certificate::multisig::tests::conformance::CodecConformance>"] n_cases = 65536 diff --git a/cryptography/fuzz/Cargo.toml b/cryptography/fuzz/Cargo.toml index a2eaa5f412..f5f594f2b7 100644 --- a/cryptography/fuzz/Cargo.toml +++ b/cryptography/fuzz/Cargo.toml @@ -16,8 +16,10 @@ commonware-codec = { workspace = true, features = ["std"] } commonware-cryptography = { workspace = true, features = ["std", "arbitrary"] } commonware-math.workspace = true commonware-parallel = { workspace = true, features = ["std"] } +commonware-utils = { workspace = true, features = ["std"] } ed25519-zebra.workspace = true libfuzzer-sys.workspace = true +num-rational.workspace = true p256 = { workspace = true, features = ["ecdsa"] } rand.workspace = true sha2.workspace = true diff --git a/cryptography/fuzz/fuzz_targets/bloomfilter.rs b/cryptography/fuzz/fuzz_targets/bloomfilter.rs index 7503663aa5..c355c80057 100644 --- a/cryptography/fuzz/fuzz_targets/bloomfilter.rs +++ b/cryptography/fuzz/fuzz_targets/bloomfilter.rs @@ -3,7 +3,9 @@ use arbitrary::Arbitrary; use commonware_codec::{Decode, Encode, EncodeSize}; use commonware_cryptography::{sha256::Sha256, BloomFilter}; +use commonware_utils::rational::BigRationalExt; use libfuzzer_sys::fuzz_target; +use num_rational::BigRational; use std::{ collections::HashSet, num::{NonZeroU16, NonZeroU8, NonZeroUsize}, @@ -26,7 +28,8 @@ enum Constructor { }, WithRate { expected_items: NonZeroU16, - fp_rate: f64, + fp_numerator: u32, + fp_denominator: u32, }, } @@ -43,11 +46,13 @@ impl<'a> Arbitrary<'a> for Constructor { Ok(Constructor::New { hashers, bits }) } else { let expected_items = u.arbitrary::()?; - // Generate f64 in range (0.0, 1.0) exclusive - let fp_rate = u.int_in_range(1u32..=u32::MAX - 1)? as f64 / u32::MAX as f64; + // Generate FP rate as rational: numerator in [1, denominator-1] to ensure (0, 1) + let fp_denominator = u.int_in_range(2u32..=10_000)?; + let fp_numerator = u.int_in_range(1u32..=fp_denominator - 1)?; Ok(Constructor::WithRate { expected_items, - fp_rate, + fp_numerator, + fp_denominator, }) } } @@ -77,11 +82,15 @@ fn fuzz(input: FuzzInput) { Constructor::New { hashers, bits } => BloomFilter::::new(hashers, bits.into()), Constructor::WithRate { expected_items, - fp_rate, - } => BloomFilter::::with_rate( - NonZeroUsize::new(expected_items.get() as usize).unwrap(), - fp_rate, - ), + fp_numerator, + fp_denominator, + } => { + let fp_rate = BigRational::from_frac_u64(fp_numerator as u64, fp_denominator as u64); + BloomFilter::::with_rate( + NonZeroUsize::new(expected_items.get() as usize).unwrap(), + fp_rate, + ) + } }; let cfg = (bf.hashers(), bf.bits().try_into().unwrap()); diff --git a/cryptography/src/bloomfilter/benches/contains.rs b/cryptography/src/bloomfilter/benches/contains.rs index c3af820a63..2ab80e0e6c 100644 --- a/cryptography/src/bloomfilter/benches/contains.rs +++ b/cryptography/src/bloomfilter/benches/contains.rs @@ -1,11 +1,19 @@ use commonware_cryptography::{blake3::Blake3, sha256::Sha256, BloomFilter, Hasher}; +use commonware_utils::rational::BigRationalExt; use criterion::{criterion_group, Criterion}; +use num_rational::BigRational; use rand::{rngs::StdRng, RngCore, SeedableRng}; use std::{collections::HashSet, hint::black_box, num::NonZeroUsize}; const ITEM_SIZES: [usize; 3] = [32, 2048, 4096]; const NUM_ITEMS: usize = 10000; -const FP_RATES: [f64; 2] = [0.1, 0.001]; + +fn fp_rates() -> [(BigRational, &'static str); 2] { + [ + (BigRational::from_frac_u64(1, 10), "10%"), + (BigRational::from_frac_u64(1, 1000), "0.1%"), + ] +} fn run_contains_bench(c: &mut Criterion, hasher: &str, query_inserted: bool) { let query_type = if query_inserted { @@ -14,7 +22,7 @@ fn run_contains_bench(c: &mut Criterion, hasher: &str, query_inserted "negative" }; for item_size in ITEM_SIZES { - for fp_rate in FP_RATES { + for (fp_rate, fp_label) in fp_rates() { // Create and populate the bloom filter let mut rng = StdRng::seed_from_u64(42); let mut bf = @@ -52,7 +60,7 @@ fn run_contains_bench(c: &mut Criterion, hasher: &str, query_inserted module_path!(), hasher, item_size, - fp_rate, + fp_label, query_type ), |b| { diff --git a/cryptography/src/bloomfilter/benches/insert.rs b/cryptography/src/bloomfilter/benches/insert.rs index 361f81425f..124bd78779 100644 --- a/cryptography/src/bloomfilter/benches/insert.rs +++ b/cryptography/src/bloomfilter/benches/insert.rs @@ -1,15 +1,23 @@ use commonware_cryptography::{blake3::Blake3, sha256::Sha256, BloomFilter, Hasher}; +use commonware_utils::rational::BigRationalExt; use criterion::{criterion_group, BatchSize, Criterion}; +use num_rational::BigRational; use rand::{rngs::StdRng, RngCore, SeedableRng}; use std::num::NonZeroUsize; const ITEM_SIZES: [usize; 3] = [32, 2048, 4096]; const NUM_ITEMS: usize = 10000; -const FP_RATES: [f64; 2] = [0.1, 0.001]; + +fn fp_rates() -> [(BigRational, &'static str); 2] { + [ + (BigRational::from_frac_u64(1, 10), "10%"), + (BigRational::from_frac_u64(1, 1000), "0.1%"), + ] +} fn run_insert_bench(c: &mut Criterion, hasher: &str) { for item_size in ITEM_SIZES { - for fp_rate in FP_RATES { + for (fp_rate, fp_label) in fp_rates() { // Pre-generate items to insert let mut rng = StdRng::seed_from_u64(42); let items: Vec> = (0..NUM_ITEMS) @@ -26,7 +34,7 @@ fn run_insert_bench(c: &mut Criterion, hasher: &str) { module_path!(), hasher, item_size, - fp_rate + fp_label ), |b| { let mut idx = 0; @@ -34,7 +42,7 @@ fn run_insert_bench(c: &mut Criterion, hasher: &str) { || { BloomFilter::::with_rate( NonZeroUsize::new(NUM_ITEMS).unwrap(), - fp_rate, + fp_rate.clone(), ) }, |mut bf| { diff --git a/cryptography/src/bloomfilter/conformance.rs b/cryptography/src/bloomfilter/conformance.rs index 7245e692fe..d6a1df9eb0 100644 --- a/cryptography/src/bloomfilter/conformance.rs +++ b/cryptography/src/bloomfilter/conformance.rs @@ -3,63 +3,62 @@ use super::{BloomFilter, Sha256}; use commonware_codec::conformance::CodecConformance; use commonware_conformance::Conformance; +use commonware_utils::rational::BigRationalExt; use core::num::NonZeroUsize; +use num_rational::BigRational; commonware_conformance::conformance_tests! { CodecConformance, - FpRateBuckets => 1024, + RationalOptimalBits => 1024, } -/// Conformance test for FP rate bucket calculations and with_rate constructor. +/// Conformance test for rational-based optimal_bits and with_rate. /// Verifies that optimal_bits, optimal_hashers, and with_rate produce stable -/// outputs across all four FP rate buckets for various expected_items values. -struct FpRateBuckets; +/// outputs for various expected_items values and FP rates expressed as rationals. +struct RationalOptimalBits; -impl Conformance for FpRateBuckets { +impl Conformance for RationalOptimalBits { async fn commit(seed: u64) -> Vec { let mut log = Vec::new(); // Use seed to vary expected_items (1 to 1M range) let expected_items = ((seed % 1_000_000) + 1) as usize; - // Test all four FP rate buckets with representative values + // Test FP rates as rationals: 1/10000, 1/1000, 1/100, 1/10 let fp_rates = [ - 0.0001, // FP_1E4 bucket (~0.01%) - 0.001, // FP_1E3 bucket (~0.1%) - 0.01, // FP_1E2 bucket (~1%) - 0.1, // FP_1E1 bucket (~10%) + BigRational::from_frac_u64(1, 10_000), // 0.01% + BigRational::from_frac_u64(1, 1_000), // 0.1% + BigRational::from_frac_u64(1, 100), // 1% + BigRational::from_frac_u64(1, 10), // 10% ]; - for &fp_rate in &fp_rates { + for fp_rate in &fp_rates { // Test individual functions let bits = BloomFilter::::optimal_bits(expected_items, fp_rate); let hashers = BloomFilter::::optimal_hashers(expected_items, bits); log.extend((expected_items as u64).to_le_bytes()); - log.extend(fp_rate.to_le_bytes()); log.extend((bits as u64).to_le_bytes()); log.extend(hashers.to_le_bytes()); // Test with_rate constructor produces same results let filter = BloomFilter::::with_rate( NonZeroUsize::new(expected_items).unwrap(), - fp_rate, + fp_rate.clone(), ); log.extend((filter.bits().get() as u64).to_le_bytes()); log.extend(filter.hashers().get().to_le_bytes()); } - // Also test bucket boundaries to catch rounding changes + // Test some boundary values let boundary_rates = [ - 0.00014, // Just below FP_1E4/FP_1E3 boundary - 0.00016, // Just above FP_1E4/FP_1E3 boundary - 0.00104, // Just below FP_1E3/FP_1E2 boundary - 0.00106, // Just above FP_1E3/FP_1E2 boundary - 0.01004, // Just below FP_1E2/FP_1E1 boundary - 0.01006, // Just above FP_1E2/FP_1E1 boundary + BigRational::from_frac_u64(1, 7_000), // Between 0.01% and 0.1% + BigRational::from_frac_u64(1, 500), // Between 0.1% and 1% + BigRational::from_frac_u64(1, 50), // Between 1% and 10% + BigRational::from_frac_u64(3, 100), // 3% ]; - for &fp_rate in &boundary_rates { + for fp_rate in &boundary_rates { let bits = BloomFilter::::optimal_bits(expected_items, fp_rate); log.extend((bits as u64).to_le_bytes()); } diff --git a/cryptography/src/bloomfilter/mod.rs b/cryptography/src/bloomfilter/mod.rs index fe9f2a6eac..e47e7accdb 100644 --- a/cryptography/src/bloomfilter/mod.rs +++ b/cryptography/src/bloomfilter/mod.rs @@ -15,64 +15,17 @@ use core::{ marker::PhantomData, num::{NonZeroU64, NonZeroU8, NonZeroUsize}, }; +#[cfg(feature = "std")] +use { + commonware_utils::rational::BigRationalExt, + num_rational::BigRational, + num_traits::{One, Zero}, +}; /// ln(2) in Q16.16 fixed-point format. /// Used for computing optimal number of hash functions. const LN2_Q16: u64 = 45_426; -/// Scale factor for false positive rate normalization (4 decimal places). -const FP_RATE_SCALE: u64 = 10_000; - -/// Bits-per-element constants in Q16.16 fixed-point format. -/// Pre-computed as `bpe = round(-ln(p) / (ln(2))^2 * 65536)` for common false positive -/// rates. See `test_q16_constants` for verification. -mod bpe { - pub const FP_1E1: u64 = 314_083; // ~10% FP rate (4.79 bits/element) - pub const FP_1E2: u64 = 628_166; // ~1% FP rate (9.59 bits/element) - pub const FP_1E3: u64 = 942_250; // ~0.1% FP rate (14.38 bits/element) - pub const FP_1E4: u64 = 1_256_333; // ~0.01% FP rate (19.17 bits/element) -} - -/// Convert false positive rate to normalized integer (4 decimal places). -/// Uses explicit round-half-up for determinism. -/// -/// # Determinism -/// -/// This function is deterministic across platforms because: -/// - Clamping to 0.0001-0.9999 keeps values in a safe range where f64 arithmetic -/// is well-behaved (no subnormals, no precision loss near 0 or 1) -/// - After scaling by 10000, results are in range 1-9999 which are all exactly -/// representable as f64 (integers up to 2^53 are exact) -/// - All operations (*, +, floor) are IEEE 754 basic operations, which are -/// required to be correctly rounded to the same result on all compliant systems -/// - We use round-half-up (`+ 0.5` then `floor`) instead of `f64::round()` which -/// uses "round half to even" (banker's rounding) that could theoretically vary -/// -/// # Panics -/// -/// Panics if `p` is not between 0.0 and 1.0 (exclusive). -#[inline] -#[cfg(feature = "std")] -fn normalize_fp_rate(p: f64) -> u64 { - assert!(p > 0.0 && p < 1.0); - // Clamp to supported bucket range - let p = p.clamp(0.0001, 0.9999); - // Round-half-up: add 0.5 then floor - ((p * FP_RATE_SCALE as f64) + 0.5).floor() as u64 -} - -/// Map normalized false positive rate to bits-per-element bucket. -/// Uses discrete buckets for simplicity and robustness. -#[inline] -const fn bpe_for_rate(p_normalized: u64) -> u64 { - match p_normalized { - 0..=1 => bpe::FP_1E4, // <= 0.0001 - 2..=10 => bpe::FP_1E3, // <= 0.001 - 11..=100 => bpe::FP_1E2, // <= 0.01 - _ => bpe::FP_1E1, // > 0.01 (loose filter) - } -} - /// A [Bloom Filter](https://en.wikipedia.org/wiki/Bloom_filter). /// /// This implementation uses the Kirsch-Mitzenmacher optimization to derive `k` hash functions @@ -140,15 +93,19 @@ impl BloomFilter { /// Creates a new [BloomFilter] with optimal parameters for the expected number /// of items and desired false positive rate. /// - /// The false positive rate is quantized to one of four buckets (~10%, ~1%, ~0.1%, ~0.01%) - /// using integer math for determinism across all platforms. + /// Uses exact rational arithmetic for full determinism across all platforms. + /// + /// # Arguments + /// + /// * `expected_items` - Number of items expected to be inserted + /// * `fp_rate` - False positive rate as a rational (e.g., `BigRational::from_frac_u64(1, 100)` for 1%) /// /// # Panics /// - /// Panics if `false_positive_rate` is not between 0.0 and 1.0 (exclusive). + /// Panics if `fp_rate` is not in (0, 1). #[cfg(feature = "std")] - pub fn with_rate(expected_items: NonZeroUsize, false_positive_rate: f64) -> Self { - let bits = Self::optimal_bits(expected_items.get(), false_positive_rate); + pub fn with_rate(expected_items: NonZeroUsize, fp_rate: BigRational) -> Self { + let bits = Self::optimal_bits(expected_items.get(), &fp_rate); let hashers = Self::optimal_hashers(expected_items.get(), bits); Self { hashers, @@ -263,24 +220,36 @@ impl BloomFilter { /// Calculates the optimal number of bits for a given capacity and false positive rate. /// - /// Uses pre-computed bits-per-element values and integer math for determinism. The - /// false positive rate is quantized to one of four buckets (~10%, ~1%, ~0.1%, ~0.01%). + /// Uses exact rational arithmetic for full determinism across all platforms. /// The result is rounded up to the next power of 2. If that would overflow, the maximum /// power of 2 for the platform (2^63 on 64-bit) is used. /// + /// Formula: m = -n * log2(p) / ln(2) + /// /// # Panics /// - /// Panics if `false_positive_rate` is not between 0.0 and 1.0 (exclusive). + /// Panics if `fp_rate` is not in (0, 1). #[cfg(feature = "std")] - pub fn optimal_bits(expected_items: usize, false_positive_rate: f64) -> usize { - let fp_normalized = normalize_fp_rate(false_positive_rate); - let bpe = bpe_for_rate(fp_normalized); - // ceil(n * bpe) using Q16.16 fixed-point, with overflow protection - let raw = (expected_items as u64) - .saturating_mul(bpe) - .saturating_add(0xFFFF) - >> 16; - (raw as usize) + pub fn optimal_bits(expected_items: usize, fp_rate: &BigRational) -> usize { + assert!( + fp_rate > &BigRational::zero() && fp_rate < &BigRational::one(), + "false positive rate must be in (0, 1)" + ); + + let n = BigRational::from_usize(expected_items); + + // log2(p) is negative for p < 1, use 16 bits of precision + let log2_p = fp_rate.log2_ceil(16); + + // 1/ln(2) approximation: 29145/20201 (6 digits precision) + let ln2_inv = BigRational::from_frac_u64(29145, 20201); + + // m = -n * log2(p) / ln(2) = -n * log2(p) * (1/ln(2)) + // Since log2(p) < 0 for p < 1, -log2(p) > 0 + let bits_rational = -(&n * &log2_p * &ln2_inv); + + let raw = bits_rational.ceil_to_u128().unwrap_or(1) as usize; + raw.max(1) .checked_next_power_of_two() .unwrap_or(1 << (usize::BITS - 1)) } @@ -521,10 +490,11 @@ mod tests { #[test] fn test_with_rate() { // Create a filter for 1000 items with 1% false positive rate - let mut bf = BloomFilter::::with_rate(NZUsize!(1000), 0.01); + let fp_rate = BigRational::from_frac_u64(1, 100); + let mut bf = BloomFilter::::with_rate(NZUsize!(1000), fp_rate.clone()); // Verify getters return expected values - let expected_bits = BloomFilter::::optimal_bits(1000, 0.01); + let expected_bits = BloomFilter::::optimal_bits(1000, &fp_rate); let expected_hashers = BloomFilter::::optimal_hashers(1000, expected_bits); assert_eq!(bf.bits().get(), expected_bits); assert_eq!(bf.hashers().get(), expected_hashers); @@ -589,56 +559,64 @@ mod tests { // For 1000 items with 1% FP rate // Formula: m = -n * ln(p) / (ln(2))^2 = -1000 * ln(0.01) / 0.4804 = 9585 // Rounded to next power of 2 = 16384 - let bits = BloomFilter::::optimal_bits(1000, 0.01); + let fp_1pct = BigRational::from_frac_u64(1, 100); + let bits = BloomFilter::::optimal_bits(1000, &fp_1pct); assert_eq!(bits, 16384); assert!(bits.is_power_of_two()); // For 10000 items with 0.001% FP rate (need significantly more bits) // Formula: m = -10000 * ln(0.00001) / 0.4804 = 239627 // Rounded to next power of 2 = 262144 - let bits_lower_fp = BloomFilter::::optimal_bits(10000, 0.00001); + let fp_001pct = BigRational::from_frac_u64(1, 100_000); + let bits_lower_fp = BloomFilter::::optimal_bits(10000, &fp_001pct); assert_eq!(bits_lower_fp, 262144); assert!(bits_lower_fp.is_power_of_two()); } #[test] fn test_bits_extreme_values() { - // Very large expected_items, uses saturation arithmetic - let bits = BloomFilter::::optimal_bits(usize::MAX / 2, 0.0001); + let fp_001pct = BigRational::from_frac_u64(1, 10_000); + let fp_1pct = BigRational::from_frac_u64(1, 100); + + // Very large expected_items + let bits = BloomFilter::::optimal_bits(usize::MAX / 2, &fp_001pct); assert!(bits.is_power_of_two()); assert!(bits > 0); // Large but reasonable values - let bits = BloomFilter::::optimal_bits(1_000_000_000, 0.0001); + let bits = BloomFilter::::optimal_bits(1_000_000_000, &fp_001pct); assert!(bits.is_power_of_two()); // Zero items - let bits = BloomFilter::::optimal_bits(0, 0.01); + let bits = BloomFilter::::optimal_bits(0, &fp_1pct); assert!(bits.is_power_of_two()); assert_eq!(bits, 1); // 0 * bpe rounds up to 1 } #[test] - fn test_q16_constants() { - // Verify Q16.16 fixed-point constants match the formulas - - // LN2_Q16 = ln(2) * 65536 + fn test_q16_ln2_constant() { + // Verify Q16.16 fixed-point constant for ln(2) matches the formula let ln2 = core::f64::consts::LN_2; let expected_ln2_q16 = (ln2 * 65536.0).round() as u64; assert_eq!(LN2_Q16, expected_ln2_q16); + } - // bpe = -ln(p) / (ln(2))^2 - // bpe_q16 = bpe * 65536 - let ln2_sq = ln2 * ln2; - - let bpe_1e1 = -0.1_f64.ln() / ln2_sq; - let bpe_1e2 = -0.01_f64.ln() / ln2_sq; - let bpe_1e3 = -0.001_f64.ln() / ln2_sq; - let bpe_1e4 = -0.0001_f64.ln() / ln2_sq; + #[test] + fn test_with_rate_deterministic() { + let fp_rate = BigRational::from_frac_u64(1, 100); + let bf1 = BloomFilter::::with_rate(NZUsize!(1000), fp_rate.clone()); + let bf2 = BloomFilter::::with_rate(NZUsize!(1000), fp_rate); + assert_eq!(bf1.bits(), bf2.bits()); + assert_eq!(bf1.hashers(), bf2.hashers()); + } - assert_eq!(bpe::FP_1E1, (bpe_1e1 * 65536.0).round() as u64); - assert_eq!(bpe::FP_1E2, (bpe_1e2 * 65536.0).round() as u64); - assert_eq!(bpe::FP_1E3, (bpe_1e3 * 65536.0).round() as u64); - assert_eq!(bpe::FP_1E4, (bpe_1e4 * 65536.0).round() as u64); + #[test] + fn test_optimal_bits_matches_formula() { + // For 1000 items at 1% FP rate + // m = -1000 * log2(0.01) / ln(2) = 9585 + // Rounded to power of 2 = 16384 + let fp_rate = BigRational::from_frac_u64(1, 100); + let bits = BloomFilter::::optimal_bits(1000, &fp_rate); + assert_eq!(bits, 16384); } }