diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index 6368bddc9..56385ddfc 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -31,6 +31,10 @@ harness = false name = "hashtocurve" harness = false +[[bench]] +name = "msm" +harness = false + [[bench]] name = "plonk" harness = false diff --git a/halo2_proofs/benches/msm.rs b/halo2_proofs/benches/msm.rs new file mode 100644 index 000000000..21c5508a0 --- /dev/null +++ b/halo2_proofs/benches/msm.rs @@ -0,0 +1,27 @@ +#[macro_use] +extern crate criterion; + +use crate::arithmetic::best_multiexp; +use crate::pasta::{EqAffine, Fp}; +use crate::poly::commitment::Params; +use criterion::{BenchmarkId, Criterion}; +use group::ff::Field; +use halo2_proofs::*; +use rand_core::OsRng; + +fn criterion_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("msm"); + for k in 8..16 { + group + .bench_function(BenchmarkId::new("k", k), |b| { + let coeffs = (0..(1 << k)).map(|_| Fp::random(OsRng)).collect::>(); + let bases = Params::::new(k).get_g(); + + b.iter(|| best_multiexp(&coeffs, &bases)) + }) + .sample_size(30); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs index 4cb0039d3..09d4cec81 100644 --- a/halo2_proofs/src/arithmetic.rs +++ b/halo2_proofs/src/arithmetic.rs @@ -1,15 +1,16 @@ //! This module provides common utilities, traits and structures for group, //! field and polynomial arithmetic. -use super::multicore; pub use ff::Field; use group::{ ff::{BatchInvert, PrimeField}, Group as _, GroupOpsOwned, ScalarMulOwned, }; - +use maybe_rayon::prelude::*; pub use pasta_curves::arithmetic::*; +use crate::multicore::{self, TheBestReduce}; + /// This represents an element of a group with basic operations that can be /// performed. This allows an FFT implementation (for example) to operate /// generically over either a field or elliptic curve group. @@ -25,92 +26,87 @@ where { } -fn multiexp_serial(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) { - let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); - - let c = if bases.len() < 4 { - 1 - } else if bases.len() < 32 { - 3 - } else { - (f64::from(bases.len() as u32)).ln().ceil() as usize - }; - - fn get_at(segment: usize, c: usize, bytes: &F::Repr) -> usize { - let skip_bits = segment * c; - let skip_bytes = skip_bits / 8; - - if skip_bytes >= 32 { - return 0; - } +#[derive(Clone, Copy)] +enum Bucket { + None, + Affine(C), + Projective(C::Curve), +} - let mut v = [0; 8]; - for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) { - *v = *o; +impl Bucket { + fn add_assign(&mut self, other: &C) { + *self = match *self { + Bucket::None => Bucket::Affine(*other), + Bucket::Affine(a) => Bucket::Projective(a + *other), + Bucket::Projective(mut a) => { + a += *other; + Bucket::Projective(a) + } } - - let mut tmp = u64::from_le_bytes(v); - tmp >>= skip_bits - (skip_bytes * 8); - tmp %= 1 << c; - - tmp as usize } - let segments = (256 / c) + 1; - - for current_segment in (0..segments).rev() { - for _ in 0..c { - *acc = acc.double(); - } - - #[derive(Clone, Copy)] - enum Bucket { - None, - Affine(C), - Projective(C::Curve), + fn add(self, mut other: C::Curve) -> C::Curve { + match self { + Bucket::None => other, + Bucket::Affine(a) => { + other += a; + other + } + Bucket::Projective(a) => other + &a, } + } +} - impl Bucket { - fn add_assign(&mut self, other: &C) { - *self = match *self { - Bucket::None => Bucket::Affine(*other), - Bucket::Affine(a) => Bucket::Projective(a + *other), - Bucket::Projective(mut a) => { - a += *other; - Bucket::Projective(a) - } - } - } +#[derive(Clone)] +struct Buckets { + c: usize, + coeffs: Vec>, +} - fn add(self, mut other: C::Curve) -> C::Curve { - match self { - Bucket::None => other, - Bucket::Affine(a) => { - other += a; - other - } - Bucket::Projective(a) => other + &a, - } - } +impl Buckets { + fn new(c: usize) -> Self { + Self { + c, + coeffs: vec![Bucket::None; (1 << c) - 1], } + } - let mut buckets: Vec> = vec![Bucket::None; (1 << c) - 1]; - + fn sum(&mut self, coeffs: &[C::Scalar], bases: &[C], i: usize) -> C::Curve { + // get segmentation and add coeff to buckets content for (coeff, base) in coeffs.iter().zip(bases.iter()) { - let coeff = get_at::(current_segment, c, coeff); - if coeff != 0 { - buckets[coeff - 1].add_assign(base); + let seg = self.get_at::(i, &coeff.to_repr()); + if seg != 0 { + self.coeffs[seg - 1].add_assign(base); } } - // Summation by parts // e.g. 3a + 2b + 1c = a + // (a) + b + // ((a) + b) + c - let mut running_sum = C::Curve::identity(); - for exp in buckets.into_iter().rev() { - running_sum = exp.add(running_sum); - *acc += &running_sum; + let mut acc = C::Curve::identity(); + let mut sum = C::Curve::identity(); + self.coeffs.iter().rev().for_each(|b| { + sum = b.add(sum); + acc += sum; + }); + acc + } + + fn get_at(&self, segment: usize, bytes: &F::Repr) -> usize { + let skip_bits = segment * self.c; + let skip_bytes = skip_bits / 8; + + if skip_bytes >= 32 { + 0 + } else { + let mut v = [0; 8]; + for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) { + *v = *o; + } + + let mut tmp = u64::from_le_bytes(v); + tmp >>= skip_bits - (skip_bytes * 8); + (tmp % (1 << self.c)) as usize } } } @@ -147,29 +143,39 @@ pub fn small_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::C pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { assert_eq!(coeffs.len(), bases.len()); + let c = if bases.len() < 4 { + 1 + } else if bases.len() < 32 { + 3 + } else { + (f64::from(bases.len() as u32)).ln().ceil() as usize + }; + + let mut multi_buckets: Vec> = vec![Buckets::new(c); (256 / c) + 1]; let num_threads = multicore::current_num_threads(); if coeffs.len() > num_threads { - let chunk = coeffs.len() / num_threads; - let num_chunks = coeffs.chunks(chunk).len(); - let mut results = vec![C::Curve::identity(); num_chunks]; - multicore::scope(|scope| { - let chunk = coeffs.len() / num_threads; - - for ((coeffs, bases), acc) in coeffs - .chunks(chunk) - .zip(bases.chunks(chunk)) - .zip(results.iter_mut()) - { - scope.spawn(move |_| { - multiexp_serial(coeffs, bases, acc); - }); - } - }); - results.iter().fold(C::Curve::identity(), |a, b| a + b) + multi_buckets + .par_iter_mut() + .enumerate() + .rev() + .map(|(i, buckets)| { + let mut acc = buckets.sum(coeffs, bases, i); + (0..c * i).for_each(|_| acc = acc.double()); + acc + }) + .the_best_reduce(C::Curve::identity, |a, b| a + b) + .expect("multi_buckets always contains at least 1 bucket") } else { - let mut acc = C::Curve::identity(); - multiexp_serial(coeffs, bases, &mut acc); - acc + multi_buckets + .iter_mut() + .enumerate() + .rev() + .map(|(i, buckets)| buckets.sum(coeffs, bases, i)) + .fold(C::Curve::identity(), |mut sum, bucket| { + // restore original evaluation point + (0..c).for_each(|_| sum = sum.double()); + sum + bucket + }) } } @@ -429,7 +435,27 @@ pub fn lagrange_interpolate(points: &[F], evals: &[F]) -> Vec { use rand_core::OsRng; #[cfg(test)] -use crate::pasta::Fp; +use crate::pasta::{Eq, EqAffine, Fp}; + +#[test] +fn test_multiexp() { + let rng = OsRng; + let k = 8; + + let coeffs = (0..(1 << k)).map(|_| Fp::random(rng)).collect::>(); + let bases = (0..(1 << k)) + .map(|_| EqAffine::from(Eq::random(rng))) + .collect::>(); + + let expected = best_multiexp(&coeffs, &bases); + let actual = coeffs + .iter() + .zip(bases) + .map(|(coeff, base)| base * coeff) + .fold(Eq::identity(), |acc, val| acc + val); + + assert_eq!(expected, actual); +} #[test] fn test_lagrange_interpolate() { diff --git a/halo2_proofs/src/multicore.rs b/halo2_proofs/src/multicore.rs index 245965a2a..1e3469358 100644 --- a/halo2_proofs/src/multicore.rs +++ b/halo2_proofs/src/multicore.rs @@ -71,3 +71,46 @@ where self.try_fold(identity(), fold_op) } } + +pub(crate) trait TheBestReduce { + type Item; + + /// Combines the best of `std::iter` and `rayon` reductions. + fn the_best_reduce( + self, + identity: impl Fn() -> Self::Item + Send + Sync, + op: impl Fn(Self::Item, Self::Item) -> Self::Item + Send + Sync, + ) -> Option; +} + +#[cfg(feature = "multicore")] +impl TheBestReduce for I +where + I: maybe_rayon::iter::ParallelIterator, +{ + type Item = ::Item; + + fn the_best_reduce( + self, + identity: impl Fn() -> Self::Item + Send + Sync, + op: impl Fn(Self::Item, Self::Item) -> Self::Item + Send + Sync, + ) -> Option { + Some(self.reduce(identity, op)) + } +} + +#[cfg(not(feature = "multicore"))] +impl TheBestReduce for I +where + I: std::iter::Iterator, +{ + type Item = ::Item; + + fn the_best_reduce( + self, + _: impl Fn() -> Self::Item + Send + Sync, + f: impl Fn(Self::Item, Self::Item) -> Self::Item + Send + Sync, + ) -> Option { + self.reduce(f) + } +}