Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions halo2_proofs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ harness = false
name = "hashtocurve"
harness = false

[[bench]]
name = "msm"
harness = false

[[bench]]
name = "plonk"
harness = false
Expand Down
27 changes: 27 additions & 0 deletions halo2_proofs/benches/msm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#[macro_use]
extern crate criterion;

use crate::arithmetic::best_multiexp;
use crate::pasta::{EqAffine, Fp};
use crate::poly::commitment::Params;
use criterion::{BenchmarkId, Criterion};
use group::ff::Field;
use halo2_proofs::*;
use rand_core::OsRng;

fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("msm");
for k in 8..16 {
group
.bench_function(BenchmarkId::new("k", k), |b| {
let coeffs = (0..(1 << k)).map(|_| Fp::random(OsRng)).collect::<Vec<_>>();
let bases = Params::<EqAffine>::new(k).get_g();

b.iter(|| best_multiexp(&coeffs, &bases))
})
.sample_size(30);
}
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
212 changes: 119 additions & 93 deletions halo2_proofs/src/arithmetic.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
//! This module provides common utilities, traits and structures for group,
//! field and polynomial arithmetic.

use super::multicore;
pub use ff::Field;
use group::{
ff::{BatchInvert, PrimeField},
Group as _, GroupOpsOwned, ScalarMulOwned,
};

use maybe_rayon::prelude::*;
pub use pasta_curves::arithmetic::*;

use crate::multicore::{self, TheBestReduce};

/// This represents an element of a group with basic operations that can be
/// performed. This allows an FFT implementation (for example) to operate
/// generically over either a field or elliptic curve group.
Expand All @@ -25,92 +26,87 @@ where
{
}

fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) {
let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect();

let c = if bases.len() < 4 {
1
} else if bases.len() < 32 {
3
} else {
(f64::from(bases.len() as u32)).ln().ceil() as usize
};

fn get_at<F: PrimeField>(segment: usize, c: usize, bytes: &F::Repr) -> usize {
let skip_bits = segment * c;
let skip_bytes = skip_bits / 8;

if skip_bytes >= 32 {
return 0;
}
#[derive(Clone, Copy)]
enum Bucket<C: CurveAffine> {
None,
Affine(C),
Projective(C::Curve),
}

let mut v = [0; 8];
for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) {
*v = *o;
impl<C: CurveAffine> Bucket<C> {
fn add_assign(&mut self, other: &C) {
*self = match *self {
Bucket::None => Bucket::Affine(*other),
Bucket::Affine(a) => Bucket::Projective(a + *other),
Bucket::Projective(mut a) => {
a += *other;
Bucket::Projective(a)
}
}

let mut tmp = u64::from_le_bytes(v);
tmp >>= skip_bits - (skip_bytes * 8);
tmp %= 1 << c;

tmp as usize
}

let segments = (256 / c) + 1;

for current_segment in (0..segments).rev() {
for _ in 0..c {
*acc = acc.double();
}

#[derive(Clone, Copy)]
enum Bucket<C: CurveAffine> {
None,
Affine(C),
Projective(C::Curve),
fn add(self, mut other: C::Curve) -> C::Curve {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bucket::add was an existing function that is preserved in this PR, so in the interest of not needing to go through another round of CI, I'm going to defer this internal rename.

match self {
Bucket::None => other,
Bucket::Affine(a) => {
other += a;
other
}
Bucket::Projective(a) => other + &a,
}
}
}

impl<C: CurveAffine> Bucket<C> {
fn add_assign(&mut self, other: &C) {
*self = match *self {
Bucket::None => Bucket::Affine(*other),
Bucket::Affine(a) => Bucket::Projective(a + *other),
Bucket::Projective(mut a) => {
a += *other;
Bucket::Projective(a)
}
}
}
#[derive(Clone)]
struct Buckets<C: CurveAffine> {
c: usize,
coeffs: Vec<Bucket<C>>,
}

fn add(self, mut other: C::Curve) -> C::Curve {
match self {
Bucket::None => other,
Bucket::Affine(a) => {
other += a;
other
}
Bucket::Projective(a) => other + &a,
}
}
impl<C: CurveAffine> Buckets<C> {
fn new(c: usize) -> Self {
Self {
c,
coeffs: vec![Bucket::None; (1 << c) - 1],
}
}

let mut buckets: Vec<Bucket<C>> = vec![Bucket::None; (1 << c) - 1];

fn sum(&mut self, coeffs: &[C::Scalar], bases: &[C], i: usize) -> C::Curve {
// get segmentation and add coeff to buckets content
for (coeff, base) in coeffs.iter().zip(bases.iter()) {
let coeff = get_at::<C::Scalar>(current_segment, c, coeff);
if coeff != 0 {
buckets[coeff - 1].add_assign(base);
let seg = self.get_at::<C::Scalar>(i, &coeff.to_repr());
if seg != 0 {
self.coeffs[seg - 1].add_assign(base);
}
}

// Summation by parts
// e.g. 3a + 2b + 1c = a +
// (a) + b +
// ((a) + b) + c
let mut running_sum = C::Curve::identity();
for exp in buckets.into_iter().rev() {
running_sum = exp.add(running_sum);
*acc += &running_sum;
let mut acc = C::Curve::identity();
let mut sum = C::Curve::identity();
self.coeffs.iter().rev().for_each(|b| {
sum = b.add(sum);
acc += sum;
});
acc
}

fn get_at<F: PrimeField>(&self, segment: usize, bytes: &F::Repr) -> usize {
let skip_bits = segment * self.c;
let skip_bytes = skip_bits / 8;

if skip_bytes >= 32 {
0
} else {
let mut v = [0; 8];
for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) {
*v = *o;
}

let mut tmp = u64::from_le_bytes(v);
tmp >>= skip_bits - (skip_bytes * 8);
(tmp % (1 << self.c)) as usize
}
}
}
Expand Down Expand Up @@ -147,29 +143,39 @@ pub fn small_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::C
pub fn best_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve {
assert_eq!(coeffs.len(), bases.len());

let c = if bases.len() < 4 {
1
} else if bases.len() < 32 {
3
} else {
(f64::from(bases.len() as u32)).ln().ceil() as usize
};

let mut multi_buckets: Vec<Buckets<C>> = vec![Buckets::new(c); (256 / c) + 1];
let num_threads = multicore::current_num_threads();
if coeffs.len() > num_threads {
let chunk = coeffs.len() / num_threads;
let num_chunks = coeffs.chunks(chunk).len();
let mut results = vec![C::Curve::identity(); num_chunks];
multicore::scope(|scope| {
let chunk = coeffs.len() / num_threads;

for ((coeffs, bases), acc) in coeffs
.chunks(chunk)
.zip(bases.chunks(chunk))
.zip(results.iter_mut())
{
scope.spawn(move |_| {
multiexp_serial(coeffs, bases, acc);
});
}
});
results.iter().fold(C::Curve::identity(), |a, b| a + b)
multi_buckets
.par_iter_mut()
.enumerate()
.rev()
.map(|(i, buckets)| {
let mut acc = buckets.sum(coeffs, bases, i);
(0..c * i).for_each(|_| acc = acc.double());
acc
})
.the_best_reduce(C::Curve::identity, |a, b| a + b)
.expect("multi_buckets always contains at least 1 bucket")
} else {
let mut acc = C::Curve::identity();
multiexp_serial(coeffs, bases, &mut acc);
acc
multi_buckets
.iter_mut()
.enumerate()
.rev()
.map(|(i, buckets)| buckets.sum(coeffs, bases, i))
.fold(C::Curve::identity(), |mut sum, bucket| {
// restore original evaluation point
(0..c).for_each(|_| sum = sum.double());
sum + bucket
})
}
}

Expand Down Expand Up @@ -429,7 +435,27 @@ pub fn lagrange_interpolate<F: Field>(points: &[F], evals: &[F]) -> Vec<F> {
use rand_core::OsRng;

#[cfg(test)]
use crate::pasta::Fp;
use crate::pasta::{Eq, EqAffine, Fp};

#[test]
fn test_multiexp() {
let rng = OsRng;
let k = 8;

let coeffs = (0..(1 << k)).map(|_| Fp::random(rng)).collect::<Vec<_>>();
let bases = (0..(1 << k))
.map(|_| EqAffine::from(Eq::random(rng)))
.collect::<Vec<_>>();

let expected = best_multiexp(&coeffs, &bases);
let actual = coeffs
.iter()
.zip(bases)
.map(|(coeff, base)| base * coeff)
.fold(Eq::identity(), |acc, val| acc + val);

assert_eq!(expected, actual);
}

#[test]
fn test_lagrange_interpolate() {
Expand Down
43 changes: 43 additions & 0 deletions halo2_proofs/src/multicore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,46 @@ where
self.try_fold(identity(), fold_op)
}
}

pub(crate) trait TheBestReduce {
type Item;

/// Combines the best of `std::iter` and `rayon` reductions.
fn the_best_reduce(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🙃 at the name, but this is fine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I needed something that was not likely to clash in future with a std or rayon name 😄

self,
identity: impl Fn() -> Self::Item + Send + Sync,
op: impl Fn(Self::Item, Self::Item) -> Self::Item + Send + Sync,
) -> Option<Self::Item>;
}

#[cfg(feature = "multicore")]
impl<I> TheBestReduce for I
where
I: maybe_rayon::iter::ParallelIterator,
{
type Item = <Self as maybe_rayon::iter::ParallelIterator>::Item;

fn the_best_reduce(
self,
identity: impl Fn() -> Self::Item + Send + Sync,
op: impl Fn(Self::Item, Self::Item) -> Self::Item + Send + Sync,
) -> Option<Self::Item> {
Some(self.reduce(identity, op))
}
}

#[cfg(not(feature = "multicore"))]
impl<I> TheBestReduce for I
where
I: std::iter::Iterator,
{
type Item = <Self as std::iter::Iterator>::Item;

fn the_best_reduce(
self,
_: impl Fn() -> Self::Item + Send + Sync,
f: impl Fn(Self::Item, Self::Item) -> Self::Item + Send + Sync,
) -> Option<Self::Item> {
self.reduce(f)
}
}
Loading