Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fewer methods in trait Engine. Introduce utils.rs #44

Merged
merged 2 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 2 additions & 29 deletions benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,46 +338,19 @@ fn benchmarks_engine_one<E: Engine>(c: &mut Criterion, name: &str, engine: E) {
b.iter(|| E::eval_poly(black_box(&mut data), GF_ORDER / 8))
});

// XOR MUL
// MUL

let mut x = &mut generate_shards_64(1, shard_len_64, 0)[0];
let y = &generate_shards_64(1, shard_len_64, 1)[0];

group.bench_function("xor", |b| {
b.iter(|| E::xor(black_box(&mut x), black_box(&y)))
});

group.bench_function("mul", |b| {
b.iter(|| engine.mul(black_box(&mut x), black_box(12345)))
});

// XOR_WITHIN

let shards_256_data = &mut generate_shards_64(1, 256 * shard_len_64, 0)[0];
let mut shards_256 = ShardsRefMut::new(256, shard_len_64, shards_256_data.as_mut());

group.bench_function("xor_within 128*2", |b| {
b.iter(|| {
E::xor_within(
black_box(&mut shards_256),
black_box(0),
black_box(128),
black_box(128),
)
})
});

// FORMAL DERIVATIVE
// FFT IFFT

let shards_128_data = &mut generate_shards_64(1, 128 * shard_len_64, 0)[0];
let mut shards_128 = ShardsRefMut::new(128, shard_len_64, shards_128_data.as_mut());

group.bench_function("formal_derivative 128", |b| {
b.iter(|| E::formal_derivative(black_box(&mut shards_128)))
});

// FFT IFFT

group.bench_function("FFT 128", |b| {
b.iter(|| {
engine.fft(
Expand Down
113 changes: 8 additions & 105 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@
//! [`ReedSolomonDecoder`]: crate::ReedSolomonDecoder
//! [`rate`]: crate::rate

use std::iter::zip;

pub(crate) use self::shards::Shards;
pub(crate) use utils::{fft_skew_end, formal_derivative, ifft_skew_end, xor_within};

pub use self::{
engine_default::DefaultEngine, engine_naive::Naive, engine_nosimd::NoSimd, shards::ShardsRefMut,
Expand All @@ -60,6 +59,7 @@ mod engine_neon;

mod fwht;
mod shards;
mod utils;

pub mod tables;

Expand Down Expand Up @@ -90,50 +90,15 @@ pub const CANTOR_BASIS: [GfElement; GF_BITS] = [
/// Galois field element.
pub type GfElement = u16;

// ======================================================================
// FUNCTIONS - PUBLIC - Galois field operations

/// Some kind of addition.
#[inline(always)]
pub fn add_mod(x: GfElement, y: GfElement) -> GfElement {
let sum = u32::from(x) + u32::from(y);
(sum + (sum >> GF_BITS)) as GfElement
}

/// Some kind of subtraction.
#[inline(always)]
pub fn sub_mod(x: GfElement, y: GfElement) -> GfElement {
let dif = u32::from(x).wrapping_sub(u32::from(y));
dif.wrapping_add(dif >> GF_BITS) as GfElement
}

// ======================================================================
// FUNCTIONS - CRATE - Evaluate polynomial

// We have this function here instead of inside 'trait Engine' to allow
// it to be included and compiled with SIMD features enabled within the
// SIMD engines.
#[inline(always)]
pub(crate) fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) {
let log_walsh = tables::initialize_log_walsh();

fwht::fwht(erasures, truncated_size);

for (e, factor) in std::iter::zip(erasures.iter_mut(), log_walsh.iter()) {
let product = u32::from(*e) * u32::from(*factor);
*e = add_mod(product as GfElement, (product >> GF_BITS) as GfElement);
}

fwht::fwht(erasures, GF_ORDER);
}

// ======================================================================
// Engine - PUBLIC

/// Implementation of basic low-level algorithms needed
/// for Reed-Solomon encoding/decoding.
/// Implementation of compute-intensive low-level algorithms needed
/// for Reed-Solomon encoding/decoding. This is the trait you would
/// implement to provide SIMD support for a CPU architecture not
/// already provided.
///
/// These algorithms are not properly documented.
/// These algorithms are not properly documented in this library.
///
/// [`Naive`] engine is provided for those who want to
/// study the source code to understand [`Engine`].
Expand Down Expand Up @@ -187,74 +152,12 @@ pub trait Engine {
// ============================================================
// PROVIDED

/// `x[] ^= y[]`
#[inline(always)]
fn xor(xs: &mut [[u8; 64]], ys: &[[u8; 64]])
where
Self: Sized,
{
debug_assert_eq!(xs.len(), ys.len());

for (x_chunk, y_chunk) in zip(xs.iter_mut(), ys.iter()) {
for (x, y) in zip(x_chunk.iter_mut(), y_chunk.iter()) {
*x ^= y;
}
}
}

/// Evaluate polynomial.
fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize)
where
Self: Sized,
{
eval_poly(erasures, truncated_size)
}

/// FFT with `skew_delta = pos + size`.
#[inline(always)]
fn fft_skew_end(
&self,
data: &mut ShardsRefMut,
pos: usize,
size: usize,
truncated_size: usize,
) {
self.fft(data, pos, size, truncated_size, pos + size)
}

/// Formal derivative.
fn formal_derivative(data: &mut ShardsRefMut)
where
Self: Sized,
{
for i in 1..data.len() {
let width: usize = 1 << i.trailing_zeros();
Self::xor_within(data, i - width, i, width);
}
}

/// IFFT with `skew_delta = pos + size`.
#[inline(always)]
fn ifft_skew_end(
&self,
data: &mut ShardsRefMut,
pos: usize,
size: usize,
truncated_size: usize,
) {
self.ifft(data, pos, size, truncated_size, pos + size)
}

/// `data[x .. x + count] ^= data[y .. y + count]`
///
/// Ranges must not overlap.
#[inline(always)]
fn xor_within(data: &mut ShardsRefMut, x: usize, y: usize, count: usize)
where
Self: Sized,
{
let (xs, ys) = data.flat2_mut(x, y, count);
Self::xor(xs, ys);
utils::eval_poly(erasures, truncated_size)
}
}

Expand Down
25 changes: 12 additions & 13 deletions src/engine/engine_avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ use std::arch::x86::*;
use std::arch::x86_64::*;

use crate::engine::{
self,
tables::{self, Mul128, Multiply128lutT, Skew},
Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER,
utils, Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER,
};

// ======================================================================
Expand Down Expand Up @@ -238,8 +237,8 @@ impl Avx2 {
// FIRST LAYER

if log_m02 == GF_MODULUS {
Self::xor(s2, s0);
Self::xor(s3, s1);
utils::xor(s2, s0);
utils::xor(s3, s1);
} else {
self.fft_butterfly_partial(s0, s2, log_m02);
self.fft_butterfly_partial(s1, s3, log_m02);
Expand All @@ -248,13 +247,13 @@ impl Avx2 {
// SECOND LAYER

if log_m01 == GF_MODULUS {
Self::xor(s1, s0);
utils::xor(s1, s0);
} else {
self.fft_butterfly_partial(s0, s1, log_m01);
}

if log_m23 == GF_MODULUS {
Self::xor(s3, s2);
utils::xor(s3, s2);
} else {
self.fft_butterfly_partial(s2, s3, log_m23);
}
Expand Down Expand Up @@ -315,7 +314,7 @@ impl Avx2 {
let (x, y) = data.dist2_mut(pos + r, 1);

if log_m == GF_MODULUS {
Self::xor(y, x);
utils::xor(y, x);
} else {
self.fft_butterfly_partial(x, y, log_m)
}
Expand Down Expand Up @@ -379,22 +378,22 @@ impl Avx2 {
// FIRST LAYER

if log_m01 == GF_MODULUS {
Self::xor(s1, s0);
utils::xor(s1, s0);
} else {
self.ifft_butterfly_partial(s0, s1, log_m01);
}

if log_m23 == GF_MODULUS {
Self::xor(s3, s2);
utils::xor(s3, s2);
} else {
self.ifft_butterfly_partial(s2, s3, log_m23);
}

// SECOND LAYER

if log_m02 == GF_MODULUS {
Self::xor(s2, s0);
Self::xor(s3, s1);
utils::xor(s2, s0);
utils::xor(s3, s1);
} else {
self.ifft_butterfly_partial(s0, s2, log_m02);
self.ifft_butterfly_partial(s1, s3, log_m02);
Expand Down Expand Up @@ -451,7 +450,7 @@ impl Avx2 {
if dist < size {
let log_m = self.skew[dist + skew_delta - 1];
if log_m == GF_MODULUS {
Self::xor_within(data, pos + dist, pos, dist);
utils::xor_within(data, pos + dist, pos, dist);
} else {
let (mut a, mut b) = data.split_at_mut(pos + dist);
for i in 0..dist {
Expand All @@ -472,7 +471,7 @@ impl Avx2 {
impl Avx2 {
#[target_feature(enable = "avx2")]
unsafe fn eval_poly_avx2(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) {
engine::eval_poly(erasures, truncated_size)
utils::eval_poly(erasures, truncated_size)
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/engine/engine_naive.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::engine::{
tables::{self, Exp, Log, Skew},
Engine, GfElement, ShardsRefMut, GF_MODULUS,
utils, Engine, GfElement, ShardsRefMut, GF_MODULUS,
};

// ======================================================================
Expand Down Expand Up @@ -60,7 +60,7 @@ impl Engine for Naive {
if log_m != GF_MODULUS {
self.mul_add(a, b, log_m);
}
Self::xor(b, a);
utils::xor(b, a);
}
r += dist * 2;
}
Expand Down Expand Up @@ -89,7 +89,7 @@ impl Engine for Naive {

// IFFT BUTTERFLY

Self::xor(b, a);
utils::xor(b, a);
if log_m != GF_MODULUS {
self.mul_add(a, b, log_m);
}
Expand Down
Loading