From 1f5695d442690e444d6b44c8327a38f528ef1f26 Mon Sep 17 00:00:00 2001 From: AndersTrier Date: Mon, 19 Aug 2024 11:45:29 +0200 Subject: [PATCH 1/2] Engine: trait Engine only for CPU heavy methods. Move everything else to engine/utils.rs --- src/engine.rs | 113 +++--------------------------------- src/engine/engine_avx2.rs | 25 ++++---- src/engine/engine_naive.rs | 6 +- src/engine/engine_neon.rs | 25 ++++---- src/engine/engine_nosimd.rs | 26 ++++----- src/engine/engine_ssse3.rs | 25 ++++---- src/engine/fwht.rs | 6 +- src/engine/tables.rs | 6 +- src/engine/utils.rs | 91 +++++++++++++++++++++++++++++ src/rate/rate_high.rs | 14 ++--- src/rate/rate_low.rs | 8 +-- 11 files changed, 168 insertions(+), 177 deletions(-) create mode 100644 src/engine/utils.rs diff --git a/src/engine.rs b/src/engine.rs index 94d76f5..41d96cc 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -32,9 +32,8 @@ //! [`ReedSolomonDecoder`]: crate::ReedSolomonDecoder //! [`rate`]: crate::rate -use std::iter::zip; - pub(crate) use self::shards::Shards; +pub(crate) use utils::{fft_skew_end, formal_derivative, ifft_skew_end, xor_within}; pub use self::{ engine_default::DefaultEngine, engine_naive::Naive, engine_nosimd::NoSimd, shards::ShardsRefMut, @@ -60,6 +59,7 @@ mod engine_neon; mod fwht; mod shards; +mod utils; pub mod tables; @@ -90,50 +90,15 @@ pub const CANTOR_BASIS: [GfElement; GF_BITS] = [ /// Galois field element. pub type GfElement = u16; -// ====================================================================== -// FUNCTIONS - PUBLIC - Galois field operations - -/// Some kind of addition. -#[inline(always)] -pub fn add_mod(x: GfElement, y: GfElement) -> GfElement { - let sum = u32::from(x) + u32::from(y); - (sum + (sum >> GF_BITS)) as GfElement -} - -/// Some kind of subtraction. -#[inline(always)] -pub fn sub_mod(x: GfElement, y: GfElement) -> GfElement { - let dif = u32::from(x).wrapping_sub(u32::from(y)); - dif.wrapping_add(dif >> GF_BITS) as GfElement -} - -// ====================================================================== -// FUNCTIONS - CRATE - Evaluate polynomial - -// We have this function here instead of inside 'trait Engine' to allow -// it to be included and compiled with SIMD features enabled within the -// SIMD engines. -#[inline(always)] -pub(crate) fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) { - let log_walsh = tables::initialize_log_walsh(); - - fwht::fwht(erasures, truncated_size); - - for (e, factor) in std::iter::zip(erasures.iter_mut(), log_walsh.iter()) { - let product = u32::from(*e) * u32::from(*factor); - *e = add_mod(product as GfElement, (product >> GF_BITS) as GfElement); - } - - fwht::fwht(erasures, GF_ORDER); -} - // ====================================================================== // Engine - PUBLIC -/// Implementation of basic low-level algorithms needed -/// for Reed-Solomon encoding/decoding. +/// Implementation of compute-intensive low-level algorithms needed +/// for Reed-Solomon encoding/decoding. This is the trait you would +/// implement to provide SIMD support for a CPU architecture not +/// already provided. /// -/// These algorithms are not properly documented. +/// These algorithms are not properly documented in this library. /// /// [`Naive`] engine is provided for those who want to /// study the source code to understand [`Engine`]. @@ -187,74 +152,12 @@ pub trait Engine { // ============================================================ // PROVIDED - /// `x[] ^= y[]` - #[inline(always)] - fn xor(xs: &mut [[u8; 64]], ys: &[[u8; 64]]) - where - Self: Sized, - { - debug_assert_eq!(xs.len(), ys.len()); - - for (x_chunk, y_chunk) in zip(xs.iter_mut(), ys.iter()) { - for (x, y) in zip(x_chunk.iter_mut(), y_chunk.iter()) { - *x ^= y; - } - } - } - /// Evaluate polynomial. fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) where Self: Sized, { - eval_poly(erasures, truncated_size) - } - - /// FFT with `skew_delta = pos + size`. - #[inline(always)] - fn fft_skew_end( - &self, - data: &mut ShardsRefMut, - pos: usize, - size: usize, - truncated_size: usize, - ) { - self.fft(data, pos, size, truncated_size, pos + size) - } - - /// Formal derivative. - fn formal_derivative(data: &mut ShardsRefMut) - where - Self: Sized, - { - for i in 1..data.len() { - let width: usize = 1 << i.trailing_zeros(); - Self::xor_within(data, i - width, i, width); - } - } - - /// IFFT with `skew_delta = pos + size`. - #[inline(always)] - fn ifft_skew_end( - &self, - data: &mut ShardsRefMut, - pos: usize, - size: usize, - truncated_size: usize, - ) { - self.ifft(data, pos, size, truncated_size, pos + size) - } - - /// `data[x .. x + count] ^= data[y .. y + count]` - /// - /// Ranges must not overlap. - #[inline(always)] - fn xor_within(data: &mut ShardsRefMut, x: usize, y: usize, count: usize) - where - Self: Sized, - { - let (xs, ys) = data.flat2_mut(x, y, count); - Self::xor(xs, ys); + utils::eval_poly(erasures, truncated_size) } } diff --git a/src/engine/engine_avx2.rs b/src/engine/engine_avx2.rs index 8961c0c..707b723 100644 --- a/src/engine/engine_avx2.rs +++ b/src/engine/engine_avx2.rs @@ -6,9 +6,8 @@ use std::arch::x86::*; use std::arch::x86_64::*; use crate::engine::{ - self, tables::{self, Mul128, Multiply128lutT, Skew}, - Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER, + utils, Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER, }; // ====================================================================== @@ -238,8 +237,8 @@ impl Avx2 { // FIRST LAYER if log_m02 == GF_MODULUS { - Self::xor(s2, s0); - Self::xor(s3, s1); + utils::xor(s2, s0); + utils::xor(s3, s1); } else { self.fft_butterfly_partial(s0, s2, log_m02); self.fft_butterfly_partial(s1, s3, log_m02); @@ -248,13 +247,13 @@ impl Avx2 { // SECOND LAYER if log_m01 == GF_MODULUS { - Self::xor(s1, s0); + utils::xor(s1, s0); } else { self.fft_butterfly_partial(s0, s1, log_m01); } if log_m23 == GF_MODULUS { - Self::xor(s3, s2); + utils::xor(s3, s2); } else { self.fft_butterfly_partial(s2, s3, log_m23); } @@ -315,7 +314,7 @@ impl Avx2 { let (x, y) = data.dist2_mut(pos + r, 1); if log_m == GF_MODULUS { - Self::xor(y, x); + utils::xor(y, x); } else { self.fft_butterfly_partial(x, y, log_m) } @@ -379,13 +378,13 @@ impl Avx2 { // FIRST LAYER if log_m01 == GF_MODULUS { - Self::xor(s1, s0); + utils::xor(s1, s0); } else { self.ifft_butterfly_partial(s0, s1, log_m01); } if log_m23 == GF_MODULUS { - Self::xor(s3, s2); + utils::xor(s3, s2); } else { self.ifft_butterfly_partial(s2, s3, log_m23); } @@ -393,8 +392,8 @@ impl Avx2 { // SECOND LAYER if log_m02 == GF_MODULUS { - Self::xor(s2, s0); - Self::xor(s3, s1); + utils::xor(s2, s0); + utils::xor(s3, s1); } else { self.ifft_butterfly_partial(s0, s2, log_m02); self.ifft_butterfly_partial(s1, s3, log_m02); @@ -451,7 +450,7 @@ impl Avx2 { if dist < size { let log_m = self.skew[dist + skew_delta - 1]; if log_m == GF_MODULUS { - Self::xor_within(data, pos + dist, pos, dist); + utils::xor_within(data, pos + dist, pos, dist); } else { let (mut a, mut b) = data.split_at_mut(pos + dist); for i in 0..dist { @@ -472,7 +471,7 @@ impl Avx2 { impl Avx2 { #[target_feature(enable = "avx2")] unsafe fn eval_poly_avx2(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) { - engine::eval_poly(erasures, truncated_size) + utils::eval_poly(erasures, truncated_size) } } diff --git a/src/engine/engine_naive.rs b/src/engine/engine_naive.rs index e6d782f..b4954ab 100644 --- a/src/engine/engine_naive.rs +++ b/src/engine/engine_naive.rs @@ -1,6 +1,6 @@ use crate::engine::{ tables::{self, Exp, Log, Skew}, - Engine, GfElement, ShardsRefMut, GF_MODULUS, + utils, Engine, GfElement, ShardsRefMut, GF_MODULUS, }; // ====================================================================== @@ -60,7 +60,7 @@ impl Engine for Naive { if log_m != GF_MODULUS { self.mul_add(a, b, log_m); } - Self::xor(b, a); + utils::xor(b, a); } r += dist * 2; } @@ -89,7 +89,7 @@ impl Engine for Naive { // IFFT BUTTERFLY - Self::xor(b, a); + utils::xor(b, a); if log_m != GF_MODULUS { self.mul_add(a, b, log_m); } diff --git a/src/engine/engine_neon.rs b/src/engine/engine_neon.rs index cfde21b..d97fa09 100644 --- a/src/engine/engine_neon.rs +++ b/src/engine/engine_neon.rs @@ -1,7 +1,6 @@ use crate::engine::{ - self, tables::{self, Mul128, Multiply128lutT, Skew}, - Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER, + utils, Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER, }; use std::arch::aarch64::*; use std::iter::zip; @@ -239,8 +238,8 @@ impl Neon { // FIRST LAYER if log_m02 == GF_MODULUS { - Self::xor(s2, s0); - Self::xor(s3, s1); + utils::xor(s2, s0); + utils::xor(s3, s1); } else { self.fft_butterfly_partial(s0, s2, log_m02); self.fft_butterfly_partial(s1, s3, log_m02); @@ -249,13 +248,13 @@ impl Neon { // SECOND LAYER if log_m01 == GF_MODULUS { - Self::xor(s1, s0); + utils::xor(s1, s0); } else { self.fft_butterfly_partial(s0, s1, log_m01); } if log_m23 == GF_MODULUS { - Self::xor(s3, s2); + utils::xor(s3, s2); } else { self.fft_butterfly_partial(s2, s3, log_m23); } @@ -316,7 +315,7 @@ impl Neon { let (x, y) = data.dist2_mut(pos + r, 1); if log_m == GF_MODULUS { - Self::xor(y, x); + utils::xor(y, x); } else { self.fft_butterfly_partial(x, y, log_m) } @@ -391,13 +390,13 @@ impl Neon { // FIRST LAYER if log_m01 == GF_MODULUS { - Self::xor(s1, s0); + utils::xor(s1, s0); } else { self.ifft_butterfly_partial(s0, s1, log_m01); } if log_m23 == GF_MODULUS { - Self::xor(s3, s2); + utils::xor(s3, s2); } else { self.ifft_butterfly_partial(s2, s3, log_m23); } @@ -405,8 +404,8 @@ impl Neon { // SECOND LAYER if log_m02 == GF_MODULUS { - Self::xor(s2, s0); - Self::xor(s3, s1); + utils::xor(s2, s0); + utils::xor(s3, s1); } else { self.ifft_butterfly_partial(s0, s2, log_m02); self.ifft_butterfly_partial(s1, s3, log_m02); @@ -463,7 +462,7 @@ impl Neon { if dist < size { let log_m = self.skew[dist + skew_delta - 1]; if log_m == GF_MODULUS { - Self::xor_within(data, pos + dist, pos, dist); + utils::xor_within(data, pos + dist, pos, dist); } else { let (mut a, mut b) = data.split_at_mut(pos + dist); for i in 0..dist { @@ -484,7 +483,7 @@ impl Neon { impl Neon { #[target_feature(enable = "neon")] unsafe fn eval_poly_neon(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) { - engine::eval_poly(erasures, truncated_size) + utils::eval_poly(erasures, truncated_size) } } diff --git a/src/engine/engine_nosimd.rs b/src/engine/engine_nosimd.rs index ded90bd..abf8f45 100644 --- a/src/engine/engine_nosimd.rs +++ b/src/engine/engine_nosimd.rs @@ -2,7 +2,7 @@ use std::iter::zip; use crate::engine::{ tables::{self, Mul16, Skew}, - Engine, GfElement, ShardsRefMut, GF_MODULUS, + utils, Engine, GfElement, ShardsRefMut, GF_MODULUS, }; // ====================================================================== @@ -119,7 +119,7 @@ impl NoSimd { #[inline(always)] fn fft_butterfly_partial(&self, x: &mut [[u8; 64]], y: &mut [[u8; 64]], log_m: GfElement) { self.mul_add(x, y, log_m); - Self::xor(y, x); + utils::xor(y, x); } #[inline(always)] @@ -137,8 +137,8 @@ impl NoSimd { // FIRST LAYER if log_m02 == GF_MODULUS { - Self::xor(s2, s0); - Self::xor(s3, s1); + utils::xor(s2, s0); + utils::xor(s3, s1); } else { self.fft_butterfly_partial(s0, s2, log_m02); self.fft_butterfly_partial(s1, s3, log_m02); @@ -147,13 +147,13 @@ impl NoSimd { // SECOND LAYER if log_m01 == GF_MODULUS { - Self::xor(s1, s0); + utils::xor(s1, s0); } else { self.fft_butterfly_partial(s0, s1, log_m01); } if log_m23 == GF_MODULUS { - Self::xor(s3, s2); + utils::xor(s3, s2); } else { self.fft_butterfly_partial(s2, s3, log_m23); } @@ -201,7 +201,7 @@ impl NoSimd { let (x, y) = data.dist2_mut(pos + r, 1); if log_m == GF_MODULUS { - Self::xor(y, x); + utils::xor(y, x); } else { self.fft_butterfly_partial(x, y, log_m) } @@ -219,7 +219,7 @@ impl NoSimd { // Partial butterfly, caller must do `GF_MODULUS` check with `xor`. #[inline(always)] fn ifft_butterfly_partial(&self, x: &mut [[u8; 64]], y: &mut [[u8; 64]], log_m: GfElement) { - Self::xor(y, x); + utils::xor(y, x); self.mul_add(x, y, log_m); } @@ -238,13 +238,13 @@ impl NoSimd { // FIRST LAYER if log_m01 == GF_MODULUS { - Self::xor(s1, s0); + utils::xor(s1, s0); } else { self.ifft_butterfly_partial(s0, s1, log_m01); } if log_m23 == GF_MODULUS { - Self::xor(s3, s2); + utils::xor(s3, s2); } else { self.ifft_butterfly_partial(s2, s3, log_m23); } @@ -252,8 +252,8 @@ impl NoSimd { // SECOND LAYER if log_m02 == GF_MODULUS { - Self::xor(s2, s0); - Self::xor(s3, s1); + utils::xor(s2, s0); + utils::xor(s3, s1); } else { self.ifft_butterfly_partial(s0, s2, log_m02); self.ifft_butterfly_partial(s1, s3, log_m02); @@ -297,7 +297,7 @@ impl NoSimd { if dist < size { let log_m = self.skew[dist + skew_delta - 1]; if log_m == GF_MODULUS { - Self::xor_within(data, pos + dist, pos, dist); + utils::xor_within(data, pos + dist, pos, dist); } else { let (mut a, mut b) = data.split_at_mut(pos + dist); for i in 0..dist { diff --git a/src/engine/engine_ssse3.rs b/src/engine/engine_ssse3.rs index ae23140..26329de 100644 --- a/src/engine/engine_ssse3.rs +++ b/src/engine/engine_ssse3.rs @@ -6,9 +6,8 @@ use std::arch::x86::*; use std::arch::x86_64::*; use crate::engine::{ - self, tables::{self, Mul128, Multiply128lutT, Skew}, - Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER, + utils, Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER, }; // ====================================================================== @@ -238,8 +237,8 @@ impl Ssse3 { // FIRST LAYER if log_m02 == GF_MODULUS { - Self::xor(s2, s0); - Self::xor(s3, s1); + utils::xor(s2, s0); + utils::xor(s3, s1); } else { self.fft_butterfly_partial(s0, s2, log_m02); self.fft_butterfly_partial(s1, s3, log_m02); @@ -248,13 +247,13 @@ impl Ssse3 { // SECOND LAYER if log_m01 == GF_MODULUS { - Self::xor(s1, s0); + utils::xor(s1, s0); } else { self.fft_butterfly_partial(s0, s1, log_m01); } if log_m23 == GF_MODULUS { - Self::xor(s3, s2); + utils::xor(s3, s2); } else { self.fft_butterfly_partial(s2, s3, log_m23); } @@ -315,7 +314,7 @@ impl Ssse3 { let (x, y) = data.dist2_mut(pos + r, 1); if log_m == GF_MODULUS { - Self::xor(y, x); + utils::xor(y, x); } else { self.fft_butterfly_partial(x, y, log_m) } @@ -390,13 +389,13 @@ impl Ssse3 { // FIRST LAYER if log_m01 == GF_MODULUS { - Self::xor(s1, s0); + utils::xor(s1, s0); } else { self.ifft_butterfly_partial(s0, s1, log_m01); } if log_m23 == GF_MODULUS { - Self::xor(s3, s2); + utils::xor(s3, s2); } else { self.ifft_butterfly_partial(s2, s3, log_m23); } @@ -404,8 +403,8 @@ impl Ssse3 { // SECOND LAYER if log_m02 == GF_MODULUS { - Self::xor(s2, s0); - Self::xor(s3, s1); + utils::xor(s2, s0); + utils::xor(s3, s1); } else { self.ifft_butterfly_partial(s0, s2, log_m02); self.ifft_butterfly_partial(s1, s3, log_m02); @@ -462,7 +461,7 @@ impl Ssse3 { if dist < size { let log_m = self.skew[dist + skew_delta - 1]; if log_m == GF_MODULUS { - Self::xor_within(data, pos + dist, pos, dist); + utils::xor_within(data, pos + dist, pos, dist); } else { let (mut a, mut b) = data.split_at_mut(pos + dist); for i in 0..dist { @@ -483,7 +482,7 @@ impl Ssse3 { impl Ssse3 { #[target_feature(enable = "ssse3")] unsafe fn eval_poly_ssse3(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) { - engine::eval_poly(erasures, truncated_size) + utils::eval_poly(erasures, truncated_size) } } diff --git a/src/engine/fwht.rs b/src/engine/fwht.rs index 31630dc..203c8c4 100644 --- a/src/engine/fwht.rs +++ b/src/engine/fwht.rs @@ -1,4 +1,4 @@ -use crate::engine::{self, GfElement, GF_ORDER}; +use crate::engine::{utils, GfElement, GF_ORDER}; // ====================================================================== // FWHT (fast Walsh-Hadamard transform) - CRATE @@ -29,8 +29,8 @@ pub(crate) fn fwht(data: &mut [GfElement; GF_ORDER], m_truncated: usize) { #[inline(always)] fn fwht_2(a: GfElement, b: GfElement) -> (GfElement, GfElement) { - let sum = engine::add_mod(a, b); - let dif = engine::sub_mod(a, b); + let sum = utils::add_mod(a, b); + let dif = utils::sub_mod(a, b); (sum, dif) } diff --git a/src/engine/tables.rs b/src/engine/tables.rs index 3621407..2ab4afb 100644 --- a/src/engine/tables.rs +++ b/src/engine/tables.rs @@ -22,7 +22,7 @@ use once_cell::sync::OnceCell; use crate::engine::{ - self, fwht, GfElement, CANTOR_BASIS, GF_BITS, GF_MODULUS, GF_ORDER, GF_POLYNOMIAL, + fwht, utils, GfElement, CANTOR_BASIS, GF_BITS, GF_MODULUS, GF_ORDER, GF_POLYNOMIAL, }; // ====================================================================== @@ -99,7 +99,7 @@ pub fn mul(x: GfElement, log_m: GfElement, exp: &Exp, log: &Log) -> GfElement { if x == 0 { 0 } else { - exp[engine::add_mod(log[x as usize], log_m) as usize] + exp[utils::add_mod(log[x as usize], log_m) as usize] } } @@ -252,7 +252,7 @@ pub fn initialize_skew() -> &'static Skew { GF_MODULUS - log[mul(temp[m], log[(temp[m] ^ 1) as usize], exp, log) as usize]; for i in m + 1..GF_BITS - 1 { - let sum = engine::add_mod(log[(temp[i] ^ 1) as usize], temp[m]); + let sum = utils::add_mod(log[(temp[i] ^ 1) as usize], temp[m]); temp[i] = mul(temp[i], sum, exp, log); } } diff --git a/src/engine/utils.rs b/src/engine/utils.rs new file mode 100644 index 0000000..8fe50d8 --- /dev/null +++ b/src/engine/utils.rs @@ -0,0 +1,91 @@ +use crate::engine::{fwht, tables, Engine, GfElement, ShardsRefMut, GF_BITS, GF_ORDER}; +use std::iter::zip; + +// ====================================================================== +// FUNCTIONS - CRATE - Galois field operations + +/// Some kind of addition. +#[inline(always)] +pub(crate) fn add_mod(x: GfElement, y: GfElement) -> GfElement { + let sum = u32::from(x) + u32::from(y); + (sum + (sum >> GF_BITS)) as GfElement +} + +/// Some kind of subtraction. +#[inline(always)] +pub(crate) fn sub_mod(x: GfElement, y: GfElement) -> GfElement { + let dif = u32::from(x).wrapping_sub(u32::from(y)); + dif.wrapping_add(dif >> GF_BITS) as GfElement +} + +// ====================================================================== +// FUNCTIONS - CRATE + +/// FFT with `skew_delta = pos + size`. +#[inline(always)] +pub(crate) fn fft_skew_end( + engine: &impl Engine, + data: &mut ShardsRefMut, + pos: usize, + size: usize, + truncated_size: usize, +) { + engine.fft(data, pos, size, truncated_size, pos + size) +} + +/// IFFT with `skew_delta = pos + size`. +#[inline(always)] +pub(crate) fn ifft_skew_end( + engine: &impl Engine, + data: &mut ShardsRefMut, + pos: usize, + size: usize, + truncated_size: usize, +) { + engine.ifft(data, pos, size, truncated_size, pos + size) +} + +// Meant to be included and compiled with SIMD features enabled within the +// SIMD engines. +#[inline(always)] +pub(crate) fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) { + let log_walsh = tables::initialize_log_walsh(); + + fwht::fwht(erasures, truncated_size); + + for (e, factor) in std::iter::zip(erasures.iter_mut(), log_walsh.iter()) { + let product = u32::from(*e) * u32::from(*factor); + *e = add_mod(product as GfElement, (product >> GF_BITS) as GfElement); + } + + fwht::fwht(erasures, GF_ORDER); +} + +/// `x[] ^= y[]` +#[inline(always)] +pub(crate) fn xor(xs: &mut [[u8; 64]], ys: &[[u8; 64]]) { + debug_assert_eq!(xs.len(), ys.len()); + + for (x_chunk, y_chunk) in zip(xs.iter_mut(), ys.iter()) { + for (x, y) in zip(x_chunk.iter_mut(), y_chunk.iter()) { + *x ^= y; + } + } +} + +// `data[x .. x + count] ^= data[y .. y + count]` +// +// Ranges must not overlap. +#[inline(always)] +pub(crate) fn xor_within(data: &mut ShardsRefMut, x: usize, y: usize, count: usize) { + let (xs, ys) = data.flat2_mut(x, y, count); + xor(xs, ys); +} + +// Formal derivative. +pub(crate) fn formal_derivative(data: &mut ShardsRefMut) { + for i in 1..data.len() { + let width: usize = 1 << i.trailing_zeros(); + xor_within(data, i - width, i, width); + } +} diff --git a/src/rate/rate_high.rs b/src/rate/rate_high.rs index 97f23b2..0694f6d 100644 --- a/src/rate/rate_high.rs +++ b/src/rate/rate_high.rs @@ -1,7 +1,7 @@ use std::marker::PhantomData; use crate::{ - engine::{Engine, GF_MODULUS, GF_ORDER}, + engine::{self, Engine, GF_MODULUS, GF_ORDER}, rate::{DecoderWork, EncoderWork, Rate, RateDecoder, RateEncoder}, DecoderResult, EncoderResult, Error, }; @@ -51,15 +51,15 @@ impl RateEncoder for HighRateEncoder { let first_count = std::cmp::min(original_count, chunk_size); work.zero(first_count..chunk_size); - engine.ifft_skew_end(&mut work, 0, chunk_size, first_count); + engine::ifft_skew_end(engine, &mut work, 0, chunk_size, first_count); if original_count > chunk_size { // FULL CHUNKS let mut chunk_start = chunk_size; while chunk_start + chunk_size <= original_count { - engine.ifft_skew_end(&mut work, chunk_start, chunk_size, chunk_size); - E::xor_within(&mut work, 0, chunk_start, chunk_size); + engine::ifft_skew_end(engine, &mut work, chunk_start, chunk_size, chunk_size); + engine::xor_within(&mut work, 0, chunk_start, chunk_size); chunk_start += chunk_size; } @@ -68,8 +68,8 @@ impl RateEncoder for HighRateEncoder { let last_count = original_count % chunk_size; if last_count > 0 { work.zero(chunk_start + last_count..); - engine.ifft_skew_end(&mut work, chunk_start, chunk_size, last_count); - E::xor_within(&mut work, 0, chunk_start, chunk_size); + engine::ifft_skew_end(engine, &mut work, chunk_start, chunk_size, last_count); + engine::xor_within(&mut work, 0, chunk_start, chunk_size); } } @@ -230,7 +230,7 @@ impl RateDecoder for HighRateDecoder { // IFFT / FORMAL DERIVATIVE / FFT self.engine.ifft(&mut work, 0, work_count, original_end, 0); - E::formal_derivative(&mut work); + engine::formal_derivative(&mut work); self.engine.fft(&mut work, 0, work_count, original_end, 0); // REVEAL ERASURES diff --git a/src/rate/rate_low.rs b/src/rate/rate_low.rs index c6ada23..cb3d707 100644 --- a/src/rate/rate_low.rs +++ b/src/rate/rate_low.rs @@ -1,7 +1,7 @@ use std::marker::PhantomData; use crate::{ - engine::{Engine, GF_MODULUS, GF_ORDER}, + engine::{self, Engine, GF_MODULUS, GF_ORDER}, rate::{DecoderWork, EncoderWork, Rate, RateDecoder, RateEncoder}, DecoderResult, EncoderResult, Error, }; @@ -66,7 +66,7 @@ impl RateEncoder for LowRateEncoder { let mut chunk_start = 0; while chunk_start + chunk_size <= recovery_count { - engine.fft_skew_end(&mut work, chunk_start, chunk_size, chunk_size); + engine::fft_skew_end(engine, &mut work, chunk_start, chunk_size, chunk_size); chunk_start += chunk_size; } @@ -74,7 +74,7 @@ impl RateEncoder for LowRateEncoder { let last_count = recovery_count % chunk_size; if last_count > 0 { - engine.fft_skew_end(&mut work, chunk_start, chunk_size, last_count); + engine::fft_skew_end(engine, &mut work, chunk_start, chunk_size, last_count); } // DONE @@ -230,7 +230,7 @@ impl RateDecoder for LowRateDecoder { // IFFT / FORMAL DERIVATIVE / FFT self.engine.ifft(&mut work, 0, work_count, recovery_end, 0); - E::formal_derivative(&mut work); + engine::formal_derivative(&mut work); self.engine.fft(&mut work, 0, work_count, recovery_end, 0); // REVEAL ERASURES From 52a8f57c94ab5d8e15de1a286f56174d09ae149f Mon Sep 17 00:00:00 2001 From: AndersTrier Date: Mon, 19 Aug 2024 11:48:33 +0200 Subject: [PATCH 2/2] Benches: No longer test xor --- benches/benchmarks.rs | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index b25586c..664843b 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -338,46 +338,19 @@ fn benchmarks_engine_one(c: &mut Criterion, name: &str, engine: E) { b.iter(|| E::eval_poly(black_box(&mut data), GF_ORDER / 8)) }); - // XOR MUL + // MUL let mut x = &mut generate_shards_64(1, shard_len_64, 0)[0]; - let y = &generate_shards_64(1, shard_len_64, 1)[0]; - - group.bench_function("xor", |b| { - b.iter(|| E::xor(black_box(&mut x), black_box(&y))) - }); group.bench_function("mul", |b| { b.iter(|| engine.mul(black_box(&mut x), black_box(12345))) }); - // XOR_WITHIN - - let shards_256_data = &mut generate_shards_64(1, 256 * shard_len_64, 0)[0]; - let mut shards_256 = ShardsRefMut::new(256, shard_len_64, shards_256_data.as_mut()); - - group.bench_function("xor_within 128*2", |b| { - b.iter(|| { - E::xor_within( - black_box(&mut shards_256), - black_box(0), - black_box(128), - black_box(128), - ) - }) - }); - - // FORMAL DERIVATIVE + // FFT IFFT let shards_128_data = &mut generate_shards_64(1, 128 * shard_len_64, 0)[0]; let mut shards_128 = ShardsRefMut::new(128, shard_len_64, shards_128_data.as_mut()); - group.bench_function("formal_derivative 128", |b| { - b.iter(|| E::formal_derivative(black_box(&mut shards_128))) - }); - - // FFT IFFT - group.bench_function("FFT 128", |b| { b.iter(|| { engine.fft(