diff --git a/ext/crates/fp/build.rs b/ext/crates/fp/build.rs index 6b5a6d0896..42fb63650d 100644 --- a/ext/crates/fp/build.rs +++ b/ext/crates/fp/build.rs @@ -29,8 +29,6 @@ fn main() -> io::Result<()> { writer.add_value("NUM_PRIMES", "usize", num_primes); writer.add_raw("/// The `NUM_PRIMES`th prime number."); writer.add_value("MAX_PRIME", "usize", max_prime); - // `NOT_A_PRIME` is never used if odd-primes is disabled. - writer.add_raw("#[allow(dead_code)]"); writer.add_raw( "/// A sentinel value. `PRIME_TO_INDEX_MAP[i] == NOT_A_PRIME` if and only if `i` is not", ); diff --git a/ext/crates/fp/src/constants.rs b/ext/crates/fp/src/constants.rs index ca1235745d..d0827feafb 100644 --- a/ext/crates/fp/src/constants.rs +++ b/ext/crates/fp/src/constants.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use build_const::build_const; build_const!("constants"); diff --git a/ext/crates/fp/src/simd/generic.rs b/ext/crates/fp/src/simd/generic.rs index 13786681f7..6a93631108 100644 --- a/ext/crates/fp/src/simd/generic.rs +++ b/ext/crates/fp/src/simd/generic.rs @@ -1,15 +1,7 @@ use crate::limb::Limb; -pub(crate) type SimdLimb = Limb; - -pub(crate) unsafe fn load(limb: *const Limb) -> SimdLimb { - *limb -} - -pub(crate) unsafe fn store(limb: *mut Limb, val: SimdLimb) { - *limb = val; -} - -pub(crate) unsafe fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb { - left ^ right +pub(super) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) { + for (target_limb, source_limb) in target.iter_mut().zip(source.iter()).skip(min_limb) { + *target_limb ^= source_limb + } } diff --git a/ext/crates/fp/src/simd/mod.rs b/ext/crates/fp/src/simd/mod.rs index c40d8850d9..9036128225 100644 --- a/ext/crates/fp/src/simd/mod.rs +++ b/ext/crates/fp/src/simd/mod.rs @@ -1,34 +1,16 @@ -cfg_if::cfg_if! { - if #[cfg(target_arch = "x86_64")] { - mod x86_64; - use x86_64::*; - } else { - mod generic; - use generic::*; - } -} +use crate::limb::Limb; -use super::limb::Limb; +mod generic; -const LIMBS_PER_SIMD: usize = std::mem::size_of::() / crate::constants::BYTES_PER_LIMB; +#[cfg(target_arch = "x86_64")] +mod x86_64; pub(crate) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) { - let max_limb = target.len(); - let target = target.as_mut_ptr(); - let source = source.as_ptr(); - let chunks = (max_limb - min_limb) / LIMBS_PER_SIMD; - for i in 0..chunks { - unsafe { - let mut target_chunk = load(target.add(LIMBS_PER_SIMD * i + min_limb)); - let source_chunk = load(source.add(LIMBS_PER_SIMD * i + min_limb)); - target_chunk = xor(target_chunk, source_chunk); - store(target.add(LIMBS_PER_SIMD * i + min_limb), target_chunk); - } - } - for i in (min_limb + LIMBS_PER_SIMD * chunks)..max_limb { - unsafe { - // pointer arithmetic - *target.add(i) = *target.add(i) ^ *source.add(i); + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + x86_64::add_simd(target, source, min_limb) + } else { + generic::add_simd(target, source, min_limb) } } } diff --git a/ext/crates/fp/src/simd/x86_64.rs b/ext/crates/fp/src/simd/x86_64.rs deleted file mode 100644 index fe836d0827..0000000000 --- a/ext/crates/fp/src/simd/x86_64.rs +++ /dev/null @@ -1,58 +0,0 @@ -#[allow(unused_imports)] -use std::arch::x86_64; - -use crate::limb::Limb; - -cfg_if::cfg_if! { - if #[cfg(target_feature="avx2")] { - pub(crate) type SimdLimb = x86_64::__m256i; - } else if #[cfg(target_feature="avx")] { - pub(crate) type SimdLimb = x86_64::__m256; - } else if #[cfg(target_feature="sse2")] { - pub(crate) type SimdLimb = x86_64::__m128i; - } else { - pub(crate) type SimdLimb = Limb; - } -} - -pub(crate) unsafe fn load(limb: *const Limb) -> SimdLimb { - cfg_if::cfg_if! { - if #[cfg(target_feature="avx2")] { - x86_64::_mm256_loadu_si256(limb as *const SimdLimb) - } else if #[cfg(target_feature="avx")] { - x86_64::_mm256_loadu_ps(limb as *const f32) - } else if #[cfg(target_feature="sse2")] { - x86_64::_mm_loadu_si128(limb as *const SimdLimb) - } else { - *limb - } - } -} - -pub(crate) unsafe fn store(limb: *mut Limb, val: SimdLimb) { - cfg_if::cfg_if! { - if #[cfg(target_feature="avx2")] { - x86_64::_mm256_storeu_si256(limb as *mut SimdLimb, val); - } else if #[cfg(target_feature="avx")] { - x86_64::_mm256_storeu_ps(limb as *mut f32, val); - } else if #[cfg(target_feature="sse2")] { - x86_64::_mm_storeu_si128(limb as *mut SimdLimb, val) - } else { - *limb = val; - } - } -} - -pub(crate) unsafe fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb { - cfg_if::cfg_if! { - if #[cfg(target_feature="avx2")] { - x86_64::_mm256_xor_si256(left, right) - } else if #[cfg(target_feature="avx")] { - x86_64::_mm256_xor_ps(left, right) - } else if #[cfg(target_feature="sse2")] { - x86_64::_mm_xor_si128(left, right) - } else { - left ^ right - } - } -} diff --git a/ext/crates/fp/src/simd/x86_64/avx.rs b/ext/crates/fp/src/simd/x86_64/avx.rs new file mode 100644 index 0000000000..2632f13783 --- /dev/null +++ b/ext/crates/fp/src/simd/x86_64/avx.rs @@ -0,0 +1,22 @@ +use std::arch::x86_64; + +use crate::limb::Limb; + +type SimdLimb = x86_64::__m256; + +#[target_feature(enable = "avx")] +fn load(limb: *const Limb) -> SimdLimb { + unsafe { x86_64::_mm256_loadu_ps(limb as *const f32) } +} + +#[target_feature(enable = "avx")] +fn store(limb: *mut Limb, val: SimdLimb) { + unsafe { x86_64::_mm256_storeu_ps(limb as *mut f32, val) } +} + +#[target_feature(enable = "avx")] +fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb { + x86_64::_mm256_xor_ps(left, right) +} + +super::add_simd_arch!("avx"); diff --git a/ext/crates/fp/src/simd/x86_64/avx2.rs b/ext/crates/fp/src/simd/x86_64/avx2.rs new file mode 100644 index 0000000000..395b111cc3 --- /dev/null +++ b/ext/crates/fp/src/simd/x86_64/avx2.rs @@ -0,0 +1,22 @@ +use std::arch::x86_64; + +use crate::limb::Limb; + +type SimdLimb = x86_64::__m256i; + +#[target_feature(enable = "avx2")] +fn load(limb: *const Limb) -> SimdLimb { + unsafe { x86_64::_mm256_loadu_si256(limb as *const SimdLimb) } +} + +#[target_feature(enable = "avx2")] +fn store(limb: *mut Limb, val: SimdLimb) { + unsafe { x86_64::_mm256_storeu_si256(limb as *mut SimdLimb, val) } +} + +#[target_feature(enable = "avx2")] +fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb { + x86_64::_mm256_xor_si256(left, right) +} + +super::add_simd_arch!("avx2"); diff --git a/ext/crates/fp/src/simd/x86_64/avx512.rs b/ext/crates/fp/src/simd/x86_64/avx512.rs new file mode 100644 index 0000000000..9bf8cffdee --- /dev/null +++ b/ext/crates/fp/src/simd/x86_64/avx512.rs @@ -0,0 +1,22 @@ +use std::arch::x86_64; + +use crate::limb::Limb; + +type SimdLimb = x86_64::__m512i; + +#[target_feature(enable = "avx512f")] +fn load(limb: *const Limb) -> SimdLimb { + unsafe { x86_64::_mm512_loadu_si512(limb as *const SimdLimb) } +} + +#[target_feature(enable = "avx512f")] +fn store(limb: *mut Limb, val: SimdLimb) { + unsafe { x86_64::_mm512_storeu_si512(limb as *mut SimdLimb, val) } +} + +#[target_feature(enable = "avx512f")] +fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb { + x86_64::_mm512_xor_si512(left, right) +} + +super::add_simd_arch!("avx512f"); diff --git a/ext/crates/fp/src/simd/x86_64/mod.rs b/ext/crates/fp/src/simd/x86_64/mod.rs new file mode 100644 index 0000000000..754a894164 --- /dev/null +++ b/ext/crates/fp/src/simd/x86_64/mod.rs @@ -0,0 +1,51 @@ +mod avx; +mod avx2; +mod avx512; +mod sse2; + +use crate::limb::Limb; + +macro_rules! add_simd_arch { + ($arch:tt) => { + const LIMBS_PER_SIMD: usize = + std::mem::size_of::() / crate::constants::BYTES_PER_LIMB; + + #[target_feature(enable = $arch)] + pub(super) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) { + let max_limb = target.len(); + let target = target.as_mut_ptr(); + let source = source.as_ptr(); + let chunks = (max_limb - min_limb) / LIMBS_PER_SIMD; + for i in 0..chunks { + unsafe { + let mut target_chunk = load(target.add(LIMBS_PER_SIMD * i + min_limb)); + let source_chunk = load(source.add(LIMBS_PER_SIMD * i + min_limb)); + target_chunk = xor(target_chunk, source_chunk); + store(target.add(LIMBS_PER_SIMD * i + min_limb), target_chunk); + } + } + for i in (min_limb + LIMBS_PER_SIMD * chunks)..max_limb { + unsafe { + // pointer arithmetic + *target.add(i) = *target.add(i) ^ *source.add(i); + } + } + } + }; +} + +use add_simd_arch; + +pub(super) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) { + if is_x86_feature_detected!("avx512f") { + unsafe { avx512::add_simd(target, source, min_limb) } + } else if is_x86_feature_detected!("avx2") { + unsafe { avx2::add_simd(target, source, min_limb) } + } else if is_x86_feature_detected!("avx") { + unsafe { avx::add_simd(target, source, min_limb) } + } else if is_x86_feature_detected!("sse2") { + unsafe { sse2::add_simd(target, source, min_limb) } + } else { + super::generic::add_simd(target, source, min_limb) + } +} diff --git a/ext/crates/fp/src/simd/x86_64/sse2.rs b/ext/crates/fp/src/simd/x86_64/sse2.rs new file mode 100644 index 0000000000..9d7c27237e --- /dev/null +++ b/ext/crates/fp/src/simd/x86_64/sse2.rs @@ -0,0 +1,22 @@ +use std::arch::x86_64; + +use crate::limb::Limb; + +type SimdLimb = x86_64::__m128i; + +#[target_feature(enable = "sse2")] +fn load(limb: *const Limb) -> SimdLimb { + unsafe { x86_64::_mm_loadu_si128(limb as *const SimdLimb) } +} + +#[target_feature(enable = "sse2")] +fn store(limb: *mut Limb, val: SimdLimb) { + unsafe { x86_64::_mm_storeu_si128(limb as *mut SimdLimb, val) } +} + +#[target_feature(enable = "sse2")] +fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb { + x86_64::_mm_xor_si128(left, right) +} + +super::add_simd_arch!("sse2");