Skip to content

Commit 2381c25

Browse files
authored
Determine SIMD at runtime (#209)
* Determine SIMD at runtime * Allow unused constants in `fp` * Disregard 32-bit x86 architectures * Add appropriate decorations to intrinsics wrappers
1 parent 5044634 commit 2381c25

10 files changed

Lines changed: 154 additions & 99 deletions

File tree

ext/crates/fp/build.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ fn main() -> io::Result<()> {
2929
writer.add_value("NUM_PRIMES", "usize", num_primes);
3030
writer.add_raw("/// The `NUM_PRIMES`th prime number.");
3131
writer.add_value("MAX_PRIME", "usize", max_prime);
32-
// `NOT_A_PRIME` is never used if odd-primes is disabled.
33-
writer.add_raw("#[allow(dead_code)]");
3432
writer.add_raw(
3533
"/// A sentinel value. `PRIME_TO_INDEX_MAP[i] == NOT_A_PRIME` if and only if `i` is not",
3634
);

ext/crates/fp/src/constants.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#![allow(dead_code)]
2+
13
use build_const::build_const;
24

35
build_const!("constants");

ext/crates/fp/src/simd/generic.rs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,7 @@
11
use crate::limb::Limb;
22

3-
pub(crate) type SimdLimb = Limb;
4-
5-
pub(crate) unsafe fn load(limb: *const Limb) -> SimdLimb {
6-
*limb
7-
}
8-
9-
pub(crate) unsafe fn store(limb: *mut Limb, val: SimdLimb) {
10-
*limb = val;
11-
}
12-
13-
pub(crate) unsafe fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb {
14-
left ^ right
3+
pub(super) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) {
4+
for (target_limb, source_limb) in target.iter_mut().zip(source.iter()).skip(min_limb) {
5+
*target_limb ^= source_limb
6+
}
157
}

ext/crates/fp/src/simd/mod.rs

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,16 @@
1-
cfg_if::cfg_if! {
2-
if #[cfg(target_arch = "x86_64")] {
3-
mod x86_64;
4-
use x86_64::*;
5-
} else {
6-
mod generic;
7-
use generic::*;
8-
}
9-
}
1+
use crate::limb::Limb;
102

11-
use super::limb::Limb;
3+
mod generic;
124

13-
const LIMBS_PER_SIMD: usize = std::mem::size_of::<SimdLimb>() / crate::constants::BYTES_PER_LIMB;
5+
#[cfg(target_arch = "x86_64")]
6+
mod x86_64;
147

158
pub(crate) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) {
16-
let max_limb = target.len();
17-
let target = target.as_mut_ptr();
18-
let source = source.as_ptr();
19-
let chunks = (max_limb - min_limb) / LIMBS_PER_SIMD;
20-
for i in 0..chunks {
21-
unsafe {
22-
let mut target_chunk = load(target.add(LIMBS_PER_SIMD * i + min_limb));
23-
let source_chunk = load(source.add(LIMBS_PER_SIMD * i + min_limb));
24-
target_chunk = xor(target_chunk, source_chunk);
25-
store(target.add(LIMBS_PER_SIMD * i + min_limb), target_chunk);
26-
}
27-
}
28-
for i in (min_limb + LIMBS_PER_SIMD * chunks)..max_limb {
29-
unsafe {
30-
// pointer arithmetic
31-
*target.add(i) = *target.add(i) ^ *source.add(i);
9+
cfg_if::cfg_if! {
10+
if #[cfg(target_arch = "x86_64")] {
11+
x86_64::add_simd(target, source, min_limb)
12+
} else {
13+
generic::add_simd(target, source, min_limb)
3214
}
3315
}
3416
}

ext/crates/fp/src/simd/x86_64.rs

Lines changed: 0 additions & 58 deletions
This file was deleted.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
use std::arch::x86_64;
2+
3+
use crate::limb::Limb;
4+
5+
type SimdLimb = x86_64::__m256;
6+
7+
#[target_feature(enable = "avx")]
8+
fn load(limb: *const Limb) -> SimdLimb {
9+
unsafe { x86_64::_mm256_loadu_ps(limb as *const f32) }
10+
}
11+
12+
#[target_feature(enable = "avx")]
13+
fn store(limb: *mut Limb, val: SimdLimb) {
14+
unsafe { x86_64::_mm256_storeu_ps(limb as *mut f32, val) }
15+
}
16+
17+
#[target_feature(enable = "avx")]
18+
fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb {
19+
x86_64::_mm256_xor_ps(left, right)
20+
}
21+
22+
super::add_simd_arch!("avx");
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
use std::arch::x86_64;
2+
3+
use crate::limb::Limb;
4+
5+
type SimdLimb = x86_64::__m256i;
6+
7+
#[target_feature(enable = "avx2")]
8+
fn load(limb: *const Limb) -> SimdLimb {
9+
unsafe { x86_64::_mm256_loadu_si256(limb as *const SimdLimb) }
10+
}
11+
12+
#[target_feature(enable = "avx2")]
13+
fn store(limb: *mut Limb, val: SimdLimb) {
14+
unsafe { x86_64::_mm256_storeu_si256(limb as *mut SimdLimb, val) }
15+
}
16+
17+
#[target_feature(enable = "avx2")]
18+
fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb {
19+
x86_64::_mm256_xor_si256(left, right)
20+
}
21+
22+
super::add_simd_arch!("avx2");
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
use std::arch::x86_64;
2+
3+
use crate::limb::Limb;
4+
5+
type SimdLimb = x86_64::__m512i;
6+
7+
#[target_feature(enable = "avx512f")]
8+
fn load(limb: *const Limb) -> SimdLimb {
9+
unsafe { x86_64::_mm512_loadu_si512(limb as *const SimdLimb) }
10+
}
11+
12+
#[target_feature(enable = "avx512f")]
13+
fn store(limb: *mut Limb, val: SimdLimb) {
14+
unsafe { x86_64::_mm512_storeu_si512(limb as *mut SimdLimb, val) }
15+
}
16+
17+
#[target_feature(enable = "avx512f")]
18+
fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb {
19+
x86_64::_mm512_xor_si512(left, right)
20+
}
21+
22+
super::add_simd_arch!("avx512f");
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
mod avx;
2+
mod avx2;
3+
mod avx512;
4+
mod sse2;
5+
6+
use crate::limb::Limb;
7+
8+
macro_rules! add_simd_arch {
9+
($arch:tt) => {
10+
const LIMBS_PER_SIMD: usize =
11+
std::mem::size_of::<SimdLimb>() / crate::constants::BYTES_PER_LIMB;
12+
13+
#[target_feature(enable = $arch)]
14+
pub(super) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) {
15+
let max_limb = target.len();
16+
let target = target.as_mut_ptr();
17+
let source = source.as_ptr();
18+
let chunks = (max_limb - min_limb) / LIMBS_PER_SIMD;
19+
for i in 0..chunks {
20+
unsafe {
21+
let mut target_chunk = load(target.add(LIMBS_PER_SIMD * i + min_limb));
22+
let source_chunk = load(source.add(LIMBS_PER_SIMD * i + min_limb));
23+
target_chunk = xor(target_chunk, source_chunk);
24+
store(target.add(LIMBS_PER_SIMD * i + min_limb), target_chunk);
25+
}
26+
}
27+
for i in (min_limb + LIMBS_PER_SIMD * chunks)..max_limb {
28+
unsafe {
29+
// pointer arithmetic
30+
*target.add(i) = *target.add(i) ^ *source.add(i);
31+
}
32+
}
33+
}
34+
};
35+
}
36+
37+
use add_simd_arch;
38+
39+
pub(super) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) {
40+
if is_x86_feature_detected!("avx512f") {
41+
unsafe { avx512::add_simd(target, source, min_limb) }
42+
} else if is_x86_feature_detected!("avx2") {
43+
unsafe { avx2::add_simd(target, source, min_limb) }
44+
} else if is_x86_feature_detected!("avx") {
45+
unsafe { avx::add_simd(target, source, min_limb) }
46+
} else if is_x86_feature_detected!("sse2") {
47+
unsafe { sse2::add_simd(target, source, min_limb) }
48+
} else {
49+
super::generic::add_simd(target, source, min_limb)
50+
}
51+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
use std::arch::x86_64;
2+
3+
use crate::limb::Limb;
4+
5+
type SimdLimb = x86_64::__m128i;
6+
7+
#[target_feature(enable = "sse2")]
8+
fn load(limb: *const Limb) -> SimdLimb {
9+
unsafe { x86_64::_mm_loadu_si128(limb as *const SimdLimb) }
10+
}
11+
12+
#[target_feature(enable = "sse2")]
13+
fn store(limb: *mut Limb, val: SimdLimb) {
14+
unsafe { x86_64::_mm_storeu_si128(limb as *mut SimdLimb, val) }
15+
}
16+
17+
#[target_feature(enable = "sse2")]
18+
fn xor(left: SimdLimb, right: SimdLimb) -> SimdLimb {
19+
x86_64::_mm_xor_si128(left, right)
20+
}
21+
22+
super::add_simd_arch!("sse2");

0 commit comments

Comments
 (0)