|
| 1 | +mod avx; |
| 2 | +mod avx2; |
| 3 | +mod avx512; |
| 4 | +mod sse2; |
| 5 | + |
| 6 | +use crate::limb::Limb; |
| 7 | + |
| 8 | +macro_rules! add_simd_arch { |
| 9 | + ($arch:tt) => { |
| 10 | + const LIMBS_PER_SIMD: usize = |
| 11 | + std::mem::size_of::<SimdLimb>() / crate::constants::BYTES_PER_LIMB; |
| 12 | + |
| 13 | + #[target_feature(enable = $arch)] |
| 14 | + pub(super) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) { |
| 15 | + let max_limb = target.len(); |
| 16 | + let target = target.as_mut_ptr(); |
| 17 | + let source = source.as_ptr(); |
| 18 | + let chunks = (max_limb - min_limb) / LIMBS_PER_SIMD; |
| 19 | + for i in 0..chunks { |
| 20 | + unsafe { |
| 21 | + let mut target_chunk = load(target.add(LIMBS_PER_SIMD * i + min_limb)); |
| 22 | + let source_chunk = load(source.add(LIMBS_PER_SIMD * i + min_limb)); |
| 23 | + target_chunk = xor(target_chunk, source_chunk); |
| 24 | + store(target.add(LIMBS_PER_SIMD * i + min_limb), target_chunk); |
| 25 | + } |
| 26 | + } |
| 27 | + for i in (min_limb + LIMBS_PER_SIMD * chunks)..max_limb { |
| 28 | + unsafe { |
| 29 | + // pointer arithmetic |
| 30 | + *target.add(i) = *target.add(i) ^ *source.add(i); |
| 31 | + } |
| 32 | + } |
| 33 | + } |
| 34 | + }; |
| 35 | +} |
| 36 | + |
| 37 | +use add_simd_arch; |
| 38 | + |
| 39 | +pub(super) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) { |
| 40 | + if is_x86_feature_detected!("avx512f") { |
| 41 | + unsafe { avx512::add_simd(target, source, min_limb) } |
| 42 | + } else if is_x86_feature_detected!("avx2") { |
| 43 | + unsafe { avx2::add_simd(target, source, min_limb) } |
| 44 | + } else if is_x86_feature_detected!("avx") { |
| 45 | + unsafe { avx::add_simd(target, source, min_limb) } |
| 46 | + } else if is_x86_feature_detected!("sse2") { |
| 47 | + unsafe { sse2::add_simd(target, source, min_limb) } |
| 48 | + } else { |
| 49 | + super::generic::add_simd(target, source, min_limb) |
| 50 | + } |
| 51 | +} |
0 commit comments