Skip to content

Commit

Permalink
Tune AVX512 code
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Jun 19, 2024
1 parent f307dd8 commit af7700e
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions include/Sieve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,18 @@ class Sieve
// Compute this for loop using AVX512.
// for (i = start_idx + 1; i < stop_idx; i++)
// cnt += popcnt64(sieve64[i]);
do {
__mmask8 mask = (i + 8 < stop_idx) ? 0xff : (__mmask8) _bzhi_u64(0xff, stop_idx - i);
__m512i vec = _mm512_maskz_loadu_epi64(mask , &sieve64[i]);
//
for (; i + 8 < stop_idx; i += 8)
{
__m512i vec = _mm512_loadu_epi64(&sieve64[i]);
vec = _mm512_popcnt_epi64(vec);
vcnt = _mm512_add_epi64(vcnt, vec);
i += 8;
}
while (i < stop_idx);

__mmask8 mask = (__mmask8) _bzhi_u64(0xff, stop_idx - i);
__m512i vec = _mm512_maskz_loadu_epi64(mask , &sieve64[i]);
vec = _mm512_popcnt_epi64(vec);
vcnt = _mm512_add_epi64(vcnt, vec);

cnt += _mm512_reduce_add_epi64(vcnt);
cnt += popcnt64(sieve64[stop_idx] & m2);
Expand Down

0 comments on commit af7700e

Please sign in to comment.