Skip to content

Commit

Permalink
Fix compiler warning
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Jul 11, 2024
1 parent 2dd6268 commit dcdb6f0
Showing 1 changed file with 149 additions and 22 deletions.
171 changes: 149 additions & 22 deletions include/popcnt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@
#include <cpu_supports_popcnt.hpp>
#endif

// GCC & Clang
#if defined(__GNUC__) || \
__has_builtin(__builtin_popcountl)

// CPUID is only enabled on x86 and x86-64 CPUs
// if the user compiles without -mpopcnt.
#if defined(ENABLE_MULTIARCH_x86_POPCNT)
#if defined(__x86_64__)

namespace {

/// This uses fewer arithmetic operations than any other known
Expand All @@ -40,19 +49,6 @@ NOINLINE uint64_t popcnt64_bitwise(uint64_t x)
return (x * h01) >> 56;
}

} // namespace

// GCC & Clang
#if defined(__GNUC__) || \
__has_builtin(__builtin_popcountl)

// CPUID is only enabled on x86 and x86-64 CPUs
// if the user compiles without -mpopcnt.
#if defined(ENABLE_MULTIARCH_x86_POPCNT)
#if defined(__x86_64__)

namespace {

ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
// On my AMD EPYC 7642 CPU using GCC 12 this runtime
Expand All @@ -72,6 +68,25 @@ ALWAYS_INLINE uint64_t popcnt64(uint64_t x)

namespace {

/// This uses fewer arithmetic operations than any other known
/// implementation on machines with fast multiplication.
/// It uses 12 arithmetic operations, one of which is a multiply.
/// http://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
///
NOINLINE uint64_t popcnt64_bitwise(uint64_t x)
{
uint64_t m1 = 0x5555555555555555ull;
uint64_t m2 = 0x3333333333333333ull;
uint64_t m4 = 0x0F0F0F0F0F0F0F0Full;
uint64_t h01 = 0x0101010101010101ull;

x -= (x >> 1) & m1;
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;

return (x * h01) >> 56;
}

ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
if_likely(cpu_supports_popcnt)
Expand Down Expand Up @@ -120,23 +135,66 @@ ALWAYS_INLINE uint64_t popcnt64(uint64_t x)

namespace {

ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
#if defined(__POPCNT__) || \
defined(__AVX__)

ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
return __popcnt64(x);
}

#elif defined(ENABLE_MULTIARCH_x86_POPCNT)

/// This uses fewer arithmetic operations than any other known
/// implementation on machines with fast multiplication.
/// It uses 12 arithmetic operations, one of which is a multiply.
/// http://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
///
NOINLINE uint64_t popcnt64_bitwise(uint64_t x)
{
uint64_t m1 = 0x5555555555555555ull;
uint64_t m2 = 0x3333333333333333ull;
uint64_t m4 = 0x0F0F0F0F0F0F0F0Full;
uint64_t h01 = 0x0101010101010101ull;

x -= (x >> 1) & m1;
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;

return (x * h01) >> 56;
}

ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
if_likely(cpu_supports_popcnt)
return __popcnt64(x);
else
return popcnt64_bitwise(x);
}

#else
return popcnt64_bitwise(x);
#endif

/// This uses fewer arithmetic operations than any other known
/// implementation on machines with fast multiplication.
/// It uses 12 arithmetic operations, one of which is a multiply.
/// http://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
///
ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
uint64_t m1 = 0x5555555555555555ull;
uint64_t m2 = 0x3333333333333333ull;
uint64_t m4 = 0x0F0F0F0F0F0F0F0Full;
uint64_t h01 = 0x0101010101010101ull;

x -= (x >> 1) & m1;
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;

return (x * h01) >> 56;
}

#endif

} // namespace

#elif defined(_MSC_VER) && \
Expand All @@ -147,28 +205,72 @@ ALWAYS_INLINE uint64_t popcnt64(uint64_t x)

namespace {

ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
#if defined(__POPCNT__) || \
defined(__AVX__)

ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
return __popcnt(uint32_t(x)) +
__popcnt(uint32_t(x >> 32));
}

#elif defined(ENABLE_MULTIARCH_x86_POPCNT)

/// This uses fewer arithmetic operations than any other known
/// implementation on machines with fast multiplication.
/// It uses 12 arithmetic operations, one of which is a multiply.
/// http://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
///
NOINLINE uint64_t popcnt64_bitwise(uint64_t x)
{
uint64_t m1 = 0x5555555555555555ull;
uint64_t m2 = 0x3333333333333333ull;
uint64_t m4 = 0x0F0F0F0F0F0F0F0Full;
uint64_t h01 = 0x0101010101010101ull;

x -= (x >> 1) & m1;
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;

return (x * h01) >> 56;
}

ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
if_likely(cpu_supports_popcnt)
return __popcnt(uint32_t(x)) +
__popcnt(uint32_t(x >> 32));
else
return popcnt64_bitwise(x);
}

#else
return popcnt64_bitwise(x);
#endif

/// This uses fewer arithmetic operations than any other known
/// implementation on machines with fast multiplication.
/// It uses 12 arithmetic operations, one of which is a multiply.
/// http://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
///
ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
uint64_t m1 = 0x5555555555555555ull;
uint64_t m2 = 0x3333333333333333ull;
uint64_t m4 = 0x0F0F0F0F0F0F0F0Full;
uint64_t h01 = 0x0101010101010101ull;

x -= (x >> 1) & m1;
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;

return (x * h01) >> 56;
}

#endif

} // namespace

#elif __cplusplus >= 202002L
#elif __cplusplus >= 202002L && \
__has_include(<bit>)

#include <bit>

Expand All @@ -184,6 +286,31 @@ ALWAYS_INLINE uint64_t popcnt64(uint64_t x)

} // namespace

#else

namespace {

/// This uses fewer arithmetic operations than any other known
/// implementation on machines with fast multiplication.
/// It uses 12 arithmetic operations, one of which is a multiply.
/// http://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
///
ALWAYS_INLINE uint64_t popcnt64(uint64_t x)
{
uint64_t m1 = 0x5555555555555555ull;
uint64_t m2 = 0x3333333333333333ull;
uint64_t m4 = 0x0F0F0F0F0F0F0F0Full;
uint64_t h01 = 0x0101010101010101ull;

x -= (x >> 1) & m1;
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;

return (x * h01) >> 56;
}

} // namespace

#endif

#endif // POPCNT_HPP

0 comments on commit dcdb6f0

Please sign in to comment.