diff --git a/blosc/CMakeLists.txt b/blosc/CMakeLists.txt index af5a1ec7..0a3e0617 100644 --- a/blosc/CMakeLists.txt +++ b/blosc/CMakeLists.txt @@ -300,17 +300,11 @@ if(COMPILER_SUPPORT_SSE2) set_source_files_properties( shuffle-sse2.c bitshuffle-sse2.c blosclz.c fastcopy.c PROPERTIES COMPILE_OPTIONS "/arch:SSE2") - set_property( - SOURCE shuffle.c - APPEND PROPERTY COMPILE_OPTIONS "/arch:SSE2") endif() else() set_source_files_properties( shuffle-sse2.c bitshuffle-sse2.c blosclz.c fastcopy.c PROPERTIES COMPILE_OPTIONS -msse2) - set_property( - SOURCE shuffle.c - APPEND PROPERTY COMPILE_OPTIONS -msse2) # Add SIMD flags for the bytedelta filter and Intel (it seems that ARM64 does not need these) set_source_files_properties( ${PROJECT_SOURCE_DIR}/plugins/filters/bytedelta/bytedelta.c @@ -330,16 +324,10 @@ if(COMPILER_SUPPORT_AVX2) set_source_files_properties( shuffle-avx2.c bitshuffle-avx2.c PROPERTIES COMPILE_OPTIONS "/arch:AVX2") - set_property( - SOURCE shuffle.c - APPEND PROPERTY COMPILE_OPTIONS "/arch:AVX2") else() set_source_files_properties( shuffle-avx2.c bitshuffle-avx2.c PROPERTIES COMPILE_OPTIONS -mavx2) - set_property( - SOURCE shuffle.c - APPEND PROPERTY COMPILE_OPTIONS -mavx2) endif() # Define a symbol for the shuffle-dispatch implementation @@ -354,16 +342,10 @@ if(COMPILER_SUPPORT_AVX512) set_source_files_properties( bitshuffle-avx512.c PROPERTIES COMPILE_OPTIONS "/arch:AVX512") - set_property( - SOURCE shuffle.c - APPEND PROPERTY COMPILE_OPTIONS "/arch:AVX512") else() set_source_files_properties( bitshuffle-avx512.c PROPERTIES COMPILE_OPTIONS "-mavx512f;-mavx512bw") - set_property( - SOURCE shuffle.c - APPEND PROPERTY COMPILE_OPTIONS "-mavx512f;-mavx512bw") endif() # Define a symbol for the shuffle-dispatch implementation @@ -377,17 +359,11 @@ if(COMPILER_SUPPORT_NEON) set_source_files_properties( shuffle-neon.c bitshuffle-neon.c PROPERTIES COMPILE_OPTIONS "-flax-vector-conversions") - set_property( - SOURCE shuffle.c - APPEND PROPERTY COMPILE_OPTIONS "-flax-vector-conversions") if(CMAKE_SYSTEM_PROCESSOR STREQUAL armv7l) # Only armv7l needs special -mfpu=neon flag; aarch64 doesn't. set_source_files_properties( shuffle-neon.c bitshuffle-neon.c PROPERTIES COMPILE_OPTIONS "-mfpu=neon;-flax-vector-conversions") - set_property( - SOURCE shuffle.c - APPEND PROPERTY COMPILE_OPTIONS "-mfpu=neon;-flax-vector-conversions") endif() # Define a symbol for the shuffle-dispatch implementation # so it knows NEON is supported even though that file is diff --git a/blosc/bitshuffle-altivec.c b/blosc/bitshuffle-altivec.c index 754df757..81e961f5 100644 --- a/blosc/bitshuffle-altivec.c +++ b/blosc/bitshuffle-altivec.c @@ -24,6 +24,7 @@ #include "bitshuffle-altivec.h" #include "bitshuffle-generic.h" +#include /* Make sure ALTIVEC is available for the compilation target and compiler. */ #if defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) @@ -33,7 +34,6 @@ #include #include -#include /* The next is useful for debugging purposes */ #if 0 @@ -592,4 +592,23 @@ int64_t bshuf_untrans_bit_elem_altivec(const void* in, void* out, const size_t s return count; } -#endif /* defined(__ALTIVEC__) */ + +const bool is_bshuf_altivec = true; + +#else /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */ + +const bool is_bshuf_altivec = false; + +int64_t +bshuf_trans_bit_elem_altivec(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t +bshuf_untrans_bit_elem_altivec(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +#endif /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */ diff --git a/blosc/bitshuffle-altivec.h b/blosc/bitshuffle-altivec.h index 78fdf0bb..695fff96 100644 --- a/blosc/bitshuffle-altivec.h +++ b/blosc/bitshuffle-altivec.h @@ -17,6 +17,12 @@ #include #include +#include + +/** + * ALTIVEC-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_altivec; BLOSC_NO_EXPORT int64_t bshuf_trans_byte_elem_altivec(const void* in, void* out, const size_t size, diff --git a/blosc/bitshuffle-avx2.c b/blosc/bitshuffle-avx2.c index f0f3eaab..d00aed58 100644 --- a/blosc/bitshuffle-avx2.c +++ b/blosc/bitshuffle-avx2.c @@ -23,6 +23,7 @@ #include "bitshuffle-avx2.h" #include "bitshuffle-sse2.h" #include "bitshuffle-generic.h" +#include /* Make sure AVX2 is available for the compilation target and compiler. */ #if defined(__AVX2__) @@ -262,4 +263,22 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, return count; } +const bool is_bshuf_AVX = true; + +#else /* defined(__AVX2__) */ + +const bool is_bshuf_AVX = false; + +int64_t +bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t +bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + #endif /* defined(__AVX2__) */ diff --git a/blosc/bitshuffle-avx2.h b/blosc/bitshuffle-avx2.h index 0b02227a..edd87283 100644 --- a/blosc/bitshuffle-avx2.h +++ b/blosc/bitshuffle-avx2.h @@ -17,6 +17,13 @@ #include #include +#include + +/** + * AVX2-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_AVX; + /** * AVX2-accelerated bitshuffle routine. diff --git a/blosc/bitshuffle-avx512.c b/blosc/bitshuffle-avx512.c index ed3717b3..4b3594db 100644 --- a/blosc/bitshuffle-avx512.c +++ b/blosc/bitshuffle-avx512.c @@ -20,13 +20,15 @@ rights to use. **********************************************************************/ -/* Make sure AVX512 is available for the compilation target and compiler. */ -#if defined(__AVX512F__) && defined (__AVX512BW__) -#include #include "bitshuffle-avx512.h" #include "bitshuffle-avx2.h" #include "bitshuffle-sse2.h" #include "bitshuffle-generic.h" +#include + +/* Make sure AVX512 is available for the compilation target and compiler. */ +#if defined(__AVX512F__) && defined (__AVX512BW__) +#include /* Transpose bits within bytes. */ @@ -158,4 +160,22 @@ int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t si return count; } -#endif +const bool is_bshuf_AVX512 = true; + +#else /* defined(__AVX512F__) && defined (__AVX512BW__) */ + +const bool is_bshuf_AVX512 = false; + +int64_t +bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t +bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +#endif /* defined(__AVX512F__) && defined (__AVX512BW__) */ diff --git a/blosc/bitshuffle-avx512.h b/blosc/bitshuffle-avx512.h index bb805e60..b2f361c7 100644 --- a/blosc/bitshuffle-avx512.h +++ b/blosc/bitshuffle-avx512.h @@ -17,6 +17,12 @@ #include #include +#include + +/** + * AVX512-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_AVX512; BLOSC_NO_EXPORT int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size, diff --git a/blosc/bitshuffle-neon.c b/blosc/bitshuffle-neon.c index 357810b6..adc40ac5 100644 --- a/blosc/bitshuffle-neon.c +++ b/blosc/bitshuffle-neon.c @@ -22,13 +22,13 @@ #include "bitshuffle-neon.h" #include "bitshuffle-generic.h" +#include /* Make sure NEON is available for the compilation target and compiler. */ #if defined(__ARM_NEON) #include -#include /* The next is useful for debugging purposes */ #if 0 @@ -491,4 +491,20 @@ int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size return count; } +const bool is_bshuf_NEON = true; + +#else /* defined(__ARM_NEON) */ + +const bool is_bshuf_NEON = false; + +int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + #endif /* defined(__ARM_NEON) */ diff --git a/blosc/bitshuffle-neon.h b/blosc/bitshuffle-neon.h index 11d4a91d..17ddc8b4 100644 --- a/blosc/bitshuffle-neon.h +++ b/blosc/bitshuffle-neon.h @@ -17,6 +17,12 @@ #include #include +#include + +/** + * NEON-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_NEON; /** NEON-accelerated bitshuffle routine. diff --git a/blosc/bitshuffle-sse2.c b/blosc/bitshuffle-sse2.c index c6fe5eee..7d95ac47 100644 --- a/blosc/bitshuffle-sse2.c +++ b/blosc/bitshuffle-sse2.c @@ -23,6 +23,7 @@ #include "bitshuffle-sse2.h" #include "bitshuffle-generic.h" +#include /* Make sure SSE2 is available for the compilation target and compiler. */ #if defined(__SSE2__) @@ -481,5 +482,22 @@ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, return count; } +const bool is_bshuf_SSE = true; + +#else /* defined(__SSE2__) */ + +const bool is_bshuf_SSE = false; + +int64_t +bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t +bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} #endif /* defined(__SSE2__) */ diff --git a/blosc/bitshuffle-sse2.h b/blosc/bitshuffle-sse2.h index a008f419..87dc0bec 100644 --- a/blosc/bitshuffle-sse2.h +++ b/blosc/bitshuffle-sse2.h @@ -17,6 +17,12 @@ #include #include +#include + +/** + * SSE2-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_SSE; BLOSC_NO_EXPORT int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, diff --git a/blosc/shuffle-altivec.c b/blosc/shuffle-altivec.c index 7e9eb5f7..6f928e6c 100644 --- a/blosc/shuffle-altivec.c +++ b/blosc/shuffle-altivec.c @@ -10,6 +10,7 @@ #include "shuffle-altivec.h" #include "shuffle-generic.h" +#include /* Make sure ALTIVEC is available for the compilation target and compiler. */ #if defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) @@ -423,4 +424,20 @@ unshuffle_altivec(const int32_t bytesoftype, const int32_t blocksize, } } -#endif /* defined(__ALTIVEC__) */ +const bool is_shuffle_altivec = true; + +#else /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */ + +const bool is_shuffle_altivec = false; + +void shuffle_altivec(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + +void unshuffle_altivec(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + +#endif /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */ diff --git a/blosc/shuffle-altivec.h b/blosc/shuffle-altivec.h index 4f716bc1..b9963fed 100644 --- a/blosc/shuffle-altivec.h +++ b/blosc/shuffle-altivec.h @@ -16,6 +16,12 @@ #include "blosc2/blosc2-common.h" #include +#include + +/** + * ALTIVEC-accelerated (un)shuffle routines availability. +*/ +extern const bool is_shuffle_altivec; /** ALTIVEC-accelerated shuffle routine. diff --git a/blosc/shuffle-avx2.c b/blosc/shuffle-avx2.c index 080b7554..499ea162 100644 --- a/blosc/shuffle-avx2.c +++ b/blosc/shuffle-avx2.c @@ -10,13 +10,13 @@ #include "shuffle-avx2.h" #include "shuffle-generic.h" +#include /* Make sure AVX2 is available for the compilation target and compiler. */ #if defined(__AVX2__) #include -#include #include /* The next is useful for debugging purposes */ @@ -746,4 +746,20 @@ unshuffle_avx2(const int32_t bytesoftype, const int32_t blocksize, } } +const bool is_shuffle_avx2 = true; + +#else + +const bool is_shuffle_avx2 = false; + +void shuffle_avx2(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + +void unshuffle_avx2(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + #endif /* defined(__AVX2__) */ diff --git a/blosc/shuffle-avx2.h b/blosc/shuffle-avx2.h index 4520ee28..fe6f9830 100644 --- a/blosc/shuffle-avx2.h +++ b/blosc/shuffle-avx2.h @@ -16,6 +16,12 @@ #include "blosc2/blosc2-common.h" #include +#include + +/** + * AVX2-accelerated (un)shuffle routines availability. +*/ +extern const bool is_shuffle_avx2; /** AVX2-accelerated shuffle routine. diff --git a/blosc/shuffle-neon.c b/blosc/shuffle-neon.c index c1940c1f..de8bb4b7 100644 --- a/blosc/shuffle-neon.c +++ b/blosc/shuffle-neon.c @@ -11,6 +11,7 @@ #include "shuffle-neon.h" #include "shuffle-generic.h" +#include /* Make sure NEON is available for the compilation target and compiler. */ #if defined(__ARM_NEON) @@ -414,4 +415,20 @@ unshuffle_neon(const int32_t bytesoftype, const int32_t blocksize, } } +const bool is_shuffle_neon = true; + +#else /* defined(__ARM_NEON) */ + +const bool is_shuffle_neon = false; + +void shuffle_neon(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + abort(); +} + +void unshuffle_neon(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + #endif /* defined(__ARM_NEON) */ diff --git a/blosc/shuffle-neon.h b/blosc/shuffle-neon.h index 6f42e83a..ca9675fd 100644 --- a/blosc/shuffle-neon.h +++ b/blosc/shuffle-neon.h @@ -18,6 +18,12 @@ #include "blosc2/blosc2-common.h" #include +#include + +/** + * NEON-accelerated (un)shuffle routines availability. +*/ +extern const bool is_shuffle_neon; /** NEON-accelerated shuffle routine. diff --git a/blosc/shuffle-sse2.c b/blosc/shuffle-sse2.c index 4c3388f1..ceb7b378 100644 --- a/blosc/shuffle-sse2.c +++ b/blosc/shuffle-sse2.c @@ -10,6 +10,7 @@ #include "shuffle-sse2.h" #include "shuffle-generic.h" +#include /* Make sure SSE2 is available for the compilation target and compiler. */ #if defined(__SSE2__) @@ -615,4 +616,20 @@ unshuffle_sse2(const int32_t bytesoftype, const int32_t blocksize, } } +const bool is_shuffle_sse2 = true; + +#else /* defined(__SSE2__) */ + +const bool is_shuffle_sse2 = false; + +void shuffle_sse2(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + +void unshuffle_sse2(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + #endif /* defined(__SSE2__) */ diff --git a/blosc/shuffle-sse2.h b/blosc/shuffle-sse2.h index 7e63a1da..3df110c9 100644 --- a/blosc/shuffle-sse2.h +++ b/blosc/shuffle-sse2.h @@ -16,6 +16,12 @@ #include "blosc2/blosc2-common.h" #include +#include + +/** + * SSE2-accelerated (un)shuffle routines availability. +*/ +extern const bool is_shuffle_sse2; /** SSE2-accelerated shuffle routine. diff --git a/blosc/shuffle.c b/blosc/shuffle.c index 8468f98e..af9a765b 100644 --- a/blosc/shuffle.c +++ b/blosc/shuffle.c @@ -13,21 +13,21 @@ /* Include hardware-accelerated shuffle/unshuffle routines based on the target architecture. Note that a target architecture may support more than one type of acceleration!*/ -#if defined(SHUFFLE_USE_AVX512) +#if defined(SHUFFLE_AVX512_ENABLED) #include "bitshuffle-avx512.h" -#endif /* defined(SHUFFLE_USE_AVX512) */ +#endif /* defined(SHUFFLE_AVX512_ENABLED) */ -#if defined(SHUFFLE_USE_AVX2) +#if defined(SHUFFLE_AVX2_ENABLED) #include "shuffle-avx2.h" #include "bitshuffle-avx2.h" -#endif /* defined(SHUFFLE_USE_AVX2) */ +#endif /* defined(SHUFFLE_AVX2_ENABLED) */ -#if defined(SHUFFLE_USE_SSE2) +#if defined(SHUFFLE_SSE2_ENABLED) #include "shuffle-sse2.h" #include "bitshuffle-sse2.h" -#endif /* defined(SHUFFLE_USE_SSE2) */ +#endif /* defined(SHUFFLE_SSE2_ENABLED) */ -#if defined(SHUFFLE_USE_NEON) +#if defined(SHUFFLE_NEON_ENABLED) #if defined(__linux__) #include #ifdef ARM_ASM_HWCAP @@ -36,12 +36,12 @@ #endif #include "shuffle-neon.h" #include "bitshuffle-neon.h" -#endif /* defined(SHUFFLE_USE_NEON) */ +#endif /* defined(SHUFFLE_NEON_ENABLED) */ -#if defined(SHUFFLE_USE_ALTIVEC) +#if defined(SHUFFLE_ALTIVEC_ENABLED) #include "shuffle-altivec.h" #include "bitshuffle-altivec.h" -#endif /* defined(SHUFFLE_USE_ALTIVEC) */ +#endif /* defined(SHUFFLE_ALTIVEC_ENABLED) */ #include "shuffle-generic.h" #include "bitshuffle-generic.h" @@ -91,7 +91,8 @@ typedef enum { /* Detect hardware and set function pointers to the best shuffle/unshuffle implementations supported by the host processor. */ -#if defined(SHUFFLE_USE_AVX2) || defined(SHUFFLE_USE_SSE2) /* Intel/i686 */ +#if (defined(SHUFFLE_AVX2_ENABLED) || defined(SHUFFLE_SSE2_ENABLED)) && \ + (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) /* Intel/i686 */ #if defined(HAVE_CPU_FEAT_INTRIN) static blosc_cpu_features blosc_get_cpu_features(void) { @@ -261,7 +262,7 @@ static blosc_cpu_features blosc_get_cpu_features(void) { } #endif /* HAVE_CPU_FEAT_INTRIN */ -#elif defined(SHUFFLE_USE_NEON) /* ARM-NEON */ +#elif defined(SHUFFLE_NEON_ENABLED) /* ARM-NEON */ static blosc_cpu_features blosc_get_cpu_features(void) { blosc_cpu_features cpu_features = BLOSC_HAVE_NOTHING; #if defined(__aarch64__) @@ -274,7 +275,7 @@ static blosc_cpu_features blosc_get_cpu_features(void) { #endif return cpu_features; } -#elif defined(SHUFFLE_USE_ALTIVEC) /* POWER9-ALTIVEC preliminary test*/ +#elif defined(SHUFFLE_ALTIVEC_ENABLED) /* POWER9-ALTIVEC preliminary test*/ static blosc_cpu_features blosc_get_cpu_features(void) { blosc_cpu_features cpu_features = BLOSC_HAVE_NOTHING; cpu_features |= BLOSC_HAVE_ALTIVEC; @@ -291,12 +292,12 @@ static blosc_cpu_features blosc_get_cpu_features(void) { return BLOSC_HAVE_NOTHING; } -#endif /* defined(SHUFFLE_USE_AVX2) || defined(SHUFFLE_USE_SSE2) */ +#endif /* defined(SHUFFLE_AVX2_ENABLED) || defined(SHUFFLE_SSE2_ENABLED) */ static shuffle_implementation_t get_shuffle_implementation(void) { blosc_cpu_features cpu_features = blosc_get_cpu_features(); -#if defined(SHUFFLE_USE_AVX512) - if (cpu_features & BLOSC_HAVE_AVX512) { +#if defined(SHUFFLE_AVX512_ENABLED) + if (cpu_features & BLOSC_HAVE_AVX512 && is_shuffle_avx2 && is_bshuf_AVX512) { shuffle_implementation_t impl_avx512; impl_avx512.name = "avx512"; impl_avx512.shuffle = (shuffle_func)shuffle_avx2; @@ -305,10 +306,10 @@ static shuffle_implementation_t get_shuffle_implementation(void) { impl_avx512.bitunshuffle = (bitunshuffle_func)bshuf_untrans_bit_elem_AVX512; return impl_avx512; } -#endif /* defined(SHUFFLE_USE_AVX512) */ +#endif /* defined(SHUFFLE_AVX512_ENABLED) */ -#if defined(SHUFFLE_USE_AVX2) - if (cpu_features & BLOSC_HAVE_AVX2) { +#if defined(SHUFFLE_AVX2_ENABLED) + if (cpu_features & BLOSC_HAVE_AVX2 && is_shuffle_avx2 && is_bshuf_AVX) { shuffle_implementation_t impl_avx2; impl_avx2.name = "avx2"; impl_avx2.shuffle = (shuffle_func)shuffle_avx2; @@ -317,10 +318,10 @@ static shuffle_implementation_t get_shuffle_implementation(void) { impl_avx2.bitunshuffle = (bitunshuffle_func)bshuf_untrans_bit_elem_AVX; return impl_avx2; } -#endif /* defined(SHUFFLE_USE_AVX2) */ +#endif /* defined(SHUFFLE_AVX2_ENABLED) */ -#if defined(SHUFFLE_USE_SSE2) - if (cpu_features & BLOSC_HAVE_SSE2) { +#if defined(SHUFFLE_SSE2_ENABLED) + if (cpu_features & BLOSC_HAVE_SSE2 && is_shuffle_sse2 && is_bshuf_SSE) { shuffle_implementation_t impl_sse2; impl_sse2.name = "sse2"; impl_sse2.shuffle = (shuffle_func)shuffle_sse2; @@ -329,10 +330,10 @@ static shuffle_implementation_t get_shuffle_implementation(void) { impl_sse2.bitunshuffle = (bitunshuffle_func) bshuf_untrans_bit_elem_SSE; return impl_sse2; } -#endif /* defined(SHUFFLE_USE_SSE2) */ +#endif /* defined(SHUFFLE_SSE2_ENABLED) */ -#if defined(SHUFFLE_USE_NEON) - if (cpu_features & BLOSC_HAVE_NEON) { +#if defined(SHUFFLE_NEON_ENABLED) + if (cpu_features & BLOSC_HAVE_NEON && is_shuffle_neon) { // && is_bshuf_NEON if using NEON bitshuffle shuffle_implementation_t impl_neon; impl_neon.name = "neon"; impl_neon.shuffle = (shuffle_func)shuffle_neon; @@ -348,10 +349,10 @@ static shuffle_implementation_t get_shuffle_implementation(void) { impl_neon.bitunshuffle = (bitunshuffle_func)bshuf_untrans_bit_elem_scal; return impl_neon; } -#endif /* defined(SHUFFLE_USE_NEON) */ +#endif /* defined(SHUFFLE_NEON_ENABLED) */ -#if defined(SHUFFLE_USE_ALTIVEC) - if (cpu_features & BLOSC_HAVE_ALTIVEC) { +#if defined(SHUFFLE_ALTIVEC_ENABLED) + if (cpu_features & BLOSC_HAVE_ALTIVEC && is_shuffle_altivec && is_bshuf_altivec) { shuffle_implementation_t impl_altivec; impl_altivec.name = "altivec"; impl_altivec.shuffle = (shuffle_func)shuffle_altivec; @@ -360,7 +361,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) { impl_altivec.bitunshuffle = (bitunshuffle_func)bshuf_untrans_bit_elem_altivec; return impl_altivec; } -#endif /* defined(SHUFFLE_USE_ALTIVEC) */ +#endif /* defined(SHUFFLE_ALTIVEC_ENABLED) */ /* Processor doesn't support any of the hardware-accelerated implementations, so use the generic implementation. */ diff --git a/blosc/shuffle.h b/blosc/shuffle.h index c7a7594b..63355b26 100644 --- a/blosc/shuffle.h +++ b/blosc/shuffle.h @@ -23,30 +23,6 @@ #include -/* Toggle hardware-accelerated routines based on SHUFFLE_*_ENABLED macros - and availability on the target architecture. -*/ -#if defined(SHUFFLE_AVX512_ENABLED) && defined(__AVX512F__) && defined (__AVX512BW__) -#define SHUFFLE_USE_AVX512 -#define SHUFFLE_USE_AVX512 -#endif - -#if defined(SHUFFLE_AVX2_ENABLED) && defined(__AVX2__) -#define SHUFFLE_USE_AVX2 -#endif - -#if defined(SHUFFLE_SSE2_ENABLED) && defined(__SSE2__) -#define SHUFFLE_USE_SSE2 -#endif - -#if defined(SHUFFLE_ALTIVEC_ENABLED) && defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) -#define SHUFFLE_USE_ALTIVEC -#endif - -#if defined(SHUFFLE_NEON_ENABLED) && defined(__ARM_NEON) -#define SHUFFLE_USE_NEON -#endif - /** Primary shuffle and bitshuffle routines. This function dynamically dispatches to the appropriate hardware-accelerated