From b15c187c72f0702036a27bef25f97b329538231f Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Tue, 25 Jun 2024 14:46:47 +0200 Subject: [PATCH 1/6] Add stubs and runtime checks for AVX2 shuffle/bitshuffle implementation --- blosc/bitshuffle-avx2.c | 19 +++++++++++++++++++ blosc/bitshuffle-avx2.h | 7 +++++++ blosc/shuffle-avx2.c | 18 +++++++++++++++++- blosc/shuffle-avx2.h | 6 ++++++ blosc/shuffle.c | 2 +- 5 files changed, 50 insertions(+), 2 deletions(-) diff --git a/blosc/bitshuffle-avx2.c b/blosc/bitshuffle-avx2.c index f0f3eaab..d00aed58 100644 --- a/blosc/bitshuffle-avx2.c +++ b/blosc/bitshuffle-avx2.c @@ -23,6 +23,7 @@ #include "bitshuffle-avx2.h" #include "bitshuffle-sse2.h" #include "bitshuffle-generic.h" +#include /* Make sure AVX2 is available for the compilation target and compiler. */ #if defined(__AVX2__) @@ -262,4 +263,22 @@ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, return count; } +const bool is_bshuf_AVX = true; + +#else /* defined(__AVX2__) */ + +const bool is_bshuf_AVX = false; + +int64_t +bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t +bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + #endif /* defined(__AVX2__) */ diff --git a/blosc/bitshuffle-avx2.h b/blosc/bitshuffle-avx2.h index 0b02227a..edd87283 100644 --- a/blosc/bitshuffle-avx2.h +++ b/blosc/bitshuffle-avx2.h @@ -17,6 +17,13 @@ #include #include +#include + +/** + * AVX2-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_AVX; + /** * AVX2-accelerated bitshuffle routine. diff --git a/blosc/shuffle-avx2.c b/blosc/shuffle-avx2.c index 080b7554..499ea162 100644 --- a/blosc/shuffle-avx2.c +++ b/blosc/shuffle-avx2.c @@ -10,13 +10,13 @@ #include "shuffle-avx2.h" #include "shuffle-generic.h" +#include /* Make sure AVX2 is available for the compilation target and compiler. */ #if defined(__AVX2__) #include -#include #include /* The next is useful for debugging purposes */ @@ -746,4 +746,20 @@ unshuffle_avx2(const int32_t bytesoftype, const int32_t blocksize, } } +const bool is_shuffle_avx2 = true; + +#else + +const bool is_shuffle_avx2 = false; + +void shuffle_avx2(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + +void unshuffle_avx2(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + #endif /* defined(__AVX2__) */ diff --git a/blosc/shuffle-avx2.h b/blosc/shuffle-avx2.h index 4520ee28..fe6f9830 100644 --- a/blosc/shuffle-avx2.h +++ b/blosc/shuffle-avx2.h @@ -16,6 +16,12 @@ #include "blosc2/blosc2-common.h" #include +#include + +/** + * AVX2-accelerated (un)shuffle routines availability. +*/ +extern const bool is_shuffle_avx2; /** AVX2-accelerated shuffle routine. diff --git a/blosc/shuffle.c b/blosc/shuffle.c index ab9ba055..78e48c21 100644 --- a/blosc/shuffle.c +++ b/blosc/shuffle.c @@ -308,7 +308,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) { #endif /* defined(SHUFFLE_AVX512_ENABLED) */ #if defined(SHUFFLE_AVX2_ENABLED) - if (cpu_features & BLOSC_HAVE_AVX2) { + if (cpu_features & BLOSC_HAVE_AVX2 && is_shuffle_avx2 && is_bshuf_AVX) { shuffle_implementation_t impl_avx2; impl_avx2.name = "avx2"; impl_avx2.shuffle = (shuffle_func)shuffle_avx2; From 06b5da3f6c6528a087e1e73debc20a236cdd8c3c Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Wed, 3 Jul 2024 15:33:09 +0200 Subject: [PATCH 2/6] Add stubs and runtime checks for SSE2 shuffle/bitshuffle implementation --- blosc/bitshuffle-sse2.c | 18 ++++++++++++++++++ blosc/bitshuffle-sse2.h | 6 ++++++ blosc/shuffle-sse2.c | 17 +++++++++++++++++ blosc/shuffle-sse2.h | 6 ++++++ blosc/shuffle.c | 2 +- 5 files changed, 48 insertions(+), 1 deletion(-) diff --git a/blosc/bitshuffle-sse2.c b/blosc/bitshuffle-sse2.c index c6fe5eee..7d95ac47 100644 --- a/blosc/bitshuffle-sse2.c +++ b/blosc/bitshuffle-sse2.c @@ -23,6 +23,7 @@ #include "bitshuffle-sse2.h" #include "bitshuffle-generic.h" +#include /* Make sure SSE2 is available for the compilation target and compiler. */ #if defined(__SSE2__) @@ -481,5 +482,22 @@ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, return count; } +const bool is_bshuf_SSE = true; + +#else /* defined(__SSE2__) */ + +const bool is_bshuf_SSE = false; + +int64_t +bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t +bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} #endif /* defined(__SSE2__) */ diff --git a/blosc/bitshuffle-sse2.h b/blosc/bitshuffle-sse2.h index a008f419..87dc0bec 100644 --- a/blosc/bitshuffle-sse2.h +++ b/blosc/bitshuffle-sse2.h @@ -17,6 +17,12 @@ #include #include +#include + +/** + * SSE2-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_SSE; BLOSC_NO_EXPORT int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, diff --git a/blosc/shuffle-sse2.c b/blosc/shuffle-sse2.c index 4c3388f1..ceb7b378 100644 --- a/blosc/shuffle-sse2.c +++ b/blosc/shuffle-sse2.c @@ -10,6 +10,7 @@ #include "shuffle-sse2.h" #include "shuffle-generic.h" +#include /* Make sure SSE2 is available for the compilation target and compiler. */ #if defined(__SSE2__) @@ -615,4 +616,20 @@ unshuffle_sse2(const int32_t bytesoftype, const int32_t blocksize, } } +const bool is_shuffle_sse2 = true; + +#else /* defined(__SSE2__) */ + +const bool is_shuffle_sse2 = false; + +void shuffle_sse2(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + +void unshuffle_sse2(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + #endif /* defined(__SSE2__) */ diff --git a/blosc/shuffle-sse2.h b/blosc/shuffle-sse2.h index 7e63a1da..3df110c9 100644 --- a/blosc/shuffle-sse2.h +++ b/blosc/shuffle-sse2.h @@ -16,6 +16,12 @@ #include "blosc2/blosc2-common.h" #include +#include + +/** + * SSE2-accelerated (un)shuffle routines availability. +*/ +extern const bool is_shuffle_sse2; /** SSE2-accelerated shuffle routine. diff --git a/blosc/shuffle.c b/blosc/shuffle.c index 78e48c21..af55458a 100644 --- a/blosc/shuffle.c +++ b/blosc/shuffle.c @@ -320,7 +320,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) { #endif /* defined(SHUFFLE_AVX2_ENABLED) */ #if defined(SHUFFLE_SSE2_ENABLED) - if (cpu_features & BLOSC_HAVE_SSE2) { + if (cpu_features & BLOSC_HAVE_SSE2 && is_shuffle_sse2 && is_bshuf_SSE) { shuffle_implementation_t impl_sse2; impl_sse2.name = "sse2"; impl_sse2.shuffle = (shuffle_func)shuffle_sse2; From 4817a41a077874bbdb3822ab219e8a58d8b2310a Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Wed, 3 Jul 2024 15:39:03 +0200 Subject: [PATCH 3/6] Add stubs and runtime checks for AVX512 bitshuffle implementation --- blosc/bitshuffle-avx512.c | 28 ++++++++++++++++++++++++---- blosc/bitshuffle-avx512.h | 6 ++++++ blosc/shuffle.c | 2 +- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/blosc/bitshuffle-avx512.c b/blosc/bitshuffle-avx512.c index ed3717b3..4b3594db 100644 --- a/blosc/bitshuffle-avx512.c +++ b/blosc/bitshuffle-avx512.c @@ -20,13 +20,15 @@ rights to use. **********************************************************************/ -/* Make sure AVX512 is available for the compilation target and compiler. */ -#if defined(__AVX512F__) && defined (__AVX512BW__) -#include #include "bitshuffle-avx512.h" #include "bitshuffle-avx2.h" #include "bitshuffle-sse2.h" #include "bitshuffle-generic.h" +#include + +/* Make sure AVX512 is available for the compilation target and compiler. */ +#if defined(__AVX512F__) && defined (__AVX512BW__) +#include /* Transpose bits within bytes. */ @@ -158,4 +160,22 @@ int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t si return count; } -#endif +const bool is_bshuf_AVX512 = true; + +#else /* defined(__AVX512F__) && defined (__AVX512BW__) */ + +const bool is_bshuf_AVX512 = false; + +int64_t +bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t +bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +#endif /* defined(__AVX512F__) && defined (__AVX512BW__) */ diff --git a/blosc/bitshuffle-avx512.h b/blosc/bitshuffle-avx512.h index bb805e60..b2f361c7 100644 --- a/blosc/bitshuffle-avx512.h +++ b/blosc/bitshuffle-avx512.h @@ -17,6 +17,12 @@ #include #include +#include + +/** + * AVX512-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_AVX512; BLOSC_NO_EXPORT int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size, diff --git a/blosc/shuffle.c b/blosc/shuffle.c index af55458a..9acfae2b 100644 --- a/blosc/shuffle.c +++ b/blosc/shuffle.c @@ -296,7 +296,7 @@ return BLOSC_HAVE_NOTHING; static shuffle_implementation_t get_shuffle_implementation(void) { blosc_cpu_features cpu_features = blosc_get_cpu_features(); #if defined(SHUFFLE_AVX512_ENABLED) - if (cpu_features & BLOSC_HAVE_AVX512) { + if (cpu_features & BLOSC_HAVE_AVX512 && is_shuffle_avx2 && is_bshuf_AVX512) { shuffle_implementation_t impl_avx512; impl_avx512.name = "avx512"; impl_avx512.shuffle = (shuffle_func)shuffle_avx2; From 9afb7b9f1daf39af748599f1650035bb29b89a0c Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Wed, 3 Jul 2024 15:50:55 +0200 Subject: [PATCH 4/6] Add stubs and runtime checks for NEON shuffle/bitshuffle implementation --- blosc/bitshuffle-neon.c | 18 +++++++++++++++++- blosc/bitshuffle-neon.h | 6 ++++++ blosc/shuffle-neon.c | 17 +++++++++++++++++ blosc/shuffle-neon.h | 6 ++++++ blosc/shuffle.c | 2 +- 5 files changed, 47 insertions(+), 2 deletions(-) diff --git a/blosc/bitshuffle-neon.c b/blosc/bitshuffle-neon.c index 357810b6..adc40ac5 100644 --- a/blosc/bitshuffle-neon.c +++ b/blosc/bitshuffle-neon.c @@ -22,13 +22,13 @@ #include "bitshuffle-neon.h" #include "bitshuffle-generic.h" +#include /* Make sure NEON is available for the compilation target and compiler. */ #if defined(__ARM_NEON) #include -#include /* The next is useful for debugging purposes */ #if 0 @@ -491,4 +491,20 @@ int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size return count; } +const bool is_bshuf_NEON = true; + +#else /* defined(__ARM_NEON) */ + +const bool is_bshuf_NEON = false; + +int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + #endif /* defined(__ARM_NEON) */ diff --git a/blosc/bitshuffle-neon.h b/blosc/bitshuffle-neon.h index 11d4a91d..17ddc8b4 100644 --- a/blosc/bitshuffle-neon.h +++ b/blosc/bitshuffle-neon.h @@ -17,6 +17,12 @@ #include #include +#include + +/** + * NEON-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_NEON; /** NEON-accelerated bitshuffle routine. diff --git a/blosc/shuffle-neon.c b/blosc/shuffle-neon.c index c1940c1f..de8bb4b7 100644 --- a/blosc/shuffle-neon.c +++ b/blosc/shuffle-neon.c @@ -11,6 +11,7 @@ #include "shuffle-neon.h" #include "shuffle-generic.h" +#include /* Make sure NEON is available for the compilation target and compiler. */ #if defined(__ARM_NEON) @@ -414,4 +415,20 @@ unshuffle_neon(const int32_t bytesoftype, const int32_t blocksize, } } +const bool is_shuffle_neon = true; + +#else /* defined(__ARM_NEON) */ + +const bool is_shuffle_neon = false; + +void shuffle_neon(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + abort(); +} + +void unshuffle_neon(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + #endif /* defined(__ARM_NEON) */ diff --git a/blosc/shuffle-neon.h b/blosc/shuffle-neon.h index 6f42e83a..ca9675fd 100644 --- a/blosc/shuffle-neon.h +++ b/blosc/shuffle-neon.h @@ -18,6 +18,12 @@ #include "blosc2/blosc2-common.h" #include +#include + +/** + * NEON-accelerated (un)shuffle routines availability. +*/ +extern const bool is_shuffle_neon; /** NEON-accelerated shuffle routine. diff --git a/blosc/shuffle.c b/blosc/shuffle.c index 9acfae2b..e65cb3ce 100644 --- a/blosc/shuffle.c +++ b/blosc/shuffle.c @@ -332,7 +332,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) { #endif /* defined(SHUFFLE_SSE2_ENABLED) */ #if defined(SHUFFLE_NEON_ENABLED) - if (cpu_features & BLOSC_HAVE_NEON) { + if (cpu_features & BLOSC_HAVE_NEON && is_shuffle_neon) { // && is_bshuf_NEON is using NEON bitshuffle shuffle_implementation_t impl_neon; impl_neon.name = "neon"; impl_neon.shuffle = (shuffle_func)shuffle_neon; From a7fdcef1c1e64c4264153e14f21c3d075e5af705 Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Wed, 3 Jul 2024 16:00:46 +0200 Subject: [PATCH 5/6] Add stubs and runtime checks for ALTIVEC shuffle/bitshuffle implementation --- blosc/bitshuffle-altivec.c | 23 +++++++++++++++++++++-- blosc/bitshuffle-altivec.h | 6 ++++++ blosc/shuffle-altivec.c | 19 ++++++++++++++++++- blosc/shuffle-altivec.h | 6 ++++++ blosc/shuffle.c | 4 ++-- 5 files changed, 53 insertions(+), 5 deletions(-) diff --git a/blosc/bitshuffle-altivec.c b/blosc/bitshuffle-altivec.c index 754df757..81e961f5 100644 --- a/blosc/bitshuffle-altivec.c +++ b/blosc/bitshuffle-altivec.c @@ -24,6 +24,7 @@ #include "bitshuffle-altivec.h" #include "bitshuffle-generic.h" +#include /* Make sure ALTIVEC is available for the compilation target and compiler. */ #if defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) @@ -33,7 +34,6 @@ #include #include -#include /* The next is useful for debugging purposes */ #if 0 @@ -592,4 +592,23 @@ int64_t bshuf_untrans_bit_elem_altivec(const void* in, void* out, const size_t s return count; } -#endif /* defined(__ALTIVEC__) */ + +const bool is_bshuf_altivec = true; + +#else /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */ + +const bool is_bshuf_altivec = false; + +int64_t +bshuf_trans_bit_elem_altivec(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +int64_t +bshuf_untrans_bit_elem_altivec(const void* in, void* out, const size_t size, + const size_t elem_size) { + abort(); +} + +#endif /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */ diff --git a/blosc/bitshuffle-altivec.h b/blosc/bitshuffle-altivec.h index 78fdf0bb..695fff96 100644 --- a/blosc/bitshuffle-altivec.h +++ b/blosc/bitshuffle-altivec.h @@ -17,6 +17,12 @@ #include #include +#include + +/** + * ALTIVEC-accelerated bit(un)shuffle routines availability. +*/ +extern const bool is_bshuf_altivec; BLOSC_NO_EXPORT int64_t bshuf_trans_byte_elem_altivec(const void* in, void* out, const size_t size, diff --git a/blosc/shuffle-altivec.c b/blosc/shuffle-altivec.c index 7e9eb5f7..6f928e6c 100644 --- a/blosc/shuffle-altivec.c +++ b/blosc/shuffle-altivec.c @@ -10,6 +10,7 @@ #include "shuffle-altivec.h" #include "shuffle-generic.h" +#include /* Make sure ALTIVEC is available for the compilation target and compiler. */ #if defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) @@ -423,4 +424,20 @@ unshuffle_altivec(const int32_t bytesoftype, const int32_t blocksize, } } -#endif /* defined(__ALTIVEC__) */ +const bool is_shuffle_altivec = true; + +#else /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */ + +const bool is_shuffle_altivec = false; + +void shuffle_altivec(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + +void unshuffle_altivec(const int32_t bytesoftype, const int32_t blocksize, + const uint8_t *_src, uint8_t *_dest) { + abort(); +} + +#endif /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */ diff --git a/blosc/shuffle-altivec.h b/blosc/shuffle-altivec.h index 4f716bc1..b9963fed 100644 --- a/blosc/shuffle-altivec.h +++ b/blosc/shuffle-altivec.h @@ -16,6 +16,12 @@ #include "blosc2/blosc2-common.h" #include +#include + +/** + * ALTIVEC-accelerated (un)shuffle routines availability. +*/ +extern const bool is_shuffle_altivec; /** ALTIVEC-accelerated shuffle routine. diff --git a/blosc/shuffle.c b/blosc/shuffle.c index e65cb3ce..dcfa5b41 100644 --- a/blosc/shuffle.c +++ b/blosc/shuffle.c @@ -332,7 +332,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) { #endif /* defined(SHUFFLE_SSE2_ENABLED) */ #if defined(SHUFFLE_NEON_ENABLED) - if (cpu_features & BLOSC_HAVE_NEON && is_shuffle_neon) { // && is_bshuf_NEON is using NEON bitshuffle + if (cpu_features & BLOSC_HAVE_NEON && is_shuffle_neon) { // && is_bshuf_NEON if using NEON bitshuffle shuffle_implementation_t impl_neon; impl_neon.name = "neon"; impl_neon.shuffle = (shuffle_func)shuffle_neon; @@ -351,7 +351,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) { #endif /* defined(SHUFFLE_NEON_ENABLED) */ #if defined(SHUFFLE_ALTIVEC_ENABLED) - if (cpu_features & BLOSC_HAVE_ALTIVEC) { + if (cpu_features & BLOSC_HAVE_ALTIVEC && is_shuffle_altivec && is_bshuf_altivec) { shuffle_implementation_t impl_altivec; impl_altivec.name = "altivec"; impl_altivec.shuffle = (shuffle_func)shuffle_altivec; From a0c0b881edf3587db6ee07c9b056378c61b6a710 Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Wed, 3 Jul 2024 17:00:47 +0200 Subject: [PATCH 6/6] Add a check of architecture to select simd probing implementation --- blosc/shuffle.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/blosc/shuffle.c b/blosc/shuffle.c index dcfa5b41..af9a765b 100644 --- a/blosc/shuffle.c +++ b/blosc/shuffle.c @@ -91,7 +91,8 @@ typedef enum { /* Detect hardware and set function pointers to the best shuffle/unshuffle implementations supported by the host processor. */ -#if defined(SHUFFLE_AVX2_ENABLED) || defined(SHUFFLE_SSE2_ENABLED) /* Intel/i686 */ +#if (defined(SHUFFLE_AVX2_ENABLED) || defined(SHUFFLE_SSE2_ENABLED)) && \ + (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) /* Intel/i686 */ #if defined(HAVE_CPU_FEAT_INTRIN) static blosc_cpu_features blosc_get_cpu_features(void) {