Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable macos universal2 build with AVX2/SSE2 optimisation for x86_64 target #347

Merged
merged 2 commits into from
Oct 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions blosc/bitshuffle-avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@


/* Make sure AVX2 is available for the compilation target and compiler. */
#if !defined(__AVX2__)
#error AVX2 is not supported by the target architecture/platform and/or this compiler.
#endif
#if defined(__AVX2__)

#include <immintrin.h>

Expand Down Expand Up @@ -243,3 +241,5 @@ int64_t blosc_internal_bshuf_untrans_bit_elem_avx2(void* in, void* out, const si

return count;
}

#endif /* !defined(__AVX2__) */
6 changes: 3 additions & 3 deletions blosc/bitshuffle-sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@
#include "bitshuffle-sse2.h"

/* Make sure SSE2 is available for the compilation target and compiler. */
#if !defined(__SSE2__)
#error SSE2 is not supported by the target architecture/platform and/or this compiler.
#endif
#if defined(__SSE2__)

#include <emmintrin.h>

Expand Down Expand Up @@ -465,3 +463,5 @@ int64_t blosc_internal_bshuf_untrans_bit_elem_sse2(void* in, void* out, const si

return count;
}

#endif /* !defined(__SSE2__) */
6 changes: 3 additions & 3 deletions blosc/shuffle-avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
#include "shuffle-avx2.h"

/* Make sure AVX2 is available for the compilation target and compiler. */
#if !defined(__AVX2__)
#error AVX2 is not supported by the target architecture/platform and/or this compiler.
#endif
#if defined(__AVX2__)

#include <immintrin.h>

Expand Down Expand Up @@ -755,3 +753,5 @@ blosc_internal_unshuffle_avx2(const size_t bytesoftype, const size_t blocksize,
unshuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest);
}
}

#endif /* !defined(__AVX2__) */
6 changes: 3 additions & 3 deletions blosc/shuffle-sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
#include "shuffle-sse2.h"

/* Make sure SSE2 is available for the compilation target and compiler. */
#if !defined(__SSE2__)
#error SSE2 is not supported by the target architecture/platform and/or this compiler.
#endif
#if defined(__SSE2__)

#include <emmintrin.h>

Expand Down Expand Up @@ -624,3 +622,5 @@ blosc_internal_unshuffle_sse2(const size_t bytesoftype, const size_t blocksize,
unshuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest);
}
}

#endif /* !defined(__SSE2__) */
26 changes: 17 additions & 9 deletions blosc/shuffle.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,26 @@ typedef unsigned char bool;
#define HAVE_CPU_FEAT_INTRIN
#endif

#if defined(SHUFFLE_AVX2_ENABLED) && defined(__AVX2__)
#define SHUFFLE_USE_AVX2
#endif

#if defined(SHUFFLE_SSE2_ENABLED) && defined(__SSE2__)
#define SHUFFLE_USE_SSE2
#endif

/* Include hardware-accelerated shuffle/unshuffle routines based on
the target architecture. Note that a target architecture may support
more than one type of acceleration!*/
#if defined(SHUFFLE_AVX2_ENABLED)
#if defined(SHUFFLE_USE_AVX2)
#include "shuffle-avx2.h"
#include "bitshuffle-avx2.h"
#endif /* defined(SHUFFLE_AVX2_ENABLED) */
#endif /* defined(SHUFFLE_USE_AVX2) */

#if defined(SHUFFLE_SSE2_ENABLED)
#if defined(SHUFFLE_USE_SSE2)
#include "shuffle-sse2.h"
#include "bitshuffle-sse2.h"
#endif /* defined(SHUFFLE_SSE2_ENABLED) */
#endif /* defined(SHUFFLE_USE_SSE2) */


/* Define function pointer types for shuffle/unshuffle routines. */
Expand Down Expand Up @@ -77,7 +85,7 @@ typedef enum {

/* Detect hardware and set function pointers to the best shuffle/unshuffle
implementations supported by the host processor. */
#if defined(SHUFFLE_AVX2_ENABLED) || defined(SHUFFLE_SSE2_ENABLED) /* Intel/i686 */
#if defined(SHUFFLE_USE_AVX2) || defined(SHUFFLE_USE_SSE2) /* Intel/i686 */

/* Disabled the __builtin_cpu_supports() call, as it has issues with
new versions of gcc (like 5.3.1 in forthcoming ubuntu/xenial:
Expand Down Expand Up @@ -316,7 +324,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) {
blosc_cpu_features cpu_features = blosc_get_cpu_features();
shuffle_implementation_t impl_generic;

#if defined(SHUFFLE_AVX2_ENABLED)
#if defined(SHUFFLE_USE_AVX2)
if (cpu_features & BLOSC_HAVE_AVX2) {
shuffle_implementation_t impl_avx2;
impl_avx2.name = "avx2";
Expand All @@ -326,9 +334,9 @@ static shuffle_implementation_t get_shuffle_implementation(void) {
impl_avx2.bitunshuffle = (bitunshuffle_func)blosc_internal_bshuf_untrans_bit_elem_avx2;
return impl_avx2;
}
#endif /* defined(SHUFFLE_AVX2_ENABLED) */
#endif /* defined(SHUFFLE_USE_AVX2) */

#if defined(SHUFFLE_SSE2_ENABLED)
#if defined(SHUFFLE_USE_SSE2)
if (cpu_features & BLOSC_HAVE_SSE2) {
shuffle_implementation_t impl_sse2;
impl_sse2.name = "sse2";
Expand All @@ -338,7 +346,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) {
impl_sse2.bitunshuffle = (bitunshuffle_func)blosc_internal_bshuf_untrans_bit_elem_sse2;
return impl_sse2;
}
#endif /* defined(SHUFFLE_SSE2_ENABLED) */
#endif /* defined(SHUFFLE_USE_SSE2) */

/* Processor doesn't support any of the hardware-accelerated implementations,
so use the generic implementation. */
Expand Down