Skip to content

Commit

Permalink
Merge pull request #1 from cskiraly/arm64
Browse files Browse the repository at this point in the history
add support for Apple M1 and other arm64+Neon architectures
  • Loading branch information
cskiraly authored May 19, 2022
2 parents b58d1ea + a16be41 commit 20eb7c8
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "sse2neon"]
path = sse2neon
url = https://github.com/DLTcollab/sse2neon
2 changes: 2 additions & 0 deletions LeopardCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ static void _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
#endif
}

#elif defined(LEO_USE_SSE2NEON)
bool CpuHasSSSE3 = true;
#endif // defined(LEO_TARGET_MOBILE)


Expand Down
11 changes: 11 additions & 0 deletions LeopardCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,11 @@
// Unroll inner loops 4 times
#define LEO_USE_VECTOR4_OPT

// MacOS M1
#if defined(__aarch64__)
#define LEO_USE_SSE2NEON
#define LEO_TARGET_MOBILE
#endif

//------------------------------------------------------------------------------
// Debug
Expand Down Expand Up @@ -256,6 +261,8 @@
// Note: MSVC currently only supports SSSE3 but not AVX2
#include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
#include <emmintrin.h> // SSE2
#elif defined(LEO_USE_SSE2NEON)
#include "sse2neon/sse2neon.h"
#endif // LEO_TARGET_MOBILE

#if defined(HAVE_ARM_NEON_H)
Expand All @@ -270,6 +277,8 @@
// Compiler-specific 128-bit SIMD register keyword
#define LEO_M128 uint8x16_t
#define LEO_TRY_NEON
#elif defined(LEO_USE_SSE2NEON)
#define LEO_M128 __m128i
#else
#define LEO_M128 uint64_t
# endif
Expand Down Expand Up @@ -335,6 +344,8 @@ void InitializeCPUArch();
# endif
// Does CPU support SSSE3?
extern bool CpuHasSSSE3;
#elif defined(LEO_USE_SSE2NEON)
extern bool CpuHasSSSE3;
#endif // LEO_TARGET_MOBILE


Expand Down
1 change: 1 addition & 0 deletions sse2neon
Submodule sse2neon added at cad518

0 comments on commit 20eb7c8

Please sign in to comment.