add support for Mac M1, maybe other arm64+neon as well

There was already some NEON support, through a separate code
path. This version relies on the sse2neon library to add
Neon support

Signed-off-by: Csaba Kiraly <csaba.kiraly@gmail.com>
This commit is contained in:
Csaba Kiraly 2022-05-17 13:00:08 +02:00
parent b58d1eaf59
commit a16be41a8d
No known key found for this signature in database
GPG Key ID: 0FE274EE8C95166E
4 changed files with 17 additions and 0 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "sse2neon"]
path = sse2neon
url = https://github.com/DLTcollab/sse2neon

View File

@ -107,6 +107,8 @@ static void _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
#endif
}
#elif defined(LEO_USE_SSE2NEON)
bool CpuHasSSSE3 = true;
#endif // defined(LEO_TARGET_MOBILE)

View File

@ -186,6 +186,11 @@
// Unroll inner loops 4 times
#define LEO_USE_VECTOR4_OPT
// MacOS M1
#if defined(__aarch64__)
#define LEO_USE_SSE2NEON
#define LEO_TARGET_MOBILE
#endif
//------------------------------------------------------------------------------
// Debug
@ -256,6 +261,8 @@
// Note: MSVC currently only supports SSSE3 but not AVX2
#include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
#include <emmintrin.h> // SSE2
#elif defined(LEO_USE_SSE2NEON)
#include "sse2neon/sse2neon.h"
#endif // LEO_TARGET_MOBILE
#if defined(HAVE_ARM_NEON_H)
@ -270,6 +277,8 @@
// Compiler-specific 128-bit SIMD register keyword
#define LEO_M128 uint8x16_t
#define LEO_TRY_NEON
#elif defined(LEO_USE_SSE2NEON)
#define LEO_M128 __m128i
#else
#define LEO_M128 uint64_t
# endif
@ -335,6 +344,8 @@ void InitializeCPUArch();
# endif
// Does CPU support SSSE3?
extern bool CpuHasSSSE3;
#elif defined(LEO_USE_SSE2NEON)
extern bool CpuHasSSSE3;
#endif // LEO_TARGET_MOBILE

1
sse2neon Submodule

@ -0,0 +1 @@
Subproject commit cad518a93b326f0f644b7972d488d04eaa2b0475