mirror of https://github.com/status-im/leopard.git
add support for Mac M1, maybe other arm64+neon as well
There was already some NEON support, through a separate code path. This version relies on the sse2neon library to add Neon support Signed-off-by: Csaba Kiraly <csaba.kiraly@gmail.com>
This commit is contained in:
parent
b58d1eaf59
commit
a16be41a8d
|
@ -0,0 +1,3 @@
|
|||
[submodule "sse2neon"]
|
||||
path = sse2neon
|
||||
url = https://github.com/DLTcollab/sse2neon
|
|
@ -107,6 +107,8 @@ static void _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
|
|||
#endif
|
||||
}
|
||||
|
||||
#elif defined(LEO_USE_SSE2NEON)
|
||||
bool CpuHasSSSE3 = true;
|
||||
#endif // defined(LEO_TARGET_MOBILE)
|
||||
|
||||
|
||||
|
|
|
@ -186,6 +186,11 @@
|
|||
// Unroll inner loops 4 times
|
||||
#define LEO_USE_VECTOR4_OPT
|
||||
|
||||
// MacOS M1
|
||||
#if defined(__aarch64__)
|
||||
#define LEO_USE_SSE2NEON
|
||||
#define LEO_TARGET_MOBILE
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Debug
|
||||
|
@ -256,6 +261,8 @@
|
|||
// Note: MSVC currently only supports SSSE3 but not AVX2
|
||||
#include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
|
||||
#include <emmintrin.h> // SSE2
|
||||
#elif defined(LEO_USE_SSE2NEON)
|
||||
#include "sse2neon/sse2neon.h"
|
||||
#endif // LEO_TARGET_MOBILE
|
||||
|
||||
#if defined(HAVE_ARM_NEON_H)
|
||||
|
@ -270,6 +277,8 @@
|
|||
// Compiler-specific 128-bit SIMD register keyword
|
||||
#define LEO_M128 uint8x16_t
|
||||
#define LEO_TRY_NEON
|
||||
#elif defined(LEO_USE_SSE2NEON)
|
||||
#define LEO_M128 __m128i
|
||||
#else
|
||||
#define LEO_M128 uint64_t
|
||||
# endif
|
||||
|
@ -335,6 +344,8 @@ void InitializeCPUArch();
|
|||
# endif
|
||||
// Does CPU support SSSE3?
|
||||
extern bool CpuHasSSSE3;
|
||||
#elif defined(LEO_USE_SSE2NEON)
|
||||
extern bool CpuHasSSSE3;
|
||||
#endif // LEO_TARGET_MOBILE
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Subproject commit cad518a93b326f0f644b7972d488d04eaa2b0475
|
Loading…
Reference in New Issue