From a16be41a8d76a677cd3e275e831c26d5b18cfc3b Mon Sep 17 00:00:00 2001 From: Csaba Kiraly Date: Tue, 17 May 2022 13:00:08 +0200 Subject: [PATCH] add support for Mac M1, maybe other arm64+neon as well There was already some NEON support, through a separate code path. This version relies on the sse2neon library to add Neon support Signed-off-by: Csaba Kiraly --- .gitmodules | 3 +++ LeopardCommon.cpp | 2 ++ LeopardCommon.h | 11 +++++++++++ sse2neon | 1 + 4 files changed, 17 insertions(+) create mode 100644 .gitmodules create mode 160000 sse2neon diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a11d0ff --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "sse2neon"] + path = sse2neon + url = https://github.com/DLTcollab/sse2neon diff --git a/LeopardCommon.cpp b/LeopardCommon.cpp index 76d87a3..9632915 100644 --- a/LeopardCommon.cpp +++ b/LeopardCommon.cpp @@ -107,6 +107,8 @@ static void _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type) #endif } +#elif defined(LEO_USE_SSE2NEON) +bool CpuHasSSSE3 = true; #endif // defined(LEO_TARGET_MOBILE) diff --git a/LeopardCommon.h b/LeopardCommon.h index 467f829..45bac3d 100644 --- a/LeopardCommon.h +++ b/LeopardCommon.h @@ -186,6 +186,11 @@ // Unroll inner loops 4 times #define LEO_USE_VECTOR4_OPT +// MacOS M1 +#if defined(__aarch64__) + #define LEO_USE_SSE2NEON + #define LEO_TARGET_MOBILE +#endif //------------------------------------------------------------------------------ // Debug @@ -256,6 +261,8 @@ // Note: MSVC currently only supports SSSE3 but not AVX2 #include // SSSE3: _mm_shuffle_epi8 #include // SSE2 +#elif defined(LEO_USE_SSE2NEON) + #include "sse2neon/sse2neon.h" #endif // LEO_TARGET_MOBILE #if defined(HAVE_ARM_NEON_H) @@ -270,6 +277,8 @@ // Compiler-specific 128-bit SIMD register keyword #define LEO_M128 uint8x16_t #define LEO_TRY_NEON +#elif defined(LEO_USE_SSE2NEON) + #define LEO_M128 __m128i #else #define LEO_M128 uint64_t # endif @@ -335,6 +344,8 @@ void InitializeCPUArch(); # endif // Does CPU support SSSE3? extern bool CpuHasSSSE3; +#elif defined(LEO_USE_SSE2NEON) + extern bool CpuHasSSSE3; #endif // LEO_TARGET_MOBILE diff --git a/sse2neon b/sse2neon new file mode 160000 index 0000000..cad518a --- /dev/null +++ b/sse2neon @@ -0,0 +1 @@ +Subproject commit cad518a93b326f0f644b7972d488d04eaa2b0475