Add CMakeLists and fix OS X compat

2025-02-19 17:34:19 +00:00 · 2017-06-20 22:47:41 -07:00 · 2017-06-20 22:47:41 -07:00 · bf5795fd17
commit bf5795fd17
parent dee7d414de
5 changed files with 58 additions and 11 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,37 @@
+cmake_minimum_required(VERSION 3.7)
+project(leopard)
+
+set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
+
+set(CMAKE_CXX_STANDARD 11)
+
+set(LIB_SOURCE_FILES
+        leopard.cpp
+        leopard.h
+        LeopardCommon.cpp
+        LeopardCommon.h
+        LeopardFF16.cpp
+        LeopardFF16.h
+        LeopardFF8.cpp
+        LeopardFF8.h)
+
+set(BENCH_SOURCE_FILES
+        tests/benchmark.cpp)
+
+set(EXPERIMENT_SOURCE_FILES
+        tests/experiments.cpp)
+
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE Release)
+endif()
+
+set(CMAKE_CXX_FLAGS "-Wall -Wextra")
+set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+
+add_library(libleopard STATIC ${LIB_SOURCE_FILES})
+
+add_executable(bench_leopard ${BENCH_SOURCE_FILES})
+target_link_libraries(bench_leopard libleopard)
+
+add_executable(experiment_leopard ${EXPERIMENT_SOURCE_FILES})
--- a/LeopardCommon.h
+++ b/LeopardCommon.h
@ -153,7 +153,9 @@
 #include "leopard.h"

 #include <stdint.h>
+#ifdef _WIN32
 #include <malloc.h>
+#endif //_WIN32
 #include <vector>
 #include <atomic>
 #include <memory>
--- a/LeopardFF16.cpp
+++ b/LeopardFF16.cpp
@ -381,7 +381,7 @@ static void InitializeMultiplyTables()

        // For each log_m multiplicand:
 #pragma omp parallel for
-        for (int log_m = 0; log_m < kOrder; ++log_m)
+        for (int log_m = 0; log_m < (int)kOrder; ++log_m)
        {
            const Product16Table& lut = Multiply16LUT[log_m];

@ -400,14 +400,16 @@ static void InitializeMultiplyTables()
        return;
    }

+#if defined(LEO_TRY_AVX2)
    if (CpuHasAVX2)
        Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
    else
+#endif // LEO_TRY_AVX2
        Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));

    // For each value we could multiply by:
 #pragma omp parallel for
-    for (int log_m = 0; log_m < kOrder; ++log_m)
+    for (int log_m = 0; log_m < (int)kOrder; ++log_m)
    {
        // For each 4 bits of the finite field width in bits:
        for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4)
@ -425,7 +427,9 @@ static void InitializeMultiplyTables()
            const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi);

            // Store in 128-bit wide table
+#if defined(LEO_TRY_AVX2)
            if (!CpuHasAVX2)
+#endif // LEO_TRY_AVX2
            {
                _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo);
                _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi);
@ -1341,9 +1345,6 @@ static void FFT_DIT(
    unsigned dist4 = m, dist = m >> 2;
    for (; dist != 0; dist4 = dist, dist >>= 2)
    {
-        const unsigned thread_u = m_truncated / dist4;
-        const unsigned thread_v = dist;
-
        // For each set of dist*4 elements:
 #pragma omp parallel for
        for (int r = 0; r < (int)m_truncated; r += dist4)
@ -1439,8 +1440,6 @@ void ReedSolomonEncode(
    // Handle final partial set of m pieces:
    if (last_count != 0)
    {
-        const unsigned i = original_count - last_count;
-
        data += m;
        skewLUT += m;

@ -1692,7 +1691,7 @@ void ReedSolomonDecode(
    FWHT(error_locations, kOrder, m + original_count);

 #pragma omp parallel for
-    for (int i = 0; i < kOrder; ++i)
+    for (int i = 0; i < (int)kOrder; ++i)
        error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;

    FWHT(error_locations, kOrder, kOrder);
--- a/LeopardFF8.cpp
+++ b/LeopardFF8.cpp
@ -368,9 +368,11 @@ static void InitializeMultiplyTables()
        return;
    }

+#ifdef LEO_TRY_AVX2
    if (CpuHasAVX2)
        Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
    else
+#endif // LEO_TRY_AVX2
        Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));

    // For each value we could multiply by:
@ -388,7 +390,9 @@ static void InitializeMultiplyTables()
            const LEO_M128 value = _mm_loadu_si128(v_ptr);

            // Store in 128-bit wide table
+#if defined(LEO_TRY_AVX2)
            if (!CpuHasAVX2)
+#endif // LEO_TRY_AVX2
                _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value);

            // Store in 256-bit wide table
@ -1397,6 +1401,7 @@ static void FFT_DIT4(
 {
 #ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT

+#if defined(LEO_TRY_AVX2)
    if (CpuHasAVX2)
    {
        const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
@ -1451,6 +1456,7 @@ static void FFT_DIT4(

        return;
    }
+#endif // LEO_TRY_AVX2

    if (CpuHasSSSE3)
    {
@ -1639,8 +1645,6 @@ void ReedSolomonEncode(
    // Handle final partial set of m pieces:
    if (last_count != 0)
    {
-        const unsigned i = original_count - last_count;
-
        data += m;
        skewLUT += m;

--- a/tests/benchmark.cpp
+++ b/tests/benchmark.cpp
@ -91,7 +91,8 @@ static bool SetCurrentThreadPriority()
 #ifdef _WIN32
    return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
 #else
-    return -1 != nice(2);
+    // setpriority on mac os x
+    return true;
 #endif
 }

@ -99,6 +100,10 @@ static bool SetCurrentThreadPriority()
 //------------------------------------------------------------------------------
 // Timing

+#ifndef _WIN32
+#include <sys/time.h>
+#endif
+
 static uint64_t GetTimeUsec()
 {
 #ifdef _WIN32