Add CMakeLists and fix OS X compat

2017-06-20 22:47:41 -07:00 · 2017-06-20 22:47:41 -07:00 · bf5795fd17
parent dee7d414de
commit bf5795fd17
5 changed files with 58 additions and 11 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,37 @@
 cmake_minimum_required(VERSION 3.7)
 project(leopard)
 set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
 set(CMAKE_CXX_STANDARD 11)
 set(LIB_SOURCE_FILES
        leopard.cpp
        leopard.h
        LeopardCommon.cpp
        LeopardCommon.h
        LeopardFF16.cpp
        LeopardFF16.h
        LeopardFF8.cpp
        LeopardFF8.h)
 set(BENCH_SOURCE_FILES
        tests/benchmark.cpp)
 set(EXPERIMENT_SOURCE_FILES
        tests/experiments.cpp)
 if(NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra")
 set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
 set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 add_library(libleopard STATIC ${LIB_SOURCE_FILES})
 add_executable(bench_leopard ${BENCH_SOURCE_FILES})
 target_link_libraries(bench_leopard libleopard)
 add_executable(experiment_leopard ${EXPERIMENT_SOURCE_FILES})
--- a/LeopardCommon.h
+++ b/LeopardCommon.h
@ -153,7 +153,9 @@
 #include "leopard.h"
 #include <stdint.h>
 #ifdef _WIN32
 #include <malloc.h>
 #endif //_WIN32
 #include <vector>
 #include <atomic>
 #include <memory>
--- a/LeopardFF16.cpp
+++ b/LeopardFF16.cpp
@ -381,7 +381,7 @@ static void InitializeMultiplyTables()
        // For each log_m multiplicand:
 #pragma omp parallel for
-        for (int log_m = 0; log_m < kOrder; ++log_m)
+        for (int log_m = 0; log_m < (int)kOrder; ++log_m)
        {
            const Product16Table& lut = Multiply16LUT[log_m];
@ -400,14 +400,16 @@ static void InitializeMultiplyTables()
        return;
    }
 #if defined(LEO_TRY_AVX2)
    if (CpuHasAVX2)
        Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
    else
 #endif // LEO_TRY_AVX2
        Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
    // For each value we could multiply by:
 #pragma omp parallel for
-    for (int log_m = 0; log_m < kOrder; ++log_m)
+    for (int log_m = 0; log_m < (int)kOrder; ++log_m)
    {
        // For each 4 bits of the finite field width in bits:
        for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4)
@ -425,7 +427,9 @@ static void InitializeMultiplyTables()
            const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi);
            // Store in 128-bit wide table
 #if defined(LEO_TRY_AVX2)
            if (!CpuHasAVX2)
 #endif // LEO_TRY_AVX2
            {
                _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo);
                _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi);
@ -1341,9 +1345,6 @@ static void FFT_DIT(
    unsigned dist4 = m, dist = m >> 2;
    for (; dist != 0; dist4 = dist, dist >>= 2)
    {
        const unsigned thread_u = m_truncated / dist4;
        const unsigned thread_v = dist;
        // For each set of dist*4 elements:
 #pragma omp parallel for
        for (int r = 0; r < (int)m_truncated; r += dist4)
@ -1439,8 +1440,6 @@ void ReedSolomonEncode(
    // Handle final partial set of m pieces:
    if (last_count != 0)
    {
        const unsigned i = original_count - last_count;
        data += m;
        skewLUT += m;
@ -1692,7 +1691,7 @@ void ReedSolomonDecode(
    FWHT(error_locations, kOrder, m + original_count);
 #pragma omp parallel for
-    for (int i = 0; i < kOrder; ++i)
+    for (int i = 0; i < (int)kOrder; ++i)
        error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;
    FWHT(error_locations, kOrder, kOrder);
--- a/LeopardFF8.cpp
+++ b/LeopardFF8.cpp
@ -368,9 +368,11 @@ static void InitializeMultiplyTables()
        return;
    }
 #ifdef LEO_TRY_AVX2
    if (CpuHasAVX2)
        Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
    else
 #endif // LEO_TRY_AVX2
        Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
    // For each value we could multiply by:
@ -388,7 +390,9 @@ static void InitializeMultiplyTables()
            const LEO_M128 value = _mm_loadu_si128(v_ptr);
            // Store in 128-bit wide table
 #if defined(LEO_TRY_AVX2)
            if (!CpuHasAVX2)
 #endif // LEO_TRY_AVX2
                _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value);
            // Store in 256-bit wide table
@ -1397,6 +1401,7 @@ static void FFT_DIT4(
 {
 #ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
 #if defined(LEO_TRY_AVX2)
    if (CpuHasAVX2)
    {
        const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
@ -1451,6 +1456,7 @@ static void FFT_DIT4(
        return;
    }
 #endif // LEO_TRY_AVX2
    if (CpuHasSSSE3)
    {
@ -1639,8 +1645,6 @@ void ReedSolomonEncode(
    // Handle final partial set of m pieces:
    if (last_count != 0)
    {
        const unsigned i = original_count - last_count;
        data += m;
        skewLUT += m;
--- a/tests/benchmark.cpp
+++ b/tests/benchmark.cpp
@ -91,7 +91,8 @@ static bool SetCurrentThreadPriority()
 #ifdef _WIN32
    return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
 #else
-    return -1 != nice(2);
+    // setpriority on mac os x
    return true;
 #endif
 }
@ -99,6 +100,10 @@ static bool SetCurrentThreadPriority()
 //------------------------------------------------------------------------------
 // Timing
 #ifndef _WIN32
 #include <sys/time.h>
 #endif
 static uint64_t GetTimeUsec()
 {
 #ifdef _WIN32