Add CMakeLists and fix OS X compat

This commit is contained in:
Christopher Taylor 2017-06-20 22:47:41 -07:00
parent dee7d414de
commit bf5795fd17
5 changed files with 58 additions and 11 deletions

37
CMakeLists.txt Normal file
View File

@ -0,0 +1,37 @@
cmake_minimum_required(VERSION 3.7)
project(leopard)
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
set(CMAKE_CXX_STANDARD 11)
set(LIB_SOURCE_FILES
leopard.cpp
leopard.h
LeopardCommon.cpp
LeopardCommon.h
LeopardFF16.cpp
LeopardFF16.h
LeopardFF8.cpp
LeopardFF8.h)
set(BENCH_SOURCE_FILES
tests/benchmark.cpp)
set(EXPERIMENT_SOURCE_FILES
tests/experiments.cpp)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
add_library(libleopard STATIC ${LIB_SOURCE_FILES})
add_executable(bench_leopard ${BENCH_SOURCE_FILES})
target_link_libraries(bench_leopard libleopard)
add_executable(experiment_leopard ${EXPERIMENT_SOURCE_FILES})

View File

@ -153,7 +153,9 @@
#include "leopard.h" #include "leopard.h"
#include <stdint.h> #include <stdint.h>
#ifdef _WIN32
#include <malloc.h> #include <malloc.h>
#endif //_WIN32
#include <vector> #include <vector>
#include <atomic> #include <atomic>
#include <memory> #include <memory>

View File

@ -381,7 +381,7 @@ static void InitializeMultiplyTables()
// For each log_m multiplicand: // For each log_m multiplicand:
#pragma omp parallel for #pragma omp parallel for
for (int log_m = 0; log_m < kOrder; ++log_m) for (int log_m = 0; log_m < (int)kOrder; ++log_m)
{ {
const Product16Table& lut = Multiply16LUT[log_m]; const Product16Table& lut = Multiply16LUT[log_m];
@ -400,14 +400,16 @@ static void InitializeMultiplyTables()
return; return;
} }
#if defined(LEO_TRY_AVX2)
if (CpuHasAVX2) if (CpuHasAVX2)
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder)); Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
else else
#endif // LEO_TRY_AVX2
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder)); Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
// For each value we could multiply by: // For each value we could multiply by:
#pragma omp parallel for #pragma omp parallel for
for (int log_m = 0; log_m < kOrder; ++log_m) for (int log_m = 0; log_m < (int)kOrder; ++log_m)
{ {
// For each 4 bits of the finite field width in bits: // For each 4 bits of the finite field width in bits:
for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4) for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4)
@ -425,7 +427,9 @@ static void InitializeMultiplyTables()
const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi); const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi);
// Store in 128-bit wide table // Store in 128-bit wide table
#if defined(LEO_TRY_AVX2)
if (!CpuHasAVX2) if (!CpuHasAVX2)
#endif // LEO_TRY_AVX2
{ {
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo); _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo);
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi); _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi);
@ -1341,9 +1345,6 @@ static void FFT_DIT(
unsigned dist4 = m, dist = m >> 2; unsigned dist4 = m, dist = m >> 2;
for (; dist != 0; dist4 = dist, dist >>= 2) for (; dist != 0; dist4 = dist, dist >>= 2)
{ {
const unsigned thread_u = m_truncated / dist4;
const unsigned thread_v = dist;
// For each set of dist*4 elements: // For each set of dist*4 elements:
#pragma omp parallel for #pragma omp parallel for
for (int r = 0; r < (int)m_truncated; r += dist4) for (int r = 0; r < (int)m_truncated; r += dist4)
@ -1439,8 +1440,6 @@ void ReedSolomonEncode(
// Handle final partial set of m pieces: // Handle final partial set of m pieces:
if (last_count != 0) if (last_count != 0)
{ {
const unsigned i = original_count - last_count;
data += m; data += m;
skewLUT += m; skewLUT += m;
@ -1692,7 +1691,7 @@ void ReedSolomonDecode(
FWHT(error_locations, kOrder, m + original_count); FWHT(error_locations, kOrder, m + original_count);
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < kOrder; ++i) for (int i = 0; i < (int)kOrder; ++i)
error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus; error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;
FWHT(error_locations, kOrder, kOrder); FWHT(error_locations, kOrder, kOrder);

View File

@ -368,9 +368,11 @@ static void InitializeMultiplyTables()
return; return;
} }
#ifdef LEO_TRY_AVX2
if (CpuHasAVX2) if (CpuHasAVX2)
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder)); Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
else else
#endif // LEO_TRY_AVX2
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder)); Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
// For each value we could multiply by: // For each value we could multiply by:
@ -388,7 +390,9 @@ static void InitializeMultiplyTables()
const LEO_M128 value = _mm_loadu_si128(v_ptr); const LEO_M128 value = _mm_loadu_si128(v_ptr);
// Store in 128-bit wide table // Store in 128-bit wide table
#if defined(LEO_TRY_AVX2)
if (!CpuHasAVX2) if (!CpuHasAVX2)
#endif // LEO_TRY_AVX2
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value); _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value);
// Store in 256-bit wide table // Store in 256-bit wide table
@ -1397,6 +1401,7 @@ static void FFT_DIT4(
{ {
#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT #ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
#if defined(LEO_TRY_AVX2)
if (CpuHasAVX2) if (CpuHasAVX2)
{ {
const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]); const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
@ -1451,6 +1456,7 @@ static void FFT_DIT4(
return; return;
} }
#endif // LEO_TRY_AVX2
if (CpuHasSSSE3) if (CpuHasSSSE3)
{ {
@ -1639,8 +1645,6 @@ void ReedSolomonEncode(
// Handle final partial set of m pieces: // Handle final partial set of m pieces:
if (last_count != 0) if (last_count != 0)
{ {
const unsigned i = original_count - last_count;
data += m; data += m;
skewLUT += m; skewLUT += m;

View File

@ -91,7 +91,8 @@ static bool SetCurrentThreadPriority()
#ifdef _WIN32 #ifdef _WIN32
return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL); return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
#else #else
return -1 != nice(2); // setpriority on mac os x
return true;
#endif #endif
} }
@ -99,6 +100,10 @@ static bool SetCurrentThreadPriority()
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Timing // Timing
#ifndef _WIN32
#include <sys/time.h>
#endif
static uint64_t GetTimeUsec() static uint64_t GetTimeUsec()
{ {
#ifdef _WIN32 #ifdef _WIN32