Add CMakeLists and fix OS X compat

This commit is contained in:
Christopher Taylor 2017-06-20 22:47:41 -07:00
parent dee7d414de
commit bf5795fd17
5 changed files with 58 additions and 11 deletions

37
CMakeLists.txt Normal file
View File

@ -0,0 +1,37 @@
cmake_minimum_required(VERSION 3.7)
project(leopard)
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
set(CMAKE_CXX_STANDARD 11)
set(LIB_SOURCE_FILES
leopard.cpp
leopard.h
LeopardCommon.cpp
LeopardCommon.h
LeopardFF16.cpp
LeopardFF16.h
LeopardFF8.cpp
LeopardFF8.h)
set(BENCH_SOURCE_FILES
tests/benchmark.cpp)
set(EXPERIMENT_SOURCE_FILES
tests/experiments.cpp)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
add_library(libleopard STATIC ${LIB_SOURCE_FILES})
add_executable(bench_leopard ${BENCH_SOURCE_FILES})
target_link_libraries(bench_leopard libleopard)
add_executable(experiment_leopard ${EXPERIMENT_SOURCE_FILES})

View File

@ -153,7 +153,9 @@
#include "leopard.h"
#include <stdint.h>
#ifdef _WIN32
#include <malloc.h>
#endif //_WIN32
#include <vector>
#include <atomic>
#include <memory>

View File

@ -381,7 +381,7 @@ static void InitializeMultiplyTables()
// For each log_m multiplicand:
#pragma omp parallel for
for (int log_m = 0; log_m < kOrder; ++log_m)
for (int log_m = 0; log_m < (int)kOrder; ++log_m)
{
const Product16Table& lut = Multiply16LUT[log_m];
@ -400,14 +400,16 @@ static void InitializeMultiplyTables()
return;
}
#if defined(LEO_TRY_AVX2)
if (CpuHasAVX2)
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
else
#endif // LEO_TRY_AVX2
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
// For each value we could multiply by:
#pragma omp parallel for
for (int log_m = 0; log_m < kOrder; ++log_m)
for (int log_m = 0; log_m < (int)kOrder; ++log_m)
{
// For each 4 bits of the finite field width in bits:
for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4)
@ -425,7 +427,9 @@ static void InitializeMultiplyTables()
const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi);
// Store in 128-bit wide table
#if defined(LEO_TRY_AVX2)
if (!CpuHasAVX2)
#endif // LEO_TRY_AVX2
{
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo);
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi);
@ -1341,9 +1345,6 @@ static void FFT_DIT(
unsigned dist4 = m, dist = m >> 2;
for (; dist != 0; dist4 = dist, dist >>= 2)
{
const unsigned thread_u = m_truncated / dist4;
const unsigned thread_v = dist;
// For each set of dist*4 elements:
#pragma omp parallel for
for (int r = 0; r < (int)m_truncated; r += dist4)
@ -1439,8 +1440,6 @@ void ReedSolomonEncode(
// Handle final partial set of m pieces:
if (last_count != 0)
{
const unsigned i = original_count - last_count;
data += m;
skewLUT += m;
@ -1692,7 +1691,7 @@ void ReedSolomonDecode(
FWHT(error_locations, kOrder, m + original_count);
#pragma omp parallel for
for (int i = 0; i < kOrder; ++i)
for (int i = 0; i < (int)kOrder; ++i)
error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;
FWHT(error_locations, kOrder, kOrder);

View File

@ -368,9 +368,11 @@ static void InitializeMultiplyTables()
return;
}
#ifdef LEO_TRY_AVX2
if (CpuHasAVX2)
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
else
#endif // LEO_TRY_AVX2
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
// For each value we could multiply by:
@ -388,7 +390,9 @@ static void InitializeMultiplyTables()
const LEO_M128 value = _mm_loadu_si128(v_ptr);
// Store in 128-bit wide table
#if defined(LEO_TRY_AVX2)
if (!CpuHasAVX2)
#endif // LEO_TRY_AVX2
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value);
// Store in 256-bit wide table
@ -1397,6 +1401,7 @@ static void FFT_DIT4(
{
#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
#if defined(LEO_TRY_AVX2)
if (CpuHasAVX2)
{
const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
@ -1451,6 +1456,7 @@ static void FFT_DIT4(
return;
}
#endif // LEO_TRY_AVX2
if (CpuHasSSSE3)
{
@ -1639,8 +1645,6 @@ void ReedSolomonEncode(
// Handle final partial set of m pieces:
if (last_count != 0)
{
const unsigned i = original_count - last_count;
data += m;
skewLUT += m;

View File

@ -91,7 +91,8 @@ static bool SetCurrentThreadPriority()
#ifdef _WIN32
return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
#else
return -1 != nice(2);
// setpriority on mac os x
return true;
#endif
}
@ -99,6 +100,10 @@ static bool SetCurrentThreadPriority()
//------------------------------------------------------------------------------
// Timing
#ifndef _WIN32
#include <sys/time.h>
#endif
static uint64_t GetTimeUsec()
{
#ifdef _WIN32