mirror of https://github.com/status-im/leopard.git
Add CMakeLists and fix OS X compat
This commit is contained in:
parent
dee7d414de
commit
bf5795fd17
|
@ -0,0 +1,37 @@
|
|||
cmake_minimum_required(VERSION 3.7)
|
||||
project(leopard)
|
||||
|
||||
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
set(LIB_SOURCE_FILES
|
||||
leopard.cpp
|
||||
leopard.h
|
||||
LeopardCommon.cpp
|
||||
LeopardCommon.h
|
||||
LeopardFF16.cpp
|
||||
LeopardFF16.h
|
||||
LeopardFF8.cpp
|
||||
LeopardFF8.h)
|
||||
|
||||
set(BENCH_SOURCE_FILES
|
||||
tests/benchmark.cpp)
|
||||
|
||||
set(EXPERIMENT_SOURCE_FILES
|
||||
tests/experiments.cpp)
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS "-Wall -Wextra")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
|
||||
|
||||
add_library(libleopard STATIC ${LIB_SOURCE_FILES})
|
||||
|
||||
add_executable(bench_leopard ${BENCH_SOURCE_FILES})
|
||||
target_link_libraries(bench_leopard libleopard)
|
||||
|
||||
add_executable(experiment_leopard ${EXPERIMENT_SOURCE_FILES})
|
|
@ -153,7 +153,9 @@
|
|||
#include "leopard.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif //_WIN32
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
|
|
@ -381,7 +381,7 @@ static void InitializeMultiplyTables()
|
|||
|
||||
// For each log_m multiplicand:
|
||||
#pragma omp parallel for
|
||||
for (int log_m = 0; log_m < kOrder; ++log_m)
|
||||
for (int log_m = 0; log_m < (int)kOrder; ++log_m)
|
||||
{
|
||||
const Product16Table& lut = Multiply16LUT[log_m];
|
||||
|
||||
|
@ -400,14 +400,16 @@ static void InitializeMultiplyTables()
|
|||
return;
|
||||
}
|
||||
|
||||
#if defined(LEO_TRY_AVX2)
|
||||
if (CpuHasAVX2)
|
||||
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
|
||||
else
|
||||
#endif // LEO_TRY_AVX2
|
||||
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
|
||||
|
||||
// For each value we could multiply by:
|
||||
#pragma omp parallel for
|
||||
for (int log_m = 0; log_m < kOrder; ++log_m)
|
||||
for (int log_m = 0; log_m < (int)kOrder; ++log_m)
|
||||
{
|
||||
// For each 4 bits of the finite field width in bits:
|
||||
for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4)
|
||||
|
@ -425,7 +427,9 @@ static void InitializeMultiplyTables()
|
|||
const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi);
|
||||
|
||||
// Store in 128-bit wide table
|
||||
#if defined(LEO_TRY_AVX2)
|
||||
if (!CpuHasAVX2)
|
||||
#endif // LEO_TRY_AVX2
|
||||
{
|
||||
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo);
|
||||
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi);
|
||||
|
@ -1341,9 +1345,6 @@ static void FFT_DIT(
|
|||
unsigned dist4 = m, dist = m >> 2;
|
||||
for (; dist != 0; dist4 = dist, dist >>= 2)
|
||||
{
|
||||
const unsigned thread_u = m_truncated / dist4;
|
||||
const unsigned thread_v = dist;
|
||||
|
||||
// For each set of dist*4 elements:
|
||||
#pragma omp parallel for
|
||||
for (int r = 0; r < (int)m_truncated; r += dist4)
|
||||
|
@ -1439,8 +1440,6 @@ void ReedSolomonEncode(
|
|||
// Handle final partial set of m pieces:
|
||||
if (last_count != 0)
|
||||
{
|
||||
const unsigned i = original_count - last_count;
|
||||
|
||||
data += m;
|
||||
skewLUT += m;
|
||||
|
||||
|
@ -1692,7 +1691,7 @@ void ReedSolomonDecode(
|
|||
FWHT(error_locations, kOrder, m + original_count);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < kOrder; ++i)
|
||||
for (int i = 0; i < (int)kOrder; ++i)
|
||||
error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;
|
||||
|
||||
FWHT(error_locations, kOrder, kOrder);
|
||||
|
|
|
@ -368,9 +368,11 @@ static void InitializeMultiplyTables()
|
|||
return;
|
||||
}
|
||||
|
||||
#ifdef LEO_TRY_AVX2
|
||||
if (CpuHasAVX2)
|
||||
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
|
||||
else
|
||||
#endif // LEO_TRY_AVX2
|
||||
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
|
||||
|
||||
// For each value we could multiply by:
|
||||
|
@ -388,7 +390,9 @@ static void InitializeMultiplyTables()
|
|||
const LEO_M128 value = _mm_loadu_si128(v_ptr);
|
||||
|
||||
// Store in 128-bit wide table
|
||||
#if defined(LEO_TRY_AVX2)
|
||||
if (!CpuHasAVX2)
|
||||
#endif // LEO_TRY_AVX2
|
||||
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value);
|
||||
|
||||
// Store in 256-bit wide table
|
||||
|
@ -1397,6 +1401,7 @@ static void FFT_DIT4(
|
|||
{
|
||||
#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
|
||||
|
||||
#if defined(LEO_TRY_AVX2)
|
||||
if (CpuHasAVX2)
|
||||
{
|
||||
const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
|
||||
|
@ -1451,6 +1456,7 @@ static void FFT_DIT4(
|
|||
|
||||
return;
|
||||
}
|
||||
#endif // LEO_TRY_AVX2
|
||||
|
||||
if (CpuHasSSSE3)
|
||||
{
|
||||
|
@ -1639,8 +1645,6 @@ void ReedSolomonEncode(
|
|||
// Handle final partial set of m pieces:
|
||||
if (last_count != 0)
|
||||
{
|
||||
const unsigned i = original_count - last_count;
|
||||
|
||||
data += m;
|
||||
skewLUT += m;
|
||||
|
||||
|
|
|
@ -91,7 +91,8 @@ static bool SetCurrentThreadPriority()
|
|||
#ifdef _WIN32
|
||||
return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
|
||||
#else
|
||||
return -1 != nice(2);
|
||||
// setpriority on mac os x
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -99,6 +100,10 @@ static bool SetCurrentThreadPriority()
|
|||
//------------------------------------------------------------------------------
|
||||
// Timing
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
static uint64_t GetTimeUsec()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
|
|
Loading…
Reference in New Issue