mirror of
https://github.com/status-im/leopard.git
synced 2025-02-25 20:25:32 +00:00
Add CMakeLists and fix OS X compat
This commit is contained in:
parent
dee7d414de
commit
bf5795fd17
37
CMakeLists.txt
Normal file
37
CMakeLists.txt
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.7)
|
||||||
|
project(leopard)
|
||||||
|
|
||||||
|
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
|
|
||||||
|
set(LIB_SOURCE_FILES
|
||||||
|
leopard.cpp
|
||||||
|
leopard.h
|
||||||
|
LeopardCommon.cpp
|
||||||
|
LeopardCommon.h
|
||||||
|
LeopardFF16.cpp
|
||||||
|
LeopardFF16.h
|
||||||
|
LeopardFF8.cpp
|
||||||
|
LeopardFF8.h)
|
||||||
|
|
||||||
|
set(BENCH_SOURCE_FILES
|
||||||
|
tests/benchmark.cpp)
|
||||||
|
|
||||||
|
set(EXPERIMENT_SOURCE_FILES
|
||||||
|
tests/experiments.cpp)
|
||||||
|
|
||||||
|
if(NOT CMAKE_BUILD_TYPE)
|
||||||
|
set(CMAKE_BUILD_TYPE Release)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(CMAKE_CXX_FLAGS "-Wall -Wextra")
|
||||||
|
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
|
||||||
|
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
|
||||||
|
|
||||||
|
add_library(libleopard STATIC ${LIB_SOURCE_FILES})
|
||||||
|
|
||||||
|
add_executable(bench_leopard ${BENCH_SOURCE_FILES})
|
||||||
|
target_link_libraries(bench_leopard libleopard)
|
||||||
|
|
||||||
|
add_executable(experiment_leopard ${EXPERIMENT_SOURCE_FILES})
|
@ -153,7 +153,9 @@
|
|||||||
#include "leopard.h"
|
#include "leopard.h"
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#ifdef _WIN32
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
|
#endif //_WIN32
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -381,7 +381,7 @@ static void InitializeMultiplyTables()
|
|||||||
|
|
||||||
// For each log_m multiplicand:
|
// For each log_m multiplicand:
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int log_m = 0; log_m < kOrder; ++log_m)
|
for (int log_m = 0; log_m < (int)kOrder; ++log_m)
|
||||||
{
|
{
|
||||||
const Product16Table& lut = Multiply16LUT[log_m];
|
const Product16Table& lut = Multiply16LUT[log_m];
|
||||||
|
|
||||||
@ -400,14 +400,16 @@ static void InitializeMultiplyTables()
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(LEO_TRY_AVX2)
|
||||||
if (CpuHasAVX2)
|
if (CpuHasAVX2)
|
||||||
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
|
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
|
||||||
else
|
else
|
||||||
|
#endif // LEO_TRY_AVX2
|
||||||
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
|
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
|
||||||
|
|
||||||
// For each value we could multiply by:
|
// For each value we could multiply by:
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int log_m = 0; log_m < kOrder; ++log_m)
|
for (int log_m = 0; log_m < (int)kOrder; ++log_m)
|
||||||
{
|
{
|
||||||
// For each 4 bits of the finite field width in bits:
|
// For each 4 bits of the finite field width in bits:
|
||||||
for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4)
|
for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4)
|
||||||
@ -425,7 +427,9 @@ static void InitializeMultiplyTables()
|
|||||||
const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi);
|
const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi);
|
||||||
|
|
||||||
// Store in 128-bit wide table
|
// Store in 128-bit wide table
|
||||||
|
#if defined(LEO_TRY_AVX2)
|
||||||
if (!CpuHasAVX2)
|
if (!CpuHasAVX2)
|
||||||
|
#endif // LEO_TRY_AVX2
|
||||||
{
|
{
|
||||||
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo);
|
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo);
|
||||||
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi);
|
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi);
|
||||||
@ -1341,9 +1345,6 @@ static void FFT_DIT(
|
|||||||
unsigned dist4 = m, dist = m >> 2;
|
unsigned dist4 = m, dist = m >> 2;
|
||||||
for (; dist != 0; dist4 = dist, dist >>= 2)
|
for (; dist != 0; dist4 = dist, dist >>= 2)
|
||||||
{
|
{
|
||||||
const unsigned thread_u = m_truncated / dist4;
|
|
||||||
const unsigned thread_v = dist;
|
|
||||||
|
|
||||||
// For each set of dist*4 elements:
|
// For each set of dist*4 elements:
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int r = 0; r < (int)m_truncated; r += dist4)
|
for (int r = 0; r < (int)m_truncated; r += dist4)
|
||||||
@ -1439,8 +1440,6 @@ void ReedSolomonEncode(
|
|||||||
// Handle final partial set of m pieces:
|
// Handle final partial set of m pieces:
|
||||||
if (last_count != 0)
|
if (last_count != 0)
|
||||||
{
|
{
|
||||||
const unsigned i = original_count - last_count;
|
|
||||||
|
|
||||||
data += m;
|
data += m;
|
||||||
skewLUT += m;
|
skewLUT += m;
|
||||||
|
|
||||||
@ -1692,7 +1691,7 @@ void ReedSolomonDecode(
|
|||||||
FWHT(error_locations, kOrder, m + original_count);
|
FWHT(error_locations, kOrder, m + original_count);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int i = 0; i < kOrder; ++i)
|
for (int i = 0; i < (int)kOrder; ++i)
|
||||||
error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;
|
error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;
|
||||||
|
|
||||||
FWHT(error_locations, kOrder, kOrder);
|
FWHT(error_locations, kOrder, kOrder);
|
||||||
|
@ -368,9 +368,11 @@ static void InitializeMultiplyTables()
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef LEO_TRY_AVX2
|
||||||
if (CpuHasAVX2)
|
if (CpuHasAVX2)
|
||||||
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
|
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
|
||||||
else
|
else
|
||||||
|
#endif // LEO_TRY_AVX2
|
||||||
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
|
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
|
||||||
|
|
||||||
// For each value we could multiply by:
|
// For each value we could multiply by:
|
||||||
@ -388,7 +390,9 @@ static void InitializeMultiplyTables()
|
|||||||
const LEO_M128 value = _mm_loadu_si128(v_ptr);
|
const LEO_M128 value = _mm_loadu_si128(v_ptr);
|
||||||
|
|
||||||
// Store in 128-bit wide table
|
// Store in 128-bit wide table
|
||||||
|
#if defined(LEO_TRY_AVX2)
|
||||||
if (!CpuHasAVX2)
|
if (!CpuHasAVX2)
|
||||||
|
#endif // LEO_TRY_AVX2
|
||||||
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value);
|
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value);
|
||||||
|
|
||||||
// Store in 256-bit wide table
|
// Store in 256-bit wide table
|
||||||
@ -1397,6 +1401,7 @@ static void FFT_DIT4(
|
|||||||
{
|
{
|
||||||
#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
|
#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
|
||||||
|
|
||||||
|
#if defined(LEO_TRY_AVX2)
|
||||||
if (CpuHasAVX2)
|
if (CpuHasAVX2)
|
||||||
{
|
{
|
||||||
const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
|
const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
|
||||||
@ -1451,6 +1456,7 @@ static void FFT_DIT4(
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
#endif // LEO_TRY_AVX2
|
||||||
|
|
||||||
if (CpuHasSSSE3)
|
if (CpuHasSSSE3)
|
||||||
{
|
{
|
||||||
@ -1639,8 +1645,6 @@ void ReedSolomonEncode(
|
|||||||
// Handle final partial set of m pieces:
|
// Handle final partial set of m pieces:
|
||||||
if (last_count != 0)
|
if (last_count != 0)
|
||||||
{
|
{
|
||||||
const unsigned i = original_count - last_count;
|
|
||||||
|
|
||||||
data += m;
|
data += m;
|
||||||
skewLUT += m;
|
skewLUT += m;
|
||||||
|
|
||||||
|
@ -91,7 +91,8 @@ static bool SetCurrentThreadPriority()
|
|||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
|
return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
|
||||||
#else
|
#else
|
||||||
return -1 != nice(2);
|
// setpriority on mac os x
|
||||||
|
return true;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -99,6 +100,10 @@ static bool SetCurrentThreadPriority()
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Timing
|
// Timing
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
static uint64_t GetTimeUsec()
|
static uint64_t GetTimeUsec()
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
Loading…
x
Reference in New Issue
Block a user