This commit is contained in:
Christopher Taylor 2017-05-26 19:51:30 -07:00
parent 49dbcdc8b1
commit 5cba1989ec
21 changed files with 2458 additions and 8201 deletions

File diff suppressed because it is too large Load Diff

View File

@ -30,42 +30,20 @@
/*
TODO:
+ Refactor software
+ I think it should be split up into several C++ modules
+ Replace GFSymbol with a file data pointer
+ New 16-bit Muladd inner loops
+ Class to contain the (large) muladd tables
+ Preliminary benchmarks for large data!
+ New 8-bit Muladd inner loops
+ Benchmarks for smaller data!
+ Write detailed comments for all the routines
+ Look into getting EncodeL working so we can support smaller data (Ask Lin)
+ Look into using k instead of k2 to speed up decoder (Ask Lin)
+ Avoid performing FFT/IFFT intermediate calculations we're not going to use
+ Benchmarks, fun!
+ Benchmarks for smaller data!
+ New 16-bit Muladd inner loops
+ Benchmarks for large data!
+ Use parallel row ops
+ Add multi-threading to split up long parallelizable calculations
+ Final benchmarks!
+ Finish up documentation
+ Write detailed comments for all the routines
+ Final benchmarks!
+ Release version 1
+ Finish up documentation
Muladd implementation notes:
Specialize for 1-3 rows at a time since often times we're multiplying by
the same (skew) value repeatedly, as the ISA-L library does here:
https://github.com/01org/isa-l/blob/master/erasure_code/gf_3vect_mad_avx.asm#L258
Except we should be doing it for 16-bit Galois Field.
To implement that use the ALTMAP trick from Jerasure:
http://lab.jerasure.org/jerasure/gf-complete/blob/master/src/gf_w16.c#L1140
Except we should also support AVX2 since that is a 40% perf boost, so put
the high and low bytes 32 bytes instead of 16 bytes apart.
Also I think we should go ahead and precompute the multiply tables since
it avoids a bunch of memory lookups for each muladd, and only costs 8 MB.
TBD:
+ Look into getting EncodeL working so we can support smaller data (Ask Lin)
+ Look into using FFT_m instead of FFT_n for decoder
*/
#include <stdint.h>
@ -191,4 +169,57 @@ extern bool CpuHasSSSE3;
#endif // LEO_TARGET_MOBILE
//------------------------------------------------------------------------------
// Portable Intrinsics
#ifdef _MSC_VER
#include <intrin.h>
#endif
// Returns highest bit index 0..31 where the first non-zero bit is found
// Precondition: x != 0
LEO_FORCE_INLINE unsigned LastNonzeroBit32(unsigned x)
{
#ifdef _MSC_VER
unsigned long index;
// Note: Ignoring result because x != 0
_BitScanReverse(&index, (uint32_t)x);
return (unsigned)index;
#else
// Note: Ignoring return value of 0 because x != 0
return 31 - (unsigned)__builtin_clzl(x);
#endif
}
// Returns next power of two at or above given value
LEO_FORCE_INLINE unsigned NextPow2(unsigned n)
{
return 2UL << LastNonzeroBit32(n - 1);
}
//------------------------------------------------------------------------------
// XOR Memory
//
// This works for both 8-bit and 16-bit finite fields
// x[] ^= y[]
void xor_mem(
void * LEO_RESTRICT x, const void * LEO_RESTRICT y,
unsigned bytes);
// For i = {0, 1}: x_i[] ^= x_i[]
void xor_mem2(
void * LEO_RESTRICT x_0, const void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, const void * LEO_RESTRICT y_1,
unsigned bytes);
// For i = {0, 1, 2}: x_i[] ^= x_i[]
void xor_mem3(
void * LEO_RESTRICT x_0, const void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, const void * LEO_RESTRICT y_1,
void * LEO_RESTRICT x_2, const void * LEO_RESTRICT y_2,
unsigned bytes);
} // namespace leopard

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of LHC-RS nor the names of its contributors may be
* Neither the name of Leopard-RS nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.
@ -27,6 +27,10 @@
*/
#include "LeopardFF8.h"
#include <string.h>
// Define this to enable the optimized version of FWHT()
#define LEO_FF8_FWHT_OPTIMIZED
namespace leopard { namespace ff8 {
@ -34,6 +38,9 @@ namespace leopard { namespace ff8 {
//------------------------------------------------------------------------------
// Datatypes and Constants
// Modulus for field operations
static const ffe_t kModulus = 255;
// LFSR Polynomial that generates the field elements
static const unsigned kPolynomial = 0x11D;
@ -47,9 +54,6 @@ static const ffe_t kBasis[kBits] = {
//------------------------------------------------------------------------------
// Field Operations
// Modulus for field operations
static const ffe_t kModulus = 255;
// z = x + y (mod kModulus)
static inline ffe_t AddMod(const ffe_t a, const ffe_t b)
{
@ -69,50 +73,6 @@ static inline ffe_t SubMod(const ffe_t a, const ffe_t b)
}
//------------------------------------------------------------------------------
// Logarithm Tables
static ffe_t LogLUT[kOrder];
static ffe_t ExpLUT[kOrder];
// Initialize LogLUT[], ExpLUT[]
static void InitializeLogarithmTables()
{
// LFSR table generation:
unsigned state = 1;
for (unsigned i = 0; i < kModulus; ++i)
{
ExpLUT[state] = static_cast<ffe_t>(i);
state <<= 1;
if (state >= kOrder)
state ^= kPolynomial;
}
ExpLUT[0] = kModulus;
// Conversion to chosen basis:
LogLUT[0] = 0;
for (unsigned i = 0; i < kBits; ++i)
{
const ffe_t basis = kBasis[i];
const unsigned width = static_cast<unsigned>(1UL << i);
for (unsigned j = 0; j < width; ++j)
LogLUT[j + width] = LogLUT[j] ^ basis;
}
for (unsigned i = 0; i < kOrder; ++i)
LogLUT[i] = ExpLUT[LogLUT[i]];
for (unsigned i = 0; i < kOrder; ++i)
ExpLUT[LogLUT[i]] = i;
ExpLUT[kModulus] = ExpLUT[0];
}
//------------------------------------------------------------------------------
// Fast Walsh-Hadamard Transform (FWHT) (mod kModulus)
@ -248,234 +208,47 @@ void FWHT(ffe_t data[kOrder])
//------------------------------------------------------------------------------
// XOR Memory
// Logarithm Tables
void xor_mem(
void * LEO_RESTRICT vx, const void * LEO_RESTRICT vy,
unsigned bytes)
static ffe_t LogLUT[kOrder];
static ffe_t ExpLUT[kOrder];
// Initialize LogLUT[], ExpLUT[]
static void InitializeLogarithmTables()
{
#if defined(LEO_TRY_AVX2)
if (CpuHasAVX2)
{
LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(vx);
const LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<const LEO_M256 *>(vy);
do
{
const LEO_M256 x0 = _mm256_xor_si256(_mm256_loadu_si256(x32), _mm256_loadu_si256(y32));
const LEO_M256 x1 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 1), _mm256_loadu_si256(y32 + 1));
const LEO_M256 x2 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 2), _mm256_loadu_si256(y32 + 2));
const LEO_M256 x3 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 3), _mm256_loadu_si256(y32 + 3));
_mm256_storeu_si256(x32, x0);
_mm256_storeu_si256(x32 + 1, x1);
_mm256_storeu_si256(x32 + 2, x2);
_mm256_storeu_si256(x32 + 3, x3);
bytes -= 128, x32 += 4, y32 += 4;
} while (bytes >= 128);
if (bytes > 0)
{
const LEO_M256 x0 = _mm256_xor_si256(_mm256_loadu_si256(x32), _mm256_loadu_si256(y32));
const LEO_M256 x1 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 1), _mm256_loadu_si256(y32 + 1));
_mm256_storeu_si256(x32, x0);
_mm256_storeu_si256(x32 + 1, x1);
}
return;
}
#endif // LEO_TRY_AVX2
LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(vx);
const LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<const LEO_M128 *>(vy);
do
{
const LEO_M128 x0 = _mm_xor_si128(_mm_loadu_si128(x16), _mm_loadu_si128(y16));
const LEO_M128 x1 = _mm_xor_si128(_mm_loadu_si128(x16 + 1), _mm_loadu_si128(y16 + 1));
const LEO_M128 x2 = _mm_xor_si128(_mm_loadu_si128(x16 + 2), _mm_loadu_si128(y16 + 2));
const LEO_M128 x3 = _mm_xor_si128(_mm_loadu_si128(x16 + 3), _mm_loadu_si128(y16 + 3));
_mm_storeu_si128(x16, x0);
_mm_storeu_si128(x16 + 1, x1);
_mm_storeu_si128(x16 + 2, x2);
_mm_storeu_si128(x16 + 3, x3);
bytes -= 64, x16 += 4, y16 += 4;
} while (bytes > 0);
}
// LFSR table generation:
void xor_mem2(
void * LEO_RESTRICT vx_0, const void * LEO_RESTRICT vy_0,
void * LEO_RESTRICT vx_1, const void * LEO_RESTRICT vy_1,
unsigned bytes)
{
#if defined(LEO_TRY_AVX2)
if (CpuHasAVX2)
unsigned state = 1;
for (unsigned i = 0; i < kModulus; ++i)
{
LEO_M256 * LEO_RESTRICT x32_0 = reinterpret_cast<LEO_M256 *> (vx_0);
const LEO_M256 * LEO_RESTRICT y32_0 = reinterpret_cast<const LEO_M256 *>(vy_0);
LEO_M256 * LEO_RESTRICT x32_1 = reinterpret_cast<LEO_M256 *> (vx_1);
const LEO_M256 * LEO_RESTRICT y32_1 = reinterpret_cast<const LEO_M256 *>(vy_1);
do
{
const LEO_M256 x0_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0), _mm256_loadu_si256(y32_0));
const LEO_M256 x1_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 1), _mm256_loadu_si256(y32_0 + 1));
const LEO_M256 x2_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 2), _mm256_loadu_si256(y32_0 + 2));
const LEO_M256 x3_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 3), _mm256_loadu_si256(y32_0 + 3));
const LEO_M256 x0_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1), _mm256_loadu_si256(y32_1));
const LEO_M256 x1_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 1), _mm256_loadu_si256(y32_1 + 1));
const LEO_M256 x2_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 2), _mm256_loadu_si256(y32_1 + 2));
const LEO_M256 x3_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 3), _mm256_loadu_si256(y32_1 + 3));
_mm256_storeu_si256(x32_0, x0_0);
_mm256_storeu_si256(x32_0 + 1, x1_0);
_mm256_storeu_si256(x32_0 + 2, x2_0);
_mm256_storeu_si256(x32_0 + 3, x3_0);
_mm256_storeu_si256(x32_1, x0_1);
_mm256_storeu_si256(x32_1 + 1, x1_1);
_mm256_storeu_si256(x32_1 + 2, x2_1);
_mm256_storeu_si256(x32_1 + 3, x3_1);
x32_0 += 4, y32_0 += 4;
x32_1 += 4, y32_1 += 4;
bytes -= 128;
} while (bytes >= 128);
if (bytes > 0)
{
const LEO_M256 x0_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0), _mm256_loadu_si256(y32_0));
const LEO_M256 x1_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 1), _mm256_loadu_si256(y32_0 + 1));
const LEO_M256 x0_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1), _mm256_loadu_si256(y32_1));
const LEO_M256 x1_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 1), _mm256_loadu_si256(y32_1 + 1));
_mm256_storeu_si256(x32_0, x0_0);
_mm256_storeu_si256(x32_0 + 1, x1_0);
_mm256_storeu_si256(x32_1, x0_1);
_mm256_storeu_si256(x32_1 + 1, x1_1);
}
return;
ExpLUT[state] = static_cast<ffe_t>(i);
state <<= 1;
if (state >= kOrder)
state ^= kPolynomial;
}
#endif // LEO_TRY_AVX2
LEO_M128 * LEO_RESTRICT x16_0 = reinterpret_cast<LEO_M128 *> (vx_0);
const LEO_M128 * LEO_RESTRICT y16_0 = reinterpret_cast<const LEO_M128 *>(vy_0);
LEO_M128 * LEO_RESTRICT x16_1 = reinterpret_cast<LEO_M128 *> (vx_1);
const LEO_M128 * LEO_RESTRICT y16_1 = reinterpret_cast<const LEO_M128 *>(vy_1);
do
{
const LEO_M128 x0_0 = _mm_xor_si128(_mm_loadu_si128(x16_0), _mm_loadu_si128(y16_0));
const LEO_M128 x1_0 = _mm_xor_si128(_mm_loadu_si128(x16_0 + 1), _mm_loadu_si128(y16_0 + 1));
const LEO_M128 x2_0 = _mm_xor_si128(_mm_loadu_si128(x16_0 + 2), _mm_loadu_si128(y16_0 + 2));
const LEO_M128 x3_0 = _mm_xor_si128(_mm_loadu_si128(x16_0 + 3), _mm_loadu_si128(y16_0 + 3));
const LEO_M128 x0_1 = _mm_xor_si128(_mm_loadu_si128(x16_1), _mm_loadu_si128(y16_1));
const LEO_M128 x1_1 = _mm_xor_si128(_mm_loadu_si128(x16_1 + 1), _mm_loadu_si128(y16_1 + 1));
const LEO_M128 x2_1 = _mm_xor_si128(_mm_loadu_si128(x16_1 + 2), _mm_loadu_si128(y16_1 + 2));
const LEO_M128 x3_1 = _mm_xor_si128(_mm_loadu_si128(x16_1 + 3), _mm_loadu_si128(y16_1 + 3));
_mm_storeu_si128(x16_0, x0_0);
_mm_storeu_si128(x16_0 + 1, x1_0);
_mm_storeu_si128(x16_0 + 2, x2_0);
_mm_storeu_si128(x16_0 + 3, x3_0);
_mm_storeu_si128(x16_1, x0_1);
_mm_storeu_si128(x16_1 + 1, x1_1);
_mm_storeu_si128(x16_1 + 2, x2_1);
_mm_storeu_si128(x16_1 + 3, x3_1);
x16_0 += 4, y16_0 += 4;
x16_1 += 4, y16_1 += 4;
bytes -= 64;
} while (bytes > 0);
}
ExpLUT[0] = kModulus;
void xor_mem3(
void * LEO_RESTRICT vx_0, const void * LEO_RESTRICT vy_0,
void * LEO_RESTRICT vx_1, const void * LEO_RESTRICT vy_1,
void * LEO_RESTRICT vx_2, const void * LEO_RESTRICT vy_2,
unsigned bytes)
{
#if defined(LEO_TRY_AVX2)
if (CpuHasAVX2)
// Conversion to chosen basis:
LogLUT[0] = 0;
for (unsigned i = 0; i < kBits; ++i)
{
LEO_M256 * LEO_RESTRICT x32_0 = reinterpret_cast<LEO_M256 *> (vx_0);
const LEO_M256 * LEO_RESTRICT y32_0 = reinterpret_cast<const LEO_M256 *>(vy_0);
LEO_M256 * LEO_RESTRICT x32_1 = reinterpret_cast<LEO_M256 *> (vx_1);
const LEO_M256 * LEO_RESTRICT y32_1 = reinterpret_cast<const LEO_M256 *>(vy_1);
LEO_M256 * LEO_RESTRICT x32_2 = reinterpret_cast<LEO_M256 *> (vx_2);
const LEO_M256 * LEO_RESTRICT y32_2 = reinterpret_cast<const LEO_M256 *>(vy_2);
do
{
const LEO_M256 x0_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0), _mm256_loadu_si256(y32_0));
const LEO_M256 x1_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 1), _mm256_loadu_si256(y32_0 + 1));
const LEO_M256 x2_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 2), _mm256_loadu_si256(y32_0 + 2));
const LEO_M256 x3_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 3), _mm256_loadu_si256(y32_0 + 3));
const LEO_M256 x0_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1), _mm256_loadu_si256(y32_1));
const LEO_M256 x1_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 1), _mm256_loadu_si256(y32_1 + 1));
const LEO_M256 x2_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 2), _mm256_loadu_si256(y32_1 + 2));
const LEO_M256 x3_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 3), _mm256_loadu_si256(y32_1 + 3));
const LEO_M256 x0_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2), _mm256_loadu_si256(y32_2));
const LEO_M256 x1_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2 + 1), _mm256_loadu_si256(y32_2 + 1));
const LEO_M256 x2_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2 + 2), _mm256_loadu_si256(y32_2 + 2));
const LEO_M256 x3_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2 + 3), _mm256_loadu_si256(y32_2 + 3));
_mm256_storeu_si256(x32_0, x0_0);
_mm256_storeu_si256(x32_0 + 1, x1_0);
_mm256_storeu_si256(x32_0 + 2, x2_0);
_mm256_storeu_si256(x32_0 + 3, x3_0);
_mm256_storeu_si256(x32_1, x0_1);
_mm256_storeu_si256(x32_1 + 1, x1_1);
_mm256_storeu_si256(x32_1 + 2, x2_1);
_mm256_storeu_si256(x32_1 + 3, x3_1);
_mm256_storeu_si256(x32_2, x0_2);
_mm256_storeu_si256(x32_2 + 1, x1_2);
_mm256_storeu_si256(x32_2 + 2, x2_2);
_mm256_storeu_si256(x32_2 + 3, x3_2);
x32_0 += 4, y32_0 += 4;
x32_1 += 4, y32_1 += 4;
x32_2 += 4, y32_2 += 4;
bytes -= 128;
} while (bytes >= 128);
if (bytes > 0)
{
const LEO_M256 x0_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0), _mm256_loadu_si256(y32_0));
const LEO_M256 x1_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 1), _mm256_loadu_si256(y32_0 + 1));
const LEO_M256 x0_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1), _mm256_loadu_si256(y32_1));
const LEO_M256 x1_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 1), _mm256_loadu_si256(y32_1 + 1));
const LEO_M256 x0_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2), _mm256_loadu_si256(y32_2));
const LEO_M256 x1_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2 + 1), _mm256_loadu_si256(y32_2 + 1));
_mm256_storeu_si256(x32_0, x0_0);
_mm256_storeu_si256(x32_0 + 1, x1_0);
_mm256_storeu_si256(x32_1, x0_1);
_mm256_storeu_si256(x32_1 + 1, x1_1);
_mm256_storeu_si256(x32_2, x0_2);
_mm256_storeu_si256(x32_2 + 1, x1_2);
}
return;
const ffe_t basis = kBasis[i];
const unsigned width = static_cast<unsigned>(1UL << i);
for (unsigned j = 0; j < width; ++j)
LogLUT[j + width] = LogLUT[j] ^ basis;
}
#endif // LEO_TRY_AVX2
LEO_M128 * LEO_RESTRICT x16_0 = reinterpret_cast<LEO_M128 *> (vx_0);
const LEO_M128 * LEO_RESTRICT y16_0 = reinterpret_cast<const LEO_M128 *>(vy_0);
LEO_M128 * LEO_RESTRICT x16_1 = reinterpret_cast<LEO_M128 *> (vx_1);
const LEO_M128 * LEO_RESTRICT y16_1 = reinterpret_cast<const LEO_M128 *>(vy_1);
LEO_M128 * LEO_RESTRICT x16_2 = reinterpret_cast<LEO_M128 *> (vx_2);
const LEO_M128 * LEO_RESTRICT y16_2 = reinterpret_cast<const LEO_M128 *>(vy_2);
do
{
const LEO_M128 x0_0 = _mm_xor_si128(_mm_loadu_si128(x16_0), _mm_loadu_si128(y16_0));
const LEO_M128 x1_0 = _mm_xor_si128(_mm_loadu_si128(x16_0 + 1), _mm_loadu_si128(y16_0 + 1));
const LEO_M128 x2_0 = _mm_xor_si128(_mm_loadu_si128(x16_0 + 2), _mm_loadu_si128(y16_0 + 2));
const LEO_M128 x3_0 = _mm_xor_si128(_mm_loadu_si128(x16_0 + 3), _mm_loadu_si128(y16_0 + 3));
const LEO_M128 x0_1 = _mm_xor_si128(_mm_loadu_si128(x16_1), _mm_loadu_si128(y16_1));
const LEO_M128 x1_1 = _mm_xor_si128(_mm_loadu_si128(x16_1 + 1), _mm_loadu_si128(y16_1 + 1));
const LEO_M128 x2_1 = _mm_xor_si128(_mm_loadu_si128(x16_1 + 2), _mm_loadu_si128(y16_1 + 2));
const LEO_M128 x3_1 = _mm_xor_si128(_mm_loadu_si128(x16_1 + 3), _mm_loadu_si128(y16_1 + 3));
const LEO_M128 x0_2 = _mm_xor_si128(_mm_loadu_si128(x16_2), _mm_loadu_si128(y16_2));
const LEO_M128 x1_2 = _mm_xor_si128(_mm_loadu_si128(x16_2 + 1), _mm_loadu_si128(y16_2 + 1));
const LEO_M128 x2_2 = _mm_xor_si128(_mm_loadu_si128(x16_2 + 2), _mm_loadu_si128(y16_2 + 2));
const LEO_M128 x3_2 = _mm_xor_si128(_mm_loadu_si128(x16_2 + 3), _mm_loadu_si128(y16_2 + 3));
_mm_storeu_si128(x16_0, x0_0);
_mm_storeu_si128(x16_0 + 1, x1_0);
_mm_storeu_si128(x16_0 + 2, x2_0);
_mm_storeu_si128(x16_0 + 3, x3_0);
_mm_storeu_si128(x16_1, x0_1);
_mm_storeu_si128(x16_1 + 1, x1_1);
_mm_storeu_si128(x16_1 + 2, x2_1);
_mm_storeu_si128(x16_1 + 3, x3_1);
_mm_storeu_si128(x16_2, x0_2);
_mm_storeu_si128(x16_2 + 1, x1_2);
_mm_storeu_si128(x16_2 + 2, x2_2);
_mm_storeu_si128(x16_2 + 3, x3_2);
x16_0 += 4, y16_0 += 4;
x16_1 += 4, y16_1 += 4;
x16_2 += 4, y16_2 += 4;
bytes -= 64;
} while (bytes > 0);
}
for (unsigned i = 0; i < kOrder; ++i)
LogLUT[i] = ExpLUT[LogLUT[i]];
for (unsigned i = 0; i < kOrder; ++i)
ExpLUT[LogLUT[i]] = i;
ExpLUT[kModulus] = ExpLUT[0];
}
//------------------------------------------------------------------------------
// Multiplies
@ -485,12 +258,12 @@ void xor_mem3(
struct {
LEO_ALIGNED LEO_M128 Lo[256];
LEO_ALIGNED LEO_M128 Hi[256];
} Multiply128LUT;
} static Multiply128LUT;
#if defined(LEO_TRY_AVX2)
struct {
LEO_ALIGNED LEO_M256 Lo[256];
LEO_ALIGNED LEO_M256 Hi[256];
} Multiply256LUT;
} static Multiply256LUT;
#endif // LEO_TRY_AVX2
// Returns a * b
@ -501,14 +274,19 @@ static ffe_t FFEMultiply(ffe_t a, ffe_t b)
return ExpLUT[AddMod(LogLUT[a], LogLUT[b])];
}
// Returns a * Log(b)
static ffe_t FFEMultiplyLog(ffe_t a, ffe_t log_b)
{
if (a == 0)
return 0;
return ExpLUT[AddMod(LogLUT[a], b)];
}
bool InitializeMultiplyTables()
{
// Reuse aligned self test buffers to load table data
uint8_t* lo = m_SelfTestBuffers.A;
uint8_t* hi = m_SelfTestBuffers.B;
for (int y = 0; y < 256; ++y)
{
uint8_t lo[16], hi[16];
for (unsigned char x = 0; x < 16; ++x)
{
lo[x] = FFEMultiply(x, static_cast<uint8_t>(y));
@ -517,15 +295,17 @@ bool InitializeMultiplyTables()
const LEO_M128 table_lo = _mm_loadu_si128((LEO_M128*)lo);
const LEO_M128 table_hi = _mm_loadu_si128((LEO_M128*)hi);
_mm_storeu_si128(Multiply128LUT.Lo + y, table_lo);
_mm_storeu_si128(Multiply128LUT.Hi + y, table_hi);
#if defined(LEO_TRY_AVX2)
if (CpuHasAVX2)
{
const LEO_M256 table_lo2 = _mm256_broadcastsi128_si256(table_lo);
const LEO_M256 table_hi2 = _mm256_broadcastsi128_si256(table_hi);
_mm256_storeu_si256(Multiply256LUT.Lo + y, table_lo2);
_mm256_storeu_si256(Multiply256LUT.Hi + y, table_hi2);
_mm256_storeu_si256(Multiply256LUT.Lo + y,
_mm256_broadcastsi128_si256(table_lo));
_mm256_storeu_si256(Multiply256LUT.Hi + y,
_mm256_broadcastsi128_si256(table_hi));
}
#endif // LEO_TRY_AVX2
}
@ -536,7 +316,7 @@ bool InitializeMultiplyTables()
// vx[] = vy[] * m
void mul_mem_set(
void * LEO_RESTRICT vx, const void * LEO_RESTRICT vy,
ffe_t m, unsigned bytes)
ffe_t m, uint64_t bytes)
{
if (m <= 1)
{
@ -633,7 +413,7 @@ void mul_mem_set(
void mul_mem2_inplace(
void * LEO_RESTRICT vx_0,
void * LEO_RESTRICT vx_1,
ffe_t m, unsigned bytes)
ffe_t m, uint64_t bytes)
{
if (m <= 1)
{
@ -759,28 +539,28 @@ void mul_mem2_inplace(
// FFT Operations
// x[] ^= y[] * m, y[] ^= x[]
void mul_fft(
void fft_butterfly(
void * LEO_RESTRICT x, void * LEO_RESTRICT y,
ffe_t m, unsigned bytes)
ffe_t m, uint64_t bytes)
{
}
// For i = {0, 1}: x_i[] ^= y_i[] * m, y_i[] ^= x_i[]
void mul_fft2(
void fft_butterfly2(
void * LEO_RESTRICT x_0, void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, void * LEO_RESTRICT y_1,
ffe_t m, unsigned bytes)
ffe_t m, uint64_t bytes)
{
}
// For i = {0, 1, 2}: x_i[] ^= y_i[] * m, y_i[] ^= x_i[]
void mul_fft3(
void fft_butterfly3(
void * LEO_RESTRICT x_0, void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, void * LEO_RESTRICT y_1,
void * LEO_RESTRICT x_2, void * LEO_RESTRICT y_2,
ffe_t m, unsigned bytes)
ffe_t m, uint64_t bytes)
{
}
@ -790,33 +570,348 @@ void mul_fft3(
// IFFT Operations
// y[] ^= x[], x[] ^= y[] * m
void mul_ifft(
void ifft_butterfly(
void * LEO_RESTRICT x, void * LEO_RESTRICT y,
ffe_t m, unsigned bytes)
ffe_t m, uint64_t bytes)
{
}
// For i = {0, 1}: y_i[] ^= x_i[], x_i[] ^= y_i[] * m
void mul_ifft2(
void ifft_butterfly2(
void * LEO_RESTRICT x_0, void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, void * LEO_RESTRICT y_1,
ffe_t m, unsigned bytes)
ffe_t m, uint64_t bytes)
{
}
// For i = {0, 1, 2}: y_i[] ^= x_i[], x_i[] ^= y_i[] * m
void mul_ifft3(
void ifft_butterfly3(
void * LEO_RESTRICT x_0, void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, void * LEO_RESTRICT y_1,
void * LEO_RESTRICT x_2, void * LEO_RESTRICT y_2,
ffe_t m, unsigned bytes)
ffe_t m, uint64_t bytes)
{
}
//------------------------------------------------------------------------------
// FFT
static ffe_t FFTSkew[kFieldModulus]; // twisted factors used in FFT
static ffe_t LogWalsh[kOrder]; // factors used in the evaluation of the error locator polynomial
void FFTInitialize()
{
ffe_t temp[kBits - 1];
for (unsigned i = 1; i < kBits; ++i)
temp[i - 1] = (ffe_t)((unsigned)1 << i);
for (unsigned m = 0; m < (kBits - 1); ++m)
{
const unsigned step = (unsigned)1 << (m + 1);
FFTSkew[((unsigned)1 << m) - 1] = 0;
for (unsigned i = m; i < (kBits - 1); ++i)
{
const unsigned s = ((unsigned)1 << (i + 1));
for (unsigned j = ((unsigned)1 << m) - 1; j < s; j += step)
FFTSkew[j + s] = FFTSkew[j] ^ temp[i];
}
// TBD: This can be cleaned up
temp[m] = kFieldModulus - LogLUT[FFEMultiply(temp[m], temp[m] ^ 1)];
for (unsigned i = m + 1; i < (kBits - 1); ++i)
temp[i] = FFEMultiplyLog(temp[i], (LogLUT[temp[i] ^ 1] + temp[m]) % kFieldModulus);
}
for (unsigned i = 0; i < kOrder; ++i)
FFTSkew[i] = LogLUT[FFTSkew[i]];
// Precalculate FWHT(Log[i]):
for (unsigned i = 0; i < kOrder; ++i)
LogWalsh[i] = LogLUT[i];
LogWalsh[0] = 0;
FWHT(LogWalsh, kBits);
}
//------------------------------------------------------------------------------
// Encode
void Encode(
uint64_t buffer_bytes,
unsigned original_count,
unsigned recovery_count,
unsigned m,
void* const * const data,
void** work)
{
// work <- data
// FIXME: Unroll first loop to eliminate this
for (unsigned i = 0; i < m; ++i)
memcpy(work[i], data[i], buffer_bytes);
// work <- IFFT(data, m, m)
for (unsigned width = 1; width < m; width <<= 1)
{
for (unsigned j = width; j < m; j += (width << 1))
{
const ffe_t skew = FFTSkew[j + m - 1];
if (skew != kFieldModulus)
{
for (unsigned i = j - width; i < j; ++i)
ifft_butterfly(work[i], work[i + width], skew, buffer_bytes);
}
else
{
for (unsigned i = j - width; i < j; ++i)
xor_mem(work[i + width], work[i], buffer_bytes);
}
}
}
for (unsigned i = m; i + m <= original_count; i += m)
{
// temp <- data + i
void** temp = work + m;
// FIXME: Unroll first loop to eliminate this
for (unsigned j = 0; j < m; ++j)
memcpy(temp[j], data[j], buffer_bytes);
// temp <- IFFT(temp, m, m + i)
for (unsigned width = 1; width < m; width <<= 1)
{
for (unsigned j = width; j < m; j += (width << 1))
{
const ffe_t skew = FFTSkew[j + m + i - 1];
if (skew != kFieldModulus)
{
for (unsigned k = j - width; k < j; ++k)
ifft_butterfly(temp[k], temp[k + width], skew, buffer_bytes);
}
else
{
for (unsigned k = j - width; k < j; ++k)
xor_mem(temp[k + width], temp[k], buffer_bytes);
}
}
}
// work <- work XOR temp
// FIXME: Unroll last loop to eliminate this
for (unsigned j = 0; j < m; ++j)
xor_mem(work[j], temp[j], buffer_bytes);
}
const unsigned last_count = original_count % m;
if (last_count != 0)
{
const unsigned i = original_count - last_count;
// temp <- data + i
void** temp = work + m;
for (unsigned j = 0; j < last_count; ++j)
memcpy(temp[j], data[j], buffer_bytes);
for (unsigned j = last_count; j < m; ++j)
memset(temp[j], 0, buffer_bytes);
// temp <- IFFT(temp, m, m + i)
for (unsigned width = 1, shift = 1; width < m; width <<= 1, ++shift)
{
// Calculate stop considering that the right is all zeroes
const unsigned stop = ((last_count + width - 1) >> shift) << shift;
for (unsigned j = width; j < stop; j += (width << 1))
{
const ffe_t skew = FFTSkew[j + m + i - 1];
if (skew != kFieldModulus)
{
for (unsigned k = j - width; k < j; ++k)
ifft_butterfly(temp[k], temp[k + width], skew, buffer_bytes);
}
else
{
for (unsigned k = j - width; k < j; ++k)
xor_mem(temp[k + width], temp[k], buffer_bytes);
}
}
}
// work <- work XOR temp
// FIXME: Unroll last loop to eliminate this
for (unsigned j = 0; j < m; ++j)
xor_mem(work[j], temp[j], buffer_bytes);
}
// work <- FFT(work, m, 0)
for (unsigned width = (m >> 1); width > 0; width >>= 1)
{
const ffe_t* skewLUT = FFTSkew + width - 1;
const unsigned range = width << 1;
for (unsigned j = 0; j < m; j += range)
{
const ffe_t skew = skewLUT[j];
if (skew != kFieldModulus)
{
for (unsigned k = j, count = j + width; k < count; ++k)
fft_butterfly(data[k], data[k + width], skew, buffer_bytes);
}
else
{
for (unsigned k = j, count = j + width; k < count; ++k)
xor_mem(work[k + width], work[k], buffer_bytes);
}
}
}
}
//------------------------------------------------------------------------------
// Decode
void Decode(
uint64_t buffer_bytes,
unsigned original_count,
unsigned recovery_count,
unsigned m, // NextPow2(recovery_count)
unsigned n, // NextPow2(m + original_count) = work_count
void* const * const original, // original_count entries
void* const * const recovery, // recovery_count entries
void** work) // n entries
{
// Fill in error locations
ffe_t ErrorLocations[kOrder];
for (unsigned i = 0; i < recovery_count; ++i)
ErrorLocations[i] = recovery[i] ? 0 : 1;
for (unsigned i = recovery_count; i < m; ++i)
ErrorLocations[i] = 1;
for (unsigned i = 0; i < original_count; ++i)
ErrorLocations[i + m] = original[i] ? 0 : 1;
memset(ErrorLocations + m + original_count, 0, (n - original_count - m) * sizeof(ffe_t));
// Evaluate error locator polynomial
FWHT(ErrorLocations, kBits);
for (unsigned i = 0; i < kOrder; ++i)
ErrorLocations[i] = ((unsigned)ErrorLocations[i] * (unsigned)LogWalsh[i]) % kFieldModulus;
FWHT(ErrorLocations, kBits);
// work <- recovery data
for (unsigned i = 0; i < recovery_count; ++i)
{
if (recovery[i])
mul_mem_set(work[i], recovery[i], ErrorLocations[i], buffer_bytes);
else
memset(work[i], 0, buffer_bytes);
}
for (unsigned i = recovery_count; i < m; ++i)
memset(work[i], 0, buffer_bytes);
// work <- original data
for (unsigned i = 0; i < original_count; ++i)
{
if (original[i])
mul_mem_set(work[m + i], original[i], ErrorLocations[m + i], buffer_bytes);
else
memset(work[m + i], 0, buffer_bytes);
}
for (unsigned i = m + original_count; i < n; ++i)
memset(work[i], 0, buffer_bytes);
// work <- IFFT(work, n, 0)
for (unsigned width = 1; width < n; width <<= 1)
{
for (unsigned j = width; j < n; j += (width << 1))
{
const ffe_t skew = FFTSkew[j - 1];
if (skew != kFieldModulus)
{
for (unsigned i = j - width; i < j; ++i)
ifft_butterfly(work[i], work[i + width], skew, buffer_bytes);
}
else
{
for (unsigned i = j - width; i < j; ++i)
xor_mem(work[i + width], work[i], buffer_bytes);
}
}
}
// work <- FormalDerivative(work, n)
for (unsigned i = 1; i < n; ++i)
{
const unsigned width = ((i ^ (i - 1)) + 1) >> 1;
// If a large number of values are being XORed:
for (unsigned j = i - width; j < i; ++j)
xor_mem(work[j], work[j + width], buffer_bytes);
}
// work <- FFT(work, n, 0) truncated to m + original_count
const unsigned output_count = m + original_count;
for (unsigned width = (n >> 1); width > 0; width >>= 1)
{
const ffe_t* skewLUT = FFTSkew + width - 1;
const unsigned range = width << 1;
for (unsigned j = (m < range) ? 0 : m; j < output_count; j += range)
{
const ffe_t skew = skewLUT[j];
if (skew != kFieldModulus)
{
for (unsigned i = j; i < j + width; ++i)
fft_butterfly(work[i], work[i + width], skew, buffer_bytes);
}
else
{
for (unsigned i = j; i < j + width; ++i)
xor_mem(work[i + width], work[i], buffer_bytes);
}
}
}
// Reveal erasures
for (unsigned i = 0; i < original_count; ++i)
if (!original[i])
mul_mem_set(work[i], work[i + m], kFieldModulus - ErrorLocations[i], buffer_bytes);
}
//------------------------------------------------------------------------------
// API
@ -831,6 +926,7 @@ bool Initialize()
return false;
InitializeLogarithmTables();
FFTInitialize();
IsInitialized = true;
return true;

View File

@ -56,9 +56,6 @@ static const unsigned kOrder = 256;
//------------------------------------------------------------------------------
// Fast Walsh-Hadamard Transform (FWHT) (mod kModulus)
// Define this to enable the optimized version of FWHT()
#define LEO_FF8_FWHT_OPTIMIZED
// Transform for a variable number of bits (up to kOrder)
void FWHT(ffe_t* data, const unsigned bits);
@ -66,85 +63,89 @@ void FWHT(ffe_t* data, const unsigned bits);
void FWHT(ffe_t data[kOrder]);
//------------------------------------------------------------------------------
// XOR Memory
// x[] ^= y[]
void xor_mem(
void * LEO_RESTRICT x, const void * LEO_RESTRICT y,
unsigned bytes);
// For i = {0, 1}: x_i[] ^= x_i[]
void xor_mem2(
void * LEO_RESTRICT x_0, const void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, const void * LEO_RESTRICT y_1,
unsigned bytes);
// For i = {0, 1, 2}: x_i[] ^= x_i[]
void xor_mem3(
void * LEO_RESTRICT x_0, const void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, const void * LEO_RESTRICT y_1,
void * LEO_RESTRICT x_2, const void * LEO_RESTRICT y_2,
unsigned bytes);
//------------------------------------------------------------------------------
// Multiplies
// x[] = y[] * m
void mul_mem_set(
void * LEO_RESTRICT x, const void * LEO_RESTRICT y,
ffe_t m, unsigned bytes);
ffe_t m, uint64_t bytes);
// For i = {0, 1}: x_i[] *= m
void mul_mem2_inplace(
void * LEO_RESTRICT x_0,
void * LEO_RESTRICT x_1,
ffe_t m, unsigned bytes);
ffe_t m, uint64_t bytes);
//------------------------------------------------------------------------------
// FFT Operations
// x[] ^= y[] * m, y[] ^= x[]
void mul_fft(
void fft_butterfly(
void * LEO_RESTRICT x, void * LEO_RESTRICT y,
ffe_t m, unsigned bytes);
ffe_t m, uint64_t bytes);
// For i = {0, 1}: x_i[] ^= y_i[] * m, y_i[] ^= x_i[]
void mul_fft2(
void fft_butterfly2(
void * LEO_RESTRICT x_0, void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, void * LEO_RESTRICT y_1,
ffe_t m, unsigned bytes);
ffe_t m, uint64_t bytes);
// For i = {0, 1, 2}: x_i[] ^= y_i[] * m, y_i[] ^= x_i[]
void mul_fft3(
void fft_butterfly3(
void * LEO_RESTRICT x_0, void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, void * LEO_RESTRICT y_1,
void * LEO_RESTRICT x_2, void * LEO_RESTRICT y_2,
ffe_t m, unsigned bytes);
ffe_t m, uint64_t bytes);
//------------------------------------------------------------------------------
// IFFT Operations
// y[] ^= x[], x[] ^= y[] * m
void mul_ifft(
void ifft_butterfly(
void * LEO_RESTRICT x, void * LEO_RESTRICT y,
ffe_t m, unsigned bytes);
ffe_t m, uint64_t bytes);
// For i = {0, 1}: y_i[] ^= x_i[], x_i[] ^= y_i[] * m
void mul_ifft2(
void ifft_butterfly2(
void * LEO_RESTRICT x_0, void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, void * LEO_RESTRICT y_1,
ffe_t m, unsigned bytes);
ffe_t m, uint64_t bytes);
// For i = {0, 1, 2}: y_i[] ^= x_i[], x_i[] ^= y_i[] * m
void mul_ifft3(
void ifft_butterfly3(
void * LEO_RESTRICT x_0, void * LEO_RESTRICT y_0,
void * LEO_RESTRICT x_1, void * LEO_RESTRICT y_1,
void * LEO_RESTRICT x_2, void * LEO_RESTRICT y_2,
ffe_t m, unsigned bytes);
ffe_t m, uint64_t bytes);
//------------------------------------------------------------------------------
// Encode
void Encode(
uint64_t buffer_bytes,
unsigned original_count,
unsigned recovery_count,
unsigned m, // = NextPow2(recovery_count) * 2 = work_count
void* const * const data,
void** work); // Size of GetEncodeWorkCount()
//------------------------------------------------------------------------------
// Decode
void Decode(
uint64_t buffer_bytes,
unsigned original_count,
unsigned recovery_count,
unsigned m, // = NextPow2(recovery_count)
unsigned n, // = NextPow2(m + original_count) = work_count
void* const * const original, // original_count entries
void* const * const recovery, // recovery_count entries
void** work); // n entries
//------------------------------------------------------------------------------

BIN
docs/HighRateDecoder.pdf Normal file

Binary file not shown.

BIN
docs/LowRateDecoder.pdf Normal file

Binary file not shown.

View File

@ -27,8 +27,8 @@
*/
#include "leopard.h"
#include "FecalEncoder.h"
#include "FecalDecoder.h"
#include "LeopardFF8.h"
#include "LeopardFF16.h"
extern "C" {
@ -38,134 +38,152 @@ extern "C" {
static bool m_Initialized = false;
FECAL_EXPORT int fecal_init_(int version)
LEO_EXPORT int leo_init_(int version)
{
if (version != FECAL_VERSION)
return Fecal_InvalidInput;
if (version != LEO_VERSION)
return Leopard_InvalidInput;
if (0 != gf256_init())
return Fecal_Platform;
if (!leopard::ff8::Initialize())
return Leopard_Platform;
if (!leopard::ff16::Initialize())
return Leopard_Platform;
m_Initialized = true;
return Fecal_Success;
return Leopard_Success;
}
//------------------------------------------------------------------------------
// Encoder API
FECAL_EXPORT FecalEncoder fecal_encoder_create(unsigned input_count, void* const * const input_data, uint64_t total_bytes)
LEO_EXPORT unsigned leo_encode_work_count(
unsigned original_count,
unsigned recovery_count)
{
if (input_count <= 0 || !input_data || total_bytes < input_count)
{
FECAL_DEBUG_BREAK; // Invalid input
return nullptr;
}
FECAL_DEBUG_ASSERT(m_Initialized); // Must call fecal_init() first
if (!m_Initialized)
return nullptr;
fecal::Encoder* encoder = new(std::nothrow) fecal::Encoder;
if (!encoder)
{
FECAL_DEBUG_BREAK; // Out of memory
return nullptr;
}
if (Fecal_Success != encoder->Initialize(input_count, input_data, total_bytes))
{
delete encoder;
return nullptr;
}
return reinterpret_cast<FecalEncoder>( encoder );
return leopard::NextPow2(recovery_count) * 2;
}
FECAL_EXPORT int fecal_encode(FecalEncoder encoder_v, FecalSymbol* symbol)
LEO_EXPORT LeopardResult leo_encode(
uint64_t buffer_bytes, // Number of bytes in each data buffer
unsigned original_count, // Number of original_data[] buffer pointers
unsigned recovery_count, // Number of recovery_data[] buffer pointers
unsigned work_count, // Number of work_data[] buffer pointers, from leo_encode_work_count()
void* const * const original_data, // Array of pointers to original data buffers
void** work_data, // Array of work buffers
unsigned flags) // Operation flags
{
fecal::Encoder* encoder = reinterpret_cast<fecal::Encoder*>( encoder_v );
if (!encoder || !symbol)
return Fecal_InvalidInput;
if (buffer_bytes <= 0 || buffer_bytes % 64 != 0)
return Leopard_InvalidSize;
return encoder->Encode(*symbol);
}
if (recovery_count <= 0 || recovery_count > original_count)
return Leopard_InvalidCounts;
FECAL_EXPORT void fecal_free(void* codec_v)
{
if (codec_v)
if (!original_data || !work_data)
return Leopard_InvalidInput;
const unsigned m = leopard::NextPow2(recovery_count);
const unsigned n = leopard::NextPow2(m + original_count);
if (work_count != m * 2)
return Leopard_InvalidCounts;
const bool mt = (flags & LeopardFlags_Multithreaded) != 0;
if (n <= leopard::ff8::kOrder)
{
fecal::ICodec* icodec = reinterpret_cast<fecal::ICodec*>( codec_v );
delete icodec;
leopard::ff8::Encode(
buffer_bytes,
original_count,
recovery_count,
m,
original_data,
work_data);
}
else if (n <= leopard::ff16::kOrder)
{
leopard::ff16::Encode(
buffer_bytes,
original_count,
recovery_count,
m,
original_data,
work_data);
}
else
return Leopard_TooMuchData;
return Leopard_Success;
}
//------------------------------------------------------------------------------
// Decoder API
FECAL_EXPORT FecalDecoder fecal_decoder_create(unsigned input_count, uint64_t total_bytes)
LEO_EXPORT unsigned leo_decode_work_count(
unsigned original_count,
unsigned recovery_count)
{
if (input_count <= 0 || total_bytes < input_count)
const unsigned m = leopard::NextPow2(recovery_count);
const unsigned n = leopard::NextPow2(m + original_count);
return n;
}
LEO_EXPORT LeopardResult leo_decode(
uint64_t buffer_bytes, // Number of bytes in each data buffer
unsigned original_count, // Number of original_data[] buffer pointers
unsigned recovery_count, // Number of recovery_data[] buffer pointers
unsigned work_count, // Number of buffer pointers in work_data[]
void* const * const original_data, // Array of original data buffers
void* const * const recovery_data, // Array of recovery data buffers
void** work_data, // Array of work data buffers
unsigned flags) // Operation flags
{
if (buffer_bytes <= 0 || buffer_bytes % 64 != 0)
return Leopard_InvalidSize;
if (recovery_count <= 0 || recovery_count > original_count)
return Leopard_InvalidCounts;
if (!original_data || !recovery_data || !work_data)
return Leopard_InvalidInput;
const unsigned m = leopard::NextPow2(recovery_count);
const unsigned n = leopard::NextPow2(m + original_count);
if (work_count != n)
return Leopard_InvalidCounts;
const bool mt = (flags & LeopardFlags_Multithreaded) != 0;
if (n <= leopard::ff8::kOrder)
{
FECAL_DEBUG_BREAK; // Invalid input
return nullptr;
leopard::ff8::Decode(
buffer_bytes,
original_count,
recovery_count,
m,
n,
original_data,
recovery_data,
work_data);
}
FECAL_DEBUG_ASSERT(m_Initialized); // Must call fecal_init() first
if (!m_Initialized)
return nullptr;
fecal::Decoder* decoder = new(std::nothrow) fecal::Decoder;
if (!decoder)
else if (n <= leopard::ff16::kOrder)
{
FECAL_DEBUG_BREAK; // Out of memory
return nullptr;
leopard::ff16::Decode(
buffer_bytes,
original_count,
recovery_count,
m,
n,
original_data,
recovery_data,
work_data);
}
else
return Leopard_TooMuchData;
if (Fecal_Success != decoder->Initialize(input_count, total_bytes))
{
delete decoder;
return nullptr;
}
return reinterpret_cast<FecalDecoder>( decoder );
}
FECAL_EXPORT int fecal_decoder_add_original(FecalDecoder decoder_v, const FecalSymbol* symbol)
{
fecal::Decoder* decoder = reinterpret_cast<fecal::Decoder*>( decoder_v );
if (!decoder || !symbol)
return Fecal_InvalidInput;
return decoder->AddOriginal(*symbol);
}
FECAL_EXPORT int fecal_decoder_add_recovery(FecalDecoder decoder_v, const FecalSymbol* symbol)
{
fecal::Decoder* decoder = reinterpret_cast<fecal::Decoder*>( decoder_v );
if (!decoder || !symbol)
return Fecal_InvalidInput;
return decoder->AddRecovery(*symbol);
}
FECAL_EXPORT int fecal_decode(FecalDecoder decoder_v, RecoveredSymbols* symbols)
{
fecal::Decoder* decoder = reinterpret_cast<fecal::Decoder*>( decoder_v );
if (!decoder || !symbols)
return Fecal_InvalidInput;
return decoder->Decode(*symbols);
}
FECAL_EXPORT int fecal_decoder_get(FecalDecoder decoder_v, unsigned input_index, FecalSymbol* symbol)
{
fecal::Decoder* decoder = reinterpret_cast<fecal::Decoder*>( decoder_v );
if (!decoder || !symbol)
return Fecal_InvalidInput;
return decoder->GetOriginal(input_index, *symbol);
return Leopard_Success;
}

View File

@ -59,6 +59,7 @@
# endif
#endif
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
@ -90,14 +91,13 @@ typedef enum LeopardResultT
Leopard_Success = 0, // Operation succeeded
Leopard_TooMuchData = -1, // Buffer counts are too high
Leopard_InvalidBlockSize = -2, // Buffer size must be a multiple of 64 bytes
Leopard_InvalidInput = -3, // A function parameter was invalid
Leopard_Platform = -4, // Platform is unsupported
Leopard_OutOfMemory = -5, // Out of memory error occurred
Leopard_Unexpected = -6, // Unexpected error - Software bug?
Leopard_InvalidSize = -2, // Buffer size must be a multiple of 64 bytes
Leopard_InvalidCounts = -3, // Invalid counts provided
Leopard_InvalidInput = -4, // A function parameter was invalid
Leopard_Platform = -5, // Platform is unsupported
} LeopardResult;
// Results
// Flags
typedef enum LeopardFlagsT
{
LeopardFlags_Defaults = 0, // Default settings
@ -119,7 +119,6 @@ typedef enum LeopardFlagsT
Returns the work_count value to pass into leo_encode().
Returns 0 on invalid input.
*/
LEO_EXPORT unsigned leo_encode_work_count(
unsigned original_count,
unsigned recovery_count);
@ -138,6 +137,8 @@ LEO_EXPORT unsigned leo_encode_work_count(
flags: Flags for encoding e.g. LeopardFlag_Multithreaded
The sum of original_count + recovery_count must not exceed 65536.
The recovery_count <= original_count.
The buffer_bytes must be a multiple of 64.
Each buffer should have the same number of bytes.
Even the last piece must be rounded up to the block size.
@ -153,15 +154,11 @@ LEO_EXPORT unsigned leo_encode_work_count(
((uint64_t)total_bytes + original_count - 1) / original_count);
Returns Leopard_Success on success.
The first set of recovery_count buffers in work_data will be the result.
Returns Leopard_TooMuchData if the data is too large.
Returns Leopard_InvalidBlockSize if the data is the wrong size.
Returns Leopard_InvalidInput on invalid input.
* The first set of recovery_count buffers in work_data will be the result.
Returns other values on errors.
*/
LEO_EXPORT LeopardResult leo_encode(
unsigned buffer_bytes, // Number of bytes in each data buffer
uint64_t buffer_bytes, // Number of bytes in each data buffer
unsigned original_count, // Number of original_data[] buffer pointers
unsigned recovery_count, // Number of recovery_data[] buffer pointers
unsigned work_count, // Number of work_data[] buffer pointers, from leo_encode_work_count()
@ -183,7 +180,6 @@ LEO_EXPORT LeopardResult leo_encode(
Returns the work_count value to pass into leo_encode().
Returns 0 on invalid input.
*/
LEO_EXPORT unsigned leo_decode_work_count(
unsigned original_count,
unsigned recovery_count);
@ -211,7 +207,7 @@ LEO_EXPORT unsigned leo_decode_work_count(
Returns other values on errors.
*/
LEO_EXPORT LeopardResult leo_decode(
unsigned buffer_bytes, // Number of bytes in each data buffer
uint64_t buffer_bytes, // Number of bytes in each data buffer
unsigned original_count, // Number of original_data[] buffer pointers
unsigned recovery_count, // Number of recovery_data[] buffer pointers
unsigned work_count, // Number of buffer pointers in work_data[]

View File

@ -1,12 +1,14 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.26127.3
# Visual Studio 14
VisualStudioVersion = 14.0.25420.1
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Leopard", "Leopard.vcxproj", "{32176592-2F30-4BD5-B645-EB11C8D3453E}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LeopardBenchmark", "..\tests\proj\Benchmark.vcxproj", "{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D45}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LeopardExperiments", "..\tests\proj\Experiments.vcxproj", "{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
@ -31,6 +33,14 @@ Global
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D45}.Release|Win32.Build.0 = Release|Win32
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D45}.Release|x64.ActiveCfg = Release|x64
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D45}.Release|x64.Build.0 = Release|x64
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}.Debug|Win32.ActiveCfg = Debug|Win32
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}.Debug|Win32.Build.0 = Debug|Win32
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}.Debug|x64.ActiveCfg = Debug|x64
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}.Debug|x64.Build.0 = Debug|x64
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}.Release|Win32.ActiveCfg = Release|Win32
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}.Release|Win32.Build.0 = Release|Win32
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}.Release|x64.ActiveCfg = Release|x64
{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -21,16 +21,12 @@
<ItemGroup>
<ClInclude Include="..\leopard.h" />
<ClInclude Include="..\LeopardCommon.h" />
<ClInclude Include="..\LeopardDecoder.h" />
<ClInclude Include="..\LeopardEncoder.h" />
<ClInclude Include="..\LeopardFF8.h" />
<ClInclude Include="..\LeopardFF16.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\leopard.cpp" />
<ClCompile Include="..\LeopardCommon.cpp" />
<ClCompile Include="..\LeopardDecoder.cpp" />
<ClCompile Include="..\LeopardEncoder.cpp" />
<ClCompile Include="..\LeopardFF8.cpp" />
<ClCompile Include="..\LeopardFF16.cpp" />
</ItemGroup>
@ -38,34 +34,33 @@
<ProjectGuid>{32176592-2F30-4BD5-B645-EB11C8D3453E}</ProjectGuid>
<RootNamespace>GF65536</RootNamespace>
<ProjectName>Leopard</ProjectName>
<WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">

View File

@ -21,12 +21,6 @@
<ClInclude Include="..\LeopardCommon.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="..\LeopardDecoder.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="..\LeopardEncoder.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="..\LeopardFF16.h">
<Filter>Source Files</Filter>
</ClInclude>
@ -35,12 +29,6 @@
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\LeopardDecoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\LeopardEncoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\leopard.cpp">
<Filter>Source Files</Filter>
</ClCompile>

615
tests/experiments.cpp Normal file
View File

@ -0,0 +1,615 @@
/*
Copyright (c) 2017 Christopher A. Taylor. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of LHC-RS nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <time.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
//------------------------------------------------------------------------------
// Debug
// Some bugs only repro in release mode, so this can be helpful
//#define LEO_DEBUG_IN_RELEASE
#if defined(_DEBUG) || defined(DEBUG) || defined(LEO_DEBUG_IN_RELEASE)
#define LEO_DEBUG
#ifdef _WIN32
#define LEO_DEBUG_BREAK __debugbreak()
#else
#define LEO_DEBUG_BREAK __builtin_trap()
#endif
#define LEO_DEBUG_ASSERT(cond) { if (!(cond)) { LEO_DEBUG_BREAK; } }
#else
#define LEO_DEBUG_BREAK ;
#define LEO_DEBUG_ASSERT(cond) ;
#endif
//------------------------------------------------------------------------------
// Platform/Architecture
// Compiler-specific C++11 restrict keyword
#define LEO_RESTRICT __restrict
// Compiler-specific force inline keyword
#ifdef _MSC_VER
#define LEO_FORCE_INLINE inline __forceinline
#else
#define LEO_FORCE_INLINE inline __attribute__((always_inline))
#endif
//------------------------------------------------------------------------------
// Field
//#define LEO_SHORT_FIELD
#ifdef LEO_SHORT_FIELD
typedef uint8_t ffe_t;
static const unsigned kGFBits = 8;
static const unsigned kGFPolynomial = 0x11D;
ffe_t kGFBasis[kGFBits] = {
1, 214, 152, 146, 86, 200, 88, 230 // Cantor basis
};
#else
typedef uint16_t ffe_t;
static const unsigned kGFBits = 16;
static const unsigned kGFPolynomial = 0x1002D;
ffe_t kGFBasis[kGFBits] = {
0x0001, 0xACCA, 0x3C0E, 0x163E, // Cantor basis
0xC582, 0xED2E, 0x914C, 0x4012,
0x6C98, 0x10D8, 0x6A72, 0xB900,
0xFDB8, 0xFB34, 0xFF38, 0x991E
};
#endif
/*
Cantor Basis introduced by:
D. G. Cantor, "On arithmetical algorithms over finite fields",
Journal of Combinatorial Theory, Series A, vol. 50, no. 2, pp. 285-300, 1989.
*/
static const unsigned kFieldSize = (unsigned)1 << kGFBits; //Field size
static const unsigned kFieldModulus = kFieldSize - 1;
static ffe_t GFLog[kFieldSize];
static ffe_t GFExp[kFieldSize];
// Initialize GFLog[], GFExp[]
static void InitField()
{
unsigned state = 1;
for (unsigned i = 0; i < kFieldModulus; ++i)
{
GFExp[state] = static_cast<ffe_t>(i);
state <<= 1;
if (state >= kFieldSize)
state ^= kGFPolynomial;
}
GFExp[0] = kFieldModulus;
// Conversion to chosen basis:
GFLog[0] = 0;
for (unsigned i = 0; i < kGFBits; ++i)
{
const ffe_t basis = kGFBasis[i];
const unsigned width = (unsigned)(1UL << i);
for (unsigned j = 0; j < width; ++j)
GFLog[j + width] = GFLog[j] ^ basis;
}
for (unsigned i = 0; i < kFieldSize; ++i)
GFLog[i] = GFExp[GFLog[i]];
for (unsigned i = 0; i < kFieldSize; ++i)
GFExp[GFLog[i]] = i;
GFExp[kFieldModulus] = GFExp[0];
}
//------------------------------------------------------------------------------
// Mod Q Field Operations
//
// Q is the maximum symbol value, e.g. 255 or 65535.
// z = x + y (mod Q)
static inline ffe_t AddModQ(ffe_t a, ffe_t b)
{
const unsigned sum = (unsigned)a + b;
// Partial reduction step, allowing for Q to be returned
return static_cast<ffe_t>(sum + (sum >> kGFBits));
}
// z = x - y (mod Q)
static inline ffe_t SubModQ(ffe_t a, ffe_t b)
{
const unsigned dif = (unsigned)a - b;
// Partial reduction step, allowing for Q to be returned
return static_cast<ffe_t>(dif + (dif >> kGFBits));
}
// return a*GFExp[b] over GF(2^r)
static ffe_t mulE(ffe_t a, ffe_t b)
{
if (a == 0)
return 0;
const ffe_t sum = static_cast<ffe_t>(AddModQ(GFLog[a], b));
return GFExp[sum];
}
//------------------------------------------------------------------------------
// Fast Walsh-Hadamard Transform (FWHT) Mod Q
//
// Q is the maximum symbol value, e.g. 255 or 65535.
// Define this to enable the optimized version of FWHT()
#define LEO_FWHT_OPTIMIZED
typedef ffe_t fwht_t;
// {a, b} = {a + b, a - b} (Mod Q)
static LEO_FORCE_INLINE void FWHT_2(fwht_t& LEO_RESTRICT a, fwht_t& LEO_RESTRICT b)
{
const fwht_t sum = AddModQ(a, b);
const fwht_t dif = SubModQ(a, b);
a = sum;
b = dif;
}
// Reference implementation
static void FWHT(fwht_t* data, const unsigned bits)
{
const unsigned size = (unsigned)(1UL << bits);
for (unsigned width = 1; width < size; width <<= 1)
for (unsigned i = 0; i < size; i += (width << 1))
for (unsigned j = i; j < (width + i); ++j)
FWHT_2(data[j], data[j + width]);
}
//------------------------------------------------------------------------------
// Formal Derivative
// Formal derivative of polynomial in the new basis
static void formal_derivative(ffe_t* cos, const unsigned size)
{
/*
Left to right xoring data ahead into data behind.
If the data ends in all zeroes, this can simply stop.
*/
for (unsigned i = 1; i < size; ++i)
{
const unsigned leng = ((i ^ (i - 1)) + 1) >> 1;
// If a large number of values are being XORed:
for (unsigned j = i - leng; j < i; ++j)
cos[j] ^= cos[j + leng];
}
// Doesn't seem to be needed
#if 0
/*
Same here - Zeroes on the right are preserved
*/
for (unsigned i = size; i < kFieldSize; i <<= 1)
{
for (unsigned j = 0; j < size; ++j)
cos[j] ^= cos[j + i];
}
#endif
}
//------------------------------------------------------------------------------
// Fast Fourier Transform
static ffe_t skewVec[kFieldModulus]; // twisted factors used in FFT
static LEO_FORCE_INLINE void ifft_butterfly(ffe_t& a, ffe_t& b, ffe_t skew)
{
b ^= a;
a ^= mulE(b, skew);
}
// IFFT in the proposed basis
static void IFLT(ffe_t* data, const unsigned size, const unsigned index)
{
for (unsigned width = 1; width < size; width <<= 1)
{
for (unsigned j = width; j < size; j += (width << 1))
{
const ffe_t skew = skewVec[j + index - 1];
if (skew != kFieldModulus)
{
for (unsigned i = j - width; i < j; ++i)
ifft_butterfly(data[i], data[i + width], skew);
}
else
{
for (unsigned i = j - width; i < j; ++i)
data[i + width] ^= data[i];
}
}
}
}
static LEO_FORCE_INLINE void fft_butterfly(ffe_t& a, ffe_t& b, ffe_t skew)
{
a ^= mulE(b, skew);
b ^= a;
}
// FFT in the proposed basis
static void FLT(ffe_t* data, const unsigned size, const unsigned skewIndex, const unsigned output_elements)
{
for (unsigned width = (size >> 1); width > 0; width >>= 1)
{
const ffe_t* skewLUT = skewVec + width + skewIndex - 1;
for (unsigned j = 0; j < output_elements; j += (width << 1))
{
const ffe_t skew = skewLUT[j];
if (skew != kFieldModulus)
{
for (unsigned i = j; i < j + width; ++i)
fft_butterfly(data[i], data[i + width], skew);
}
else
{
for (unsigned i = j; i < j + width; ++i)
data[i + width] ^= data[i];
}
}
}
}
//------------------------------------------------------------------------------
// FFT Initialization
//static ffe_t B[kFieldSize >> 1]; // factors used in formal derivative
static fwht_t log_walsh[kFieldSize]; // factors used in the evaluation of the error locator polynomial
// Initialize skewVec[], B[], log_walsh[]
static void InitFieldOperations()
{
ffe_t temp[kGFBits - 1];
for (unsigned i = 1; i < kGFBits; ++i)
temp[i - 1] = (ffe_t)((unsigned)1 << i);
for (unsigned m = 0; m < (kGFBits - 1); ++m)
{
const unsigned step = (unsigned)1 << (m + 1);
skewVec[((unsigned)1 << m) - 1] = 0;
for (unsigned i = m; i < (kGFBits - 1); ++i)
{
const unsigned s = ((unsigned)1 << (i + 1));
for (unsigned j = ((unsigned)1 << m) - 1; j < s; j += step)
skewVec[j + s] = skewVec[j] ^ temp[i];
}
temp[m] = kFieldModulus - GFLog[mulE(temp[m], GFLog[temp[m] ^ 1])];
for (unsigned i = m + 1; i < (kGFBits - 1); ++i)
temp[i] = mulE(temp[i], (GFLog[temp[i] ^ 1] + temp[m]) % kFieldModulus);
}
for (unsigned i = 0; i < kFieldSize; ++i)
skewVec[i] = GFLog[skewVec[i]];
#if 0
temp[0] = kFieldModulus - temp[0];
for (unsigned i = 1; i < (kGFBits - 1); ++i)
temp[i] = (kFieldModulus - temp[i] + temp[i - 1]) % kFieldModulus;
B[0] = 0;
for (unsigned i = 0; i < (kGFBits - 1); ++i)
{
const unsigned depart = ((unsigned)1 << i);
for (unsigned j = 0; j < depart; ++j)
B[j + depart] = (B[j] + temp[i]) % kFieldModulus;
}
#endif
for (unsigned i = 0; i < kFieldSize; ++i)
log_walsh[i] = GFLog[i];
log_walsh[0] = 0;
FWHT(log_walsh, kGFBits);
}
//------------------------------------------------------------------------------
// Encoder
// Encoding alg for k/n<0.5: message is a power of two
static void encodeL(ffe_t* data, const unsigned k, ffe_t* codeword)
{
memcpy(codeword, data, sizeof(ffe_t) * k);
IFLT(codeword, k, 0);
for (unsigned i = k; i < kFieldSize; i += k)
{
memcpy(&codeword[i], codeword, sizeof(ffe_t) * k);
FLT(&codeword[i], k, i, k);
}
memcpy(codeword, data, sizeof(ffe_t) * k);
}
// Encoding alg for k/n>0.5: parity is a power of two.
// data: message array. parity: parity array. mem: buffer(size>= n-k)
static void encodeH(const ffe_t* data, const unsigned m, const unsigned original_count, ffe_t* parity, ffe_t* mem)
{
// Note: Assumes data is padded with zeroes out to the next multiple of m
memcpy(parity, data, m * sizeof(ffe_t));
IFLT(parity, m, m);
for (unsigned i = m; i < original_count; i += m)
{
memcpy(mem, data + i, m * sizeof(ffe_t));
IFLT(mem, m, m + i);
for (unsigned j = 0; j < m; ++j)
parity[j] ^= mem[j];
}
FLT(parity, m, 0, m);
}
//------------------------------------------------------------------------------
// Decoder
static void decode(ffe_t* codeword, const unsigned m, const unsigned original_count, const unsigned n, const bool* erasure)
{
fwht_t log_walsh2[kFieldSize];
// Compute the evaluations of the error locator polynomial
for (unsigned i = 0; i < kFieldSize; ++i)
log_walsh2[i] = erasure[i] ? 1 : 0;
FWHT(log_walsh2, kGFBits);
for (unsigned i = 0; i < kFieldSize; ++i)
log_walsh2[i] = ((unsigned)log_walsh2[i] * (unsigned)log_walsh[i]) % kFieldModulus;
FWHT(log_walsh2, kGFBits);
// k2 can be replaced with k
//const unsigned k2 = kFieldSize;
//const unsigned k2 = k; // cannot actually be replaced with k. maybe for encodeL() only?
for (unsigned i = 0; i < m + original_count; ++i)
{
if (erasure[i])
{
codeword[i] = 0;
}
else
{
codeword[i] = mulE(codeword[i], log_walsh2[i]);
}
}
for (unsigned i = m + original_count; i < n; ++i)
codeword[i] = 0;
IFLT(codeword, n, 0);
// Note: This is not needed to recover successfully...
#if 0
// formal derivative
// Note: Preserves zeroes on the right
for (unsigned i = 0; i < m + original_count; i += 2)
{
codeword[i] = mulE(codeword[i], kFieldModulus - B[i >> 1]);
codeword[i + 1] = mulE(codeword[i + 1], kFieldModulus - B[i >> 1]);
}
#endif
formal_derivative(codeword, n);
#if 0
// Note: Preserves zeroes on the right
for (unsigned i = 0; i < m + original_count; i += 2)
{
codeword[i] = mulE(codeword[i], B[i >> 1]);
codeword[i + 1] = mulE(codeword[i + 1], B[i >> 1]);
}
#endif
FLT(codeword, n, 0, m + original_count);
for (unsigned i = 0; i < kFieldSize; ++i)
{
if (erasure[i])
{
codeword[i] = mulE(codeword[i], kFieldModulus - log_walsh2[i]);
}
}
}
#ifdef _MSC_VER
#include <intrin.h>
#endif
// Returns highest bit index 0..63 where the first non-zero bit is found
// Precondition: x != 0
LEO_FORCE_INLINE unsigned LastNonzeroBit64(uint64_t x)
{
#ifdef _MSC_VER
#ifdef _WIN64
unsigned long index;
// Note: Ignoring result because x != 0
_BitScanReverse64(&index, x);
return (unsigned)index;
#else
unsigned long index;
if (0 != _BitScanReverse(&index, (uint32_t)x))
return (unsigned)index;
// Note: Ignoring result because x != 0
_BitScanReverse(&index, (uint32_t)(x >> 32));
return (unsigned)index + 32;
#endif
#else
// Note: Ignoring return value of 0 because x != 0
return 63 - (unsigned)__builtin_clzll(x);
#endif
}
//------------------------------------------------------------------------------
// Test Application
void test(unsigned original_count, unsigned recovery_count, unsigned seed)
{
unsigned m = 2UL << LastNonzeroBit64(recovery_count - 1);
unsigned n = 2UL << LastNonzeroBit64(m + original_count - 1);
srand(seed);
//-----------Generating message----------
// Message array
ffe_t data[kFieldSize] = {0};
// Filled with random numbers
for (unsigned i = m; i < m + original_count; ++i)
data[i] = (ffe_t)rand();
//---------encoding----------
ffe_t codeword[kFieldSize] = {};
// First m codewords are for the parity data
encodeH(data + m, m, original_count, data, codeword);
//encodeL(data, k, codeword); // does not seem to work with any input? what else needs to change?
memcpy(codeword, data, sizeof(ffe_t) * kFieldSize);
//--------erasure simulation---------
// Array indicating erasures
bool erasure[kFieldSize] = {
false
};
// Tag the first "recovery_count" elements as erasures
for (unsigned i = m; i < m + recovery_count; ++i)
erasure[i] = true;
// permuting the erasure array
for (unsigned i = m + original_count - 1; i > 0; --i)
{
unsigned pos = rand() % (i + 1);
if (i != pos)
{
bool tmp = erasure[i];
erasure[i] = erasure[pos];
erasure[pos] = tmp;
}
}
//---------main processing----------
decode(codeword, m, original_count, n, erasure);
// Check the correctness of the result
for (unsigned i = 0; i < kFieldSize; ++i)
{
if (erasure[i])
{
if (data[i] != codeword[i])
{
printf("Decoding Error with seed = %d!\n", seed);
LEO_DEBUG_BREAK;
return;
}
}
}
printf(":D ");
}
//------------------------------------------------------------------------------
// Entrypoint
int main(int argc, char **argv)
{
// Fill GFLog table and GFExp table
InitField();
// Compute factors used in erasure decoder
InitFieldOperations();
unsigned seed = (unsigned)time(NULL);
for (;;)
{
#ifdef LEO_SHORT_FIELD
const unsigned input_count = 100;
const unsigned recovery_count = 20;
#else // LEO_SHORT_FIELD
const unsigned input_count = 10000;
const unsigned recovery_count = 2000;
#endif // LEO_SHORT_FIELD
test(input_count, recovery_count, seed);
++seed;
}
return 0;
}

View File

@ -20,36 +20,35 @@
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D45}</ProjectGuid>
<RootNamespace>Fecal</RootNamespace>
<RootNamespace>Leopard</RootNamespace>
<ProjectName>LeopardBenchmark</ProjectName>
<WindowsTargetPlatformVersion>10.0.14393.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">

View File

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\benchmark.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -0,0 +1,181 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{97FCA15F-EAF3-4F1A-AFF8-83E693DA9D65}</ProjectGuid>
<RootNamespace>Leopard</RootNamespace>
<ProjectName>LeopardExperiments</ProjectName>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
<IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
<IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
<IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
<IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>
</AdditionalLibraryDirectories>
</Link>
<PostBuildEvent>
<Command>
</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>
</AdditionalLibraryDirectories>
</Link>
<PostBuildEvent>
<Command>
</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>false</OmitFramePointers>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<BufferSecurityCheck>true</BufferSecurityCheck>
<PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>
</AdditionalLibraryDirectories>
</Link>
<PostBuildEvent>
<Command>
</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>false</OmitFramePointers>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<BufferSecurityCheck>true</BufferSecurityCheck>
<PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>
</AdditionalLibraryDirectories>
</Link>
<PostBuildEvent>
<Command>
</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\experiments.cpp" />
</ItemGroup>
<ItemGroup>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>