diff --git a/LeopardFF16.cpp b/LeopardFF16.cpp index 7cfb9b4..65dbfc0 100644 --- a/LeopardFF16.cpp +++ b/LeopardFF16.cpp @@ -38,12 +38,6 @@ namespace leopard { namespace ff16 { //------------------------------------------------------------------------------ // Datatypes and Constants -// Modulus for field operations -static const ffe_t kModulus = 65535; - -// LFSR Polynomial that generates the field elements -static const unsigned kPolynomial = 0x1002D; - // Basis used for generating logarithm tables static const ffe_t kCantorBasis[kBits] = { 0x0001, 0xACCA, 0x3C0E, 0x163E, @@ -634,7 +628,7 @@ void Encode( { // work <- data - // FIXME: Unroll first loop to eliminate this + // TBD: Unroll first loop to eliminate this for (unsigned i = 0; i < m; ++i) memcpy(work[i], data[i], buffer_bytes); @@ -665,7 +659,7 @@ void Encode( void** temp = work + m; - // FIXME: Unroll first loop to eliminate this + // TBD: Unroll first loop to eliminate this for (unsigned j = 0; j < m; ++j) memcpy(temp[j], data[j], buffer_bytes); @@ -692,7 +686,7 @@ void Encode( // work <- work XOR temp - // FIXME: Unroll last loop to eliminate this + // TBD: Unroll last loop to eliminate this for (unsigned j = 0; j < m; ++j) xor_mem(work[j], temp[j], buffer_bytes); } @@ -737,7 +731,7 @@ void Encode( // work <- work XOR temp - // FIXME: Unroll last loop to eliminate this + // TBD: Unroll last loop to eliminate this for (unsigned j = 0; j < m; ++j) xor_mem(work[j], temp[j], buffer_bytes); } diff --git a/LeopardFF16.h b/LeopardFF16.h index b2dc3f5..5305dd3 100644 --- a/LeopardFF16.h +++ b/LeopardFF16.h @@ -54,6 +54,12 @@ static const unsigned kBits = 16; // Finite field order: Number of elements in the field static const unsigned kOrder = 65536; +// Modulus for field operations +static const ffe_t kModulus = 65535; + +// LFSR Polynomial that generates the field elements +static const unsigned kPolynomial = 0x1002D; + //------------------------------------------------------------------------------ // Fast Walsh-Hadamard Transform (FWHT) (mod kModulus) diff --git a/LeopardFF8.cpp b/LeopardFF8.cpp index 15c82ea..12338d3 100644 --- a/LeopardFF8.cpp +++ b/LeopardFF8.cpp @@ -38,18 +38,12 @@ namespace leopard { namespace ff8 { //------------------------------------------------------------------------------ // Datatypes and Constants -// Modulus for field operations -static const ffe_t kModulus = 255; - -// LFSR Polynomial that generates the field elements -static const unsigned kPolynomial = 0x11D; - // Basis used for generating logarithm tables static const ffe_t kCantorBasis[kBits] = { 1, 214, 152, 146, 86, 200, 88, 230 }; -// Using the Cantor basis here enables us to avoid a lot of extra calculations +// Using the Cantor basis {2} here enables us to avoid a lot of extra calculations // when applying the formal derivative in decoding. @@ -59,7 +53,7 @@ static const ffe_t kCantorBasis[kBits] = { // z = x + y (mod kModulus) static inline ffe_t AddMod(const ffe_t a, const ffe_t b) { - const unsigned sum = (unsigned)a + b; + const unsigned sum = static_cast(a) + b; // Partial reduction step, allowing for kModulus to be returned return static_cast(sum + (sum >> kBits)); @@ -68,7 +62,7 @@ static inline ffe_t AddMod(const ffe_t a, const ffe_t b) // z = x - y (mod kModulus) static inline ffe_t SubMod(const ffe_t a, const ffe_t b) { - const unsigned dif = (unsigned)a - b; + const unsigned dif = static_cast(a) - b; // Partial reduction step, allowing for kModulus to be returned return static_cast(dif + (dif >> kBits)); @@ -123,7 +117,7 @@ static LEO_FORCE_INLINE void FWHT_4(ffe_t* data, unsigned s) data[y] = t3; } -static inline void FWHT_8(ffe_t* data) +static void FWHT_8(ffe_t* data) { ffe_t t0 = data[0]; ffe_t t1 = data[1]; @@ -156,9 +150,9 @@ static inline void FWHT_8(ffe_t* data) } // Decimation in time (DIT) version -static void FWHT(ffe_t* data, const unsigned ldn) +static void FWHT(ffe_t* data, const unsigned bits) { - const unsigned n = (1UL << ldn); + const unsigned n = (1UL << bits); if (n <= 2) { @@ -167,16 +161,16 @@ static void FWHT(ffe_t* data, const unsigned ldn) return; } - for (unsigned ldm = ldn; ldm > 3; ldm -= 2) + for (unsigned i = bits; i > 3; i -= 2) { - unsigned m = (1UL << ldm); + unsigned m = (1UL << i); unsigned m4 = (m >> 2); for (unsigned r = 0; r < n; r += m) for (unsigned j = 0; j < m4; j++) FWHT_4(data + j + r, m4); } - if (ldn & 1) + if (bits & 1) { for (unsigned i0 = 0; i0 < n; i0 += 8) FWHT_8(data + i0); @@ -231,7 +225,7 @@ static void InitializeLogarithmTables() } ExpLUT[0] = kModulus; - // Conversion to Cantor basis: + // Conversion to Cantor basis {2}: LogLUT[0] = 0; for (unsigned i = 0; i < kBits; ++i) @@ -246,9 +240,12 @@ static void InitializeLogarithmTables() for (unsigned i = 0; i < kOrder; ++i) LogLUT[i] = ExpLUT[LogLUT[i]]; + // Generate Exp table from Log table: + for (unsigned i = 0; i < kOrder; ++i) ExpLUT[LogLUT[i]] = i; + // Note: Handles modulus wrap around with LUT ExpLUT[kModulus] = ExpLUT[0]; } @@ -271,6 +268,14 @@ struct { // Returns a * Log(b) static ffe_t MultiplyLog(ffe_t a, ffe_t log_b) { + /* + Note that this operation is not a normal multiplication in a finite + field because the right operand is already a logarithm. This is done + because it moves K table lookups from the Decode() method into the + initialization step that is less performance critical. The LogWalsh[] + table below contains precalculated logarithms so it is easier to do + all the other multiplies in that form as well. + */ if (a == 0) return 0; return ExpLUT[AddMod(LogLUT[a], log_b)]; @@ -737,6 +742,8 @@ static void FFTInitialize() { ffe_t temp[kBits - 1]; + // Generate FFT skew vector {1}: + for (unsigned i = 1; i < kBits; ++i) temp[i - 1] = static_cast(1UL << i); @@ -779,9 +786,9 @@ void VectorFFTButterfly( unsigned count, void** x, void** y, - const ffe_t skew) + const ffe_t log_m) { - if (skew == kModulus) + if (log_m == kModulus) { VectorXOR(bytes, count, y, x); return; @@ -795,14 +802,14 @@ void VectorFFTButterfly( x[1], y[1], x[2], y[2], x[3], y[3], - skew, bytes); + log_m, bytes); x += 4, y += 4; count -= 4; } #endif // LEO_USE_VECTOR4_OPT for (unsigned i = 0; i < count; ++i) - fft_butterfly(x[i], y[i], skew, bytes); + fft_butterfly(x[i], y[i], log_m, bytes); } void VectorIFFTButterfly( @@ -810,9 +817,9 @@ void VectorIFFTButterfly( unsigned count, void** x, void** y, - const ffe_t skew) + const ffe_t log_m) { - if (skew == kModulus) + if (log_m == kModulus) { VectorXOR(bytes, count, y, x); return; @@ -826,14 +833,14 @@ void VectorIFFTButterfly( x[1], y[1], x[2], y[2], x[3], y[3], - skew, bytes); + log_m, bytes); x += 4, y += 4; count -= 4; } #endif // LEO_USE_VECTOR4_OPT for (unsigned i = 0; i < count; ++i) - ifft_butterfly(x[i], y[i], skew, bytes); + ifft_butterfly(x[i], y[i], log_m, bytes); } @@ -850,7 +857,7 @@ void Encode( { // work <- data - // FIXME: Unroll first loop to eliminate this + // TBD: Unroll first loop to eliminate this unsigned first_end = m; if (original_count < m) { @@ -893,7 +900,7 @@ void Encode( data += m; void** temp = work + m; - // FIXME: Unroll first loop to eliminate this + // TBD: Unroll first loop to eliminate this for (unsigned j = 0; j < m; ++j) memcpy(temp[j], data[j], buffer_bytes); @@ -916,7 +923,7 @@ void Encode( // work <- work XOR temp - // FIXME: Unroll last loop to eliminate this + // TBD: Unroll last loop to eliminate this VectorXOR( buffer_bytes, m, @@ -965,7 +972,7 @@ void Encode( // work <- work XOR temp - // FIXME: Unroll last loop to eliminate this + // TBD: Unroll last loop to eliminate this VectorXOR( buffer_bytes, m, diff --git a/LeopardFF8.h b/LeopardFF8.h index 4bc7115..98e26b8 100644 --- a/LeopardFF8.h +++ b/LeopardFF8.h @@ -54,6 +54,12 @@ static const unsigned kBits = 8; // Finite field order: Number of elements in the field static const unsigned kOrder = 256; +// Modulus for field operations +static const ffe_t kModulus = 255; + +// LFSR Polynomial that generates the field elements +static const unsigned kPolynomial = 0x11D; + //------------------------------------------------------------------------------ // Fast Walsh-Hadamard Transform (FWHT) (mod kModulus) @@ -78,8 +84,9 @@ void mul_mem( // FFT Operations /* - if (log_m != kModulus) - x[] ^= exp(log(y[]) + log_m) + Precondition: log_m != kModulus + + x[] ^= exp(log(y[]) + log_m) y[] ^= x[] */ void fft_butterfly( @@ -103,9 +110,10 @@ void fft_butterfly4( // IFFT Operations /* + Precondition: log_m != kModulus + y[] ^= x[] - if (log_m != kModulus) - x[] ^= exp(log(y[]) + log_m) + x[] ^= exp(log(y[]) + log_m) */ void ifft_butterfly( void * LEO_RESTRICT x, void * LEO_RESTRICT y, @@ -127,19 +135,29 @@ void ifft_butterfly4( //------------------------------------------------------------------------------ // FFT +/* + if (log_m != kModulus) + x[] ^= exp(log(y[]) + log_m) + y[] ^= x[] +*/ void VectorFFTButterfly( const uint64_t bytes, unsigned count, void** x, void** y, - const ffe_t skew); + const ffe_t log_m); +/* + y[] ^= x[] + if (log_m != kModulus) + x[] ^= exp(log(y[]) + log_m) +*/ void VectorIFFTButterfly( const uint64_t bytes, unsigned count, void** x, void** y, - const ffe_t skew); + const ffe_t log_m); //------------------------------------------------------------------------------ diff --git a/leopard.cpp b/leopard.cpp index 4427bf4..2ff4abc 100644 --- a/leopard.cpp +++ b/leopard.cpp @@ -93,6 +93,9 @@ LEO_EXPORT LeopardResult leo_encode( if (!original_data || !work_data) return Leopard_InvalidInput; + if (!m_Initialized) + return Leopard_CallInitialize; + const unsigned m = leopard::NextPow2(recovery_count); const unsigned n = leopard::NextPow2(m + original_count); @@ -164,6 +167,9 @@ LEO_EXPORT LeopardResult leo_decode( if (!original_data || !recovery_data || !work_data) return Leopard_InvalidInput; + if (!m_Initialized) + return Leopard_CallInitialize; + const unsigned m = leopard::NextPow2(recovery_count); const unsigned n = leopard::NextPow2(m + original_count); diff --git a/leopard.h b/leopard.h index bcfdade..83cdd74 100644 --- a/leopard.h +++ b/leopard.h @@ -32,10 +32,16 @@ /* Leopard-RS: Reed-Solomon Error Correction Coding for Extremely Large Data - S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung, + + References: + + {1} S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung, "Novel Polynomial Basis with Fast Fourier Transform and Its Application to Reed-Solomon Erasure Codes" IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016. http://ct.ee.ntust.edu.tw/it2016-2.pdf + + {2} D. G. Cantor, "On arithmetical algorithms over finite fields", + Journal of Combinatorial Theory, Series A, vol. 50, no. 2, pp. 285-300, 1989. */ // Library version @@ -99,6 +105,7 @@ typedef enum LeopardResultT Leopard_InvalidCounts = -3, // Invalid counts provided Leopard_InvalidInput = -4, // A function parameter was invalid Leopard_Platform = -5, // Platform is unsupported + Leopard_CallInitialize = -6, // Call leo_init() first } LeopardResult; // Flags diff --git a/proj/Leopard.vcxproj b/proj/Leopard.vcxproj index 4759b19..e065916 100644 --- a/proj/Leopard.vcxproj +++ b/proj/Leopard.vcxproj @@ -159,13 +159,13 @@ Level3 - MaxSpeed + Full true true true OnlyExplicitInline - Size - false + Speed + true MultiThreaded true _MBCS;%(PreprocessorDefinitions) diff --git a/tests/experiments.cpp b/tests/experiments.cpp index e085bea..c3300fb 100644 --- a/tests/experiments.cpp +++ b/tests/experiments.cpp @@ -105,12 +105,6 @@ ffe_t kGFBasis[kGFBits] = { }; #endif -/* - Cantor Basis introduced by: - D. G. Cantor, "On arithmetical algorithms over finite fields", - Journal of Combinatorial Theory, Series A, vol. 50, no. 2, pp. 285-300, 1989. -*/ - static const unsigned kFieldSize = (unsigned)1 << kGFBits; //Field size static const unsigned kModulus = kFieldSize - 1;