Doc fixes

2017-05-28 13:50:32 -07:00 · 2017-05-28 13:50:32 -07:00 · d31d7c85bf
parent 179d8d5284
commit d31d7c85bf
8 changed files with 86 additions and 54 deletions
--- a/LeopardFF16.cpp
+++ b/LeopardFF16.cpp
@ -38,12 +38,6 @@ namespace leopard { namespace ff16 {
 //------------------------------------------------------------------------------
 // Datatypes and Constants

-// Modulus for field operations
-static const ffe_t kModulus = 65535;
-
-// LFSR Polynomial that generates the field elements
-static const unsigned kPolynomial = 0x1002D;
-
 // Basis used for generating logarithm tables
 static const ffe_t kCantorBasis[kBits] = {
    0x0001, 0xACCA, 0x3C0E, 0x163E,
@ -634,7 +628,7 @@ void Encode(
 {
    // work <- data

-    // FIXME: Unroll first loop to eliminate this
+    // TBD: Unroll first loop to eliminate this
    for (unsigned i = 0; i < m; ++i)
        memcpy(work[i], data[i], buffer_bytes);

@ -665,7 +659,7 @@ void Encode(

        void** temp = work + m;

-        // FIXME: Unroll first loop to eliminate this
+        // TBD: Unroll first loop to eliminate this
        for (unsigned j = 0; j < m; ++j)
            memcpy(temp[j], data[j], buffer_bytes);

@ -692,7 +686,7 @@ void Encode(

        // work <- work XOR temp

-        // FIXME: Unroll last loop to eliminate this
+        // TBD: Unroll last loop to eliminate this
        for (unsigned j = 0; j < m; ++j)
            xor_mem(work[j], temp[j], buffer_bytes);
    }
@ -737,7 +731,7 @@ void Encode(

        // work <- work XOR temp

-        // FIXME: Unroll last loop to eliminate this
+        // TBD: Unroll last loop to eliminate this
        for (unsigned j = 0; j < m; ++j)
            xor_mem(work[j], temp[j], buffer_bytes);
    }
--- a/LeopardFF16.h
+++ b/LeopardFF16.h
@ -54,6 +54,12 @@ static const unsigned kBits = 16;
 // Finite field order: Number of elements in the field
 static const unsigned kOrder = 65536;

+// Modulus for field operations
+static const ffe_t kModulus = 65535;
+
+// LFSR Polynomial that generates the field elements
+static const unsigned kPolynomial = 0x1002D;
+

 //------------------------------------------------------------------------------
 // Fast Walsh-Hadamard Transform (FWHT) (mod kModulus)
--- a/LeopardFF8.cpp
+++ b/LeopardFF8.cpp
@ -38,18 +38,12 @@ namespace leopard { namespace ff8 {
 //------------------------------------------------------------------------------
 // Datatypes and Constants

-// Modulus for field operations
-static const ffe_t kModulus = 255;
-
-// LFSR Polynomial that generates the field elements
-static const unsigned kPolynomial = 0x11D;
-
 // Basis used for generating logarithm tables
 static const ffe_t kCantorBasis[kBits] = {
    1, 214, 152, 146, 86, 200, 88, 230
 };

-// Using the Cantor basis here enables us to avoid a lot of extra calculations
+// Using the Cantor basis {2} here enables us to avoid a lot of extra calculations
 // when applying the formal derivative in decoding.


@ -59,7 +53,7 @@ static const ffe_t kCantorBasis[kBits] = {
 // z = x + y (mod kModulus)
 static inline ffe_t AddMod(const ffe_t a, const ffe_t b)
 {
-    const unsigned sum = (unsigned)a + b;
+    const unsigned sum = static_cast<unsigned>(a) + b;

    // Partial reduction step, allowing for kModulus to be returned
    return static_cast<ffe_t>(sum + (sum >> kBits));
@ -68,7 +62,7 @@ static inline ffe_t AddMod(const ffe_t a, const ffe_t b)
 // z = x - y (mod kModulus)
 static inline ffe_t SubMod(const ffe_t a, const ffe_t b)
 {
-    const unsigned dif = (unsigned)a - b;
+    const unsigned dif = static_cast<unsigned>(a) - b;

    // Partial reduction step, allowing for kModulus to be returned
    return static_cast<ffe_t>(dif + (dif >> kBits));
@ -123,7 +117,7 @@ static LEO_FORCE_INLINE void FWHT_4(ffe_t* data, unsigned s)
    data[y] = t3;
 }

-static inline void FWHT_8(ffe_t* data)
+static void FWHT_8(ffe_t* data)
 {
    ffe_t t0 = data[0];
    ffe_t t1 = data[1];
@ -156,9 +150,9 @@ static inline void FWHT_8(ffe_t* data)
 }

 // Decimation in time (DIT) version
-static void FWHT(ffe_t* data, const unsigned ldn)
+static void FWHT(ffe_t* data, const unsigned bits)
 {
-    const unsigned n = (1UL << ldn);
+    const unsigned n = (1UL << bits);

    if (n <= 2)
    {
@ -167,16 +161,16 @@ static void FWHT(ffe_t* data, const unsigned ldn)
        return;
    }

-    for (unsigned ldm = ldn; ldm > 3; ldm -= 2)
+    for (unsigned i = bits; i > 3; i -= 2)
    {
-        unsigned m = (1UL << ldm);
+        unsigned m = (1UL << i);
        unsigned m4 = (m >> 2);
        for (unsigned r = 0; r < n; r += m)
            for (unsigned j = 0; j < m4; j++)
                FWHT_4(data + j + r, m4);
    }

-    if (ldn & 1)
+    if (bits & 1)
    {
        for (unsigned i0 = 0; i0 < n; i0 += 8)
            FWHT_8(data + i0);
@ -231,7 +225,7 @@ static void InitializeLogarithmTables()
    }
    ExpLUT[0] = kModulus;

-    // Conversion to Cantor basis:
+    // Conversion to Cantor basis {2}:

    LogLUT[0] = 0;
    for (unsigned i = 0; i < kBits; ++i)
@ -246,9 +240,12 @@ static void InitializeLogarithmTables()
    for (unsigned i = 0; i < kOrder; ++i)
        LogLUT[i] = ExpLUT[LogLUT[i]];

+    // Generate Exp table from Log table:
+
    for (unsigned i = 0; i < kOrder; ++i)
        ExpLUT[LogLUT[i]] = i;

+    // Note: Handles modulus wrap around with LUT
    ExpLUT[kModulus] = ExpLUT[0];
 }

@ -271,6 +268,14 @@ struct {
 // Returns a * Log(b)
 static ffe_t MultiplyLog(ffe_t a, ffe_t log_b)
 {
+    /*
+        Note that this operation is not a normal multiplication in a finite
+        field because the right operand is already a logarithm.  This is done
+        because it moves K table lookups from the Decode() method into the
+        initialization step that is less performance critical.  The LogWalsh[]
+        table below contains precalculated logarithms so it is easier to do
+        all the other multiplies in that form as well.
+    */
    if (a == 0)
        return 0;
    return ExpLUT[AddMod(LogLUT[a], log_b)];
@ -737,6 +742,8 @@ static void FFTInitialize()
 {
    ffe_t temp[kBits - 1];

+    // Generate FFT skew vector {1}:
+
    for (unsigned i = 1; i < kBits; ++i)
        temp[i - 1] = static_cast<ffe_t>(1UL << i);

@ -779,9 +786,9 @@ void VectorFFTButterfly(
    unsigned count,
    void** x,
    void** y,
-    const ffe_t skew)
+    const ffe_t log_m)
 {
-    if (skew == kModulus)
+    if (log_m == kModulus)
    {
        VectorXOR(bytes, count, y, x);
        return;
@ -795,14 +802,14 @@ void VectorFFTButterfly(
            x[1], y[1],
            x[2], y[2],
            x[3], y[3],
-            skew, bytes);
+            log_m, bytes);
        x += 4, y += 4;
        count -= 4;
    }
 #endif // LEO_USE_VECTOR4_OPT

    for (unsigned i = 0; i < count; ++i)
-        fft_butterfly(x[i], y[i], skew, bytes);
+        fft_butterfly(x[i], y[i], log_m, bytes);
 }

 void VectorIFFTButterfly(
@ -810,9 +817,9 @@ void VectorIFFTButterfly(
    unsigned count,
    void** x,
    void** y,
-    const ffe_t skew)
+    const ffe_t log_m)
 {
-    if (skew == kModulus)
+    if (log_m == kModulus)
    {
        VectorXOR(bytes, count, y, x);
        return;
@ -826,14 +833,14 @@ void VectorIFFTButterfly(
            x[1], y[1],
            x[2], y[2],
            x[3], y[3],
-            skew, bytes);
+            log_m, bytes);
        x += 4, y += 4;
        count -= 4;
    }
 #endif // LEO_USE_VECTOR4_OPT

    for (unsigned i = 0; i < count; ++i)
-        ifft_butterfly(x[i], y[i], skew, bytes);
+        ifft_butterfly(x[i], y[i], log_m, bytes);
 }


@ -850,7 +857,7 @@ void Encode(
 {
    // work <- data

-    // FIXME: Unroll first loop to eliminate this
+    // TBD: Unroll first loop to eliminate this
    unsigned first_end = m;
    if (original_count < m)
    {
@ -893,7 +900,7 @@ void Encode(
        data += m;
        void** temp = work + m;

-        // FIXME: Unroll first loop to eliminate this
+        // TBD: Unroll first loop to eliminate this
        for (unsigned j = 0; j < m; ++j)
            memcpy(temp[j], data[j], buffer_bytes);

@ -916,7 +923,7 @@ void Encode(

        // work <- work XOR temp

-        // FIXME: Unroll last loop to eliminate this
+        // TBD: Unroll last loop to eliminate this
        VectorXOR(
            buffer_bytes,
            m,
@ -965,7 +972,7 @@ void Encode(

        // work <- work XOR temp

-        // FIXME: Unroll last loop to eliminate this
+        // TBD: Unroll last loop to eliminate this
        VectorXOR(
            buffer_bytes,
            m,
--- a/LeopardFF8.h
+++ b/LeopardFF8.h
@ -54,6 +54,12 @@ static const unsigned kBits = 8;
 // Finite field order: Number of elements in the field
 static const unsigned kOrder = 256;

+// Modulus for field operations
+static const ffe_t kModulus = 255;
+
+// LFSR Polynomial that generates the field elements
+static const unsigned kPolynomial = 0x11D;
+

 //------------------------------------------------------------------------------
 // Fast Walsh-Hadamard Transform (FWHT) (mod kModulus)
@ -78,8 +84,9 @@ void mul_mem(
 // FFT Operations

 /*
-    if (log_m != kModulus)
-        x[] ^= exp(log(y[]) + log_m)
+    Precondition: log_m != kModulus
+
+    x[] ^= exp(log(y[]) + log_m)
    y[] ^= x[]
 */
 void fft_butterfly(
@ -103,9 +110,10 @@ void fft_butterfly4(
 // IFFT Operations

 /*
+    Precondition: log_m != kModulus
+
    y[] ^= x[]
-    if (log_m != kModulus)
-        x[] ^= exp(log(y[]) + log_m)
+    x[] ^= exp(log(y[]) + log_m)
 */
 void ifft_butterfly(
    void * LEO_RESTRICT x, void * LEO_RESTRICT y,
@ -127,19 +135,29 @@ void ifft_butterfly4(
 //------------------------------------------------------------------------------
 // FFT

+/*
+    if (log_m != kModulus)
+        x[] ^= exp(log(y[]) + log_m)
+    y[] ^= x[]
+*/
 void VectorFFTButterfly(
    const uint64_t bytes,
    unsigned count,
    void** x,
    void** y,
-    const ffe_t skew);
+    const ffe_t log_m);

+/*
+    y[] ^= x[]
+    if (log_m != kModulus)
+        x[] ^= exp(log(y[]) + log_m)
+*/
 void VectorIFFTButterfly(
    const uint64_t bytes,
    unsigned count,
    void** x,
    void** y,
-    const ffe_t skew);
+    const ffe_t log_m);


 //------------------------------------------------------------------------------
--- a/leopard.cpp
+++ b/leopard.cpp
@ -93,6 +93,9 @@ LEO_EXPORT LeopardResult leo_encode(
    if (!original_data || !work_data)
        return Leopard_InvalidInput;

+    if (!m_Initialized)
+        return Leopard_CallInitialize;
+
    const unsigned m = leopard::NextPow2(recovery_count);
    const unsigned n = leopard::NextPow2(m + original_count);

@ -164,6 +167,9 @@ LEO_EXPORT LeopardResult leo_decode(
    if (!original_data || !recovery_data || !work_data)
        return Leopard_InvalidInput;

+    if (!m_Initialized)
+        return Leopard_CallInitialize;
+
    const unsigned m = leopard::NextPow2(recovery_count);
    const unsigned n = leopard::NextPow2(m + original_count);

--- a/leopard.h
+++ b/leopard.h
@ -32,10 +32,16 @@
 /*
    Leopard-RS: Reed-Solomon Error Correction Coding for Extremely Large Data

-    S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung,
+
+    References:
+
+    {1} S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung,
    "Novel Polynomial Basis with Fast Fourier Transform and Its Application to Reed-Solomon Erasure Codes"
    IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016.
    http://ct.ee.ntust.edu.tw/it2016-2.pdf
+
+    {2} D. G. Cantor, "On arithmetical algorithms over finite fields",
+    Journal of Combinatorial Theory, Series A, vol. 50, no. 2, pp. 285-300, 1989.
 */

 // Library version
@ -99,6 +105,7 @@ typedef enum LeopardResultT
    Leopard_InvalidCounts     = -3, // Invalid counts provided
    Leopard_InvalidInput      = -4, // A function parameter was invalid
    Leopard_Platform          = -5, // Platform is unsupported
+    Leopard_CallInitialize    = -6, // Call leo_init() first
 } LeopardResult;

 // Flags
--- a/proj/Leopard.vcxproj
+++ b/proj/Leopard.vcxproj
@ -159,13 +159,13 @@
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
+      <Optimization>Full</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
-      <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
-      <OmitFramePointers>false</OmitFramePointers>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
      <BufferSecurityCheck>true</BufferSecurityCheck>
      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
--- a/tests/experiments.cpp
+++ b/tests/experiments.cpp
@ -105,12 +105,6 @@ ffe_t kGFBasis[kGFBits] = {
 };
 #endif

-/*
-    Cantor Basis introduced by:
-    D. G. Cantor, "On arithmetical algorithms over finite fields",
-    Journal of Combinatorial Theory, Series A, vol. 50, no. 2, pp. 285-300, 1989.
-*/
-
 static const unsigned kFieldSize = (unsigned)1 << kGFBits; //Field size
 static const unsigned kModulus = kFieldSize - 1;