Fix tabs

2025-02-19 17:34:19 +00:00 · 2017-05-30 02:23:33 -07:00 · 2017-05-30 02:23:33 -07:00 · be5c625d3d
commit be5c625d3d
parent c53b075eda
3 changed files with 28 additions and 32 deletions
--- a/LeopardCommon.h
+++ b/LeopardCommon.h
@ -28,6 +28,17 @@

 #pragma once

+/*
+    TODO:
+
+    + Multithreading
+    + Look into 12-bit fields as a performance optimization
+    + Look into shortening the FWHT() since it takes a lot of decoder runtime
+    + Unroll first/final butterflies to avoid extra copies/xors in encoder
+    + Look into getting EncodeL working so we can support smaller data
+    + Implement the faster decoder algorithm from {3}
+*/
+
 /*
    FFT Data Layout:

--- a/LeopardFF16.cpp
+++ b/LeopardFF16.cpp
@ -445,7 +445,7 @@ void mul_mem(
        do
        {
 #define LEO_MUL_256(x_ptr, y_ptr) { \
-	        const LEO_M256 A_lo = _mm256_loadu_si256(y_ptr); \
+            const LEO_M256 A_lo = _mm256_loadu_si256(y_ptr); \
            const LEO_M256 A_hi = _mm256_loadu_si256(y_ptr + 1); \
            LEO_M256 data_0 = _mm256_and_si256(A_lo, clr_mask); \
            LEO_M256 data_1 = _mm256_srli_epi64(A_lo, 4); \
@ -494,7 +494,7 @@ void mul_mem(
    do
    {
 #define LEO_MUL_128(x_ptr, y_ptr) { \
-	        const LEO_M128 A_lo = _mm_loadu_si128(y_ptr); \
+            const LEO_M128 A_lo = _mm_loadu_si128(y_ptr); \
            const LEO_M128 A_hi = _mm_loadu_si128(y_ptr + 2); \
            LEO_M128 data_0 = _mm_and_si128(A_lo, clr_mask); \
            LEO_M128 data_1 = _mm_srli_epi64(A_lo, 4); \
@ -542,7 +542,7 @@ void fft_butterfly(
        do
        {
 #define LEO_FFTB_256(x_ptr, y_ptr) { \
-	        LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
+            LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
            LEO_M256 data_0 = _mm256_and_si256(y_lo, clr_mask); \
            LEO_M256 data_1 = _mm256_srli_epi64(y_lo, 4); \
            data_1 = _mm256_and_si256(data_1, clr_mask); \
@ -558,7 +558,7 @@ void fft_butterfly(
            prod_lo = _mm256_xor_si256(prod_lo, _mm256_shuffle_epi8(T3_lo, data_1)); \
            prod_hi = _mm256_xor_si256(prod_hi, _mm256_shuffle_epi8(T2_hi, data_0)); \
            prod_hi = _mm256_xor_si256(prod_hi, _mm256_shuffle_epi8(T3_hi, data_1)); \
-	        LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
+            LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
            LEO_M256 x_hi = _mm256_loadu_si256(x_ptr + 1); \
            x_lo = _mm256_xor_si256(prod_lo, x_lo); \
            _mm256_storeu_si256(x_ptr, x_lo); \
@ -589,7 +589,7 @@ void fft_butterfly(
    do
    {
 #define LEO_FFTB_128(x_ptr, y_ptr) { \
-	        LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
+            LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
            LEO_M128 data_0 = _mm_and_si128(y_lo, clr_mask); \
            LEO_M128 data_1 = _mm_srli_epi64(y_lo, 4); \
            data_1 = _mm_and_si128(data_1, clr_mask); \
@ -605,7 +605,7 @@ void fft_butterfly(
            prod_lo = _mm_xor_si128(prod_lo, _mm_shuffle_epi8(T3_lo, data_1)); \
            prod_hi = _mm_xor_si128(prod_hi, _mm_shuffle_epi8(T2_hi, data_0)); \
            prod_hi = _mm_xor_si128(prod_hi, _mm_shuffle_epi8(T3_hi, data_1)); \
-	        LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
+            LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
            LEO_M128 x_hi = _mm_loadu_si128(x_ptr + 2); \
            x_lo = _mm_xor_si128(prod_lo, x_lo); \
            _mm_storeu_si128(x_ptr, x_lo); \
@ -728,8 +728,8 @@ void ifft_butterfly(
        do
        {
 #define LEO_IFFTB_256(x_ptr, y_ptr) { \
-	        LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
-	        LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
+            LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
+            LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
            y_lo = _mm256_xor_si256(y_lo, x_lo); \
            _mm256_storeu_si256(y_ptr, y_lo); \
            LEO_M256 data_0 = _mm256_and_si256(y_lo, clr_mask); \
@ -775,8 +775,8 @@ void ifft_butterfly(
    do
    {
 #define LEO_IFFTB_128(x_ptr, y_ptr) { \
-	        LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
-	        LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
+            LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
+            LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
            y_lo = _mm_xor_si128(y_lo, x_lo); \
            _mm_storeu_si128(y_ptr, y_lo); \
            LEO_M128 data_0 = _mm_and_si128(y_lo, clr_mask); \
--- a/leopard.h
+++ b/leopard.h
@ -47,7 +47,7 @@

    {1} S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung,
    "Novel Polynomial Basis with Fast Fourier Transform
-	and Its Application to Reed-Solomon Erasure Codes"
+    and Its Application to Reed-Solomon Erasure Codes"
    IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016.

    {2} D. G. Cantor, "On arithmetical algorithms over finite fields",
@ -58,23 +58,8 @@
    IEEE Commun. Lett., vol.16, no.12, pp. 2036-2039, Dec. 2012.

    {4} Plank, J. S., Greenan, K. M., Miller, E. L., "Screaming fast Galois Field
-	arithmetic using Intel SIMD instructions."  In: FAST-2013: 11th Usenix
-	Conference on File and Storage Technologies, San Jose, 2013
-*/
-
-/*
-    TODO:
-    + Add multi-threading to split up long parallelizable calculations
-        + Final benchmarks!
-    + Release version 1
-        + Finish up documentation
-
-    TBD:
-    + Look into 12-bit fields as a performance optimization
-    + Look into shortening the FWHT() since it takes a lot of decoder runtime
-    + Unroll first/final butterflies to avoid extra copies/xors in encoder
-    + Look into getting EncodeL working so we can support smaller data (Ask Lin)
-    + Look into using FFT_m instead of FFT_n for decoder
+    arithmetic using Intel SIMD instructions."  In: FAST-2013: 11th Usenix
+    Conference on File and Storage Technologies, San Jose, 2013
 */

 // Library version
@ -160,11 +145,11 @@ typedef enum LeopardFlagsT
 /*
    leo_encode_work_count()

-	Calculate the number of work_data buffers to provide to leo_encode().
+    Calculate the number of work_data buffers to provide to leo_encode().

    The sum of original_count + recovery_count must not exceed 65536.

-	Returns the work_count value to pass into leo_encode().
+    Returns the work_count value to pass into leo_encode().
    Returns 0 on invalid input.
 */
 LEO_EXPORT unsigned leo_encode_work_count(
@ -221,11 +206,11 @@ LEO_EXPORT LeopardResult leo_encode(
 /*
    leo_decode_work_count()

-	Calculate the number of work_data buffers to provide to leo_decode().
+    Calculate the number of work_data buffers to provide to leo_decode().

    The sum of original_count + recovery_count must not exceed 65536.

-	Returns the work_count value to pass into leo_encode().
+    Returns the work_count value to pass into leo_encode().
    Returns 0 on invalid input.
 */
 LEO_EXPORT unsigned leo_decode_work_count(