mirror of
https://github.com/status-im/leopard.git
synced 2025-02-19 17:34:19 +00:00
Fix tabs
This commit is contained in:
parent
c53b075eda
commit
be5c625d3d
@ -28,6 +28,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
TODO:
|
||||
|
||||
+ Multithreading
|
||||
+ Look into 12-bit fields as a performance optimization
|
||||
+ Look into shortening the FWHT() since it takes a lot of decoder runtime
|
||||
+ Unroll first/final butterflies to avoid extra copies/xors in encoder
|
||||
+ Look into getting EncodeL working so we can support smaller data
|
||||
+ Implement the faster decoder algorithm from {3}
|
||||
*/
|
||||
|
||||
/*
|
||||
FFT Data Layout:
|
||||
|
||||
|
@ -445,7 +445,7 @@ void mul_mem(
|
||||
do
|
||||
{
|
||||
#define LEO_MUL_256(x_ptr, y_ptr) { \
|
||||
const LEO_M256 A_lo = _mm256_loadu_si256(y_ptr); \
|
||||
const LEO_M256 A_lo = _mm256_loadu_si256(y_ptr); \
|
||||
const LEO_M256 A_hi = _mm256_loadu_si256(y_ptr + 1); \
|
||||
LEO_M256 data_0 = _mm256_and_si256(A_lo, clr_mask); \
|
||||
LEO_M256 data_1 = _mm256_srli_epi64(A_lo, 4); \
|
||||
@ -494,7 +494,7 @@ void mul_mem(
|
||||
do
|
||||
{
|
||||
#define LEO_MUL_128(x_ptr, y_ptr) { \
|
||||
const LEO_M128 A_lo = _mm_loadu_si128(y_ptr); \
|
||||
const LEO_M128 A_lo = _mm_loadu_si128(y_ptr); \
|
||||
const LEO_M128 A_hi = _mm_loadu_si128(y_ptr + 2); \
|
||||
LEO_M128 data_0 = _mm_and_si128(A_lo, clr_mask); \
|
||||
LEO_M128 data_1 = _mm_srli_epi64(A_lo, 4); \
|
||||
@ -542,7 +542,7 @@ void fft_butterfly(
|
||||
do
|
||||
{
|
||||
#define LEO_FFTB_256(x_ptr, y_ptr) { \
|
||||
LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
|
||||
LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
|
||||
LEO_M256 data_0 = _mm256_and_si256(y_lo, clr_mask); \
|
||||
LEO_M256 data_1 = _mm256_srli_epi64(y_lo, 4); \
|
||||
data_1 = _mm256_and_si256(data_1, clr_mask); \
|
||||
@ -558,7 +558,7 @@ void fft_butterfly(
|
||||
prod_lo = _mm256_xor_si256(prod_lo, _mm256_shuffle_epi8(T3_lo, data_1)); \
|
||||
prod_hi = _mm256_xor_si256(prod_hi, _mm256_shuffle_epi8(T2_hi, data_0)); \
|
||||
prod_hi = _mm256_xor_si256(prod_hi, _mm256_shuffle_epi8(T3_hi, data_1)); \
|
||||
LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
|
||||
LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
|
||||
LEO_M256 x_hi = _mm256_loadu_si256(x_ptr + 1); \
|
||||
x_lo = _mm256_xor_si256(prod_lo, x_lo); \
|
||||
_mm256_storeu_si256(x_ptr, x_lo); \
|
||||
@ -589,7 +589,7 @@ void fft_butterfly(
|
||||
do
|
||||
{
|
||||
#define LEO_FFTB_128(x_ptr, y_ptr) { \
|
||||
LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
|
||||
LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
|
||||
LEO_M128 data_0 = _mm_and_si128(y_lo, clr_mask); \
|
||||
LEO_M128 data_1 = _mm_srli_epi64(y_lo, 4); \
|
||||
data_1 = _mm_and_si128(data_1, clr_mask); \
|
||||
@ -605,7 +605,7 @@ void fft_butterfly(
|
||||
prod_lo = _mm_xor_si128(prod_lo, _mm_shuffle_epi8(T3_lo, data_1)); \
|
||||
prod_hi = _mm_xor_si128(prod_hi, _mm_shuffle_epi8(T2_hi, data_0)); \
|
||||
prod_hi = _mm_xor_si128(prod_hi, _mm_shuffle_epi8(T3_hi, data_1)); \
|
||||
LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
|
||||
LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
|
||||
LEO_M128 x_hi = _mm_loadu_si128(x_ptr + 2); \
|
||||
x_lo = _mm_xor_si128(prod_lo, x_lo); \
|
||||
_mm_storeu_si128(x_ptr, x_lo); \
|
||||
@ -728,8 +728,8 @@ void ifft_butterfly(
|
||||
do
|
||||
{
|
||||
#define LEO_IFFTB_256(x_ptr, y_ptr) { \
|
||||
LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
|
||||
LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
|
||||
LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
|
||||
LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
|
||||
y_lo = _mm256_xor_si256(y_lo, x_lo); \
|
||||
_mm256_storeu_si256(y_ptr, y_lo); \
|
||||
LEO_M256 data_0 = _mm256_and_si256(y_lo, clr_mask); \
|
||||
@ -775,8 +775,8 @@ void ifft_butterfly(
|
||||
do
|
||||
{
|
||||
#define LEO_IFFTB_128(x_ptr, y_ptr) { \
|
||||
LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
|
||||
LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
|
||||
LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
|
||||
LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
|
||||
y_lo = _mm_xor_si128(y_lo, x_lo); \
|
||||
_mm_storeu_si128(y_ptr, y_lo); \
|
||||
LEO_M128 data_0 = _mm_and_si128(y_lo, clr_mask); \
|
||||
|
29
leopard.h
29
leopard.h
@ -47,7 +47,7 @@
|
||||
|
||||
{1} S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung,
|
||||
"Novel Polynomial Basis with Fast Fourier Transform
|
||||
and Its Application to Reed-Solomon Erasure Codes"
|
||||
and Its Application to Reed-Solomon Erasure Codes"
|
||||
IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016.
|
||||
|
||||
{2} D. G. Cantor, "On arithmetical algorithms over finite fields",
|
||||
@ -58,23 +58,8 @@
|
||||
IEEE Commun. Lett., vol.16, no.12, pp. 2036-2039, Dec. 2012.
|
||||
|
||||
{4} Plank, J. S., Greenan, K. M., Miller, E. L., "Screaming fast Galois Field
|
||||
arithmetic using Intel SIMD instructions." In: FAST-2013: 11th Usenix
|
||||
Conference on File and Storage Technologies, San Jose, 2013
|
||||
*/
|
||||
|
||||
/*
|
||||
TODO:
|
||||
+ Add multi-threading to split up long parallelizable calculations
|
||||
+ Final benchmarks!
|
||||
+ Release version 1
|
||||
+ Finish up documentation
|
||||
|
||||
TBD:
|
||||
+ Look into 12-bit fields as a performance optimization
|
||||
+ Look into shortening the FWHT() since it takes a lot of decoder runtime
|
||||
+ Unroll first/final butterflies to avoid extra copies/xors in encoder
|
||||
+ Look into getting EncodeL working so we can support smaller data (Ask Lin)
|
||||
+ Look into using FFT_m instead of FFT_n for decoder
|
||||
arithmetic using Intel SIMD instructions." In: FAST-2013: 11th Usenix
|
||||
Conference on File and Storage Technologies, San Jose, 2013
|
||||
*/
|
||||
|
||||
// Library version
|
||||
@ -160,11 +145,11 @@ typedef enum LeopardFlagsT
|
||||
/*
|
||||
leo_encode_work_count()
|
||||
|
||||
Calculate the number of work_data buffers to provide to leo_encode().
|
||||
Calculate the number of work_data buffers to provide to leo_encode().
|
||||
|
||||
The sum of original_count + recovery_count must not exceed 65536.
|
||||
|
||||
Returns the work_count value to pass into leo_encode().
|
||||
Returns the work_count value to pass into leo_encode().
|
||||
Returns 0 on invalid input.
|
||||
*/
|
||||
LEO_EXPORT unsigned leo_encode_work_count(
|
||||
@ -221,11 +206,11 @@ LEO_EXPORT LeopardResult leo_encode(
|
||||
/*
|
||||
leo_decode_work_count()
|
||||
|
||||
Calculate the number of work_data buffers to provide to leo_decode().
|
||||
Calculate the number of work_data buffers to provide to leo_decode().
|
||||
|
||||
The sum of original_count + recovery_count must not exceed 65536.
|
||||
|
||||
Returns the work_count value to pass into leo_encode().
|
||||
Returns the work_count value to pass into leo_encode().
|
||||
Returns 0 on invalid input.
|
||||
*/
|
||||
LEO_EXPORT unsigned leo_decode_work_count(
|
||||
|
Loading…
x
Reference in New Issue
Block a user