Optimize decoder IFFT

2025-02-17 16:37:17 +00:00 · 2017-05-31 23:21:25 -07:00 · 2017-05-31 23:21:25 -07:00 · b2ad9403fe
commit b2ad9403fe
parent c7f0085948
3 changed files with 11 additions and 8 deletions
--- a/LeopardCommon.h
+++ b/LeopardCommon.h
@ -31,13 +31,16 @@
 /*
    TODO:

-    + Look into 12-bit fields as a performance optimization
+    Short-term:
+    + FF8 decoder needs DIT FFT optimization
+    + Port DIT FFT code to FF16
    + Unroll first/final butterflies to avoid extra copies/xors in encoder
-    + Skip a lot of the initial FWHT() layers that are just operating on zeroes
-    + For the actual FFT(), I should be unrolling the bottom two layers
-        and performing them in a specialized function that does 2 <=> 2 and
-        then 1<=>1, 1<=>1 operations in local registers/cache
    + Multithreading
+
+    Mid-term:
+    + Look into 12-bit fields as a performance optimization
+
+    Long-term:
    + Evaluate the error locator polynomial based on fast polynomial interpolations in O(k log^2 k)
    + Look into getting EncodeL working so we can support larger recovery sets
    + Implement the decoder algorithm from {3} based on the Forney algorithm
--- a/LeopardFF8.cpp
+++ b/LeopardFF8.cpp
@ -1544,7 +1544,7 @@ void ReedSolomonDecode(
    IFFT_DIT(
        buffer_bytes,
        nullptr,
-        n,
+        m + original_count,
        work,
        nullptr,
        n,
--- a/tests/benchmark.cpp
+++ b/tests/benchmark.cpp
@ -42,8 +42,8 @@ using namespace std;
 struct TestParameters
 {
 #ifdef LEO_HAS_FF16
-    unsigned original_count = 1000; // under 65536
-    unsigned recovery_count = 200; // under 65536 - original_count
+    unsigned original_count = 100; // under 65536
+    unsigned recovery_count = 20; // under 65536 - original_count
 #else
    unsigned original_count = 128; // under 65536
    unsigned recovery_count = 128; // under 65536 - original_count