From ff13880057c923a7ae000a5aab78a72cf737b703 Mon Sep 17 00:00:00 2001 From: Christopher Taylor Date: Sun, 4 Jun 2017 22:14:21 -0700 Subject: [PATCH] Comments --- LeopardCommon.h | 3 +-- LeopardFF16.cpp | 7 +++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/LeopardCommon.h b/LeopardCommon.h index 581e815..f6c16c5 100644 --- a/LeopardCommon.h +++ b/LeopardCommon.h @@ -32,11 +32,10 @@ TODO: Short-term: - + Unroll first/final butterflies to avoid extra copies/xors in encoder (17% of encode time) - + Add compile-time selectable XOR-only rowops instead of MULADD + Multithreading Mid-term: + + Add compile-time selectable XOR-only rowops instead of MULADD + Look into 12-bit fields as a performance optimization Long-term: diff --git a/LeopardFF16.cpp b/LeopardFF16.cpp index 903da9d..1888b4b 100644 --- a/LeopardFF16.cpp +++ b/LeopardFF16.cpp @@ -832,11 +832,10 @@ static void IFFT_DIT_Encoder( } } + // I tried unrolling this but it does not provide more than 5% performance + // improvement for 16-bit finite fields, so it's not worth the complexity. if (xor_result) - { - for (unsigned i = 0; i < m; ++i) - xor_mem(xor_result[i], work[i], bytes); - } + VectorXOR(bytes, m, xor_result, work); }