mirror of https://github.com/status-im/leopard.git
Cleanups
This commit is contained in:
parent
062084c11f
commit
a7955bf0a1
|
@ -77,6 +77,9 @@
|
|||
Unrolling is used in the code to accomplish both these optimizations.
|
||||
* The final FFT can be truncated also if recovery set is not a power of 2.
|
||||
It is easy to truncate the FFT by ending the inner loop early.
|
||||
* The FFT operations can be unrolled two layers at a time so that instead
|
||||
of writing the result of the first layer out and reading it back in for
|
||||
the second layer, those interactions can happen in registers immediately.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -126,6 +129,7 @@
|
|||
it starts mixing with non-zero data.
|
||||
|
||||
The formal derivative is applied to the entire workspace of N chunks.
|
||||
This is a massive XOR loop that runs 4 columns in parallel for speed.
|
||||
|
||||
The FFT is applied to the entire workspace of N chunks.
|
||||
The FFT is optimized by only performing intermediate calculations required
|
||||
|
@ -165,20 +169,23 @@
|
|||
// Define this to enable the optimized version of FWHT()
|
||||
#define LEO_FWHT_OPT
|
||||
|
||||
// Avoid scheduling FFT operations that are unused
|
||||
#define LEO_SCHEDULE_OPT
|
||||
|
||||
// Avoid calculating final FFT values in decoder using bitfield
|
||||
#define LEO_ERROR_BITFIELD_OPT
|
||||
|
||||
// Optimize M=1 case
|
||||
#define LEO_M1_OPT
|
||||
|
||||
// Interleave butterfly operations between layer pairs in FFT
|
||||
#define LEO_INTERLEAVE_BUTTERFLY4_OPT
|
||||
|
||||
|
||||
// FIXME: Remove these when FF16 is done
|
||||
|
||||
// Unroll inner loops 4 times
|
||||
#define LEO_USE_VECTOR4_OPT
|
||||
|
||||
// Interleave butterfly operations between layer pairs in FFT
|
||||
#define LEO_INTERLEAVE_BUTTERFLY4_OPT
|
||||
// Avoid scheduling FFT operations that are unused
|
||||
#define LEO_SCHEDULE_OPT
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -380,41 +387,39 @@ class XORSummer
|
|||
{
|
||||
public:
|
||||
// Set the addition destination and byte count
|
||||
LEO_FORCE_INLINE void Initialize(void* dest, uint64_t bytes)
|
||||
LEO_FORCE_INLINE void Initialize(void* dest)
|
||||
{
|
||||
DestBuffer = dest;
|
||||
Bytes = bytes;
|
||||
Waiting = nullptr;
|
||||
}
|
||||
|
||||
// Accumulate some source data
|
||||
LEO_FORCE_INLINE void Add(const void* src)
|
||||
LEO_FORCE_INLINE void Add(const void* src, const uint64_t bytes)
|
||||
{
|
||||
#ifdef LEO_M1_OPT
|
||||
if (Waiting)
|
||||
{
|
||||
xor_mem_2to1(DestBuffer, src, Waiting, Bytes);
|
||||
xor_mem_2to1(DestBuffer, src, Waiting, bytes);
|
||||
Waiting = nullptr;
|
||||
}
|
||||
else
|
||||
Waiting = src;
|
||||
#else // LEO_M1_OPT
|
||||
xor_mem(DestBuffer, src, Bytes);
|
||||
xor_mem(DestBuffer, src, bytes);
|
||||
#endif // LEO_M1_OPT
|
||||
}
|
||||
|
||||
// Finalize in the destination buffer
|
||||
LEO_FORCE_INLINE void Finalize()
|
||||
LEO_FORCE_INLINE void Finalize(const uint64_t bytes)
|
||||
{
|
||||
#ifdef LEO_M1_OPT
|
||||
if (Waiting)
|
||||
xor_mem(DestBuffer, Waiting, Bytes);
|
||||
xor_mem(DestBuffer, Waiting, bytes);
|
||||
#endif // LEO_M1_OPT
|
||||
}
|
||||
|
||||
protected:
|
||||
void* DestBuffer;
|
||||
uint64_t Bytes;
|
||||
const void* Waiting;
|
||||
};
|
||||
|
||||
|
|
12
leopard.cpp
12
leopard.cpp
|
@ -111,12 +111,12 @@ static void EncodeM1(
|
|||
memcpy(recovery_data, original_data[0], buffer_bytes);
|
||||
|
||||
leopard::XORSummer summer;
|
||||
summer.Initialize(recovery_data, buffer_bytes);
|
||||
summer.Initialize(recovery_data);
|
||||
|
||||
for (unsigned i = 1; i < original_count; ++i)
|
||||
summer.Add(original_data[i]);
|
||||
summer.Add(original_data[i], buffer_bytes);
|
||||
|
||||
summer.Finalize();
|
||||
summer.Finalize(buffer_bytes);
|
||||
}
|
||||
|
||||
LEO_EXPORT LeopardResult leo_encode(
|
||||
|
@ -223,13 +223,13 @@ static void DecodeM1(
|
|||
memcpy(work_data, recovery_data, buffer_bytes);
|
||||
|
||||
leopard::XORSummer summer;
|
||||
summer.Initialize(work_data, buffer_bytes);
|
||||
summer.Initialize(work_data);
|
||||
|
||||
for (unsigned i = 0; i < original_count; ++i)
|
||||
if (original_data[i])
|
||||
summer.Add(original_data[i]);
|
||||
summer.Add(original_data[i], buffer_bytes);
|
||||
|
||||
summer.Finalize();
|
||||
summer.Finalize(buffer_bytes);
|
||||
}
|
||||
|
||||
LEO_EXPORT LeopardResult leo_decode(
|
||||
|
|
Loading…
Reference in New Issue