2017-05-18 03:06:13 +00:00
|
|
|
/*
|
2017-05-25 09:24:15 +00:00
|
|
|
Copyright (c) 2017 Christopher A. Taylor. All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer in the documentation
|
|
|
|
and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of LHC-RS nor the names of its contributors may be
|
|
|
|
used to endorse or promote products derived from this software without
|
|
|
|
specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
POSSIBILITY OF SUCH DAMAGE.
|
2017-05-18 03:06:13 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <time.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
/*
|
|
|
|
TODO:
|
|
|
|
+ Write C API and unit tester
|
|
|
|
+ Limit input to multiples of 64 bytes
|
|
|
|
+ Replace GFSymbol with a file data pointer
|
|
|
|
+ New 16-bit Muladd inner loops
|
|
|
|
+ Class to contain the (large) muladd tables
|
|
|
|
+ Preliminary benchmarks for large data!
|
|
|
|
+ New 8-bit Muladd inner loops
|
|
|
|
+ Benchmarks for smaller data!
|
|
|
|
+ Refactor software
|
2017-05-25 09:24:15 +00:00
|
|
|
+ Pick a name for the software better than LEO_RS
|
2017-05-24 08:23:19 +00:00
|
|
|
+ I think it should be split up into several C++ modules
|
|
|
|
+ Write detailed comments for all the routines
|
|
|
|
+ Look into getting EncodeL working so we can support smaller data (Ask Lin)
|
|
|
|
+ Look into using k instead of k2 to speed up decoder (Ask Lin)
|
|
|
|
+ Avoid performing FFT/IFFT intermediate calculations we're not going to use
|
|
|
|
+ Benchmarks, fun!
|
|
|
|
+ Add multi-threading to split up long parallelizable calculations
|
|
|
|
+ Final benchmarks!
|
|
|
|
+ Finish up documentation
|
|
|
|
+ Release version 1
|
|
|
|
|
|
|
|
|
|
|
|
Muladd implementation notes:
|
|
|
|
|
|
|
|
Specialize for 1-3 rows at a time since often times we're multiplying by
|
|
|
|
the same (skew) value repeatedly, as the ISA-L library does here:
|
|
|
|
|
|
|
|
https://github.com/01org/isa-l/blob/master/erasure_code/gf_3vect_mad_avx.asm#L258
|
|
|
|
|
|
|
|
Except we should be doing it for 16-bit Galois Field.
|
|
|
|
To implement that use the ALTMAP trick from Jerasure:
|
|
|
|
|
|
|
|
http://lab.jerasure.org/jerasure/gf-complete/blob/master/src/gf_w16.c#L1140
|
|
|
|
|
|
|
|
Except we should also support AVX2 since that is a 40% perf boost, so put
|
|
|
|
the high and low bytes 32 bytes instead of 16 bytes apart.
|
|
|
|
|
|
|
|
Also I think we should go ahead and precompute the multiply tables since
|
|
|
|
it avoids a bunch of memory lookups for each muladd, and only costs 8 MB.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2017-05-18 03:06:13 +00:00
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Debug
|
|
|
|
|
|
|
|
// Some bugs only repro in release mode, so this can be helpful
|
2017-05-25 09:24:15 +00:00
|
|
|
//#define LEO_DEBUG_IN_RELEASE
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#if defined(_DEBUG) || defined(DEBUG) || defined(LEO_DEBUG_IN_RELEASE)
|
|
|
|
#define LEO_DEBUG
|
2017-05-18 03:06:13 +00:00
|
|
|
#ifdef _WIN32
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_DEBUG_BREAK __debugbreak()
|
2017-05-18 03:06:13 +00:00
|
|
|
#else
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_DEBUG_BREAK __builtin_trap()
|
2017-05-18 03:06:13 +00:00
|
|
|
#endif
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_DEBUG_ASSERT(cond) { if (!(cond)) { LEO_DEBUG_BREAK; } }
|
2017-05-18 03:06:13 +00:00
|
|
|
#else
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_DEBUG_BREAK ;
|
|
|
|
#define LEO_DEBUG_ASSERT(cond) ;
|
2017-05-18 03:06:13 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Platform/Architecture
|
|
|
|
|
|
|
|
#if defined(ANDROID) || defined(IOS)
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_TARGET_MOBILE
|
2017-05-18 03:06:13 +00:00
|
|
|
#endif // ANDROID
|
|
|
|
|
|
|
|
#if defined(__AVX2__) || (defined (_MSC_VER) && _MSC_VER >= 1900)
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_TRY_AVX2 /* 256-bit */
|
2017-05-18 03:06:13 +00:00
|
|
|
#include <immintrin.h>
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_ALIGN_BYTES 32
|
2017-05-18 03:06:13 +00:00
|
|
|
#else // __AVX2__
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_ALIGN_BYTES 16
|
2017-05-18 03:06:13 +00:00
|
|
|
#endif // __AVX2__
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#if !defined(LEO_TARGET_MOBILE)
|
2017-05-18 03:06:13 +00:00
|
|
|
// Note: MSVC currently only supports SSSE3 but not AVX2
|
|
|
|
#include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
|
|
|
|
#include <emmintrin.h> // SSE2
|
2017-05-25 09:24:15 +00:00
|
|
|
#endif // LEO_TARGET_MOBILE
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
#if defined(HAVE_ARM_NEON_H)
|
|
|
|
#include <arm_neon.h>
|
|
|
|
#endif // HAVE_ARM_NEON_H
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#if defined(LEO_TARGET_MOBILE)
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_ALIGNED_ACCESSES /* Inputs must be aligned to LEO_ALIGN_BYTES */
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
# if defined(HAVE_ARM_NEON_H)
|
|
|
|
// Compiler-specific 128-bit SIMD register keyword
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_M128 uint8x16_t
|
|
|
|
#define LEO_TRY_NEON
|
2017-05-18 03:06:13 +00:00
|
|
|
#else
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_M128 uint64_t
|
2017-05-18 03:06:13 +00:00
|
|
|
# endif
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#else // LEO_TARGET_MOBILE
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Compiler-specific 128-bit SIMD register keyword
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_M128 __m128i
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#endif // LEO_TARGET_MOBILE
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#ifdef LEO_TRY_AVX2
|
2017-05-18 03:06:13 +00:00
|
|
|
// Compiler-specific 256-bit SIMD register keyword
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_M256 __m256i
|
2017-05-18 03:06:13 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
// Compiler-specific C++11 restrict keyword
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_RESTRICT __restrict
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Compiler-specific force inline keyword
|
|
|
|
#ifdef _MSC_VER
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_FORCE_INLINE inline __forceinline
|
2017-05-18 03:06:13 +00:00
|
|
|
#else
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_FORCE_INLINE inline __attribute__((always_inline))
|
2017-05-18 03:06:13 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
// Compiler-specific alignment keyword
|
|
|
|
// Note: Alignment only matters for ARM NEON where it should be 16
|
|
|
|
#ifdef _MSC_VER
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_ALIGNED __declspec(align(LEO_ALIGN_BYTES))
|
2017-05-18 03:06:13 +00:00
|
|
|
#else // _MSC_VER
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_ALIGNED __attribute__((aligned(LEO_ALIGN_BYTES)))
|
2017-05-18 03:06:13 +00:00
|
|
|
#endif // _MSC_VER
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Runtime CPU Architecture Check
|
|
|
|
//
|
|
|
|
// Feature checks stolen shamelessly from
|
|
|
|
// https://github.com/jedisct1/libsodium/blob/master/src/libsodium/sodium/runtime.c
|
|
|
|
|
|
|
|
#if defined(HAVE_ANDROID_GETCPUFEATURES)
|
|
|
|
#include <cpu-features.h>
|
|
|
|
#endif
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#if defined(LEO_TRY_NEON)
|
2017-05-18 03:06:13 +00:00
|
|
|
# if defined(IOS) && defined(__ARM_NEON__)
|
|
|
|
// Requires iPhone 5S or newer
|
|
|
|
static const bool CpuHasNeon = true;
|
|
|
|
static const bool CpuHasNeon64 = true;
|
|
|
|
# else
|
|
|
|
// Remember to add LOCAL_STATIC_LIBRARIES := cpufeatures
|
|
|
|
static bool CpuHasNeon = false; // V6 / V7
|
|
|
|
static bool CpuHasNeon64 = false; // 64-bit
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#if !defined(LEO_TARGET_MOBILE)
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
#ifdef _MSC_VER
|
|
|
|
#include <intrin.h> // __cpuid
|
|
|
|
#pragma warning(disable: 4752) // found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
|
|
|
|
#endif
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#ifdef LEO_TRY_AVX2
|
2017-05-18 03:06:13 +00:00
|
|
|
static bool CpuHasAVX2 = false;
|
|
|
|
#endif
|
|
|
|
static bool CpuHasSSSE3 = false;
|
|
|
|
|
|
|
|
#define CPUID_EBX_AVX2 0x00000020
|
|
|
|
#define CPUID_ECX_SSSE3 0x00000200
|
|
|
|
|
|
|
|
static void _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
|
|
|
|
{
|
|
|
|
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
|
|
|
|
__cpuid((int *) cpu_info, cpu_info_type);
|
|
|
|
#else //if defined(HAVE_CPUID)
|
|
|
|
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
|
|
|
|
# ifdef __i386__
|
|
|
|
__asm__ __volatile__ ("pushfl; pushfl; "
|
|
|
|
"popl %0; "
|
|
|
|
"movl %0, %1; xorl %2, %0; "
|
|
|
|
"pushl %0; "
|
|
|
|
"popfl; pushfl; popl %0; popfl" :
|
|
|
|
"=&r" (cpu_info[0]), "=&r" (cpu_info[1]) :
|
|
|
|
"i" (0x200000));
|
|
|
|
if (((cpu_info[0] ^ cpu_info[1]) & 0x200000) == 0) {
|
|
|
|
return; /* LCOV_EXCL_LINE */
|
|
|
|
}
|
|
|
|
# endif
|
|
|
|
# ifdef __i386__
|
|
|
|
__asm__ __volatile__ ("xchgl %%ebx, %k1; cpuid; xchgl %%ebx, %k1" :
|
|
|
|
"=a" (cpu_info[0]), "=&r" (cpu_info[1]),
|
|
|
|
"=c" (cpu_info[2]), "=d" (cpu_info[3]) :
|
|
|
|
"0" (cpu_info_type), "2" (0U));
|
|
|
|
# elif defined(__x86_64__)
|
|
|
|
__asm__ __volatile__ ("xchgq %%rbx, %q1; cpuid; xchgq %%rbx, %q1" :
|
|
|
|
"=a" (cpu_info[0]), "=&r" (cpu_info[1]),
|
|
|
|
"=c" (cpu_info[2]), "=d" (cpu_info[3]) :
|
|
|
|
"0" (cpu_info_type), "2" (0U));
|
|
|
|
# else
|
|
|
|
__asm__ __volatile__ ("cpuid" :
|
|
|
|
"=a" (cpu_info[0]), "=b" (cpu_info[1]),
|
|
|
|
"=c" (cpu_info[2]), "=d" (cpu_info[3]) :
|
|
|
|
"0" (cpu_info_type), "2" (0U));
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#endif // defined(LEO_TARGET_MOBILE)
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
static void leo_architecture_init()
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
#if defined(LEO_TRY_NEON) && defined(HAVE_ANDROID_GETCPUFEATURES)
|
2017-05-18 03:06:13 +00:00
|
|
|
AndroidCpuFamily family = android_getCpuFamily();
|
|
|
|
if (family == ANDROID_CPU_FAMILY_ARM)
|
|
|
|
{
|
|
|
|
if (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON)
|
|
|
|
CpuHasNeon = true;
|
|
|
|
}
|
|
|
|
else if (family == ANDROID_CPU_FAMILY_ARM64)
|
|
|
|
{
|
|
|
|
CpuHasNeon = true;
|
|
|
|
if (android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_ASIMD)
|
|
|
|
CpuHasNeon64 = true;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#if !defined(LEO_TARGET_MOBILE)
|
2017-05-18 03:06:13 +00:00
|
|
|
unsigned int cpu_info[4];
|
|
|
|
|
|
|
|
_cpuid(cpu_info, 1);
|
|
|
|
CpuHasSSSE3 = ((cpu_info[2] & CPUID_ECX_SSSE3) != 0);
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#if defined(LEO_TRY_AVX2)
|
2017-05-18 03:06:13 +00:00
|
|
|
_cpuid(cpu_info, 7);
|
|
|
|
CpuHasAVX2 = ((cpu_info[1] & CPUID_EBX_AVX2) != 0);
|
2017-05-25 09:24:15 +00:00
|
|
|
#endif // LEO_TRY_AVX2
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#endif // LEO_TARGET_MOBILE
|
2017-05-18 03:06:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// SIMD-Safe Aligned Memory Allocations
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
static const unsigned kAlignmentBytes = LEO_ALIGN_BYTES;
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_FORCE_INLINE unsigned NextAlignedOffset(unsigned offset)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
|
|
|
return (offset + kAlignmentBytes - 1) & ~(kAlignmentBytes - 1);
|
|
|
|
}
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
static LEO_FORCE_INLINE uint8_t* SIMDSafeAllocate(size_t size)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
|
|
|
uint8_t* data = (uint8_t*)calloc(1, kAlignmentBytes + size);
|
|
|
|
if (!data)
|
|
|
|
return nullptr;
|
|
|
|
unsigned offset = (unsigned)((uintptr_t)data % kAlignmentBytes);
|
|
|
|
data += kAlignmentBytes - offset;
|
|
|
|
data[-1] = (uint8_t)offset;
|
|
|
|
return data;
|
|
|
|
}
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
static LEO_FORCE_INLINE void SIMDSafeFree(void* ptr)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
|
|
|
if (!ptr)
|
|
|
|
return;
|
|
|
|
uint8_t* data = (uint8_t*)ptr;
|
|
|
|
unsigned offset = data[-1];
|
|
|
|
if (offset >= kAlignmentBytes)
|
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_DEBUG_BREAK; // Should never happen
|
2017-05-18 03:06:13 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
data -= kAlignmentBytes - offset;
|
|
|
|
free(data);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Field
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
//#define LEO_SHORT_FIELD
|
2017-05-24 08:23:19 +00:00
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#ifdef LEO_SHORT_FIELD
|
2017-05-18 03:06:13 +00:00
|
|
|
typedef uint8_t GFSymbol;
|
|
|
|
static const unsigned kGFBits = 8;
|
|
|
|
static const unsigned kGFPolynomial = 0x11D;
|
2017-05-21 05:49:41 +00:00
|
|
|
GFSymbol kGFBasis[kGFBits] = {
|
|
|
|
1, 214, 152, 146, 86, 200, 88, 230 // Cantor basis
|
2017-05-18 03:06:13 +00:00
|
|
|
};
|
|
|
|
#else
|
|
|
|
typedef uint16_t GFSymbol;
|
|
|
|
static const unsigned kGFBits = 16;
|
|
|
|
static const unsigned kGFPolynomial = 0x1002D;
|
2017-05-21 05:49:41 +00:00
|
|
|
GFSymbol kGFBasis[kGFBits] = {
|
|
|
|
0x0001, 0xACCA, 0x3C0E, 0x163E, // Cantor basis
|
2017-05-18 03:06:13 +00:00
|
|
|
0xC582, 0xED2E, 0x914C, 0x4012,
|
|
|
|
0x6C98, 0x10D8, 0x6A72, 0xB900,
|
|
|
|
0xFDB8, 0xFB34, 0xFF38, 0x991E
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
2017-05-21 05:49:41 +00:00
|
|
|
Cantor Basis introduced by:
|
2017-05-18 03:06:13 +00:00
|
|
|
D. G. Cantor, "On arithmetical algorithms over finite fields",
|
|
|
|
Journal of Combinatorial Theory, Series A, vol. 50, no. 2, pp. 285-300, 1989.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static const unsigned kFieldSize = (unsigned)1 << kGFBits; //Field size
|
|
|
|
static const unsigned kFieldModulus = kFieldSize - 1;
|
|
|
|
|
|
|
|
static GFSymbol GFLog[kFieldSize];
|
|
|
|
static GFSymbol GFExp[kFieldSize];
|
|
|
|
|
|
|
|
// Initialize GFLog[], GFExp[]
|
|
|
|
static void InitField()
|
|
|
|
{
|
|
|
|
unsigned state = 1;
|
|
|
|
for (unsigned i = 0; i < kFieldModulus; ++i)
|
|
|
|
{
|
2017-05-21 05:49:41 +00:00
|
|
|
GFExp[state] = static_cast<GFSymbol>(i);
|
2017-05-18 03:06:13 +00:00
|
|
|
state <<= 1;
|
|
|
|
if (state >= kFieldSize)
|
|
|
|
state ^= kGFPolynomial;
|
|
|
|
}
|
2017-05-21 05:49:41 +00:00
|
|
|
GFExp[0] = kFieldModulus;
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-21 05:49:41 +00:00
|
|
|
// Conversion to chosen basis:
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
GFLog[0] = 0;
|
|
|
|
for (unsigned i = 0; i < kGFBits; ++i)
|
|
|
|
{
|
2017-05-21 05:49:41 +00:00
|
|
|
const GFSymbol basis = kGFBasis[i];
|
2017-05-18 03:06:13 +00:00
|
|
|
const unsigned width = (unsigned)(1UL << i);
|
|
|
|
|
|
|
|
for (unsigned j = 0; j < width; ++j)
|
|
|
|
GFLog[j + width] = GFLog[j] ^ basis;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < kFieldSize; ++i)
|
2017-05-21 05:49:41 +00:00
|
|
|
GFLog[i] = GFExp[GFLog[i]];
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < kFieldSize; ++i)
|
|
|
|
GFExp[GFLog[i]] = i;
|
|
|
|
|
|
|
|
GFExp[kFieldModulus] = GFExp[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Mod Q Field Operations
|
|
|
|
//
|
|
|
|
// Q is the maximum symbol value, e.g. 255 or 65535.
|
|
|
|
|
|
|
|
// z = x + y (mod Q)
|
|
|
|
static inline GFSymbol AddModQ(GFSymbol a, GFSymbol b)
|
|
|
|
{
|
|
|
|
const unsigned sum = (unsigned)a + b;
|
|
|
|
|
|
|
|
// Partial reduction step, allowing for Q to be returned
|
|
|
|
return static_cast<GFSymbol>(sum + (sum >> kGFBits));
|
|
|
|
}
|
|
|
|
|
|
|
|
// z = x - y (mod Q)
|
|
|
|
static inline GFSymbol SubModQ(GFSymbol a, GFSymbol b)
|
|
|
|
{
|
|
|
|
const unsigned dif = (unsigned)a - b;
|
|
|
|
|
|
|
|
// Partial reduction step, allowing for Q to be returned
|
|
|
|
return static_cast<GFSymbol>(dif + (dif >> kGFBits));
|
|
|
|
}
|
|
|
|
|
|
|
|
// vx[] += vy[] * z
|
2017-05-25 09:24:15 +00:00
|
|
|
static void muladd_mem(GFSymbol * LEO_RESTRICT vx, const GFSymbol * LEO_RESTRICT vy, GFSymbol z, unsigned symbolCount)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
|
|
|
for (unsigned i = 0; i < symbolCount; ++i)
|
|
|
|
{
|
|
|
|
const GFSymbol a = vy[i];
|
|
|
|
if (a == 0)
|
|
|
|
continue;
|
|
|
|
|
2017-05-21 05:49:41 +00:00
|
|
|
GFSymbol sum1 = static_cast<GFSymbol>(AddModQ(GFLog[a & 0x0f], z));
|
|
|
|
GFSymbol value1 = GFExp[sum1];
|
|
|
|
if ((a & 0x0f) == 0)
|
|
|
|
{
|
|
|
|
value1 = 0;
|
|
|
|
}
|
|
|
|
GFSymbol sum2 = static_cast<GFSymbol>(AddModQ(GFLog[a & 0xf0], z));
|
|
|
|
GFSymbol value2 = GFExp[sum2];
|
|
|
|
if ((a & 0xf0) == 0)
|
|
|
|
{
|
|
|
|
value2 = 0;
|
|
|
|
}
|
|
|
|
GFSymbol sum3 = static_cast<GFSymbol>(AddModQ(GFLog[a & 0x0f00], z));
|
|
|
|
GFSymbol value3 = GFExp[sum3];
|
|
|
|
if ((a & 0x0f00) == 0)
|
|
|
|
{
|
|
|
|
value3 = 0;
|
|
|
|
}
|
|
|
|
GFSymbol sum4 = static_cast<GFSymbol>(AddModQ(GFLog[a & 0xf000], z));
|
|
|
|
GFSymbol value4 = GFExp[sum4];
|
|
|
|
if ((a & 0xf000) == 0)
|
|
|
|
{
|
|
|
|
value4 = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vx[i] ^= value1;
|
|
|
|
vx[i] ^= value2;
|
|
|
|
vx[i] ^= value3;
|
|
|
|
vx[i] ^= value4;
|
2017-05-18 03:06:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// return a*GFExp[b] over GF(2^r)
|
|
|
|
static GFSymbol mulE(GFSymbol a, GFSymbol b)
|
|
|
|
{
|
|
|
|
if (a == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
const GFSymbol sum = static_cast<GFSymbol>(AddModQ(GFLog[a], b));
|
|
|
|
return GFExp[sum];
|
|
|
|
}
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
|
2017-05-18 03:06:13 +00:00
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Fast Walsh-Hadamard Transform (FWHT) Mod Q
|
|
|
|
//
|
|
|
|
// Q is the maximum symbol value, e.g. 255 or 65535.
|
|
|
|
|
|
|
|
// Define this to enable the optimized version of FWHT()
|
2017-05-25 09:24:15 +00:00
|
|
|
#define LEO_FWHT_OPTIMIZED
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
typedef GFSymbol fwht_t;
|
|
|
|
|
|
|
|
// {a, b} = {a + b, a - b} (Mod Q)
|
2017-05-25 09:24:15 +00:00
|
|
|
static LEO_FORCE_INLINE void FWHT_2(fwht_t& LEO_RESTRICT a, fwht_t& LEO_RESTRICT b)
|
2017-05-24 08:23:19 +00:00
|
|
|
{
|
|
|
|
const fwht_t sum = AddModQ(a, b);
|
|
|
|
const fwht_t dif = SubModQ(a, b);
|
|
|
|
a = sum;
|
|
|
|
b = dif;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
FWHT is a minor slice of the runtime and does not grow with data size,
|
|
|
|
but I did attempt a few additional optimizations that failed:
|
|
|
|
|
|
|
|
I've attempted to vectorize (with partial reductions) FWHT_4(data, s),
|
|
|
|
which is 70% of the algorithm, but it was slower. Left in _attic_.
|
|
|
|
|
|
|
|
I've attempted to avoid reductions in all or parts of the FWHT.
|
|
|
|
The final modular reduction ends up being slower than the savings.
|
|
|
|
Specifically I tried doing it for the whole FWHT and also I tried
|
|
|
|
doing it just for the FWHT_2 loop in the main routine, but both
|
|
|
|
approaches are slower than partial reductions.
|
|
|
|
|
|
|
|
Replacing word reads with wider reads does speed up the operation, but
|
|
|
|
at too high a complexity cost relative to minor perf improvement.
|
|
|
|
*/
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#ifndef LEO_FWHT_OPTIMIZED
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Reference implementation
|
2017-05-24 08:23:19 +00:00
|
|
|
static void FWHT(fwht_t* data, const unsigned bits)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
|
|
|
const unsigned size = (unsigned)(1UL << bits);
|
|
|
|
for (unsigned width = 1; width < size; width <<= 1)
|
|
|
|
for (unsigned i = 0; i < size; i += (width << 1))
|
|
|
|
for (unsigned j = i; j < (width + i); ++j)
|
2017-05-24 08:23:19 +00:00
|
|
|
FWHT_2(data[j], data[j + width]);
|
2017-05-18 03:06:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
static LEO_FORCE_INLINE void FWHT_4(fwht_t* data)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
fwht_t t0 = data[0];
|
|
|
|
fwht_t t1 = data[1];
|
|
|
|
fwht_t t2 = data[2];
|
|
|
|
fwht_t t3 = data[3];
|
2017-05-18 03:06:13 +00:00
|
|
|
FWHT_2(t0, t1);
|
|
|
|
FWHT_2(t2, t3);
|
|
|
|
FWHT_2(t0, t2);
|
|
|
|
FWHT_2(t1, t3);
|
|
|
|
data[0] = t0;
|
|
|
|
data[1] = t1;
|
|
|
|
data[2] = t2;
|
|
|
|
data[3] = t3;
|
|
|
|
}
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
static LEO_FORCE_INLINE void FWHT_4(fwht_t* data, unsigned s)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
|
|
|
unsigned x = 0;
|
2017-05-24 08:23:19 +00:00
|
|
|
fwht_t t0 = data[x]; x += s;
|
|
|
|
fwht_t t1 = data[x]; x += s;
|
|
|
|
fwht_t t2 = data[x]; x += s;
|
|
|
|
fwht_t t3 = data[x];
|
2017-05-18 03:06:13 +00:00
|
|
|
FWHT_2(t0, t1);
|
|
|
|
FWHT_2(t2, t3);
|
|
|
|
FWHT_2(t0, t2);
|
|
|
|
FWHT_2(t1, t3);
|
|
|
|
unsigned y = 0;
|
|
|
|
data[y] = t0; y += s;
|
|
|
|
data[y] = t1; y += s;
|
|
|
|
data[y] = t2; y += s;
|
|
|
|
data[y] = t3;
|
|
|
|
}
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
static inline void FWHT_8(fwht_t* data)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
fwht_t t0 = data[0];
|
|
|
|
fwht_t t1 = data[1];
|
|
|
|
fwht_t t2 = data[2];
|
|
|
|
fwht_t t3 = data[3];
|
|
|
|
fwht_t t4 = data[4];
|
|
|
|
fwht_t t5 = data[5];
|
|
|
|
fwht_t t6 = data[6];
|
|
|
|
fwht_t t7 = data[7];
|
2017-05-18 03:06:13 +00:00
|
|
|
FWHT_2(t0, t1);
|
|
|
|
FWHT_2(t2, t3);
|
|
|
|
FWHT_2(t4, t5);
|
|
|
|
FWHT_2(t6, t7);
|
|
|
|
FWHT_2(t0, t2);
|
|
|
|
FWHT_2(t1, t3);
|
|
|
|
FWHT_2(t4, t6);
|
|
|
|
FWHT_2(t5, t7);
|
|
|
|
FWHT_2(t0, t4);
|
|
|
|
FWHT_2(t1, t5);
|
|
|
|
FWHT_2(t2, t6);
|
|
|
|
FWHT_2(t3, t7);
|
|
|
|
data[0] = t0;
|
|
|
|
data[1] = t1;
|
|
|
|
data[2] = t2;
|
|
|
|
data[3] = t3;
|
|
|
|
data[4] = t4;
|
|
|
|
data[5] = t5;
|
|
|
|
data[6] = t6;
|
|
|
|
data[7] = t7;
|
|
|
|
}
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
static inline void FWHT_16(fwht_t* data)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
fwht_t t0 = data[0];
|
|
|
|
fwht_t t1 = data[1];
|
|
|
|
fwht_t t2 = data[2];
|
|
|
|
fwht_t t3 = data[3];
|
|
|
|
fwht_t t4 = data[4];
|
|
|
|
fwht_t t5 = data[5];
|
|
|
|
fwht_t t6 = data[6];
|
|
|
|
fwht_t t7 = data[7];
|
|
|
|
fwht_t t8 = data[8];
|
|
|
|
fwht_t t9 = data[9];
|
|
|
|
fwht_t t10 = data[10];
|
|
|
|
fwht_t t11 = data[11];
|
|
|
|
fwht_t t12 = data[12];
|
|
|
|
fwht_t t13 = data[13];
|
|
|
|
fwht_t t14 = data[14];
|
|
|
|
fwht_t t15 = data[15];
|
2017-05-18 03:06:13 +00:00
|
|
|
FWHT_2(t0, t1);
|
|
|
|
FWHT_2(t2, t3);
|
|
|
|
FWHT_2(t4, t5);
|
|
|
|
FWHT_2(t6, t7);
|
|
|
|
FWHT_2(t8, t9);
|
|
|
|
FWHT_2(t10, t11);
|
|
|
|
FWHT_2(t12, t13);
|
|
|
|
FWHT_2(t14, t15);
|
|
|
|
FWHT_2(t0, t2);
|
|
|
|
FWHT_2(t1, t3);
|
|
|
|
FWHT_2(t4, t6);
|
|
|
|
FWHT_2(t5, t7);
|
|
|
|
FWHT_2(t8, t10);
|
|
|
|
FWHT_2(t9, t11);
|
|
|
|
FWHT_2(t12, t14);
|
|
|
|
FWHT_2(t13, t15);
|
|
|
|
FWHT_2(t0, t4);
|
|
|
|
FWHT_2(t1, t5);
|
|
|
|
FWHT_2(t2, t6);
|
|
|
|
FWHT_2(t3, t7);
|
|
|
|
FWHT_2(t8, t12);
|
|
|
|
FWHT_2(t9, t13);
|
|
|
|
FWHT_2(t10, t14);
|
|
|
|
FWHT_2(t11, t15);
|
|
|
|
FWHT_2(t0, t8);
|
|
|
|
FWHT_2(t1, t9);
|
|
|
|
FWHT_2(t2, t10);
|
|
|
|
FWHT_2(t3, t11);
|
|
|
|
FWHT_2(t4, t12);
|
|
|
|
FWHT_2(t5, t13);
|
|
|
|
FWHT_2(t6, t14);
|
|
|
|
FWHT_2(t7, t15);
|
|
|
|
data[0] = t0;
|
|
|
|
data[1] = t1;
|
|
|
|
data[2] = t2;
|
|
|
|
data[3] = t3;
|
|
|
|
data[4] = t4;
|
|
|
|
data[5] = t5;
|
|
|
|
data[6] = t6;
|
|
|
|
data[7] = t7;
|
|
|
|
data[8] = t8;
|
|
|
|
data[9] = t9;
|
|
|
|
data[10] = t10;
|
|
|
|
data[11] = t11;
|
|
|
|
data[12] = t12;
|
|
|
|
data[13] = t13;
|
|
|
|
data[14] = t14;
|
|
|
|
data[15] = t15;
|
|
|
|
}
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
static void FWHT_SmallData(fwht_t* data, unsigned ldn)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
|
|
|
const unsigned n = (1UL << ldn);
|
|
|
|
|
|
|
|
if (n <= 2)
|
|
|
|
{
|
|
|
|
if (n == 2)
|
|
|
|
FWHT_2(data[0], data[1]);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned ldm = ldn; ldm > 3; ldm -= 2)
|
|
|
|
{
|
|
|
|
unsigned m = (1UL << ldm);
|
|
|
|
unsigned m4 = (m >> 2);
|
|
|
|
for (unsigned r = 0; r < n; r += m)
|
|
|
|
for (unsigned j = 0; j < m4; j++)
|
|
|
|
FWHT_4(data + j + r, m4);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ldn & 1)
|
|
|
|
{
|
|
|
|
for (unsigned i0 = 0; i0 < n; i0 += 8)
|
|
|
|
FWHT_8(data + i0);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (unsigned i0 = 0; i0 < n; i0 += 4)
|
|
|
|
FWHT_4(data + i0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
// Decimation in time (DIT) version
|
|
|
|
static void FWHT(fwht_t* data, const unsigned ldn)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
|
|
|
if (ldn <= 13)
|
|
|
|
{
|
|
|
|
FWHT_SmallData(data, ldn);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
FWHT_2(data[2], data[3]);
|
|
|
|
FWHT_4(data + 4);
|
|
|
|
FWHT_8(data + 8);
|
|
|
|
FWHT_16(data + 16);
|
|
|
|
for (unsigned ldm = 5; ldm < ldn; ++ldm)
|
|
|
|
FWHT(data + (unsigned)(1UL << ldm), ldm);
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
for (unsigned ldm = 0; ldm < ldn; ++ldm)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
const unsigned mh = (1UL << ldm);
|
2017-05-18 03:06:13 +00:00
|
|
|
for (unsigned t1 = 0, t2 = mh; t1 < mh; ++t1, ++t2)
|
|
|
|
FWHT_2(data[t1], data[t2]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Memory Buffer XOR
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
static void xor_mem(void * LEO_RESTRICT vx, const void * LEO_RESTRICT vy, unsigned bytes)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(vx);
|
|
|
|
const LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<const LEO_M128 *>(vy);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
#if defined(LEO_TARGET_MOBILE)
|
|
|
|
# if defined(LEO_TRY_NEON)
|
2017-05-18 03:06:13 +00:00
|
|
|
// Handle multiples of 64 bytes
|
|
|
|
if (CpuHasNeon)
|
|
|
|
{
|
|
|
|
while (bytes >= 64)
|
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M128 x0 = vld1q_u8(x16);
|
|
|
|
LEO_M128 x1 = vld1q_u8(x16 + 1);
|
|
|
|
LEO_M128 x2 = vld1q_u8(x16 + 2);
|
|
|
|
LEO_M128 x3 = vld1q_u8(x16 + 3);
|
|
|
|
LEO_M128 y0 = vld1q_u8(y16);
|
|
|
|
LEO_M128 y1 = vld1q_u8(y16 + 1);
|
|
|
|
LEO_M128 y2 = vld1q_u8(y16 + 2);
|
|
|
|
LEO_M128 y3 = vld1q_u8(y16 + 3);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
vst1q_u8(x16, veorq_u8(x0, y0));
|
|
|
|
vst1q_u8(x16 + 1, veorq_u8(x1, y1));
|
|
|
|
vst1q_u8(x16 + 2, veorq_u8(x2, y2));
|
|
|
|
vst1q_u8(x16 + 3, veorq_u8(x3, y3));
|
|
|
|
|
|
|
|
bytes -= 64, x16 += 4, y16 += 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle multiples of 16 bytes
|
|
|
|
while (bytes >= 16)
|
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M128 x0 = vld1q_u8(x16);
|
|
|
|
LEO_M128 y0 = vld1q_u8(y16);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
vst1q_u8(x16, veorq_u8(x0, y0));
|
|
|
|
|
|
|
|
bytes -= 16, ++x16, ++y16;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2017-05-25 09:24:15 +00:00
|
|
|
# endif // LEO_TRY_NEON
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
uint64_t * LEO_RESTRICT x8 = reinterpret_cast<uint64_t *>(x16);
|
|
|
|
const uint64_t * LEO_RESTRICT y8 = reinterpret_cast<const uint64_t *>(y16);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
const unsigned count = (unsigned)bytes / 8;
|
|
|
|
for (unsigned ii = 0; ii < count; ++ii)
|
|
|
|
x8[ii] ^= y8[ii];
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
x16 = reinterpret_cast<LEO_M128 *>(x8 + count);
|
|
|
|
y16 = reinterpret_cast<const LEO_M128 *>(y8 + count);
|
2017-05-18 03:06:13 +00:00
|
|
|
}
|
2017-05-25 09:24:15 +00:00
|
|
|
#else // LEO_TARGET_MOBILE
|
|
|
|
# if defined(LEO_TRY_AVX2)
|
2017-05-18 03:06:13 +00:00
|
|
|
if (CpuHasAVX2)
|
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(x16);
|
|
|
|
const LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<const LEO_M256 *>(y16);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
while (bytes >= 128)
|
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M256 x0 = _mm256_loadu_si256(x32);
|
|
|
|
LEO_M256 y0 = _mm256_loadu_si256(y32);
|
2017-05-18 03:06:13 +00:00
|
|
|
x0 = _mm256_xor_si256(x0, y0);
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M256 x1 = _mm256_loadu_si256(x32 + 1);
|
|
|
|
LEO_M256 y1 = _mm256_loadu_si256(y32 + 1);
|
2017-05-18 03:06:13 +00:00
|
|
|
x1 = _mm256_xor_si256(x1, y1);
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M256 x2 = _mm256_loadu_si256(x32 + 2);
|
|
|
|
LEO_M256 y2 = _mm256_loadu_si256(y32 + 2);
|
2017-05-18 03:06:13 +00:00
|
|
|
x2 = _mm256_xor_si256(x2, y2);
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M256 x3 = _mm256_loadu_si256(x32 + 3);
|
|
|
|
LEO_M256 y3 = _mm256_loadu_si256(y32 + 3);
|
2017-05-18 03:06:13 +00:00
|
|
|
x3 = _mm256_xor_si256(x3, y3);
|
|
|
|
|
|
|
|
_mm256_storeu_si256(x32, x0);
|
|
|
|
_mm256_storeu_si256(x32 + 1, x1);
|
|
|
|
_mm256_storeu_si256(x32 + 2, x2);
|
|
|
|
_mm256_storeu_si256(x32 + 3, x3);
|
|
|
|
|
|
|
|
bytes -= 128, x32 += 4, y32 += 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle multiples of 32 bytes
|
|
|
|
while (bytes >= 32)
|
|
|
|
{
|
|
|
|
// x[i] = x[i] xor y[i]
|
|
|
|
_mm256_storeu_si256(x32,
|
|
|
|
_mm256_xor_si256(
|
|
|
|
_mm256_loadu_si256(x32),
|
|
|
|
_mm256_loadu_si256(y32)));
|
|
|
|
|
|
|
|
bytes -= 32, ++x32, ++y32;
|
|
|
|
}
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
x16 = reinterpret_cast<LEO_M128 *>(x32);
|
|
|
|
y16 = reinterpret_cast<const LEO_M128 *>(y32);
|
2017-05-18 03:06:13 +00:00
|
|
|
}
|
|
|
|
else
|
2017-05-25 09:24:15 +00:00
|
|
|
# endif // LEO_TRY_AVX2
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
|
|
|
while (bytes >= 64)
|
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M128 x0 = _mm_loadu_si128(x16);
|
|
|
|
LEO_M128 y0 = _mm_loadu_si128(y16);
|
2017-05-18 03:06:13 +00:00
|
|
|
x0 = _mm_xor_si128(x0, y0);
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M128 x1 = _mm_loadu_si128(x16 + 1);
|
|
|
|
LEO_M128 y1 = _mm_loadu_si128(y16 + 1);
|
2017-05-18 03:06:13 +00:00
|
|
|
x1 = _mm_xor_si128(x1, y1);
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M128 x2 = _mm_loadu_si128(x16 + 2);
|
|
|
|
LEO_M128 y2 = _mm_loadu_si128(y16 + 2);
|
2017-05-18 03:06:13 +00:00
|
|
|
x2 = _mm_xor_si128(x2, y2);
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_M128 x3 = _mm_loadu_si128(x16 + 3);
|
|
|
|
LEO_M128 y3 = _mm_loadu_si128(y16 + 3);
|
2017-05-18 03:06:13 +00:00
|
|
|
x3 = _mm_xor_si128(x3, y3);
|
|
|
|
|
|
|
|
_mm_storeu_si128(x16, x0);
|
|
|
|
_mm_storeu_si128(x16 + 1, x1);
|
|
|
|
_mm_storeu_si128(x16 + 2, x2);
|
|
|
|
_mm_storeu_si128(x16 + 3, x3);
|
|
|
|
|
|
|
|
bytes -= 64, x16 += 4, y16 += 4;
|
|
|
|
}
|
|
|
|
}
|
2017-05-25 09:24:15 +00:00
|
|
|
#endif // LEO_TARGET_MOBILE
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Handle multiples of 16 bytes
|
|
|
|
while (bytes >= 16)
|
|
|
|
{
|
|
|
|
// x[i] = x[i] xor y[i]
|
|
|
|
_mm_storeu_si128(x16,
|
|
|
|
_mm_xor_si128(
|
|
|
|
_mm_loadu_si128(x16),
|
|
|
|
_mm_loadu_si128(y16)));
|
|
|
|
|
|
|
|
bytes -= 16, ++x16, ++y16;
|
|
|
|
}
|
|
|
|
|
2017-05-25 09:24:15 +00:00
|
|
|
uint8_t * LEO_RESTRICT x1 = reinterpret_cast<uint8_t *>(x16);
|
|
|
|
const uint8_t * LEO_RESTRICT y1 = reinterpret_cast<const uint8_t *>(y16);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Handle a block of 8 bytes
|
|
|
|
const unsigned eight = bytes & 8;
|
|
|
|
if (eight)
|
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
uint64_t * LEO_RESTRICT x8 = reinterpret_cast<uint64_t *>(x1);
|
|
|
|
const uint64_t * LEO_RESTRICT y8 = reinterpret_cast<const uint64_t *>(y1);
|
2017-05-18 03:06:13 +00:00
|
|
|
*x8 ^= *y8;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle a block of 4 bytes
|
|
|
|
const unsigned four = bytes & 4;
|
|
|
|
if (four)
|
|
|
|
{
|
2017-05-25 09:24:15 +00:00
|
|
|
uint32_t * LEO_RESTRICT x4 = reinterpret_cast<uint32_t *>(x1 + eight);
|
|
|
|
const uint32_t * LEO_RESTRICT y4 = reinterpret_cast<const uint32_t *>(y1 + eight);
|
2017-05-18 03:06:13 +00:00
|
|
|
*x4 ^= *y4;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle final bytes
|
|
|
|
const unsigned offset = eight + four;
|
|
|
|
switch (bytes & 3)
|
|
|
|
{
|
|
|
|
case 3: x1[offset + 2] ^= y1[offset + 2];
|
|
|
|
case 2: x1[offset + 1] ^= y1[offset + 1];
|
|
|
|
case 1: x1[offset] ^= y1[offset];
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Formal Derivative
|
|
|
|
|
|
|
|
// Formal derivative of polynomial in the new basis
|
|
|
|
static void formal_derivative(GFSymbol* cos, const unsigned size)
|
|
|
|
{
|
|
|
|
for (unsigned i = 1; i < size; ++i)
|
|
|
|
{
|
|
|
|
const unsigned leng = ((i ^ (i - 1)) + 1) >> 1;
|
|
|
|
|
|
|
|
// If a large number of values are being XORed:
|
|
|
|
if (leng >= 8)
|
|
|
|
xor_mem(cos + i - leng, cos + i, leng * sizeof(GFSymbol));
|
|
|
|
else
|
|
|
|
for (unsigned j = i - leng; j < i; j++)
|
|
|
|
cos[j] ^= cos[j + leng];
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned i = size; i < kFieldSize; i <<= 1)
|
|
|
|
xor_mem(cos, cos + i, size * sizeof(GFSymbol));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Fast Fourier Transform
|
|
|
|
|
|
|
|
static GFSymbol skewVec[kFieldModulus]; // twisted factors used in FFT
|
|
|
|
|
|
|
|
// IFFT in the proposed basis
|
|
|
|
static void IFLT(GFSymbol* data, const unsigned size, const unsigned index)
|
|
|
|
{
|
|
|
|
for (unsigned depart_no = 1; depart_no < size; depart_no <<= 1)
|
|
|
|
{
|
|
|
|
for (unsigned j = depart_no; j < size; j += (depart_no << 1))
|
|
|
|
{
|
|
|
|
// If a large number of values are being XORed:
|
|
|
|
if (depart_no >= 8)
|
|
|
|
xor_mem(data + j, data + j - depart_no, depart_no * sizeof(GFSymbol));
|
|
|
|
else
|
|
|
|
for (unsigned i = j - depart_no; i < j; ++i)
|
|
|
|
data[i + depart_no] ^= data[i];
|
|
|
|
|
|
|
|
const GFSymbol skew = skewVec[j + index - 1];
|
|
|
|
|
|
|
|
if (skew != kFieldModulus)
|
|
|
|
muladd_mem(data + j - depart_no, data + j, skew, depart_no);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// FFT in the proposed basis
|
|
|
|
static void FLT(GFSymbol* data, const unsigned size, const unsigned index)
|
|
|
|
{
|
|
|
|
for (unsigned depart_no = (size >> 1); depart_no > 0; depart_no >>= 1)
|
|
|
|
{
|
|
|
|
for (unsigned j = depart_no; j < size; j += (depart_no << 1))
|
|
|
|
{
|
|
|
|
const GFSymbol skew = skewVec[j + index - 1];
|
|
|
|
|
|
|
|
if (skew != kFieldModulus)
|
|
|
|
muladd_mem(data + j - depart_no, data + j, skew, depart_no);
|
|
|
|
|
|
|
|
// If a large number of values are being XORed:
|
|
|
|
if (depart_no >= 8)
|
|
|
|
xor_mem(data + j, data + j - depart_no, depart_no * sizeof(GFSymbol));
|
|
|
|
else
|
|
|
|
for (unsigned i = j - depart_no; i < j; ++i)
|
|
|
|
data[i + depart_no] ^= data[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// FFT Initialization
|
|
|
|
|
|
|
|
static GFSymbol B[kFieldSize >> 1]; // factors used in formal derivative
|
2017-05-24 08:23:19 +00:00
|
|
|
static fwht_t log_walsh[kFieldSize]; // factors used in the evaluation of the error locator polynomial
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Initialize skewVec[], B[], log_walsh[]
|
|
|
|
static void InitFieldOperations()
|
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
GFSymbol temp[kGFBits - 1];
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
for (unsigned i = 1; i < kGFBits; ++i)
|
|
|
|
temp[i - 1] = (GFSymbol)((unsigned)1 << i);
|
|
|
|
|
|
|
|
for (unsigned m = 0; m < (kGFBits - 1); ++m)
|
|
|
|
{
|
|
|
|
const unsigned step = (unsigned)1 << (m + 1);
|
|
|
|
|
|
|
|
skewVec[((unsigned)1 << m) - 1] = 0;
|
|
|
|
|
|
|
|
for (unsigned i = m; i < (kGFBits - 1); ++i)
|
|
|
|
{
|
|
|
|
const unsigned s = ((unsigned)1 << (i + 1));
|
|
|
|
|
|
|
|
for (unsigned j = ((unsigned)1 << m) - 1; j < s; j += step)
|
|
|
|
skewVec[j + s] = skewVec[j] ^ temp[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
temp[m] = kFieldModulus - GFLog[mulE(temp[m], GFLog[temp[m] ^ 1])];
|
|
|
|
|
|
|
|
for (unsigned i = m + 1; i < (kGFBits - 1); ++i)
|
|
|
|
temp[i] = mulE(temp[i], (GFLog[temp[i] ^ 1] + temp[m]) % kFieldModulus);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < kFieldSize; ++i)
|
|
|
|
skewVec[i] = GFLog[skewVec[i]];
|
|
|
|
|
|
|
|
temp[0] = kFieldModulus - temp[0];
|
|
|
|
|
|
|
|
for (unsigned i = 1; i < (kGFBits - 1); ++i)
|
|
|
|
temp[i] = (kFieldModulus - temp[i] + temp[i - 1]) % kFieldModulus;
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
B[0] = 0;
|
2017-05-18 03:06:13 +00:00
|
|
|
for (unsigned i = 0; i < (kGFBits - 1); ++i)
|
|
|
|
{
|
|
|
|
const unsigned depart = ((unsigned)1 << i);
|
|
|
|
|
|
|
|
for (unsigned j = 0; j < depart; ++j)
|
|
|
|
B[j + depart] = (B[j] + temp[i]) % kFieldModulus;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < kFieldSize; ++i)
|
|
|
|
log_walsh[i] = GFLog[i];
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
log_walsh[0] = 0;
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
FWHT(log_walsh, kGFBits);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Encoder
|
|
|
|
|
|
|
|
// Encoding alg for k/n<0.5: message is a power of two
|
|
|
|
static void encodeL(GFSymbol* data, const unsigned k, GFSymbol* codeword)
|
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
memcpy(codeword, data, sizeof(GFSymbol) * k);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
IFLT(codeword, k, 0);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
for (unsigned i = k; i < kFieldSize; i += k)
|
|
|
|
{
|
|
|
|
memcpy(&codeword[i], codeword, sizeof(GFSymbol) * k);
|
|
|
|
|
|
|
|
FLT(&codeword[i], k, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(codeword, data, sizeof(GFSymbol) * k);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Encoding alg for k/n>0.5: parity is a power of two.
|
|
|
|
// data: message array. parity: parity array. mem: buffer(size>= n-k)
|
|
|
|
static void encodeH(const GFSymbol* data, const unsigned k, GFSymbol* parity, GFSymbol* mem)
|
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
const unsigned t = kFieldSize - k;
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
memset(parity, 0, sizeof(GFSymbol) * t);
|
|
|
|
|
|
|
|
for (unsigned i = t; i < kFieldSize; i += t)
|
|
|
|
{
|
|
|
|
memcpy(mem, &data[i - t], sizeof(GFSymbol) * t);
|
|
|
|
|
|
|
|
IFLT(mem, t, i);
|
|
|
|
|
|
|
|
xor_mem(parity, mem, t * sizeof(GFSymbol));
|
|
|
|
}
|
|
|
|
|
|
|
|
FLT(parity, t, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Decoder
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
static void decode(GFSymbol* codeword, unsigned k, const bool* erasure)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
fwht_t log_walsh2[kFieldSize];
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Compute the evaluations of the error locator polynomial
|
|
|
|
for (unsigned i = 0; i < kFieldSize; ++i)
|
|
|
|
log_walsh2[i] = erasure[i] ? 1 : 0;
|
|
|
|
|
|
|
|
FWHT(log_walsh2, kGFBits);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < kFieldSize; ++i)
|
2017-05-24 08:23:19 +00:00
|
|
|
log_walsh2[i] = ((unsigned)log_walsh2[i] * (unsigned)log_walsh[i]) % kFieldModulus;
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
FWHT(log_walsh2, kGFBits);
|
|
|
|
|
|
|
|
// k2 can be replaced with k
|
2017-05-24 08:23:19 +00:00
|
|
|
const unsigned k2 = kFieldSize;
|
|
|
|
//const unsigned k2 = k; // cannot actually be replaced with k. what else need to change?
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < kFieldSize; ++i)
|
2017-05-24 08:23:19 +00:00
|
|
|
{
|
|
|
|
if (erasure[i])
|
|
|
|
{
|
|
|
|
codeword[i] = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
codeword[i] = mulE(codeword[i], log_walsh2[i]);
|
|
|
|
}
|
|
|
|
}
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
IFLT(codeword, kFieldSize, 0);
|
|
|
|
|
|
|
|
// formal derivative
|
|
|
|
for (unsigned i = 0; i < kFieldSize; i += 2)
|
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
codeword[i] = mulE(codeword[i], kFieldModulus - B[i >> 1]);
|
|
|
|
codeword[i + 1] = mulE(codeword[i + 1], kFieldModulus - B[i >> 1]);
|
|
|
|
}
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
formal_derivative(codeword, k2);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < k2; i += 2)
|
|
|
|
{
|
|
|
|
codeword[i] = mulE(codeword[i], B[i >> 1]);
|
|
|
|
codeword[i + 1] = mulE(codeword[i + 1], B[i >> 1]);
|
|
|
|
}
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
FLT(codeword, k2, 0);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < k2; ++i)
|
2017-05-24 08:23:19 +00:00
|
|
|
{
|
|
|
|
if (erasure[i])
|
|
|
|
{
|
|
|
|
codeword[i] = mulE(codeword[i], kFieldModulus - log_walsh2[i]);
|
|
|
|
}
|
|
|
|
}
|
2017-05-18 03:06:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Test Application
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
void test(unsigned k, unsigned seed)
|
2017-05-18 03:06:13 +00:00
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
srand(seed);
|
|
|
|
|
|
|
|
//-----------Generating message----------
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Message array
|
2017-05-24 08:23:19 +00:00
|
|
|
GFSymbol data[kFieldSize] = {0};
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Filled with random numbers
|
|
|
|
for (unsigned i = kFieldSize - k; i < kFieldSize; ++i)
|
|
|
|
data[i] = (GFSymbol)rand();
|
|
|
|
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
//---------encoding----------
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
GFSymbol codeword[kFieldSize];
|
|
|
|
encodeH(&data[kFieldSize - k], k, data, codeword);
|
|
|
|
//encodeL(data, k, codeword); // does not seem to work with any input? what else needs to change?
|
2017-05-18 03:06:13 +00:00
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
memcpy(codeword, data, sizeof(GFSymbol) * kFieldSize);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
//--------erasure simulation---------
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Array indicating erasures
|
2017-05-24 08:23:19 +00:00
|
|
|
bool erasure[kFieldSize] = {
|
2017-05-18 03:06:13 +00:00
|
|
|
false
|
|
|
|
};
|
|
|
|
|
|
|
|
for (unsigned i = k; i < kFieldSize; ++i)
|
|
|
|
erasure[i] = true;
|
|
|
|
|
|
|
|
// permuting the erasure array
|
|
|
|
for (unsigned i = kFieldSize - 1; i > 0; --i)
|
|
|
|
{
|
|
|
|
unsigned pos = rand() % (i + 1);
|
|
|
|
|
|
|
|
if (i != pos)
|
|
|
|
{
|
|
|
|
bool tmp = erasure[i];
|
|
|
|
erasure[i] = erasure[pos];
|
|
|
|
erasure[pos] = tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// erasure codeword symbols
|
|
|
|
for (unsigned i = 0; i < kFieldSize; ++i)
|
|
|
|
if (erasure[i])
|
|
|
|
codeword[i] = 0;
|
|
|
|
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
//---------main processing----------
|
|
|
|
decode(codeword, k, erasure);
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Check the correctness of the result
|
|
|
|
for (unsigned i = 0; i < kFieldSize; ++i)
|
|
|
|
{
|
|
|
|
if (erasure[i] == 1)
|
|
|
|
{
|
|
|
|
if (data[i] != codeword[i])
|
|
|
|
{
|
2017-05-24 08:23:19 +00:00
|
|
|
printf("Decoding Error with seed = %d!\n", seed);
|
2017-05-25 09:24:15 +00:00
|
|
|
LEO_DEBUG_BREAK;
|
2017-05-18 03:06:13 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
//printf("Decoding is successful!\n");
|
2017-05-18 03:06:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Entrypoint
|
|
|
|
|
|
|
|
int main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
// Initialize architecture-specific code
|
2017-05-25 09:24:15 +00:00
|
|
|
leo_architecture_init();
|
2017-05-18 03:06:13 +00:00
|
|
|
|
|
|
|
// Fill GFLog table and GFExp table
|
|
|
|
InitField();
|
|
|
|
|
|
|
|
// Compute factors used in erasure decoder
|
|
|
|
InitFieldOperations();
|
|
|
|
|
2017-05-24 08:23:19 +00:00
|
|
|
unsigned seed = (unsigned)time(NULL);
|
2017-05-18 03:06:13 +00:00
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
// test(int k), k: message size
|
2017-05-24 08:23:19 +00:00
|
|
|
/*
|
|
|
|
EncodeH works for kFieldSize / 2 and kFieldSize * 3 / 4, etc,
|
|
|
|
s.t. the number of recovery pieces is a power of two
|
|
|
|
*/
|
|
|
|
test(kFieldSize / 2, seed);
|
|
|
|
|
|
|
|
++seed;
|
2017-05-18 03:06:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|