mirror of
https://github.com/status-im/status-go.git
synced 2025-01-10 22:56:40 +00:00
1742 lines
50 KiB
Go
1742 lines
50 KiB
Go
/**
|
|
* Reed-Solomon Coding over 8-bit values.
|
|
*
|
|
* Copyright 2015, Klaus Post
|
|
* Copyright 2015, Backblaze, Inc.
|
|
*/
|
|
|
|
// Package reedsolomon enables Erasure Coding in Go
|
|
//
|
|
// For usage and examples, see https://github.com/klauspost/reedsolomon
|
|
package reedsolomon
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"runtime"
|
|
"sync"
|
|
|
|
"github.com/klauspost/cpuid/v2"
|
|
)
|
|
|
|
// Encoder is an interface to encode Reed-Salomon parity sets for your data.
|
|
type Encoder interface {
|
|
// Encode parity for a set of data shards.
|
|
// Input is 'shards' containing data shards followed by parity shards.
|
|
// The number of shards must match the number given to New().
|
|
// Each shard is a byte array, and they must all be the same size.
|
|
// The parity shards will always be overwritten and the data shards
|
|
// will remain the same, so it is safe for you to read from the
|
|
// data shards while this is running.
|
|
Encode(shards [][]byte) error
|
|
|
|
// EncodeIdx will add parity for a single data shard.
|
|
// Parity shards should start out as 0. The caller must zero them.
|
|
// Data shards must be delivered exactly once. There is no check for this.
|
|
// The parity shards will always be updated and the data shards will remain the same.
|
|
EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
|
|
|
|
// Verify returns true if the parity shards contain correct data.
|
|
// The data is the same format as Encode. No data is modified, so
|
|
// you are allowed to read from data while this is running.
|
|
Verify(shards [][]byte) (bool, error)
|
|
|
|
// Reconstruct will recreate the missing shards if possible.
|
|
//
|
|
// Given a list of shards, some of which contain data, fills in the
|
|
// ones that don't have data.
|
|
//
|
|
// The length of the array must be equal to the total number of shards.
|
|
// You indicate that a shard is missing by setting it to nil or zero-length.
|
|
// If a shard is zero-length but has sufficient capacity, that memory will
|
|
// be used, otherwise a new []byte will be allocated.
|
|
//
|
|
// If there are too few shards to reconstruct the missing
|
|
// ones, ErrTooFewShards will be returned.
|
|
//
|
|
// The reconstructed shard set is complete, but integrity is not verified.
|
|
// Use the Verify function to check if data set is ok.
|
|
Reconstruct(shards [][]byte) error
|
|
|
|
// ReconstructData will recreate any missing data shards, if possible.
|
|
//
|
|
// Given a list of shards, some of which contain data, fills in the
|
|
// data shards that don't have data.
|
|
//
|
|
// The length of the array must be equal to Shards.
|
|
// You indicate that a shard is missing by setting it to nil or zero-length.
|
|
// If a shard is zero-length but has sufficient capacity, that memory will
|
|
// be used, otherwise a new []byte will be allocated.
|
|
//
|
|
// If there are too few shards to reconstruct the missing
|
|
// ones, ErrTooFewShards will be returned.
|
|
//
|
|
// As the reconstructed shard set may contain missing parity shards,
|
|
// calling the Verify function is likely to fail.
|
|
ReconstructData(shards [][]byte) error
|
|
|
|
// ReconstructSome will recreate only requested shards, if possible.
|
|
//
|
|
// Given a list of shards, some of which contain data, fills in the
|
|
// shards indicated by true values in the "required" parameter.
|
|
// The length of the "required" array must be equal to either Shards or DataShards.
|
|
// If the length is equal to DataShards, the reconstruction of parity shards will be ignored.
|
|
//
|
|
// The length of "shards" array must be equal to Shards.
|
|
// You indicate that a shard is missing by setting it to nil or zero-length.
|
|
// If a shard is zero-length but has sufficient capacity, that memory will
|
|
// be used, otherwise a new []byte will be allocated.
|
|
//
|
|
// If there are too few shards to reconstruct the missing
|
|
// ones, ErrTooFewShards will be returned.
|
|
//
|
|
// As the reconstructed shard set may contain missing parity shards,
|
|
// calling the Verify function is likely to fail.
|
|
ReconstructSome(shards [][]byte, required []bool) error
|
|
|
|
// Update parity is use for change a few data shards and update it's parity.
|
|
// Input 'newDatashards' containing data shards changed.
|
|
// Input 'shards' containing old data shards (if data shard not changed, it can be nil) and old parity shards.
|
|
// new parity shards will in shards[DataShards:]
|
|
// Update is very useful if DataShards much larger than ParityShards and changed data shards is few. It will
|
|
// faster than Encode and not need read all data shards to encode.
|
|
Update(shards [][]byte, newDatashards [][]byte) error
|
|
|
|
// Split a data slice into the number of shards given to the encoder,
|
|
// and create empty parity shards if necessary.
|
|
//
|
|
// The data will be split into equally sized shards.
|
|
// If the data size isn't divisible by the number of shards,
|
|
// the last shard will contain extra zeros.
|
|
//
|
|
// If there is extra capacity on the provided data slice
|
|
// it will be used instead of allocating parity shards.
|
|
// It will be zeroed out.
|
|
//
|
|
// There must be at least 1 byte otherwise ErrShortData will be
|
|
// returned.
|
|
//
|
|
// The data will not be copied, except for the last shard, so you
|
|
// should not modify the data of the input slice afterwards.
|
|
Split(data []byte) ([][]byte, error)
|
|
|
|
// Join the shards and write the data segment to dst.
|
|
//
|
|
// Only the data shards are considered.
|
|
// You must supply the exact output size you want.
|
|
// If there are to few shards given, ErrTooFewShards will be returned.
|
|
// If the total data size is less than outSize, ErrShortData will be returned.
|
|
Join(dst io.Writer, shards [][]byte, outSize int) error
|
|
}
|
|
|
|
// Extensions is an optional interface.
|
|
// All returned instances will support this interface.
|
|
type Extensions interface {
|
|
// ShardSizeMultiple will return the size the shard sizes must be a multiple of.
|
|
ShardSizeMultiple() int
|
|
|
|
// DataShards will return the number of data shards.
|
|
DataShards() int
|
|
|
|
// ParityShards will return the number of parity shards.
|
|
ParityShards() int
|
|
|
|
// TotalShards will return the total number of shards.
|
|
TotalShards() int
|
|
|
|
// AllocAligned will allocate TotalShards number of slices,
|
|
// aligned to reasonable memory sizes.
|
|
// Provide the size of each shard.
|
|
AllocAligned(each int) [][]byte
|
|
}
|
|
|
|
const (
|
|
avx2CodeGenMinSize = 64
|
|
avx2CodeGenMinShards = 3
|
|
avx2CodeGenMaxGoroutines = 8
|
|
gfniCodeGenMaxGoroutines = 4
|
|
|
|
intSize = 32 << (^uint(0) >> 63) // 32 or 64
|
|
maxInt = 1<<(intSize-1) - 1
|
|
)
|
|
|
|
// reedSolomon contains a matrix for a specific
|
|
// distribution of datashards and parity shards.
|
|
// Construct if using New()
|
|
type reedSolomon struct {
|
|
dataShards int // Number of data shards, should not be modified.
|
|
parityShards int // Number of parity shards, should not be modified.
|
|
totalShards int // Total number of shards. Calculated, and should not be modified.
|
|
m matrix
|
|
tree *inversionTree
|
|
parity [][]byte
|
|
o options
|
|
mPoolSz int
|
|
mPool sync.Pool // Pool for temp matrices, etc
|
|
}
|
|
|
|
var _ = Extensions(&reedSolomon{})
|
|
|
|
func (r *reedSolomon) ShardSizeMultiple() int {
|
|
return 1
|
|
}
|
|
|
|
func (r *reedSolomon) DataShards() int {
|
|
return r.dataShards
|
|
}
|
|
|
|
func (r *reedSolomon) ParityShards() int {
|
|
return r.parityShards
|
|
}
|
|
|
|
func (r *reedSolomon) TotalShards() int {
|
|
return r.totalShards
|
|
}
|
|
|
|
func (r *reedSolomon) AllocAligned(each int) [][]byte {
|
|
return AllocAligned(r.totalShards, each)
|
|
}
|
|
|
|
// ErrInvShardNum will be returned by New, if you attempt to create
|
|
// an Encoder with less than one data shard or less than zero parity
|
|
// shards.
|
|
var ErrInvShardNum = errors.New("cannot create Encoder with less than one data shard or less than zero parity shards")
|
|
|
|
// ErrMaxShardNum will be returned by New, if you attempt to create an
|
|
// Encoder where data and parity shards are bigger than the order of
|
|
// GF(2^8).
|
|
var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards")
|
|
|
|
// ErrNotSupported is returned when an operation is not supported.
|
|
var ErrNotSupported = errors.New("operation not supported")
|
|
|
|
// buildMatrix creates the matrix to use for encoding, given the
|
|
// number of data shards and the number of total shards.
|
|
//
|
|
// The top square of the matrix is guaranteed to be an identity
|
|
// matrix, which means that the data shards are unchanged after
|
|
// encoding.
|
|
func buildMatrix(dataShards, totalShards int) (matrix, error) {
|
|
// Start with a Vandermonde matrix. This matrix would work,
|
|
// in theory, but doesn't have the property that the data
|
|
// shards are unchanged after encoding.
|
|
vm, err := vandermonde(totalShards, dataShards)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Multiply by the inverse of the top square of the matrix.
|
|
// This will make the top square be the identity matrix, but
|
|
// preserve the property that any square subset of rows is
|
|
// invertible.
|
|
top, err := vm.SubMatrix(0, 0, dataShards, dataShards)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
topInv, err := top.Invert()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return vm.Multiply(topInv)
|
|
}
|
|
|
|
// buildMatrixJerasure creates the same encoding matrix as Jerasure library
|
|
//
|
|
// The top square of the matrix is guaranteed to be an identity
|
|
// matrix, which means that the data shards are unchanged after
|
|
// encoding.
|
|
func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) {
|
|
// Start with a Vandermonde matrix. This matrix would work,
|
|
// in theory, but doesn't have the property that the data
|
|
// shards are unchanged after encoding.
|
|
vm, err := vandermonde(totalShards, dataShards)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Jerasure does this:
|
|
// first row is always 100..00
|
|
vm[0][0] = 1
|
|
for i := 1; i < dataShards; i++ {
|
|
vm[0][i] = 0
|
|
}
|
|
// last row is always 000..01
|
|
for i := 0; i < dataShards-1; i++ {
|
|
vm[totalShards-1][i] = 0
|
|
}
|
|
vm[totalShards-1][dataShards-1] = 1
|
|
|
|
for i := 0; i < dataShards; i++ {
|
|
// Find the row where i'th col is not 0
|
|
r := i
|
|
for ; r < totalShards && vm[r][i] == 0; r++ {
|
|
}
|
|
if r != i {
|
|
// Swap it with i'th row if not already
|
|
t := vm[r]
|
|
vm[r] = vm[i]
|
|
vm[i] = t
|
|
}
|
|
// Multiply by the inverted matrix (same as vm.Multiply(vm[0:dataShards].Invert()))
|
|
if vm[i][i] != 1 {
|
|
// Make vm[i][i] = 1 by dividing the column by vm[i][i]
|
|
tmp := galOneOver(vm[i][i])
|
|
for j := 0; j < totalShards; j++ {
|
|
vm[j][i] = galMultiply(vm[j][i], tmp)
|
|
}
|
|
}
|
|
for j := 0; j < dataShards; j++ {
|
|
// Make vm[i][j] = 0 where j != i by adding vm[i][j]*vm[.][i] to each column
|
|
tmp := vm[i][j]
|
|
if j != i && tmp != 0 {
|
|
for r := 0; r < totalShards; r++ {
|
|
vm[r][j] = galAdd(vm[r][j], galMultiply(tmp, vm[r][i]))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make vm[dataShards] row all ones - divide each column j by vm[dataShards][j]
|
|
for j := 0; j < dataShards; j++ {
|
|
tmp := vm[dataShards][j]
|
|
if tmp != 1 {
|
|
tmp = galOneOver(tmp)
|
|
for i := dataShards; i < totalShards; i++ {
|
|
vm[i][j] = galMultiply(vm[i][j], tmp)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make vm[dataShards...totalShards-1][0] column all ones - divide each row
|
|
for i := dataShards + 1; i < totalShards; i++ {
|
|
tmp := vm[i][0]
|
|
if tmp != 1 {
|
|
tmp = galOneOver(tmp)
|
|
for j := 0; j < dataShards; j++ {
|
|
vm[i][j] = galMultiply(vm[i][j], tmp)
|
|
}
|
|
}
|
|
}
|
|
|
|
return vm, nil
|
|
}
|
|
|
|
// buildMatrixPAR1 creates the matrix to use for encoding according to
|
|
// the PARv1 spec, given the number of data shards and the number of
|
|
// total shards. Note that the method they use is buggy, and may lead
|
|
// to cases where recovery is impossible, even if there are enough
|
|
// parity shards.
|
|
//
|
|
// The top square of the matrix is guaranteed to be an identity
|
|
// matrix, which means that the data shards are unchanged after
|
|
// encoding.
|
|
func buildMatrixPAR1(dataShards, totalShards int) (matrix, error) {
|
|
result, err := newMatrix(totalShards, dataShards)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for r, row := range result {
|
|
// The top portion of the matrix is the identity
|
|
// matrix, and the bottom is a transposed Vandermonde
|
|
// matrix starting at 1 instead of 0.
|
|
if r < dataShards {
|
|
result[r][r] = 1
|
|
} else {
|
|
for c := range row {
|
|
result[r][c] = galExp(byte(c+1), r-dataShards)
|
|
}
|
|
}
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func buildMatrixCauchy(dataShards, totalShards int) (matrix, error) {
|
|
result, err := newMatrix(totalShards, dataShards)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for r, row := range result {
|
|
// The top portion of the matrix is the identity
|
|
// matrix, and the bottom is a transposed Cauchy matrix.
|
|
if r < dataShards {
|
|
result[r][r] = 1
|
|
} else {
|
|
for c := range row {
|
|
result[r][c] = invTable[(byte(r ^ c))]
|
|
}
|
|
}
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// buildXorMatrix can be used to build a matrix with pure XOR
|
|
// operations if there is only one parity shard.
|
|
func buildXorMatrix(dataShards, totalShards int) (matrix, error) {
|
|
if dataShards+1 != totalShards {
|
|
return nil, errors.New("internal error")
|
|
}
|
|
result, err := newMatrix(totalShards, dataShards)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for r, row := range result {
|
|
// The top portion of the matrix is the identity
|
|
// matrix.
|
|
if r < dataShards {
|
|
result[r][r] = 1
|
|
} else {
|
|
// Set all values to 1 (XOR)
|
|
for c := range row {
|
|
result[r][c] = 1
|
|
}
|
|
}
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// New creates a new encoder and initializes it to
|
|
// the number of data shards and parity shards that
|
|
// you want to use. You can reuse this encoder.
|
|
// Note that the maximum number of total shards is 65536, with some
|
|
// restrictions for a total larger than 256:
|
|
//
|
|
// - Shard sizes must be multiple of 64
|
|
// - The methods Join/Split/Update/EncodeIdx are not supported
|
|
//
|
|
// If no options are supplied, default options are used.
|
|
func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
|
|
o := defaultOptions
|
|
for _, opt := range opts {
|
|
opt(&o)
|
|
}
|
|
|
|
totShards := dataShards + parityShards
|
|
switch {
|
|
case o.withLeopard == leopardGF16 && parityShards > 0 || totShards > 256:
|
|
return newFF16(dataShards, parityShards, o)
|
|
case o.withLeopard == leopardAlways && parityShards > 0:
|
|
return newFF8(dataShards, parityShards, o)
|
|
}
|
|
if totShards > 256 {
|
|
return nil, ErrMaxShardNum
|
|
}
|
|
|
|
r := reedSolomon{
|
|
dataShards: dataShards,
|
|
parityShards: parityShards,
|
|
totalShards: dataShards + parityShards,
|
|
o: o,
|
|
}
|
|
|
|
if dataShards <= 0 || parityShards < 0 {
|
|
return nil, ErrInvShardNum
|
|
}
|
|
|
|
if parityShards == 0 {
|
|
return &r, nil
|
|
}
|
|
|
|
var err error
|
|
switch {
|
|
case r.o.customMatrix != nil:
|
|
if len(r.o.customMatrix) < parityShards {
|
|
return nil, errors.New("coding matrix must contain at least parityShards rows")
|
|
}
|
|
r.m = make([][]byte, r.totalShards)
|
|
for i := 0; i < dataShards; i++ {
|
|
r.m[i] = make([]byte, dataShards)
|
|
r.m[i][i] = 1
|
|
}
|
|
for k, row := range r.o.customMatrix {
|
|
if len(row) < dataShards {
|
|
return nil, errors.New("coding matrix must contain at least dataShards columns")
|
|
}
|
|
r.m[dataShards+k] = make([]byte, dataShards)
|
|
copy(r.m[dataShards+k], row)
|
|
}
|
|
case r.o.fastOneParity && parityShards == 1:
|
|
r.m, err = buildXorMatrix(dataShards, r.totalShards)
|
|
case r.o.useCauchy:
|
|
r.m, err = buildMatrixCauchy(dataShards, r.totalShards)
|
|
case r.o.usePAR1Matrix:
|
|
r.m, err = buildMatrixPAR1(dataShards, r.totalShards)
|
|
case r.o.useJerasureMatrix:
|
|
r.m, err = buildMatrixJerasure(dataShards, r.totalShards)
|
|
default:
|
|
r.m, err = buildMatrix(dataShards, r.totalShards)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Calculate what we want per round
|
|
r.o.perRound = cpuid.CPU.Cache.L2
|
|
if r.o.perRound < 128<<10 {
|
|
r.o.perRound = 128 << 10
|
|
}
|
|
|
|
divide := parityShards + 1
|
|
if avx2CodeGen && r.o.useAVX2 && (dataShards > maxAvx2Inputs || parityShards > maxAvx2Outputs) {
|
|
// Base on L1 cache if we have many inputs.
|
|
r.o.perRound = cpuid.CPU.Cache.L1D
|
|
if r.o.perRound < 32<<10 {
|
|
r.o.perRound = 32 << 10
|
|
}
|
|
divide = 0
|
|
if dataShards > maxAvx2Inputs {
|
|
divide += maxAvx2Inputs
|
|
} else {
|
|
divide += dataShards
|
|
}
|
|
if parityShards > maxAvx2Inputs {
|
|
divide += maxAvx2Outputs
|
|
} else {
|
|
divide += parityShards
|
|
}
|
|
}
|
|
|
|
if cpuid.CPU.ThreadsPerCore > 1 && r.o.maxGoroutines > cpuid.CPU.PhysicalCores {
|
|
// If multiple threads per core, make sure they don't contend for cache.
|
|
r.o.perRound /= cpuid.CPU.ThreadsPerCore
|
|
}
|
|
|
|
// 1 input + parity must fit in cache, and we add one more to be safer.
|
|
r.o.perRound = r.o.perRound / divide
|
|
// Align to 64 bytes.
|
|
r.o.perRound = ((r.o.perRound + 63) / 64) * 64
|
|
|
|
// Final sanity check...
|
|
if r.o.perRound < 1<<10 {
|
|
r.o.perRound = 1 << 10
|
|
}
|
|
|
|
if r.o.minSplitSize <= 0 {
|
|
// Set minsplit as high as we can, but still have parity in L1.
|
|
cacheSize := cpuid.CPU.Cache.L1D
|
|
if cacheSize <= 0 {
|
|
cacheSize = 32 << 10
|
|
}
|
|
|
|
r.o.minSplitSize = cacheSize / (parityShards + 1)
|
|
// Min 1K
|
|
if r.o.minSplitSize < 1024 {
|
|
r.o.minSplitSize = 1024
|
|
}
|
|
}
|
|
|
|
if r.o.shardSize > 0 {
|
|
p := runtime.GOMAXPROCS(0)
|
|
if p == 1 || r.o.shardSize <= r.o.minSplitSize*2 {
|
|
// Not worth it.
|
|
r.o.maxGoroutines = 1
|
|
} else {
|
|
g := r.o.shardSize / r.o.perRound
|
|
|
|
// Overprovision by a factor of 2.
|
|
if g < p*2 && r.o.perRound > r.o.minSplitSize*2 {
|
|
g = p * 2
|
|
r.o.perRound /= 2
|
|
}
|
|
|
|
// Have g be multiple of p
|
|
g += p - 1
|
|
g -= g % p
|
|
|
|
r.o.maxGoroutines = g
|
|
}
|
|
}
|
|
|
|
// Generated AVX2 does not need data to stay in L1 cache between runs.
|
|
// We will be purely limited by RAM speed.
|
|
if r.canAVX2C(avx2CodeGenMinSize, maxAvx2Inputs, maxAvx2Outputs) && r.o.maxGoroutines > avx2CodeGenMaxGoroutines {
|
|
r.o.maxGoroutines = avx2CodeGenMaxGoroutines
|
|
}
|
|
|
|
if r.canGFNI(avx2CodeGenMinSize, maxAvx2Inputs, maxAvx2Outputs) && r.o.maxGoroutines > gfniCodeGenMaxGoroutines {
|
|
r.o.maxGoroutines = gfniCodeGenMaxGoroutines
|
|
}
|
|
|
|
// Inverted matrices are cached in a tree keyed by the indices
|
|
// of the invalid rows of the data to reconstruct.
|
|
// The inversion root node will have the identity matrix as
|
|
// its inversion matrix because it implies there are no errors
|
|
// with the original data.
|
|
if r.o.inversionCache {
|
|
r.tree = newInversionTree(dataShards, parityShards)
|
|
}
|
|
|
|
r.parity = make([][]byte, parityShards)
|
|
for i := range r.parity {
|
|
r.parity[i] = r.m[dataShards+i]
|
|
}
|
|
|
|
if avx2CodeGen && r.o.useAVX2 {
|
|
sz := r.dataShards * r.parityShards * 2 * 32
|
|
r.mPool.New = func() interface{} {
|
|
return AllocAligned(1, sz)[0]
|
|
}
|
|
r.mPoolSz = sz
|
|
}
|
|
return &r, err
|
|
}
|
|
|
|
func (r *reedSolomon) getTmpSlice() []byte {
|
|
return r.mPool.Get().([]byte)
|
|
}
|
|
|
|
func (r *reedSolomon) putTmpSlice(b []byte) {
|
|
if b != nil && cap(b) >= r.mPoolSz {
|
|
r.mPool.Put(b[:r.mPoolSz])
|
|
return
|
|
}
|
|
if false {
|
|
// Sanity check
|
|
panic(fmt.Sprintf("got short tmp returned, want %d, got %d", r.mPoolSz, cap(b)))
|
|
}
|
|
}
|
|
|
|
// ErrTooFewShards is returned if too few shards where given to
|
|
// Encode/Verify/Reconstruct/Update. It will also be returned from Reconstruct
|
|
// if there were too few shards to reconstruct the missing data.
|
|
var ErrTooFewShards = errors.New("too few shards given")
|
|
|
|
// Encode parity for a set of data shards.
|
|
// An array 'shards' containing data shards followed by parity shards.
|
|
// The number of shards must match the number given to New.
|
|
// Each shard is a byte array, and they must all be the same size.
|
|
// The parity shards will always be overwritten and the data shards
|
|
// will remain the same.
|
|
func (r *reedSolomon) Encode(shards [][]byte) error {
|
|
if len(shards) != r.totalShards {
|
|
return ErrTooFewShards
|
|
}
|
|
|
|
err := checkShards(shards, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Get the slice of output buffers.
|
|
output := shards[r.dataShards:]
|
|
|
|
// Do the coding.
|
|
r.codeSomeShards(r.parity, shards[0:r.dataShards], output[:r.parityShards], len(shards[0]))
|
|
return nil
|
|
}
|
|
|
|
// EncodeIdx will add parity for a single data shard.
|
|
// Parity shards should start out zeroed. The caller must zero them before first call.
|
|
// Data shards should only be delivered once. There is no check for this.
|
|
// The parity shards will always be updated and the data shards will remain the unchanged.
|
|
func (r *reedSolomon) EncodeIdx(dataShard []byte, idx int, parity [][]byte) error {
|
|
if len(parity) != r.parityShards {
|
|
return ErrTooFewShards
|
|
}
|
|
if len(parity) == 0 {
|
|
return nil
|
|
}
|
|
if idx < 0 || idx >= r.dataShards {
|
|
return ErrInvShardNum
|
|
}
|
|
err := checkShards(parity, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(parity[0]) != len(dataShard) {
|
|
return ErrShardSize
|
|
}
|
|
|
|
if avx2CodeGen && len(dataShard) >= r.o.perRound && len(parity) >= avx2CodeGenMinShards && ((pshufb && r.o.useAVX2) || r.o.useAvx512GFNI || r.o.useAvxGNFI) {
|
|
m := make([][]byte, r.parityShards)
|
|
for iRow := range m {
|
|
m[iRow] = r.parity[iRow][idx : idx+1]
|
|
}
|
|
if r.o.useAvx512GFNI || r.o.useAvxGNFI {
|
|
r.codeSomeShardsGFNI(m, [][]byte{dataShard}, parity, len(dataShard), false)
|
|
} else {
|
|
r.codeSomeShardsAVXP(m, [][]byte{dataShard}, parity, len(dataShard), false)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Process using no goroutines for now.
|
|
start, end := 0, r.o.perRound
|
|
if end > len(dataShard) {
|
|
end = len(dataShard)
|
|
}
|
|
|
|
for start < len(dataShard) {
|
|
in := dataShard[start:end]
|
|
for iRow := 0; iRow < r.parityShards; iRow++ {
|
|
galMulSliceXor(r.parity[iRow][idx], in, parity[iRow][start:end], &r.o)
|
|
}
|
|
start = end
|
|
end += r.o.perRound
|
|
if end > len(dataShard) {
|
|
end = len(dataShard)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ErrInvalidInput is returned if invalid input parameter of Update.
|
|
var ErrInvalidInput = errors.New("invalid input")
|
|
|
|
func (r *reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
|
|
if len(shards) != r.totalShards {
|
|
return ErrTooFewShards
|
|
}
|
|
|
|
if len(newDatashards) != r.dataShards {
|
|
return ErrTooFewShards
|
|
}
|
|
|
|
err := checkShards(shards, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = checkShards(newDatashards, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for i := range newDatashards {
|
|
if newDatashards[i] != nil && shards[i] == nil {
|
|
return ErrInvalidInput
|
|
}
|
|
}
|
|
for _, p := range shards[r.dataShards:] {
|
|
if p == nil {
|
|
return ErrInvalidInput
|
|
}
|
|
}
|
|
|
|
shardSize := shardSize(shards)
|
|
|
|
// Get the slice of output buffers.
|
|
output := shards[r.dataShards:]
|
|
|
|
// Do the coding.
|
|
r.updateParityShards(r.parity, shards[0:r.dataShards], newDatashards[0:r.dataShards], output, r.parityShards, shardSize)
|
|
return nil
|
|
}
|
|
|
|
func (r *reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
|
|
if len(outputs) == 0 {
|
|
return
|
|
}
|
|
|
|
if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
|
|
r.updateParityShardsP(matrixRows, oldinputs, newinputs, outputs, outputCount, byteCount)
|
|
return
|
|
}
|
|
|
|
for c := 0; c < r.dataShards; c++ {
|
|
in := newinputs[c]
|
|
if in == nil {
|
|
continue
|
|
}
|
|
oldin := oldinputs[c]
|
|
// oldinputs data will be changed
|
|
sliceXor(in, oldin, &r.o)
|
|
for iRow := 0; iRow < outputCount; iRow++ {
|
|
galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
|
|
var wg sync.WaitGroup
|
|
do := byteCount / r.o.maxGoroutines
|
|
if do < r.o.minSplitSize {
|
|
do = r.o.minSplitSize
|
|
}
|
|
start := 0
|
|
for start < byteCount {
|
|
if start+do > byteCount {
|
|
do = byteCount - start
|
|
}
|
|
wg.Add(1)
|
|
go func(start, stop int) {
|
|
for c := 0; c < r.dataShards; c++ {
|
|
in := newinputs[c]
|
|
if in == nil {
|
|
continue
|
|
}
|
|
oldin := oldinputs[c]
|
|
// oldinputs data will be change
|
|
sliceXor(in[start:stop], oldin[start:stop], &r.o)
|
|
for iRow := 0; iRow < outputCount; iRow++ {
|
|
galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o)
|
|
}
|
|
}
|
|
wg.Done()
|
|
}(start, start+do)
|
|
start += do
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
// Verify returns true if the parity shards contain the right data.
|
|
// The data is the same format as Encode. No data is modified.
|
|
func (r *reedSolomon) Verify(shards [][]byte) (bool, error) {
|
|
if len(shards) != r.totalShards {
|
|
return false, ErrTooFewShards
|
|
}
|
|
err := checkShards(shards, false)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// Slice of buffers being checked.
|
|
toCheck := shards[r.dataShards:]
|
|
|
|
// Do the checking.
|
|
return r.checkSomeShards(r.parity, shards[:r.dataShards], toCheck[:r.parityShards], len(shards[0])), nil
|
|
}
|
|
|
|
func (r *reedSolomon) canAVX2C(byteCount int, inputs, outputs int) bool {
|
|
return avx2CodeGen && pshufb && r.o.useAVX2 &&
|
|
byteCount >= avx2CodeGenMinSize && inputs+outputs >= avx2CodeGenMinShards &&
|
|
inputs <= maxAvx2Inputs && outputs <= maxAvx2Outputs
|
|
}
|
|
|
|
func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) bool {
|
|
return avx2CodeGen && (r.o.useAvx512GFNI || r.o.useAvxGNFI) &&
|
|
byteCount >= avx2CodeGenMinSize && inputs+outputs >= avx2CodeGenMinShards &&
|
|
inputs <= maxAvx2Inputs && outputs <= maxAvx2Outputs
|
|
}
|
|
|
|
// Multiplies a subset of rows from a coding matrix by a full set of
|
|
// input totalShards to produce some output totalShards.
|
|
// 'matrixRows' is The rows from the matrix to use.
|
|
// 'inputs' An array of byte arrays, each of which is one input shard.
|
|
// The number of inputs used is determined by the length of each matrix row.
|
|
// outputs Byte arrays where the computed totalShards are stored.
|
|
// The number of outputs computed, and the
|
|
// number of matrix rows used, is determined by
|
|
// outputCount, which is the number of outputs to compute.
|
|
func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, byteCount int) {
|
|
if len(outputs) == 0 {
|
|
return
|
|
}
|
|
if byteCount > r.o.minSplitSize {
|
|
r.codeSomeShardsP(matrixRows, inputs, outputs, byteCount)
|
|
return
|
|
}
|
|
|
|
// Process using no goroutines
|
|
start, end := 0, r.o.perRound
|
|
if end > len(inputs[0]) {
|
|
end = len(inputs[0])
|
|
}
|
|
if r.canGFNI(byteCount, len(inputs), len(outputs)) {
|
|
var gfni [maxAvx2Inputs * maxAvx2Outputs]uint64
|
|
m := genGFNIMatrix(matrixRows, len(inputs), 0, len(outputs), gfni[:])
|
|
if r.o.useAvx512GFNI {
|
|
start += galMulSlicesGFNI(m, inputs, outputs, 0, byteCount)
|
|
} else {
|
|
start += galMulSlicesAvxGFNI(m, inputs, outputs, 0, byteCount)
|
|
}
|
|
end = len(inputs[0])
|
|
} else if r.canAVX2C(byteCount, len(inputs), len(outputs)) {
|
|
m := genAvx2Matrix(matrixRows, len(inputs), 0, len(outputs), r.getTmpSlice())
|
|
start += galMulSlicesAvx2(m, inputs, outputs, 0, byteCount)
|
|
r.putTmpSlice(m)
|
|
end = len(inputs[0])
|
|
} else if len(inputs)+len(outputs) > avx2CodeGenMinShards && r.canAVX2C(byteCount, maxAvx2Inputs, maxAvx2Outputs) {
|
|
var gfni [maxAvx2Inputs * maxAvx2Outputs]uint64
|
|
end = len(inputs[0])
|
|
inIdx := 0
|
|
m := r.getTmpSlice()
|
|
defer r.putTmpSlice(m)
|
|
ins := inputs
|
|
for len(ins) > 0 {
|
|
inPer := ins
|
|
if len(inPer) > maxAvx2Inputs {
|
|
inPer = inPer[:maxAvx2Inputs]
|
|
}
|
|
outs := outputs
|
|
outIdx := 0
|
|
for len(outs) > 0 {
|
|
outPer := outs
|
|
if len(outPer) > maxAvx2Outputs {
|
|
outPer = outPer[:maxAvx2Outputs]
|
|
}
|
|
if r.o.useAvx512GFNI {
|
|
m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), gfni[:])
|
|
if inIdx == 0 {
|
|
start = galMulSlicesGFNI(m, inPer, outPer, 0, byteCount)
|
|
} else {
|
|
start = galMulSlicesGFNIXor(m, inPer, outPer, 0, byteCount)
|
|
}
|
|
} else if r.o.useAvxGNFI {
|
|
m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), gfni[:])
|
|
if inIdx == 0 {
|
|
start = galMulSlicesAvxGFNI(m, inPer, outPer, 0, byteCount)
|
|
} else {
|
|
start = galMulSlicesAvxGFNIXor(m, inPer, outPer, 0, byteCount)
|
|
}
|
|
} else {
|
|
m = genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), m)
|
|
if inIdx == 0 {
|
|
start = galMulSlicesAvx2(m, inPer, outPer, 0, byteCount)
|
|
} else {
|
|
start = galMulSlicesAvx2Xor(m, inPer, outPer, 0, byteCount)
|
|
}
|
|
}
|
|
outIdx += len(outPer)
|
|
outs = outs[len(outPer):]
|
|
}
|
|
inIdx += len(inPer)
|
|
ins = ins[len(inPer):]
|
|
}
|
|
if start >= end {
|
|
return
|
|
}
|
|
}
|
|
for start < len(inputs[0]) {
|
|
for c := 0; c < len(inputs); c++ {
|
|
in := inputs[c][start:end]
|
|
for iRow := 0; iRow < len(outputs); iRow++ {
|
|
if c == 0 {
|
|
galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o)
|
|
} else {
|
|
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o)
|
|
}
|
|
}
|
|
}
|
|
start = end
|
|
end += r.o.perRound
|
|
if end > len(inputs[0]) {
|
|
end = len(inputs[0])
|
|
}
|
|
}
|
|
}
|
|
|
|
// Perform the same as codeSomeShards, but split the workload into
|
|
// several goroutines.
|
|
func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, byteCount int) {
|
|
var wg sync.WaitGroup
|
|
gor := r.o.maxGoroutines
|
|
|
|
var avx2Matrix []byte
|
|
var gfniMatrix []uint64
|
|
useAvx2 := r.canAVX2C(byteCount, len(inputs), len(outputs))
|
|
useGFNI := r.canGFNI(byteCount, len(inputs), len(outputs))
|
|
if useGFNI {
|
|
var tmp [maxAvx2Inputs * maxAvx2Outputs]uint64
|
|
gfniMatrix = genGFNIMatrix(matrixRows, len(inputs), 0, len(outputs), tmp[:])
|
|
} else if useAvx2 {
|
|
avx2Matrix = genAvx2Matrix(matrixRows, len(inputs), 0, len(outputs), r.getTmpSlice())
|
|
defer r.putTmpSlice(avx2Matrix)
|
|
} else if (r.o.useAvx512GFNI || r.o.useAvxGNFI) && byteCount < 10<<20 && len(inputs)+len(outputs) > avx2CodeGenMinShards &&
|
|
r.canGFNI(byteCount/4, maxAvx2Inputs, maxAvx2Outputs) {
|
|
// It appears there is a switchover point at around 10MB where
|
|
// Regular processing is faster...
|
|
r.codeSomeShardsGFNI(matrixRows, inputs, outputs, byteCount, true)
|
|
return
|
|
} else if r.o.useAVX2 && byteCount < 10<<20 && len(inputs)+len(outputs) > avx2CodeGenMinShards &&
|
|
r.canAVX2C(byteCount/4, maxAvx2Inputs, maxAvx2Outputs) {
|
|
// It appears there is a switchover point at around 10MB where
|
|
// Regular processing is faster...
|
|
r.codeSomeShardsAVXP(matrixRows, inputs, outputs, byteCount, true)
|
|
return
|
|
}
|
|
|
|
do := byteCount / gor
|
|
if do < r.o.minSplitSize {
|
|
do = r.o.minSplitSize
|
|
}
|
|
|
|
exec := func(start, stop int) {
|
|
if stop-start >= 64 {
|
|
if useGFNI {
|
|
if r.o.useAvx512GFNI {
|
|
start += galMulSlicesGFNI(gfniMatrix, inputs, outputs, start, stop)
|
|
} else {
|
|
start += galMulSlicesAvxGFNI(gfniMatrix, inputs, outputs, start, stop)
|
|
}
|
|
} else if useAvx2 {
|
|
start += galMulSlicesAvx2(avx2Matrix, inputs, outputs, start, stop)
|
|
}
|
|
}
|
|
|
|
lstart, lstop := start, start+r.o.perRound
|
|
if lstop > stop {
|
|
lstop = stop
|
|
}
|
|
for lstart < stop {
|
|
for c := 0; c < len(inputs); c++ {
|
|
in := inputs[c][lstart:lstop]
|
|
for iRow := 0; iRow < len(outputs); iRow++ {
|
|
if c == 0 {
|
|
galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
|
|
} else {
|
|
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
|
|
}
|
|
}
|
|
}
|
|
lstart = lstop
|
|
lstop += r.o.perRound
|
|
if lstop > stop {
|
|
lstop = stop
|
|
}
|
|
}
|
|
wg.Done()
|
|
}
|
|
if gor <= 1 {
|
|
wg.Add(1)
|
|
exec(0, byteCount)
|
|
return
|
|
}
|
|
|
|
// Make sizes divisible by 64
|
|
do = (do + 63) & (^63)
|
|
start := 0
|
|
for start < byteCount {
|
|
if start+do > byteCount {
|
|
do = byteCount - start
|
|
}
|
|
|
|
wg.Add(1)
|
|
go exec(start, start+do)
|
|
start += do
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
// Perform the same as codeSomeShards, but split the workload into
|
|
// several goroutines.
|
|
// If clear is set, the first write will overwrite the output.
|
|
func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, byteCount int, clear bool) {
|
|
var wg sync.WaitGroup
|
|
gor := r.o.maxGoroutines
|
|
|
|
type state struct {
|
|
input [][]byte
|
|
output [][]byte
|
|
m []byte
|
|
first bool
|
|
}
|
|
// Make a plan...
|
|
plan := make([]state, 0, ((len(inputs)+maxAvx2Inputs-1)/maxAvx2Inputs)*((len(outputs)+maxAvx2Outputs-1)/maxAvx2Outputs))
|
|
|
|
tmp := r.getTmpSlice()
|
|
defer r.putTmpSlice(tmp)
|
|
|
|
// Flips between input first to output first.
|
|
// We put the smallest data load in the inner loop.
|
|
if len(inputs) > len(outputs) {
|
|
inIdx := 0
|
|
ins := inputs
|
|
for len(ins) > 0 {
|
|
inPer := ins
|
|
if len(inPer) > maxAvx2Inputs {
|
|
inPer = inPer[:maxAvx2Inputs]
|
|
}
|
|
outs := outputs
|
|
outIdx := 0
|
|
for len(outs) > 0 {
|
|
outPer := outs
|
|
if len(outPer) > maxAvx2Outputs {
|
|
outPer = outPer[:maxAvx2Outputs]
|
|
}
|
|
// Generate local matrix
|
|
m := genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), tmp)
|
|
tmp = tmp[len(m):]
|
|
plan = append(plan, state{
|
|
input: inPer,
|
|
output: outPer,
|
|
m: m,
|
|
first: inIdx == 0 && clear,
|
|
})
|
|
outIdx += len(outPer)
|
|
outs = outs[len(outPer):]
|
|
}
|
|
inIdx += len(inPer)
|
|
ins = ins[len(inPer):]
|
|
}
|
|
} else {
|
|
outs := outputs
|
|
outIdx := 0
|
|
for len(outs) > 0 {
|
|
outPer := outs
|
|
if len(outPer) > maxAvx2Outputs {
|
|
outPer = outPer[:maxAvx2Outputs]
|
|
}
|
|
|
|
inIdx := 0
|
|
ins := inputs
|
|
for len(ins) > 0 {
|
|
inPer := ins
|
|
if len(inPer) > maxAvx2Inputs {
|
|
inPer = inPer[:maxAvx2Inputs]
|
|
}
|
|
// Generate local matrix
|
|
m := genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), tmp)
|
|
tmp = tmp[len(m):]
|
|
//fmt.Println("bytes:", len(inPer)*r.o.perRound, "out:", len(outPer)*r.o.perRound)
|
|
plan = append(plan, state{
|
|
input: inPer,
|
|
output: outPer,
|
|
m: m,
|
|
first: inIdx == 0 && clear,
|
|
})
|
|
inIdx += len(inPer)
|
|
ins = ins[len(inPer):]
|
|
}
|
|
outIdx += len(outPer)
|
|
outs = outs[len(outPer):]
|
|
}
|
|
}
|
|
|
|
do := byteCount / gor
|
|
if do < r.o.minSplitSize {
|
|
do = r.o.minSplitSize
|
|
}
|
|
|
|
exec := func(start, stop int) {
|
|
defer wg.Done()
|
|
lstart, lstop := start, start+r.o.perRound
|
|
if lstop > stop {
|
|
lstop = stop
|
|
}
|
|
for lstart < stop {
|
|
if lstop-lstart >= minAvx2Size {
|
|
// Execute plan...
|
|
var n int
|
|
for _, p := range plan {
|
|
if p.first {
|
|
n = galMulSlicesAvx2(p.m, p.input, p.output, lstart, lstop)
|
|
} else {
|
|
n = galMulSlicesAvx2Xor(p.m, p.input, p.output, lstart, lstop)
|
|
}
|
|
}
|
|
lstart += n
|
|
if lstart == lstop {
|
|
lstop += r.o.perRound
|
|
if lstop > stop {
|
|
lstop = stop
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
|
|
for c := range inputs {
|
|
in := inputs[c][lstart:lstop]
|
|
for iRow := 0; iRow < len(outputs); iRow++ {
|
|
if c == 0 && clear {
|
|
galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
|
|
} else {
|
|
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
|
|
}
|
|
}
|
|
}
|
|
lstart = lstop
|
|
lstop += r.o.perRound
|
|
if lstop > stop {
|
|
lstop = stop
|
|
}
|
|
}
|
|
}
|
|
if gor == 1 {
|
|
wg.Add(1)
|
|
exec(0, byteCount)
|
|
return
|
|
}
|
|
|
|
// Make sizes divisible by 64
|
|
do = (do + 63) & (^63)
|
|
start := 0
|
|
for start < byteCount {
|
|
if start+do > byteCount {
|
|
do = byteCount - start
|
|
}
|
|
|
|
wg.Add(1)
|
|
go exec(start, start+do)
|
|
start += do
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
// Perform the same as codeSomeShards, but split the workload into
|
|
// several goroutines.
|
|
// If clear is set, the first write will overwrite the output.
|
|
func (r *reedSolomon) codeSomeShardsGFNI(matrixRows, inputs, outputs [][]byte, byteCount int, clear bool) {
|
|
var wg sync.WaitGroup
|
|
gor := r.o.maxGoroutines
|
|
|
|
type state struct {
|
|
input [][]byte
|
|
output [][]byte
|
|
m []uint64
|
|
first bool
|
|
}
|
|
// Make a plan...
|
|
plan := make([]state, 0, ((len(inputs)+maxAvx2Inputs-1)/maxAvx2Inputs)*((len(outputs)+maxAvx2Outputs-1)/maxAvx2Outputs))
|
|
|
|
// Flips between input first to output first.
|
|
// We put the smallest data load in the inner loop.
|
|
if len(inputs) > len(outputs) {
|
|
inIdx := 0
|
|
ins := inputs
|
|
for len(ins) > 0 {
|
|
inPer := ins
|
|
if len(inPer) > maxAvx2Inputs {
|
|
inPer = inPer[:maxAvx2Inputs]
|
|
}
|
|
outs := outputs
|
|
outIdx := 0
|
|
for len(outs) > 0 {
|
|
outPer := outs
|
|
if len(outPer) > maxAvx2Outputs {
|
|
outPer = outPer[:maxAvx2Outputs]
|
|
}
|
|
// Generate local matrix
|
|
m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), make([]uint64, len(inPer)*len(outPer)))
|
|
plan = append(plan, state{
|
|
input: inPer,
|
|
output: outPer,
|
|
m: m,
|
|
first: inIdx == 0 && clear,
|
|
})
|
|
outIdx += len(outPer)
|
|
outs = outs[len(outPer):]
|
|
}
|
|
inIdx += len(inPer)
|
|
ins = ins[len(inPer):]
|
|
}
|
|
} else {
|
|
outs := outputs
|
|
outIdx := 0
|
|
for len(outs) > 0 {
|
|
outPer := outs
|
|
if len(outPer) > maxAvx2Outputs {
|
|
outPer = outPer[:maxAvx2Outputs]
|
|
}
|
|
|
|
inIdx := 0
|
|
ins := inputs
|
|
for len(ins) > 0 {
|
|
inPer := ins
|
|
if len(inPer) > maxAvx2Inputs {
|
|
inPer = inPer[:maxAvx2Inputs]
|
|
}
|
|
// Generate local matrix
|
|
m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), make([]uint64, len(inPer)*len(outPer)))
|
|
//fmt.Println("bytes:", len(inPer)*r.o.perRound, "out:", len(outPer)*r.o.perRound)
|
|
plan = append(plan, state{
|
|
input: inPer,
|
|
output: outPer,
|
|
m: m,
|
|
first: inIdx == 0 && clear,
|
|
})
|
|
inIdx += len(inPer)
|
|
ins = ins[len(inPer):]
|
|
}
|
|
outIdx += len(outPer)
|
|
outs = outs[len(outPer):]
|
|
}
|
|
}
|
|
|
|
do := byteCount / gor
|
|
if do < r.o.minSplitSize {
|
|
do = r.o.minSplitSize
|
|
}
|
|
|
|
exec := func(start, stop int) {
|
|
defer wg.Done()
|
|
lstart, lstop := start, start+r.o.perRound
|
|
if lstop > stop {
|
|
lstop = stop
|
|
}
|
|
for lstart < stop {
|
|
if lstop-lstart >= minAvx2Size {
|
|
// Execute plan...
|
|
var n int
|
|
if r.o.useAvx512GFNI {
|
|
for _, p := range plan {
|
|
if p.first {
|
|
n = galMulSlicesGFNI(p.m, p.input, p.output, lstart, lstop)
|
|
} else {
|
|
n = galMulSlicesGFNIXor(p.m, p.input, p.output, lstart, lstop)
|
|
}
|
|
}
|
|
} else {
|
|
for _, p := range plan {
|
|
if p.first {
|
|
n = galMulSlicesAvxGFNI(p.m, p.input, p.output, lstart, lstop)
|
|
} else {
|
|
n = galMulSlicesAvxGFNIXor(p.m, p.input, p.output, lstart, lstop)
|
|
}
|
|
}
|
|
}
|
|
lstart += n
|
|
if lstart == lstop {
|
|
lstop += r.o.perRound
|
|
if lstop > stop {
|
|
lstop = stop
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
|
|
for c := range inputs {
|
|
in := inputs[c][lstart:lstop]
|
|
for iRow := 0; iRow < len(outputs); iRow++ {
|
|
if c == 0 && clear {
|
|
galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
|
|
} else {
|
|
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
|
|
}
|
|
}
|
|
}
|
|
lstart = lstop
|
|
lstop += r.o.perRound
|
|
if lstop > stop {
|
|
lstop = stop
|
|
}
|
|
}
|
|
}
|
|
|
|
if gor == 1 {
|
|
wg.Add(1)
|
|
exec(0, byteCount)
|
|
return
|
|
}
|
|
|
|
// Make sizes divisible by 64
|
|
do = (do + 63) & (^63)
|
|
start := 0
|
|
for start < byteCount {
|
|
if start+do > byteCount {
|
|
do = byteCount - start
|
|
}
|
|
|
|
wg.Add(1)
|
|
go exec(start, start+do)
|
|
start += do
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
// checkSomeShards is mostly the same as codeSomeShards,
|
|
// except this will check values and return
|
|
// as soon as a difference is found.
|
|
func (r *reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, byteCount int) bool {
|
|
if len(toCheck) == 0 {
|
|
return true
|
|
}
|
|
|
|
outputs := AllocAligned(len(toCheck), byteCount)
|
|
r.codeSomeShards(matrixRows, inputs, outputs, byteCount)
|
|
|
|
for i, calc := range outputs {
|
|
if !bytes.Equal(calc, toCheck[i]) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// ErrShardNoData will be returned if there are no shards,
|
|
// or if the length of all shards is zero.
|
|
var ErrShardNoData = errors.New("no shard data")
|
|
|
|
// ErrShardSize is returned if shard length isn't the same for all
|
|
// shards.
|
|
var ErrShardSize = errors.New("shard sizes do not match")
|
|
|
|
// ErrInvalidShardSize is returned if shard length doesn't meet the requirements,
|
|
// typically a multiple of N.
|
|
var ErrInvalidShardSize = errors.New("invalid shard size")
|
|
|
|
// checkShards will check if shards are the same size
|
|
// or 0, if allowed. An error is returned if this fails.
|
|
// An error is also returned if all shards are size 0.
|
|
func checkShards(shards [][]byte, nilok bool) error {
|
|
size := shardSize(shards)
|
|
if size == 0 {
|
|
return ErrShardNoData
|
|
}
|
|
for _, shard := range shards {
|
|
if len(shard) != size {
|
|
if len(shard) != 0 || !nilok {
|
|
return ErrShardSize
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// shardSize return the size of a single shard.
|
|
// The first non-zero size is returned,
|
|
// or 0 if all shards are size 0.
|
|
func shardSize(shards [][]byte) int {
|
|
for _, shard := range shards {
|
|
if len(shard) != 0 {
|
|
return len(shard)
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// Reconstruct will recreate the missing shards, if possible.
|
|
//
|
|
// Given a list of shards, some of which contain data, fills in the
|
|
// ones that don't have data.
|
|
//
|
|
// The length of the array must be equal to shards.
|
|
// You indicate that a shard is missing by setting it to nil or zero-length.
|
|
// If a shard is zero-length but has sufficient capacity, that memory will
|
|
// be used, otherwise a new []byte will be allocated.
|
|
//
|
|
// If there are too few shards to reconstruct the missing
|
|
// ones, ErrTooFewShards will be returned.
|
|
//
|
|
// The reconstructed shard set is complete, but integrity is not verified.
|
|
// Use the Verify function to check if data set is ok.
|
|
func (r *reedSolomon) Reconstruct(shards [][]byte) error {
|
|
return r.reconstruct(shards, false, nil)
|
|
}
|
|
|
|
// ReconstructData will recreate any missing data shards, if possible.
|
|
//
|
|
// Given a list of shards, some of which contain data, fills in the
|
|
// data shards that don't have data.
|
|
//
|
|
// The length of the array must be equal to shards.
|
|
// You indicate that a shard is missing by setting it to nil or zero-length.
|
|
// If a shard is zero-length but has sufficient capacity, that memory will
|
|
// be used, otherwise a new []byte will be allocated.
|
|
//
|
|
// If there are too few shards to reconstruct the missing
|
|
// ones, ErrTooFewShards will be returned.
|
|
//
|
|
// As the reconstructed shard set may contain missing parity shards,
|
|
// calling the Verify function is likely to fail.
|
|
func (r *reedSolomon) ReconstructData(shards [][]byte) error {
|
|
return r.reconstruct(shards, true, nil)
|
|
}
|
|
|
|
// ReconstructSome will recreate only requested shards, if possible.
|
|
//
|
|
// Given a list of shards, some of which contain data, fills in the
|
|
// shards indicated by true values in the "required" parameter.
|
|
// The length of the "required" array must be equal to either Shards or DataShards.
|
|
// If the length is equal to DataShards, the reconstruction of parity shards will be ignored.
|
|
//
|
|
// The length of "shards" array must be equal to Shards.
|
|
// You indicate that a shard is missing by setting it to nil or zero-length.
|
|
// If a shard is zero-length but has sufficient capacity, that memory will
|
|
// be used, otherwise a new []byte will be allocated.
|
|
//
|
|
// If there are too few shards to reconstruct the missing
|
|
// ones, ErrTooFewShards will be returned.
|
|
//
|
|
// As the reconstructed shard set may contain missing parity shards,
|
|
// calling the Verify function is likely to fail.
|
|
func (r *reedSolomon) ReconstructSome(shards [][]byte, required []bool) error {
|
|
if len(required) == r.totalShards {
|
|
return r.reconstruct(shards, false, required)
|
|
}
|
|
return r.reconstruct(shards, true, required)
|
|
}
|
|
|
|
// reconstruct will recreate the missing data totalShards, and unless
|
|
// dataOnly is true, also the missing parity totalShards
|
|
//
|
|
// The length of "shards" array must be equal to totalShards.
|
|
// You indicate that a shard is missing by setting it to nil.
|
|
//
|
|
// If there are too few totalShards to reconstruct the missing
|
|
// ones, ErrTooFewShards will be returned.
|
|
func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool, required []bool) error {
|
|
if len(shards) != r.totalShards || required != nil && len(required) < r.dataShards {
|
|
return ErrTooFewShards
|
|
}
|
|
// Check arguments.
|
|
err := checkShards(shards, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
shardSize := shardSize(shards)
|
|
|
|
// Quick check: are all of the shards present? If so, there's
|
|
// nothing to do.
|
|
numberPresent := 0
|
|
dataPresent := 0
|
|
missingRequired := 0
|
|
for i := 0; i < r.totalShards; i++ {
|
|
if len(shards[i]) != 0 {
|
|
numberPresent++
|
|
if i < r.dataShards {
|
|
dataPresent++
|
|
}
|
|
} else if required != nil && required[i] {
|
|
missingRequired++
|
|
}
|
|
}
|
|
if numberPresent == r.totalShards || dataOnly && dataPresent == r.dataShards ||
|
|
required != nil && missingRequired == 0 {
|
|
// Cool. All of the shards have data. We don't
|
|
// need to do anything.
|
|
return nil
|
|
}
|
|
|
|
// More complete sanity check
|
|
if numberPresent < r.dataShards {
|
|
return ErrTooFewShards
|
|
}
|
|
|
|
// Pull out an array holding just the shards that
|
|
// correspond to the rows of the submatrix. These shards
|
|
// will be the input to the decoding process that re-creates
|
|
// the missing data shards.
|
|
//
|
|
// Also, create an array of indices of the valid rows we do have
|
|
// and the invalid rows we don't have up until we have enough valid rows.
|
|
subShards := make([][]byte, r.dataShards)
|
|
validIndices := make([]int, r.dataShards)
|
|
invalidIndices := make([]int, 0)
|
|
subMatrixRow := 0
|
|
for matrixRow := 0; matrixRow < r.totalShards && subMatrixRow < r.dataShards; matrixRow++ {
|
|
if len(shards[matrixRow]) != 0 {
|
|
subShards[subMatrixRow] = shards[matrixRow]
|
|
validIndices[subMatrixRow] = matrixRow
|
|
subMatrixRow++
|
|
} else {
|
|
invalidIndices = append(invalidIndices, matrixRow)
|
|
}
|
|
}
|
|
|
|
// Attempt to get the cached inverted matrix out of the tree
|
|
// based on the indices of the invalid rows.
|
|
dataDecodeMatrix := r.tree.GetInvertedMatrix(invalidIndices)
|
|
|
|
// If the inverted matrix isn't cached in the tree yet we must
|
|
// construct it ourselves and insert it into the tree for the
|
|
// future. In this way the inversion tree is lazily loaded.
|
|
if dataDecodeMatrix == nil {
|
|
// Pull out the rows of the matrix that correspond to the
|
|
// shards that we have and build a square matrix. This
|
|
// matrix could be used to generate the shards that we have
|
|
// from the original data.
|
|
subMatrix, _ := newMatrix(r.dataShards, r.dataShards)
|
|
for subMatrixRow, validIndex := range validIndices {
|
|
for c := 0; c < r.dataShards; c++ {
|
|
subMatrix[subMatrixRow][c] = r.m[validIndex][c]
|
|
}
|
|
}
|
|
// Invert the matrix, so we can go from the encoded shards
|
|
// back to the original data. Then pull out the row that
|
|
// generates the shard that we want to decode. Note that
|
|
// since this matrix maps back to the original data, it can
|
|
// be used to create a data shard, but not a parity shard.
|
|
dataDecodeMatrix, err = subMatrix.Invert()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Cache the inverted matrix in the tree for future use keyed on the
|
|
// indices of the invalid rows.
|
|
err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.totalShards)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Re-create any data shards that were missing.
|
|
//
|
|
// The input to the coding is all of the shards we actually
|
|
// have, and the output is the missing data shards. The computation
|
|
// is done using the special decode matrix we just built.
|
|
outputs := make([][]byte, r.parityShards)
|
|
matrixRows := make([][]byte, r.parityShards)
|
|
outputCount := 0
|
|
|
|
for iShard := 0; iShard < r.dataShards; iShard++ {
|
|
if len(shards[iShard]) == 0 && (required == nil || required[iShard]) {
|
|
if cap(shards[iShard]) >= shardSize {
|
|
shards[iShard] = shards[iShard][0:shardSize]
|
|
} else {
|
|
shards[iShard] = AllocAligned(1, shardSize)[0]
|
|
}
|
|
outputs[outputCount] = shards[iShard]
|
|
matrixRows[outputCount] = dataDecodeMatrix[iShard]
|
|
outputCount++
|
|
}
|
|
}
|
|
r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], shardSize)
|
|
|
|
if dataOnly {
|
|
// Exit out early if we are only interested in the data shards
|
|
return nil
|
|
}
|
|
|
|
// Now that we have all of the data shards intact, we can
|
|
// compute any of the parity that is missing.
|
|
//
|
|
// The input to the coding is ALL of the data shards, including
|
|
// any that we just calculated. The output is whichever of the
|
|
// data shards were missing.
|
|
outputCount = 0
|
|
for iShard := r.dataShards; iShard < r.totalShards; iShard++ {
|
|
if len(shards[iShard]) == 0 && (required == nil || required[iShard]) {
|
|
if cap(shards[iShard]) >= shardSize {
|
|
shards[iShard] = shards[iShard][0:shardSize]
|
|
} else {
|
|
shards[iShard] = AllocAligned(1, shardSize)[0]
|
|
}
|
|
outputs[outputCount] = shards[iShard]
|
|
matrixRows[outputCount] = r.parity[iShard-r.dataShards]
|
|
outputCount++
|
|
}
|
|
}
|
|
r.codeSomeShards(matrixRows, shards[:r.dataShards], outputs[:outputCount], shardSize)
|
|
return nil
|
|
}
|
|
|
|
// ErrShortData will be returned by Split(), if there isn't enough data
|
|
// to fill the number of shards.
|
|
var ErrShortData = errors.New("not enough data to fill the number of requested shards")
|
|
|
|
// Split a data slice into the number of shards given to the encoder,
|
|
// and create empty parity shards if necessary.
|
|
//
|
|
// The data will be split into equally sized shards.
|
|
// If the data size isn't divisible by the number of shards,
|
|
// the last shard will contain extra zeros.
|
|
//
|
|
// If there is extra capacity on the provided data slice
|
|
// it will be used instead of allocating parity shards.
|
|
// It will be zeroed out.
|
|
//
|
|
// There must be at least 1 byte otherwise ErrShortData will be
|
|
// returned.
|
|
//
|
|
// The data will not be copied, except for the last shard, so you
|
|
// should not modify the data of the input slice afterwards.
|
|
func (r *reedSolomon) Split(data []byte) ([][]byte, error) {
|
|
if len(data) == 0 {
|
|
return nil, ErrShortData
|
|
}
|
|
if r.totalShards == 1 {
|
|
return [][]byte{data}, nil
|
|
}
|
|
|
|
dataLen := len(data)
|
|
// Calculate number of bytes per data shard.
|
|
perShard := (len(data) + r.dataShards - 1) / r.dataShards
|
|
needTotal := r.totalShards * perShard
|
|
|
|
if cap(data) > len(data) {
|
|
if cap(data) > needTotal {
|
|
data = data[:needTotal]
|
|
} else {
|
|
data = data[:cap(data)]
|
|
}
|
|
clear := data[dataLen:]
|
|
for i := range clear {
|
|
clear[i] = 0
|
|
}
|
|
}
|
|
|
|
// Only allocate memory if necessary
|
|
var padding [][]byte
|
|
if len(data) < needTotal {
|
|
// calculate maximum number of full shards in `data` slice
|
|
fullShards := len(data) / perShard
|
|
padding = AllocAligned(r.totalShards-fullShards, perShard)
|
|
|
|
if dataLen > perShard*fullShards {
|
|
// Copy partial shards
|
|
copyFrom := data[perShard*fullShards : dataLen]
|
|
for i := range padding {
|
|
if len(copyFrom) == 0 {
|
|
break
|
|
}
|
|
copyFrom = copyFrom[copy(padding[i], copyFrom):]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Split into equal-length shards.
|
|
dst := make([][]byte, r.totalShards)
|
|
i := 0
|
|
for ; i < len(dst) && len(data) >= perShard; i++ {
|
|
dst[i] = data[:perShard:perShard]
|
|
data = data[perShard:]
|
|
}
|
|
|
|
for j := 0; i+j < len(dst); j++ {
|
|
dst[i+j] = padding[0]
|
|
padding = padding[1:]
|
|
}
|
|
|
|
return dst, nil
|
|
}
|
|
|
|
// ErrReconstructRequired is returned if too few data shards are intact and a
|
|
// reconstruction is required before you can successfully join the shards.
|
|
var ErrReconstructRequired = errors.New("reconstruction required as one or more required data shards are nil")
|
|
|
|
// Join the shards and write the data segment to dst.
|
|
//
|
|
// Only the data shards are considered.
|
|
// You must supply the exact output size you want.
|
|
//
|
|
// If there are to few shards given, ErrTooFewShards will be returned.
|
|
// If the total data size is less than outSize, ErrShortData will be returned.
|
|
// If one or more required data shards are nil, ErrReconstructRequired will be returned.
|
|
func (r *reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
|
|
// Do we have enough shards?
|
|
if len(shards) < r.dataShards {
|
|
return ErrTooFewShards
|
|
}
|
|
shards = shards[:r.dataShards]
|
|
|
|
// Do we have enough data?
|
|
size := 0
|
|
for _, shard := range shards {
|
|
if shard == nil {
|
|
return ErrReconstructRequired
|
|
}
|
|
size += len(shard)
|
|
|
|
// Do we have enough data already?
|
|
if size >= outSize {
|
|
break
|
|
}
|
|
}
|
|
if size < outSize {
|
|
return ErrShortData
|
|
}
|
|
|
|
// Copy data to dst
|
|
write := outSize
|
|
for _, shard := range shards {
|
|
if write < len(shard) {
|
|
_, err := dst.Write(shard[:write])
|
|
return err
|
|
}
|
|
n, err := dst.Write(shard)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
write -= n
|
|
}
|
|
return nil
|
|
}
|