status-go/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go
Dmitry eeca435064 Add rendezvous implementation for discovery interface
Update vendor

Integrate rendezvous into status node

Add a test with failover using rendezvous

Use multiple servers in client

Use discovery V5 by default and test that node can be started with rendezvous discovet

Fix linter

Update rendezvous client to one with instrumented stream

Address feedback

Fix test with updated topic limits

Apply several suggestions

Change log to debug for request errors because we continue execution

Remove web3js after rebase

Update rendezvous package
2018-07-25 15:10:57 +03:00

498 lines
20 KiB
Go

//+build !noasm
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package sha256
import (
"encoding/binary"
"errors"
"hash"
"sort"
"sync/atomic"
"time"
)
//go:noescape
func sha256_x16_avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte)
// Do not start at 0 but next multiple of 16 so as to be able to
// differentiate with default initialiation value of 0
const Avx512ServerUid = 16
var uidCounter uint64
func NewAvx512(a512srv *Avx512Server) hash.Hash {
uid := atomic.AddUint64(&uidCounter, 1)
return &Avx512Digest{uid: uid, a512srv: a512srv}
}
// Type for computing SHA256 using AVX51
type Avx512Digest struct {
uid uint64
a512srv *Avx512Server
x [chunk]byte
nx int
len uint64
final bool
result [Size]byte
}
// Return size of checksum
func (d *Avx512Digest) Size() int { return Size }
// Return blocksize of checksum
func (d Avx512Digest) BlockSize() int { return BlockSize }
func (d *Avx512Digest) Reset() {
d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true}
d.nx = 0
d.len = 0
d.final = false
}
// Write to digest
func (d *Avx512Digest) Write(p []byte) (nn int, err error) {
if d.final {
return 0, errors.New("Avx512Digest already finalized. Reset first before writing again.")
}
nn = len(p)
d.len += uint64(nn)
if d.nx > 0 {
n := copy(d.x[d.nx:], p)
d.nx += n
if d.nx == chunk {
d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: d.x[:]}
d.nx = 0
}
p = p[n:]
}
if len(p) >= chunk {
n := len(p) &^ (chunk - 1)
d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: p[:n]}
p = p[n:]
}
if len(p) > 0 {
d.nx = copy(d.x[:], p)
}
return
}
// Return sha256 sum in bytes
func (d *Avx512Digest) Sum(in []byte) (result []byte) {
if d.final {
return append(in, d.result[:]...)
}
trail := make([]byte, 0, 128)
len := d.len
// Padding. Add a 1 bit and 0 bits until 56 bytes mod 64.
var tmp [64]byte
tmp[0] = 0x80
if len%64 < 56 {
trail = append(d.x[:d.nx], tmp[0:56-len%64]...)
} else {
trail = append(d.x[:d.nx], tmp[0:64+56-len%64]...)
}
d.nx = 0
// Length in bits.
len <<= 3
for i := uint(0); i < 8; i++ {
tmp[i] = byte(len >> (56 - 8*i))
}
trail = append(trail, tmp[0:8]...)
sumCh := make(chan [Size]byte)
d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: trail, final: true, sumCh: sumCh}
d.result = <-sumCh
d.final = true
return append(in, d.result[:]...)
}
var table = [512]uint64{
0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2,
0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2}
// Interface function to assembly ode
func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte {
scratch := [512]byte{}
sha256_x16_avx512(digests, &scratch, &table, mask, input)
output := [16][Size]byte{}
for i := 0; i < 16; i++ {
output[i] = getDigest(i, digests[:])
}
return output
}
func getDigest(index int, state []byte) (sum [Size]byte) {
for j := 0; j < 16; j += 2 {
for i := index*4 + j*Size; i < index*4+(j+1)*Size; i += Size {
binary.BigEndian.PutUint32(sum[j*2:], binary.LittleEndian.Uint32(state[i:i+4]))
}
}
return
}
// Message to send across input channel
type blockInput struct {
uid uint64
msg []byte
reset bool
final bool
sumCh chan [Size]byte
}
// Type to implement 16x parallel handling of SHA256 invocations
type Avx512Server struct {
blocksCh chan blockInput // Input channel
totalIn int // Total number of inputs waiting to be processed
lanes [16]Avx512LaneInfo // Array with info per lane (out of 16)
digests map[uint64][Size]byte // Map of uids to (interim) digest results
}
// Info for each lane
type Avx512LaneInfo struct {
uid uint64 // unique identification for this SHA processing
block []byte // input block to be processed
outputCh chan [Size]byte // channel for output result
}
// Create new object for parallel processing handling
func NewAvx512Server() *Avx512Server {
a512srv := &Avx512Server{}
a512srv.digests = make(map[uint64][Size]byte)
a512srv.blocksCh = make(chan blockInput)
// Start a single thread for reading from the input channel
go a512srv.Process()
return a512srv
}
// Sole handler for reading from the input channel
func (a512srv *Avx512Server) Process() {
for {
select {
case block := <-a512srv.blocksCh:
if block.reset {
a512srv.reset(block.uid)
continue
}
index := block.uid & 0xf
// fmt.Println("Adding message:", block.uid, index)
if a512srv.lanes[index].block != nil { // If slot is already filled, process all inputs
//fmt.Println("Invoking Blocks()")
a512srv.blocks()
}
a512srv.totalIn++
a512srv.lanes[index] = Avx512LaneInfo{uid: block.uid, block: block.msg}
if block.final {
a512srv.lanes[index].outputCh = block.sumCh
}
if a512srv.totalIn == len(a512srv.lanes) {
// fmt.Println("Invoking Blocks() while FULL: ")
a512srv.blocks()
}
// TODO: test with larger timeout
case <-time.After(1 * time.Microsecond):
for _, lane := range a512srv.lanes {
if lane.block != nil { // check if there is any input to process
// fmt.Println("Invoking Blocks() on TIMEOUT: ")
a512srv.blocks()
break // we are done
}
}
}
}
}
// Do a reset for this calculation
func (a512srv *Avx512Server) reset(uid uint64) {
// Check if there is a message still waiting to be processed (and remove if so)
for i, lane := range a512srv.lanes {
if lane.uid == uid {
if lane.block != nil {
a512srv.lanes[i] = Avx512LaneInfo{} // clear message
a512srv.totalIn -= 1
}
}
}
// Delete entry from hash map
delete(a512srv.digests, uid)
}
// Invoke assembly and send results back
func (a512srv *Avx512Server) blocks() (err error) {
inputs := [16][]byte{}
for i := range inputs {
inputs[i] = a512srv.lanes[i].block
}
mask := expandMask(genMask(inputs))
outputs := blockAvx512(a512srv.getDigests(), inputs, mask)
a512srv.totalIn = 0
for i := 0; i < len(outputs); i++ {
uid, outputCh := a512srv.lanes[i].uid, a512srv.lanes[i].outputCh
a512srv.digests[uid] = outputs[i]
a512srv.lanes[i] = Avx512LaneInfo{}
if outputCh != nil {
// Send back result
outputCh <- outputs[i]
delete(a512srv.digests, uid) // Delete entry from hashmap
}
}
return
}
func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) {
a512srv.blocksCh <- blockInput{uid: uid, msg: p}
return len(p), nil
}
func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte {
sumCh := make(chan [32]byte)
a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh}
return <-sumCh
}
func (a512srv *Avx512Server) getDigests() *[512]byte {
digests := [512]byte{}
for i, lane := range a512srv.lanes {
a, ok := a512srv.digests[lane.uid]
if ok {
binary.BigEndian.PutUint32(digests[(i+0*16)*4:], binary.LittleEndian.Uint32(a[0:4]))
binary.BigEndian.PutUint32(digests[(i+1*16)*4:], binary.LittleEndian.Uint32(a[4:8]))
binary.BigEndian.PutUint32(digests[(i+2*16)*4:], binary.LittleEndian.Uint32(a[8:12]))
binary.BigEndian.PutUint32(digests[(i+3*16)*4:], binary.LittleEndian.Uint32(a[12:16]))
binary.BigEndian.PutUint32(digests[(i+4*16)*4:], binary.LittleEndian.Uint32(a[16:20]))
binary.BigEndian.PutUint32(digests[(i+5*16)*4:], binary.LittleEndian.Uint32(a[20:24]))
binary.BigEndian.PutUint32(digests[(i+6*16)*4:], binary.LittleEndian.Uint32(a[24:28]))
binary.BigEndian.PutUint32(digests[(i+7*16)*4:], binary.LittleEndian.Uint32(a[28:32]))
} else {
binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0)
binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1)
binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2)
binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3)
binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4)
binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5)
binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6)
binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7)
}
}
return &digests
}
// Helper struct for sorting blocks based on length
type lane struct {
len uint
pos uint
}
type lanes []lane
func (lns lanes) Len() int { return len(lns) }
func (lns lanes) Swap(i, j int) { lns[i], lns[j] = lns[j], lns[i] }
func (lns lanes) Less(i, j int) bool { return lns[i].len < lns[j].len }
// Helper struct for
type maskRounds struct {
mask uint64
rounds uint64
}
func genMask(input [16][]byte) [16]maskRounds {
// Sort on blocks length small to large
var sorted [16]lane
for c, inpt := range input {
sorted[c] = lane{uint(len(inpt)), uint(c)}
}
sort.Sort(lanes(sorted[:]))
// Create mask array including 'rounds' between masks
m, round, index := uint64(0xffff), uint64(0), 0
var mr [16]maskRounds
for _, s := range sorted {
if s.len > 0 {
if uint64(s.len)>>6 > round {
mr[index] = maskRounds{m, (uint64(s.len) >> 6) - round}
index++
}
round = uint64(s.len) >> 6
}
m = m & ^(1 << uint(s.pos))
}
return mr
}
// TODO: remove function
func expandMask(mr [16]maskRounds) []uint64 {
size := uint64(0)
for _, r := range mr {
size += r.rounds
}
result, index := make([]uint64, size), 0
for _, r := range mr {
for j := uint64(0); j < r.rounds; j++ {
result[index] = r.mask
index++
}
}
return result
}