status-go/vendor/lukechampine.com/blake3/guts/compress_generic.go

146 lines
5.6 KiB
Go

package guts
import (
"bytes"
"math/bits"
)
// CompressNode compresses a node into a 16-word output.
func CompressNode(n Node) (out [16]uint32) {
g := func(a, b, c, d, mx, my uint32) (uint32, uint32, uint32, uint32) {
a += b + mx
d = bits.RotateLeft32(d^a, -16)
c += d
b = bits.RotateLeft32(b^c, -12)
a += b + my
d = bits.RotateLeft32(d^a, -8)
c += d
b = bits.RotateLeft32(b^c, -7)
return a, b, c, d
}
// NOTE: we unroll all of the rounds, as well as the permutations that occur
// between rounds.
// round 1 (also initializes state)
// columns
s0, s4, s8, s12 := g(n.CV[0], n.CV[4], IV[0], uint32(n.Counter), n.Block[0], n.Block[1])
s1, s5, s9, s13 := g(n.CV[1], n.CV[5], IV[1], uint32(n.Counter>>32), n.Block[2], n.Block[3])
s2, s6, s10, s14 := g(n.CV[2], n.CV[6], IV[2], n.BlockLen, n.Block[4], n.Block[5])
s3, s7, s11, s15 := g(n.CV[3], n.CV[7], IV[3], n.Flags, n.Block[6], n.Block[7])
// diagonals
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.Block[8], n.Block[9])
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.Block[10], n.Block[11])
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.Block[12], n.Block[13])
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.Block[14], n.Block[15])
// round 2
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.Block[2], n.Block[6])
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.Block[3], n.Block[10])
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.Block[7], n.Block[0])
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.Block[4], n.Block[13])
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.Block[1], n.Block[11])
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.Block[12], n.Block[5])
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.Block[9], n.Block[14])
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.Block[15], n.Block[8])
// round 3
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.Block[3], n.Block[4])
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.Block[10], n.Block[12])
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.Block[13], n.Block[2])
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.Block[7], n.Block[14])
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.Block[6], n.Block[5])
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.Block[9], n.Block[0])
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.Block[11], n.Block[15])
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.Block[8], n.Block[1])
// round 4
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.Block[10], n.Block[7])
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.Block[12], n.Block[9])
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.Block[14], n.Block[3])
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.Block[13], n.Block[15])
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.Block[4], n.Block[0])
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.Block[11], n.Block[2])
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.Block[5], n.Block[8])
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.Block[1], n.Block[6])
// round 5
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.Block[12], n.Block[13])
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.Block[9], n.Block[11])
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.Block[15], n.Block[10])
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.Block[14], n.Block[8])
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.Block[7], n.Block[2])
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.Block[5], n.Block[3])
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.Block[0], n.Block[1])
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.Block[6], n.Block[4])
// round 6
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.Block[9], n.Block[14])
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.Block[11], n.Block[5])
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.Block[8], n.Block[12])
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.Block[15], n.Block[1])
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.Block[13], n.Block[3])
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.Block[0], n.Block[10])
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.Block[2], n.Block[6])
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.Block[4], n.Block[7])
// round 7
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.Block[11], n.Block[15])
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.Block[5], n.Block[0])
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.Block[1], n.Block[9])
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.Block[8], n.Block[6])
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.Block[14], n.Block[10])
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.Block[2], n.Block[12])
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.Block[3], n.Block[4])
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.Block[7], n.Block[13])
// finalization
return [16]uint32{
s0 ^ s8, s1 ^ s9, s2 ^ s10, s3 ^ s11,
s4 ^ s12, s5 ^ s13, s6 ^ s14, s7 ^ s15,
s8 ^ n.CV[0], s9 ^ n.CV[1], s10 ^ n.CV[2], s11 ^ n.CV[3],
s12 ^ n.CV[4], s13 ^ n.CV[5], s14 ^ n.CV[6], s15 ^ n.CV[7],
}
}
// ChainingValue compresses n and returns the first 8 output words.
func ChainingValue(n Node) (cv [8]uint32) {
full := CompressNode(n)
copy(cv[:], full[:])
return
}
func compressBufferGeneric(buf *[MaxSIMD * ChunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) (n Node) {
if buflen <= ChunkSize {
return CompressChunk(buf[:buflen], key, counter, flags)
}
var cvs [MaxSIMD][8]uint32
var numCVs uint64
for bb := bytes.NewBuffer(buf[:buflen]); bb.Len() > 0; numCVs++ {
cvs[numCVs] = ChainingValue(CompressChunk(bb.Next(ChunkSize), key, counter+numCVs, flags))
}
return mergeSubtrees(&cvs, numCVs, key, flags)
}
func compressBlocksGeneric(outs *[MaxSIMD][64]byte, n Node) {
for i := range outs {
outs[i] = WordsToBytes(CompressNode(n))
n.Counter++
}
}
func mergeSubtreesGeneric(cvs *[MaxSIMD][8]uint32, numCVs uint64, key *[8]uint32, flags uint32) Node {
for numCVs > 2 {
rem := numCVs / 2
for i := range cvs[:rem] {
cvs[i] = ChainingValue(ParentNode(cvs[i*2], cvs[i*2+1], key, flags))
}
if numCVs%2 != 0 {
cvs[rem] = cvs[rem*2]
rem++
}
numCVs = rem
}
return ParentNode(cvs[0], cvs[1], key, flags)
}