hash_ssz: updates (#13)

* rename from tree_ssz
* add special cases for some types based on beacon chain code
* add smoke test
* avoids some trivial allocations, but the big one remains - the
temporary
chunk buffer is still heap allocated
* update to handle zero-length buffer case same as latest spec
This commit is contained in:
Jacek Sieka 2018-11-16 11:04:21 -06:00 committed by GitHub
parent 8493949456
commit 949b24702c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 175 additions and 112 deletions

151
beacon_chain/hash_ssz.nim Normal file
View File

@ -0,0 +1,151 @@
import
nimcrypto, eth_common, endians, sequtils, algorithm, ./datatypes,
milagro_crypto
# Sample hashSSZ implementation based on:
# https://github.com/ethereum/eth2.0-specs/pull/120
# and
# https://github.com/ethereum/beacon_chain/blob/e32464d9c1c82a2b46f2eb83c383654ea1d1ebe6/hash_ssz.py
# Probably wrong - the spec is pretty bare-bones and no test vectors yet
const CHUNK_SIZE = 128
template withHash(body: untyped): untyped =
## Spec defines hash as BLAKE2b-512(x)[0:32]
## This little helper will init the hash function and return the sliced
## hash:
## let hashOfData = withHash: h.update(data)
var h {.inject.}: blake2_512
h.init()
body
var res: array[32, byte]
var tmp = h.finish().data
copyMem(res.addr, tmp.addr, 32)
res
# XXX varargs openarray, anyone?
func hash(a: openArray[byte]): array[32, byte] =
withHash:
h.update(a)
func hash(a, b: openArray[byte]): array[32, byte] =
withHash:
h.update(a)
h.update(b)
func nextPowerOf2(v: uint32): uint32 =
result = v - 1
result = result or (result shr 1)
result = result or (result shr 2)
result = result or (result shr 4)
result = result or (result shr 8)
result = result or (result shr 16)
inc result
func roundUpTo(v, to: int): int =
## Round up `v` to an even boundary of `to`
((v + to - 1) div to) * to
func listToGlob[T](lst: seq[T]): seq[byte]
# XXX: er, how is this _actually_ done?
func empty(T: typedesc): T = discard
const emptyChunk = @(empty(array[CHUNK_SIZE, byte]))
func merkleHash[T](lst: seq[T]): array[32, byte] =
## Merkle tree hash of a list of items flattening list with some padding,
## then dividing the list into CHUNK_SIZE sized chunks
# Turn list into padded data
# XXX: the heap allocations here can be avoided by computing the merkle tree
# recursively, but for now keep things simple and aligned with upstream
var data = listToGlob(lst)
# Store length of list (to compensate for non-bijectiveness of padding)
var dataLen: array[32, byte]
var lstLen = uint64(len(lst))
bigEndian64(dataLen[32-8].addr, lstLen.addr)
# Divide into chunks
var chunkz: seq[seq[byte]]
for i in countup(0, data.len - 1, CHUNK_SIZE):
chunkz.add data[i..<i + CHUNK_SIZE]
while chunkz.len() > 1:
if chunkz.len() mod 2 == 1:
chunkz.add emptyChunk
for i in 0..<(chunkz.len div 2):
# As tradition dictates - one feature, at least one nim bug:
# https://github.com/nim-lang/Nim/issues/9684
let tmp = @(hash(chunkz[i * 2], chunkz[i * 2 + 1]))
chunkz[i] = tmp
chunkz.setLen(chunkz.len div 2)
if chunkz.len == 0:
const empty32 = empty(array[32, byte])
result = hash(empty32, dataLen)
return
result = hash(chunkz[0], dataLen)
func hashSSZ*(x: SomeInteger): array[sizeof(x), byte] =
## Integers area all encoded as bigendian and not padded
var v: array[x.sizeof, byte]
copyMem(v.addr, x.unsafeAddr, x.sizeof)
when x.sizeof == 8: bigEndian64(result.addr, v.addr)
elif x.sizeof == 4: bigEndian32(result.addr, v.addr)
elif x.sizeof == 2: bigEndian16(result.addr, v.addr)
elif x.sizeof == 1: result = v
else: {.fatal: "boink: " & $x.sizeof .}
func hashSSZ*(x: Uint24): array[3, byte] =
var tmp = hashSSZ(x.uint32) # XXX broken endian!
copyMem(result.addr, tmp.addr, 3)
func hashSSZ*(x: EthAddress): array[sizeof(x), byte] = x
func hashSSZ*(x: MDigest[32*8]): array[32, byte] = x.data
func hashSSZ*(x: openArray[byte]): array[32, byte] = hash(x)
func hashSSZ*(x: ValidatorRecord): array[32, byte] =
# XXX hash_ssz.py code contains special cases for some types, why?
withHash:
# tmp.add(x.pubkey) # XXX our code vs spec!
h.update hashSSZ(x.withdrawal_shard)
h.update hashSSZ(x.withdrawal_address)
h.update hashSSZ(x.randao_commitment)
h.update hashSSZ(x.balance.data.lo) # XXX our code vs spec!
h.update hashSSZ(x.start_dynasty)
h.update hashSSZ(x.end_dynasty)
func hashSSZ*(x: ShardAndCommittee): array[32, byte] =
return withHash:
h.update hashSSZ(x.shard_id)
h.update merkleHash(x.committee)
func hashSSZ*[T](x: T): array[32, byte] =
when T is seq:
return merkleHash(x)
else:
# XXX could probaby compile-time-macro-sort fields...
var fields: seq[tuple[name: string, value: seq[byte]]]
for name, field in x.fieldPairs:
fields.add (name, hashSSZ(field))
return withHash:
for name, value in fields.sortedByIt(it.name):
h.update hashSSZ(value.value)
func listToGlob[T](lst: seq[T]): seq[byte] =
## Concatenate a list of homogeneous objects into data and pad it
for x in lst:
let
y = hashSSZ(x)
paddedLen = nextPowerOf2(len(y).uint32).int
result.add(y)
if paddedLen != len(y):
result.setLen(result.len.roundUpTo(paddedLen))
# Pad to chunksize
result.setLen(result.len().roundUpTo(CHUNK_SIZE))

View File

@ -1,112 +0,0 @@
import nimcrypto, eth_common, endians, sequtils, algorithm
# Sample treehash implementation based on:
# https://github.com/ethereum/eth2.0-specs/pull/120
# Probably wrong - the spec is pretty bare-bones and no test vectors yet
const CHUNK_SIZE = 128
# XXX varargs openarray, anyone?
func hash(a: openArray[byte]): array[32, byte] =
var h: blake2_512
h.init()
h.update(a)
var tmp = h.finish().data
copyMem(result.addr, tmp.addr, 32)
func hash(a, b: openArray[byte]): array[32, byte] =
var h: blake2_512
h.init()
h.update(a)
h.update(b)
var tmp = h.finish().data
copyMem(result.addr, tmp.addr, 32)
func nextPowerOf2(v: uint32): uint32 =
result = v - 1
result = result or (result shr 1)
result = result or (result shr 2)
result = result or (result shr 4)
result = result or (result shr 8)
result = result or (result shr 16)
inc result
func roundUpTo(v, to: int): int =
## Round up to an even boundary of `to`
((v + to - 1) div to) * to
# Concatenate a list of homogeneous objects into data and pad it
proc listToGlob(lst: seq[seq[byte]]): seq[byte] =
for x in lst:
var y = x
y.setLen(nextPowerOf2(len(x).uint32))
result.add(y)
# Pad to chunksize
result.setLen(result.len().roundUpTo(CHUNK_SIZE))
# XXX: er, how is this _actually_ done?
func empty(T: typedesc): T = discard
const emptyChunk = @(empty(array[CHUNK_SIZE, byte]))
proc merkleHash(lst: seq[seq[byte]]): array[32, byte] =
## Merkle tree hash of a list of items
# XXX: seq-of-seq looks weird...
# Turn list into padded data
var data = listToGlob(lst)
# Store length of list (to compensate for non-bijectiveness of padding)
var dataLen: array[32, byte]
var lstLen = uint64(len(lst))
bigEndian64(dataLen[32-8].addr, lstLen.addr)
# Divide into chunks
var chunkz: seq[seq[byte]]
for i in countup(0, data.len - 1, CHUNK_SIZE):
chunkz.add data[i..<i + CHUNK_SIZE]
while chunkz.len() > 1:
if chunkz.len() mod 2 == 1:
chunkz.add emptyChunk
for i in 0..<(chunkz.len div 2):
# As tradition dictates - one feature, at least one nim bug:
# https://github.com/nim-lang/Nim/issues/9684
let tmp = @(hash(chunkz[i * 2], chunkz[i * 2 + 1]))
chunkz[i] = tmp
chunkz.setLen(chunkz.len div 2)
result = hash(chunkz[0], dataLen)
proc treeHash*(x: SomeInteger): seq[byte] =
var v: array[x.sizeof, byte]
copyMem(v.addr, x.unsafeAddr, x.sizeof)
var res: array[x.sizeof, byte]
when x.sizeof == 8: bigEndian64(res.addr, v.addr)
elif x.sizeof == 4: bigEndian32(res.addr, v.addr)
elif x.sizeof == 2: bigEndian16(res.addr, v.addr)
elif x.sizeof == 1: res = v
else: {.fatal: "boink: " & $x.sizeof .}
result = @res
proc treeHash*(x: EthAddress): seq[byte] = @x
proc treeHash*(x: MDigest): seq[byte] = @(x.data)
proc treeHash*(x: seq[byte]): seq[byte] = @(hash(x)) # XXX: hash96 also!
proc treeHash*[T: seq](x: T): seq[byte] =
var tmp: seq[seq[byte]]
for v in x:
tmp.add treeHash(v)
result = merkleHash(tmp)
proc treeHash*[T](x: T): seq[byte] =
# XXX: could probaby compile-time-macro-sort fields...
var fields: seq[tuple[name: string, value: seq[byte]]]
for name, field in x.fieldPairs:
fields.add (name, treeHash(field))
var tmp: seq[byte]
for name, value in fields.sortedByIt(it.name):
tmp.add value.value
result = @(hash(tmp))

24
tests/test_hash_ssz.nim Normal file
View File

@ -0,0 +1,24 @@
# beacon_chain
# Copyright (c) 2018 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
unittest,
../beacon_chain/[datatypes, hash_ssz]
suite "Tree hashing":
# XXX Nothing but smoke tests for now..
test "Hash ValidatorRecord":
let vr = ValidatorRecord()
check: hashSSZ(vr).len > 0
test "Hash ShardAndCommittee":
let sc = ShardAndCommittee()
check: hashSSZ(sc).len > 0
test "Hash integer":
check: hashSSZ(0x01'u32) == [0'u8, 0, 0, 1] # big endian!