hash_ssz: updates (#13)

* rename from tree_ssz * add special cases for some types based on beacon chain code * add smoke test * avoids some trivial allocations, but the big one remains - the temporary chunk buffer is still heap allocated * update to handle zero-length buffer case same as latest spec
2025-02-01 17:27:30 +00:00 · 2018-11-16 11:04:21 -06:00 · 2018-11-16 11:04:21 -06:00 · 949b24702c
commit 949b24702c
parent 8493949456
3 changed files with 175 additions and 112 deletions
--- a/beacon_chain/hash_ssz.nim
+++ b/beacon_chain/hash_ssz.nim
@ -0,0 +1,151 @@
+import
+  nimcrypto, eth_common, endians, sequtils, algorithm, ./datatypes,
+  milagro_crypto
+
+# Sample hashSSZ implementation based on:
+# https://github.com/ethereum/eth2.0-specs/pull/120
+# and
+# https://github.com/ethereum/beacon_chain/blob/e32464d9c1c82a2b46f2eb83c383654ea1d1ebe6/hash_ssz.py
+# Probably wrong - the spec is pretty bare-bones and no test vectors yet
+
+const CHUNK_SIZE = 128
+
+template withHash(body: untyped): untyped =
+  ## Spec defines hash as BLAKE2b-512(x)[0:32]
+  ## This little helper will init the hash function and return the sliced
+  ## hash:
+  ## let hashOfData = withHash: h.update(data)
+  var h  {.inject.}: blake2_512
+  h.init()
+  body
+  var res: array[32, byte]
+  var tmp = h.finish().data
+  copyMem(res.addr, tmp.addr, 32)
+  res
+
+# XXX varargs openarray, anyone?
+func hash(a: openArray[byte]): array[32, byte] =
+  withHash:
+    h.update(a)
+
+func hash(a, b: openArray[byte]): array[32, byte] =
+  withHash:
+    h.update(a)
+    h.update(b)
+
+func nextPowerOf2(v: uint32): uint32 =
+  result = v - 1
+  result = result or (result shr 1)
+  result = result or (result shr 2)
+  result = result or (result shr 4)
+  result = result or (result shr 8)
+  result = result or (result shr 16)
+  inc result
+
+func roundUpTo(v, to: int): int =
+  ## Round up `v` to an even boundary of `to`
+  ((v + to - 1) div to) * to
+
+func listToGlob[T](lst: seq[T]): seq[byte]
+
+# XXX: er, how is this _actually_ done?
+func empty(T: typedesc): T = discard
+const emptyChunk = @(empty(array[CHUNK_SIZE, byte]))
+
+func merkleHash[T](lst: seq[T]): array[32, byte] =
+  ## Merkle tree hash of a list of items flattening list with some padding,
+  ## then dividing the list into CHUNK_SIZE sized chunks
+
+  # Turn list into padded data
+  # XXX: the heap allocations here can be avoided by computing the merkle tree
+  #      recursively, but for now keep things simple and aligned with upstream
+  var data = listToGlob(lst)
+
+  # Store length of list (to compensate for non-bijectiveness of padding)
+  var dataLen: array[32, byte]
+  var lstLen = uint64(len(lst))
+  bigEndian64(dataLen[32-8].addr, lstLen.addr)
+
+  # Divide into chunks
+  var chunkz: seq[seq[byte]]
+  for i in countup(0, data.len - 1, CHUNK_SIZE):
+    chunkz.add data[i..<i + CHUNK_SIZE]
+
+  while chunkz.len() > 1:
+    if chunkz.len() mod 2 == 1:
+      chunkz.add emptyChunk
+    for i in 0..<(chunkz.len div 2):
+      # As tradition dictates - one feature, at least one nim bug:
+      # https://github.com/nim-lang/Nim/issues/9684
+      let tmp = @(hash(chunkz[i * 2], chunkz[i * 2 + 1]))
+      chunkz[i] = tmp
+
+    chunkz.setLen(chunkz.len div 2)
+
+  if chunkz.len == 0:
+    const empty32 = empty(array[32, byte])
+    result = hash(empty32, dataLen)
+    return
+
+  result = hash(chunkz[0], dataLen)
+
+func hashSSZ*(x: SomeInteger): array[sizeof(x), byte] =
+  ## Integers area all encoded as bigendian and not padded
+  var v: array[x.sizeof, byte]
+  copyMem(v.addr, x.unsafeAddr, x.sizeof)
+
+  when x.sizeof == 8: bigEndian64(result.addr, v.addr)
+  elif x.sizeof == 4: bigEndian32(result.addr, v.addr)
+  elif x.sizeof == 2: bigEndian16(result.addr, v.addr)
+  elif x.sizeof == 1: result = v
+  else: {.fatal: "boink: " & $x.sizeof .}
+
+func hashSSZ*(x: Uint24): array[3, byte] =
+  var tmp = hashSSZ(x.uint32) # XXX broken endian!
+  copyMem(result.addr, tmp.addr, 3)
+
+func hashSSZ*(x: EthAddress): array[sizeof(x), byte] = x
+func hashSSZ*(x: MDigest[32*8]): array[32, byte] = x.data
+func hashSSZ*(x: openArray[byte]): array[32, byte] = hash(x)
+
+func hashSSZ*(x: ValidatorRecord): array[32, byte] =
+  # XXX hash_ssz.py code contains special cases for some types, why?
+  withHash:
+    # tmp.add(x.pubkey) # XXX our code vs spec!
+    h.update hashSSZ(x.withdrawal_shard)
+    h.update hashSSZ(x.withdrawal_address)
+    h.update hashSSZ(x.randao_commitment)
+    h.update hashSSZ(x.balance.data.lo) # XXX our code vs spec!
+    h.update hashSSZ(x.start_dynasty)
+    h.update hashSSZ(x.end_dynasty)
+
+func hashSSZ*(x: ShardAndCommittee): array[32, byte] =
+  return withHash:
+    h.update hashSSZ(x.shard_id)
+    h.update merkleHash(x.committee)
+
+func hashSSZ*[T](x: T): array[32, byte] =
+  when T is seq:
+    return merkleHash(x)
+  else:
+    # XXX could probaby compile-time-macro-sort fields...
+    var fields: seq[tuple[name: string, value: seq[byte]]]
+    for name, field in x.fieldPairs:
+      fields.add (name, hashSSZ(field))
+
+    return withHash:
+      for name, value in fields.sortedByIt(it.name):
+        h.update hashSSZ(value.value)
+
+func listToGlob[T](lst: seq[T]): seq[byte] =
+  ## Concatenate a list of homogeneous objects into data and pad it
+  for x in lst:
+    let
+      y = hashSSZ(x)
+      paddedLen = nextPowerOf2(len(y).uint32).int
+    result.add(y)
+    if paddedLen != len(y):
+      result.setLen(result.len.roundUpTo(paddedLen))
+
+  # Pad to chunksize
+  result.setLen(result.len().roundUpTo(CHUNK_SIZE))
--- a/beacon_chain/tree_hash.nim
+++ b/beacon_chain/tree_hash.nim
@ -1,112 +0,0 @@
-import nimcrypto, eth_common, endians, sequtils, algorithm
-
-# Sample treehash implementation based on:
-# https://github.com/ethereum/eth2.0-specs/pull/120
-# Probably wrong - the spec is pretty bare-bones and no test vectors yet
-
-const CHUNK_SIZE = 128
-
-# XXX varargs openarray, anyone?
-func hash(a: openArray[byte]): array[32, byte] =
-  var h: blake2_512
-  h.init()
-  h.update(a)
-  var tmp = h.finish().data
-  copyMem(result.addr, tmp.addr, 32)
-
-func hash(a, b: openArray[byte]): array[32, byte] =
-  var h: blake2_512
-  h.init()
-  h.update(a)
-  h.update(b)
-  var tmp = h.finish().data
-  copyMem(result.addr, tmp.addr, 32)
-
-func nextPowerOf2(v: uint32): uint32 =
-  result = v - 1
-  result = result or (result shr 1)
-  result = result or (result shr 2)
-  result = result or (result shr 4)
-  result = result or (result shr 8)
-  result = result or (result shr 16)
-  inc result
-
-func roundUpTo(v, to: int): int =
-  ## Round up to an even boundary of `to`
-  ((v + to - 1) div to) * to
-
-# Concatenate a list of homogeneous objects into data and pad it
-proc listToGlob(lst: seq[seq[byte]]): seq[byte] =
-  for x in lst:
-    var y = x
-    y.setLen(nextPowerOf2(len(x).uint32))
-    result.add(y)
-
-  # Pad to chunksize
-  result.setLen(result.len().roundUpTo(CHUNK_SIZE))
-
-# XXX: er, how is this _actually_ done?
-func empty(T: typedesc): T = discard
-const emptyChunk = @(empty(array[CHUNK_SIZE, byte]))
-
-proc merkleHash(lst: seq[seq[byte]]): array[32, byte] =
-  ## Merkle tree hash of a list of items
-  # XXX: seq-of-seq looks weird...
-
-  # Turn list into padded data
-  var data = listToGlob(lst)
-
-  # Store length of list (to compensate for non-bijectiveness of padding)
-  var dataLen: array[32, byte]
-  var lstLen = uint64(len(lst))
-  bigEndian64(dataLen[32-8].addr, lstLen.addr)
-
-  # Divide into chunks
-  var chunkz: seq[seq[byte]]
-  for i in countup(0, data.len - 1, CHUNK_SIZE):
-    chunkz.add data[i..<i + CHUNK_SIZE]
-
-  while chunkz.len() > 1:
-    if chunkz.len() mod 2 == 1:
-      chunkz.add emptyChunk
-    for i in 0..<(chunkz.len div 2):
-      # As tradition dictates - one feature, at least one nim bug:
-      # https://github.com/nim-lang/Nim/issues/9684
-      let tmp = @(hash(chunkz[i * 2], chunkz[i * 2 + 1]))
-      chunkz[i] = tmp
-
-    chunkz.setLen(chunkz.len div 2)
-
-  result = hash(chunkz[0], dataLen)
-
-proc treeHash*(x: SomeInteger): seq[byte] =
-  var v: array[x.sizeof, byte]
-  copyMem(v.addr, x.unsafeAddr, x.sizeof)
-
-  var res: array[x.sizeof, byte]
-  when x.sizeof == 8: bigEndian64(res.addr, v.addr)
-  elif x.sizeof == 4: bigEndian32(res.addr, v.addr)
-  elif x.sizeof == 2: bigEndian16(res.addr, v.addr)
-  elif x.sizeof == 1: res = v
-  else: {.fatal: "boink: " & $x.sizeof .}
-  result = @res
-
-proc treeHash*(x: EthAddress): seq[byte] = @x
-proc treeHash*(x: MDigest): seq[byte] = @(x.data)
-proc treeHash*(x: seq[byte]): seq[byte] = @(hash(x)) # XXX: hash96 also!
-proc treeHash*[T: seq](x: T): seq[byte] =
-  var tmp: seq[seq[byte]]
-  for v in x:
-    tmp.add treeHash(v)
-  result = merkleHash(tmp)
-
-proc treeHash*[T](x: T): seq[byte] =
-  # XXX: could probaby compile-time-macro-sort fields...
-  var fields: seq[tuple[name: string, value: seq[byte]]]
-  for name, field in x.fieldPairs:
-    fields.add (name, treeHash(field))
-
-  var tmp: seq[byte]
-  for name, value in fields.sortedByIt(it.name):
-    tmp.add value.value
-  result = @(hash(tmp))
--- a/tests/test_hash_ssz.nim
+++ b/tests/test_hash_ssz.nim
@ -0,0 +1,24 @@
+# beacon_chain
+# Copyright (c) 2018 Status Research & Development GmbH
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+import
+  unittest,
+  ../beacon_chain/[datatypes, hash_ssz]
+
+suite "Tree hashing":
+  # XXX Nothing but smoke tests for now..
+
+  test "Hash ValidatorRecord":
+    let vr = ValidatorRecord()
+    check: hashSSZ(vr).len > 0
+
+  test "Hash ShardAndCommittee":
+    let sc = ShardAndCommittee()
+    check: hashSSZ(sc).len > 0
+
+  test "Hash integer":
+    check: hashSSZ(0x01'u32) == [0'u8, 0, 0, 1] # big endian!