From 6df3ec952bdb29cc2ad00660c83b600b9fb1e62b Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Wed, 14 Nov 2018 14:06:04 -0600 Subject: [PATCH 1/2] hash_ssz: simplify per latest spec update * update per https://github.com/ethereum/eth2.0-specs/pull/140 * join with ssz to form a single ssz module (like spec) --- beacon_chain/hash_ssz.nim | 151 ------------------------ beacon_chain/ssz.nim | 239 +++++++++++++++++++++++++++++++------- tests/test_hash_ssz.nim | 24 ---- tests/test_ssz.nim | 17 ++- 4 files changed, 212 insertions(+), 219 deletions(-) delete mode 100644 beacon_chain/hash_ssz.nim delete mode 100644 tests/test_hash_ssz.nim diff --git a/beacon_chain/hash_ssz.nim b/beacon_chain/hash_ssz.nim deleted file mode 100644 index ff5efa506..000000000 --- a/beacon_chain/hash_ssz.nim +++ /dev/null @@ -1,151 +0,0 @@ -import - nimcrypto, eth_common, endians, sequtils, algorithm, ./datatypes, - milagro_crypto - -# Sample hashSSZ implementation based on: -# https://github.com/ethereum/eth2.0-specs/pull/120 -# and -# https://github.com/ethereum/beacon_chain/blob/e32464d9c1c82a2b46f2eb83c383654ea1d1ebe6/hash_ssz.py -# Probably wrong - the spec is pretty bare-bones and no test vectors yet - -const CHUNK_SIZE = 128 - -template withHash(body: untyped): untyped = - ## Spec defines hash as BLAKE2b-512(x)[0:32] - ## This little helper will init the hash function and return the sliced - ## hash: - ## let hashOfData = withHash: h.update(data) - var h {.inject.}: blake2_512 - h.init() - body - var res: array[32, byte] - var tmp = h.finish().data - copyMem(res.addr, tmp.addr, 32) - res - -# XXX varargs openarray, anyone? -func hash(a: openArray[byte]): array[32, byte] = - withHash: - h.update(a) - -func hash(a, b: openArray[byte]): array[32, byte] = - withHash: - h.update(a) - h.update(b) - -func nextPowerOf2(v: uint32): uint32 = - result = v - 1 - result = result or (result shr 1) - result = result or (result shr 2) - result = result or (result shr 4) - result = result or (result shr 8) - result = result or (result shr 16) - inc result - -func roundUpTo(v, to: int): int = - ## Round up `v` to an even boundary of `to` - ((v + to - 1) div to) * to - -func listToGlob[T](lst: seq[T]): seq[byte] - -# XXX: er, how is this _actually_ done? -func empty(T: typedesc): T = discard -const emptyChunk = @(empty(array[CHUNK_SIZE, byte])) - -func merkleHash[T](lst: seq[T]): array[32, byte] = - ## Merkle tree hash of a list of items flattening list with some padding, - ## then dividing the list into CHUNK_SIZE sized chunks - - # Turn list into padded data - # XXX: the heap allocations here can be avoided by computing the merkle tree - # recursively, but for now keep things simple and aligned with upstream - var data = listToGlob(lst) - - # Store length of list (to compensate for non-bijectiveness of padding) - var dataLen: array[32, byte] - var lstLen = uint64(len(lst)) - bigEndian64(dataLen[32-8].addr, lstLen.addr) - - # Divide into chunks - var chunkz: seq[seq[byte]] - for i in countup(0, data.len - 1, CHUNK_SIZE): - chunkz.add data[i.. 1: - if chunkz.len() mod 2 == 1: - chunkz.add emptyChunk - for i in 0..<(chunkz.len div 2): - # As tradition dictates - one feature, at least one nim bug: - # https://github.com/nim-lang/Nim/issues/9684 - let tmp = @(hash(chunkz[i * 2], chunkz[i * 2 + 1])) - chunkz[i] = tmp - - chunkz.setLen(chunkz.len div 2) - - if chunkz.len == 0: - const empty32 = empty(array[32, byte]) - result = hash(empty32, dataLen) - return - - result = hash(chunkz[0], dataLen) - -func hashSSZ*(x: SomeInteger): array[sizeof(x), byte] = - ## Integers area all encoded as bigendian and not padded - var v: array[x.sizeof, byte] - copyMem(v.addr, x.unsafeAddr, x.sizeof) - - when x.sizeof == 8: bigEndian64(result.addr, v.addr) - elif x.sizeof == 4: bigEndian32(result.addr, v.addr) - elif x.sizeof == 2: bigEndian16(result.addr, v.addr) - elif x.sizeof == 1: result = v - else: {.fatal: "boink: " & $x.sizeof .} - -func hashSSZ*(x: Uint24): array[3, byte] = - var tmp = hashSSZ(x.uint32) # XXX broken endian! - copyMem(result.addr, tmp.addr, 3) - -func hashSSZ*(x: EthAddress): array[sizeof(x), byte] = x -func hashSSZ*(x: MDigest[32*8]): array[32, byte] = x.data -func hashSSZ*(x: openArray[byte]): array[32, byte] = hash(x) - -func hashSSZ*(x: ValidatorRecord): array[32, byte] = - # XXX hash_ssz.py code contains special cases for some types, why? - withHash: - # tmp.add(x.pubkey) # XXX our code vs spec! - h.update hashSSZ(x.withdrawal_shard) - h.update hashSSZ(x.withdrawal_address) - h.update hashSSZ(x.randao_commitment) - h.update hashSSZ(x.balance.data.lo) # XXX our code vs spec! - h.update hashSSZ(x.start_dynasty) - h.update hashSSZ(x.end_dynasty) - -func hashSSZ*(x: ShardAndCommittee): array[32, byte] = - return withHash: - h.update hashSSZ(x.shard_id) - h.update merkleHash(x.committee) - -func hashSSZ*[T](x: T): array[32, byte] = - when T is seq: - return merkleHash(x) - else: - # XXX could probaby compile-time-macro-sort fields... - var fields: seq[tuple[name: string, value: seq[byte]]] - for name, field in x.fieldPairs: - fields.add (name, hashSSZ(field)) - - return withHash: - for name, value in fields.sortedByIt(it.name): - h.update hashSSZ(value.value) - -func listToGlob[T](lst: seq[T]): seq[byte] = - ## Concatenate a list of homogeneous objects into data and pad it - for x in lst: - let - y = hashSSZ(x) - paddedLen = nextPowerOf2(len(y).uint32).int - result.add(y) - if paddedLen != len(y): - result.setLen(result.len.roundUpTo(paddedLen)) - - # Pad to chunksize - result.setLen(result.len().roundUpTo(CHUNK_SIZE)) diff --git a/beacon_chain/ssz.nim b/beacon_chain/ssz.nim index 133e3c72e..fb7e36a6c 100644 --- a/beacon_chain/ssz.nim +++ b/beacon_chain/ssz.nim @@ -12,7 +12,45 @@ import ./datatypes, eth_common, endians, typetraits, options, nimcrypto # ################### Helper functions ################################### -func `+`[T](p: ptr T, offset: int): ptr T {.inline.}= + +func len(x: Uint24): int = 3 + +func toBytesSSZ(x: SomeInteger): array[sizeof(x), byte] = + ## Integers are all encoded as bigendian and not padded + + when x.sizeof == 8: bigEndian64(result.addr, x.unsafeAddr) + elif x.sizeof == 4: bigEndian32(result.addr, x.unsafeAddr) + elif x.sizeof == 2: bigEndian16(result.addr, x.unsafeAddr) + elif x.sizeof == 1: copyMem(result.addr, x.unsafeAddr, sizeof(result)) + else: {.fatal: "Unsupported type serialization: " & $(type(x)).name.} + +func toBytesSSZ(x: Uint24): array[3, byte] = + ## Integers are all encoded as bigendian and not padded + let v = x.uint32 + result[2] = byte(v and 0xff) + result[1] = byte((v shr 8) and 0xff) + result[0] = byte((v shr 16) and 0xff) + +func toBytesSSZ(x: EthAddress): array[sizeof(x), byte] = x +func toBytesSSZ(x: MDigest[32*8]): array[32, byte] = x.data + +func fromBytesSSZUnsafe(T: typedesc, data: ptr byte): T = + ## Integers are all encoded as bigendian and not padded + ## Assumes no buffer overruns! + + # XXX: any better way to get a suitably aligned buffer in nim??? + # see also: https://github.com/nim-lang/Nim/issues/9206 + var tmp: uint64 + var alignedBuf = cast[ptr byte](tmp.addr) + copyMem(alignedBuf, data, result.sizeof) + + when result.sizeof == 8: bigEndian64(result.addr, alignedBuf) + elif result.sizeof == 4: bigEndian32(result.addr, alignedBuf) + elif result.sizeof == 2: bigEndian16(result.addr, alignedBuf) + elif result.sizeof == 1: copyMem(result.addr, alignedBuf, sizeof(result)) + else: {.fatal: "Unsupported type deserialization: " & $(type(result)).name.} + +func `+`[T](p: ptr T, offset: int): ptr T = ## Pointer arithmetic: Addition const size = sizeof T cast[ptr T](cast[ByteAddress](p) +% offset * size) @@ -23,31 +61,16 @@ func eat(x: var auto, data: ptr byte, pos: var int, len: int): bool = inc pos, x.sizeof return true -func eatInt[T: SomeInteger or byte](x: var T, data: ptr byte, pos: var int, len: int): +func eatInt[T: SomeInteger](x: var T, data: ptr byte, pos: var int, len: int): bool = if pos + x.sizeof > len: return - # XXX: any better way to get a suitably aligned buffer in nim??? - # see also: https://github.com/nim-lang/Nim/issues/9206 - var tmp: uint64 - var alignedBuf = cast[ptr byte](tmp.addr) - copyMem(alignedBuf, data + pos, x.sizeof) - - when x.sizeof == 8: - bigEndian64(x.addr, alignedBuf) - elif x.sizeof == 4: - bigEndian32(x.addr, alignedBuf) - elif x.sizeof == 2: - bigEndian16(x.addr, alignedBuf) - elif x.sizeof == 1: - x = cast[ptr type x](alignedBuf)[] - else: - {.fatal: "Unsupported type deserialization: " & $(type(x)).name.} + x = T.fromBytesSSZUnsafe(data + pos) inc pos, x.sizeof return true -func eatSeq[T: SomeInteger or byte](x: var seq[T], data: ptr byte, pos: var int, +func eatSeq[T: SomeInteger](x: var seq[T], data: ptr byte, pos: var int, len: int): bool = var items: int32 if not eatInt(items, data, pos, len): return @@ -58,27 +81,13 @@ func eatSeq[T: SomeInteger or byte](x: var seq[T], data: ptr byte, pos: var int, discard eatInt(val, data, pos, len) # Bounds-checked above return true -func serInt[T: SomeInteger or byte](dest: var seq[byte], src: T) {.inline.}= - # XXX: any better way to get a suitably aligned buffer in nim??? - var tmp: T - var alignedBuf = cast[ptr array[src.sizeof, byte]](tmp.addr) - when src.sizeof == 8: - bigEndian64(alignedBuf, src.unsafeAddr) - elif src.sizeof == 4: - bigEndian32(alignedBuf, src.unsafeAddr) - elif src.sizeof == 2: - bigEndian16(alignedBuf, src.unsafeAddr) - elif src.sizeof == 1: - copyMem(alignedBuf, src.unsafeAddr, src.sizeof) # careful, aliasing.. - else: - {.fatal: "Unsupported type deserialization: " & $(type(x)).name.} +func serInt(dest: var seq[byte], x: SomeInteger) = + dest.add x.toBytesSSZ() - dest.add alignedBuf[] - -func serSeq[T: SomeInteger or byte](dest: var seq[byte], src: seq[T]) = +func serSeq(dest: var seq[byte], src: seq[SomeInteger]) = dest.serInt src.len.uint32 for val in src: - dest.serInt(val) + dest.add val.toBytesSSZ() # ################### Core functions ################################### func deserialize(data: ptr byte, pos: var int, len: int, typ: typedesc[object]): @@ -105,13 +114,157 @@ func deserialize*( func serialize*[T](value: T): seq[byte] = for field in value.fields: - when field is EthAddress: - result.add field - elif field is MDigest: - result.add field.data - elif field is (SomeInteger or byte): - result.serInt field + when field is (EthAddress | MDigest | SomeInteger): + result.add field.toBytesSSZ() elif field is seq[SomeInteger or byte]: result.serSeq field else: # TODO: Serializing subtypes (?, depends on final spec) {.fatal: "Unsupported type serialization: " & $typ.name.} + +# ################### Hashing ################################### + +# Sample hashSSZ implementation based on: +# https://github.com/ethereum/eth2.0-specs/blob/98312f40b5742de6aa73f24e6225ee68277c4614/specs/simple-serialize.md +# and +# https://github.com/ethereum/beacon_chain/pull/134 +# Probably wrong - the spec is pretty bare-bones and no test vectors yet + +const CHUNK_SIZE = 128 + +# ################### Hashing helpers ################################### + +template withHash(body: untyped): untyped = + ## Spec defines hash as BLAKE2b-512(x)[0:32] + ## This little helper will init the hash function and return the sliced + ## hash: + ## let hashOfData = withHash: h.update(data) + var h {.inject.}: blake2_512 + h.init() + body + var res: array[32, byte] + var tmp = h.finish().data + copyMem(res.addr, tmp.addr, 32) + res + +# XXX varargs openarray, anyone? +func hash(a: openArray[byte]): array[32, byte] = + withHash: + h.update(a) + +func hash(a, b: openArray[byte]): array[32, byte] = + withHash: + h.update(a) + h.update(b) + +# XXX: er, how is this _actually_ done? +func empty(T: typedesc): T = discard +const emptyChunk = @(empty(array[CHUNK_SIZE, byte])) + +func merkleHash[T](lst: seq[T]): array[32, byte] + +# ################### Hashing interface ################################### + +func hashSSZ*(x: SomeInteger): array[sizeof(x), byte] = + ## Integers area all encoded as bigendian and not padded + toBytesSSZ(x) + +func hashSSZ*(x: Uint24): array[3, byte] = + ## Integers area all encoded as bigendian and not padded + toBytesSSZ(x) + +func hashSSZ*(x: EthAddress): array[sizeof(x), byte] = + ## Addresses copied as-is + toBytesSSZ(x) + +func hashSSZ*(x: MDigest[32*8]): array[32, byte] = + ## Hash32 copied as-is + toBytesSSZ(x) + +func hashSSZ*(x: openArray[byte]): array[32, byte] = + ## Blobs are hashed + hash(x) + +func hashSSZ*(x: ValidatorRecord): array[32, byte] = + ## Containers have their fields recursivel hashed, concatenated and hashed + # XXX hash_ssz.py code contains special cases for some types, why? + withHash: + # tmp.add(x.pubkey) # XXX our code vs spec! + h.update hashSSZ(x.withdrawal_shard) + h.update hashSSZ(x.withdrawal_address) + h.update hashSSZ(x.randao_commitment) + h.update hashSSZ(x.randao_last_change) + h.update hashSSZ(x.balance) # XXX our code vs spec! + # h.update hashSSZ(x.status) # XXX it's an enum, deal with it + h.update hashSSZ(x.exit_slot) + +func hashSSZ*(x: ShardAndCommittee): array[32, byte] = + return withHash: + h.update hashSSZ(x.shard_id) + h.update merkleHash(x.committee) + +func hashSSZ*[T](x: T): array[32, byte] = + when T is seq: + ## Sequences are tree-hashed + return merkleHash(x) + else: + ## Containers have their fields recursivel hashed, concatenated and hashed + # XXX could probaby compile-time-macro-sort fields... + var fields: seq[tuple[name: string, value: seq[byte]]] + for name, field in x.fieldPairs: + fields.add (name, hashSSZ(field)) + + return withHash: + for name, value in fields.sortedByIt(it.name): + h.update hashSSZ(value.value) + +# ################### Tree hash ################################### + +func merkleHash[T](lst: seq[T]): array[32, byte] = + ## Merkle tree hash of a list of homogenous, non-empty items + + # XXX: the heap allocations here can be avoided by computing the merkle tree + # recursively, but for now keep things simple and aligned with upstream + + # Store length of list (to compensate for non-bijectiveness of padding) + var dataLen: array[32, byte] + var lstLen = uint64(len(lst)) + bigEndian64(dataLen[32-8].addr, lstLen.addr) + + # Divide into chunks + var chunkz: seq[seq[byte]] + + if len(lst) == 0: + chunkz.add emptyChunk + elif sizeof(hashSSZ(lst[0])) < CHUNK_SIZE: + # See how many items fit in a chunk + let itemsPerChunk = CHUNK_SIZE div sizeof(hashSSZ(lst[0])) + + chunkz.setLen((len(lst) + itemsPerChunk - 1) div itemsPerChunk) + + # Build a list of chunks based on the number of items in the chunk + for i in 0.. 1: + if chunkz.len() mod 2 == 1: + chunkz.add emptyChunk + for i in 0..<(chunkz.len div 2): + # As tradition dictates - one feature, at least one nim bug: + # https://github.com/nim-lang/Nim/issues/9684 + let tmp = @(hash(chunkz[i * 2], chunkz[i * 2 + 1])) + chunkz[i] = tmp + + chunkz.setLen(chunkz.len div 2) + + if chunkz.len == 0: + const empty32 = empty(array[32, byte]) + result = hash(empty32, dataLen) + return + + result = hash(chunkz[0], dataLen) diff --git a/tests/test_hash_ssz.nim b/tests/test_hash_ssz.nim deleted file mode 100644 index 541ebbcfe..000000000 --- a/tests/test_hash_ssz.nim +++ /dev/null @@ -1,24 +0,0 @@ -# beacon_chain -# Copyright (c) 2018 Status Research & Development GmbH -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - unittest, - ../beacon_chain/[datatypes, hash_ssz] - -suite "Tree hashing": - # XXX Nothing but smoke tests for now.. - - test "Hash ValidatorRecord": - let vr = ValidatorRecord() - check: hashSSZ(vr).len > 0 - - test "Hash ShardAndCommittee": - let sc = ShardAndCommittee() - check: hashSSZ(sc).len > 0 - - test "Hash integer": - check: hashSSZ(0x01'u32) == [0'u8, 0, 0, 1] # big endian! diff --git a/tests/test_ssz.nim b/tests/test_ssz.nim index 453499603..e301dc95a 100644 --- a/tests/test_ssz.nim +++ b/tests/test_ssz.nim @@ -7,7 +7,7 @@ import unittest, nimcrypto, eth_common, sequtils, options, - ../beacon_chain/ssz + ../beacon_chain/[datatypes, ssz] func filled[N: static[int], T](typ: type array[N, T], value: T): array[N, T] = for val in result.mitems: @@ -58,3 +58,18 @@ suite "Simple serialization": check: expected_ser[0..^2].deserialize(Foo).isNone() expected_ser[1..^1].deserialize(Foo).isNone() + +suite "Tree hashing": + # XXX Nothing but smoke tests for now.. + + test "Hash ValidatorRecord": + let vr = ValidatorRecord() + check: hashSSZ(vr).len > 0 + + test "Hash ShardAndCommittee": + let sc = ShardAndCommittee() + check: hashSSZ(sc).len > 0 + + test "Hash integer": + check: hashSSZ(0x01'u32) == [0'u8, 0, 0, 1] # big endian! + check: hashSSZ(Uint24(0x01)) == [0'u8, 0, 1] # big endian! From 7e243d57cda58c369a1e3e19d15297fc0b04f0f4 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Tue, 20 Nov 2018 11:35:11 -0600 Subject: [PATCH 2/2] ssz: documentation updates, fix fromBytesSSZ type constraint --- beacon_chain/ssz.nim | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/beacon_chain/ssz.nim b/beacon_chain/ssz.nim index fb7e36a6c..344a7cdf8 100644 --- a/beacon_chain/ssz.nim +++ b/beacon_chain/ssz.nim @@ -16,7 +16,8 @@ import ./datatypes, eth_common, endians, typetraits, options, nimcrypto func len(x: Uint24): int = 3 func toBytesSSZ(x: SomeInteger): array[sizeof(x), byte] = - ## Integers are all encoded as bigendian and not padded + ## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``) + ## All integers are serialized as **big endian**. when x.sizeof == 8: bigEndian64(result.addr, x.unsafeAddr) elif x.sizeof == 4: bigEndian32(result.addr, x.unsafeAddr) @@ -34,9 +35,10 @@ func toBytesSSZ(x: Uint24): array[3, byte] = func toBytesSSZ(x: EthAddress): array[sizeof(x), byte] = x func toBytesSSZ(x: MDigest[32*8]): array[32, byte] = x.data -func fromBytesSSZUnsafe(T: typedesc, data: ptr byte): T = - ## Integers are all encoded as bigendian and not padded - ## Assumes no buffer overruns! +func fromBytesSSZUnsafe(T: typedesc[SomeInteger], data: ptr byte): T = + ## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``) + ## All integers are serialized as **big endian**. + ## XXX: Assumes data points to a sufficiently large buffer # XXX: any better way to get a suitably aligned buffer in nim??? # see also: https://github.com/nim-lang/Nim/issues/9206 @@ -165,11 +167,13 @@ func merkleHash[T](lst: seq[T]): array[32, byte] # ################### Hashing interface ################################### func hashSSZ*(x: SomeInteger): array[sizeof(x), byte] = - ## Integers area all encoded as bigendian and not padded + ## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``) + ## All integers are serialized as **big endian**. toBytesSSZ(x) func hashSSZ*(x: Uint24): array[3, byte] = - ## Integers area all encoded as bigendian and not padded + ## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``) + ## All integers are serialized as **big endian**. toBytesSSZ(x) func hashSSZ*(x: EthAddress): array[sizeof(x), byte] = @@ -185,15 +189,15 @@ func hashSSZ*(x: openArray[byte]): array[32, byte] = hash(x) func hashSSZ*(x: ValidatorRecord): array[32, byte] = - ## Containers have their fields recursivel hashed, concatenated and hashed + ## Containers have their fields recursively hashed, concatenated and hashed # XXX hash_ssz.py code contains special cases for some types, why? withHash: - # tmp.add(x.pubkey) # XXX our code vs spec! + # tmp.add(x.pubkey) # XXX uncertain future of public key format h.update hashSSZ(x.withdrawal_shard) h.update hashSSZ(x.withdrawal_address) h.update hashSSZ(x.randao_commitment) h.update hashSSZ(x.randao_last_change) - h.update hashSSZ(x.balance) # XXX our code vs spec! + h.update hashSSZ(x.balance) # h.update hashSSZ(x.status) # XXX it's an enum, deal with it h.update hashSSZ(x.exit_slot) @@ -207,7 +211,7 @@ func hashSSZ*[T](x: T): array[32, byte] = ## Sequences are tree-hashed return merkleHash(x) else: - ## Containers have their fields recursivel hashed, concatenated and hashed + ## Containers have their fields recursively hashed, concatenated and hashed # XXX could probaby compile-time-macro-sort fields... var fields: seq[tuple[name: string, value: seq[byte]]] for name, field in x.fieldPairs: