ssz: finish implementation (#42)

* ssz: finish implementation

* add object support, simplify implementation
* fix extra round of hashing in tree_hash_root

* ssz: cleanups

* work around Nim range bug for Uint24, cleanups
This commit is contained in:
Jacek Sieka 2018-12-17 12:03:53 -06:00 committed by Mamy Ratsimbazafy
parent 5dc06f4496
commit 142aa8ca8e
6 changed files with 223 additions and 77 deletions

View File

@ -277,7 +277,7 @@ type
latest_crosslinks*: array[SHARD_COUNT, CrosslinkRecord]
latest_state_recalculation_slot*: uint64
latest_block_roots*: array[LATEST_BLOCK_ROOTS_COUNT, Eth2Digest] ##\
latest_block_roots*: array[LATEST_BLOCK_ROOTS_COUNT.int, Eth2Digest] ##\
## Needed to process attestations, older to newer
latest_penalized_exit_balances*: seq[uint64] ##\
## Balances penalized in the current withdrawal period

View File

@ -6,20 +6,18 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# SSZ Serialization (simple serialize)
# See https://github.com/ethereum/beacon_chain/issues/100
# and https://github.com/ethereum/beacon_chain/tree/master/ssz
# See https://github.com/ethereum/eth2.0-specs/blob/master/specs/simple-serialize.md
import
endians, typetraits, options, algorithm,
eth_common, nimcrypto/blake2,
./spec/[crypto, datatypes, digest]
from milagro_crypto import getRaw
from milagro_crypto import getRaw, fromRaw
# ################### Helper functions ###################################
func len(x: Uint24): int = 3
# toBytesSSZ convert simple fixed-length types to their SSZ wire representation
func toBytesSSZ(x: SomeInteger): array[sizeof(x), byte] =
## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``)
## All integers are serialized as **big endian**.
@ -40,7 +38,36 @@ func toBytesSSZ(x: Uint24): array[3, byte] =
func toBytesSSZ(x: EthAddress): array[sizeof(x), byte] = x
func toBytesSSZ(x: Eth2Digest): array[32, byte] = x.data
func fromBytesSSZUnsafe(T: typedesc[SomeInteger], data: ptr byte): T =
# TODO these two are still being debated:
# https://github.com/ethereum/eth2.0-specs/issues/308#issuecomment-447026815
func toBytesSSZ(x: ValidatorPubKey|ValidatorSig): auto = x.getRaw()
type TrivialTypes =
# Types that serialize down to a fixed-length array - most importantly, these
# values don't carry a length prefix in the final encoding. toBytesSSZ
# provides the actual nim-type-to-bytes conversion.
# TODO think about this for a bit - depends where the serialization of
# validator keys ends up going..
# TODO can't put ranges like Uint24 in here:
# https://github.com/nim-lang/Nim/issues/10027
SomeInteger | EthAddress | Eth2Digest | ValidatorPubKey | ValidatorSig
func sszLen(v: TrivialTypes): int = toBytesSSZ(v).len
func sszLen(v: Uint24): int = toBytesSSZ(v).len
func sszLen(v: object | tuple): int =
result = 4 # Length
for field in v.fields:
result += sszLen(type field)
func sszLen(v: seq | array): int =
result = 4 # Length
for i in v:
result += sszLen(i)
# fromBytesSSZUnsafe copy wire representation to a Nim variable, assuming
# there's enough data in the buffer
func fromBytesSSZUnsafe(T: typedesc[SomeInteger], data: pointer): T =
## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``)
## All integers are serialized as **big endian**.
## TODO: Assumes data points to a sufficiently large buffer
@ -57,76 +84,141 @@ func fromBytesSSZUnsafe(T: typedesc[SomeInteger], data: ptr byte): T =
elif result.sizeof == 1: copyMem(result.addr, alignedBuf, sizeof(result))
else: {.fatal: "Unsupported type deserialization: " & $(type(result)).name.}
func `+`[T](p: ptr T, offset: int): ptr T =
## Pointer arithmetic: Addition
const size = sizeof T
cast[ptr T](cast[ByteAddress](p) +% offset * size)
func fromBytesSSZUnsafe(T: typedesc[Uint24], data: pointer): T =
## Integers are all encoded as bigendian and not padded
var tmp: uint32
let p = cast[ptr UncheckedArray[byte]](data)
tmp = tmp or uint32(p[2])
tmp = tmp or uint32(p[1]) shl 8
tmp = tmp or uint32(p[0]) shl 16
result = tmp.Uint24
func eat(x: var auto, data: ptr byte, pos: var int, len: int): bool =
if pos + x.sizeof > len: return
copyMem(x.addr, data + pos, x.sizeof)
inc pos, x.sizeof
return true
func fromBytesSSZUnsafe(T: typedesc[EthAddress], data: pointer): T =
copyMem(result.addr, data, sizeof(result))
func eatInt[T: SomeInteger](x: var T, data: ptr byte, pos: var int, len: int):
bool =
if pos + x.sizeof > len: return
func fromBytesSSZUnsafe(T: typedesc[Eth2Digest], data: pointer): T =
copyMem(result.data.addr, data, sizeof(result.data))
x = T.fromBytesSSZUnsafe(data + pos)
proc deserialize[T: TrivialTypes](
dest: var T, offset: var int, data: openArray[byte]): bool =
# TODO proc because milagro is problematic
if offset + sszLen(dest) > data.len():
false
else:
when T is (ValidatorPubKey|ValidatorSig):
if T.fromRaw(data[offset..data.len-1], dest):
offset += sszLen(dest)
true
else:
false
else:
dest = fromBytesSSZUnsafe(T, data[offset].unsafeAddr)
offset += sszLen(dest)
true
inc pos, x.sizeof
return true
func deserialize(
dest: var Uint24, offset: var int, data: openArray[byte]): bool =
if offset + sszLen(dest) > data.len():
false
else:
dest = fromBytesSSZUnsafe(Uint24, data[offset].unsafeAddr)
offset += sszLen(dest)
true
func eatSeq[T: SomeInteger](x: var seq[T], data: ptr byte, pos: var int,
len: int): bool =
var items: int32
if not eatInt(items, data, pos, len): return
if pos + T.sizeof * items > len: return
func deserialize[T: enum](dest: var T, offset: var int, data: openArray[byte]): bool =
# TODO er, verify the size here, probably an uint64 but...
var tmp: uint64
if not deserialize(tmp, offset, data):
false
else:
# TODO what to do with out-of-range values?? rejecting means breaking
# forwards compatibility..
dest = cast[T](tmp)
true
x = newSeqUninitialized[T](items)
for val in x.mitems:
discard eatInt(val, data, pos, len) # Bounds-checked above
return true
proc deserialize[T: not (enum|TrivialTypes|Uint24)](
dest: var T, offset: var int, data: openArray[byte]): bool =
# Length in bytes, followed by each item
var totalLen: uint32
if not deserialize(totalLen, offset, data): return false
func serInt(dest: var seq[byte], x: SomeInteger) =
dest.add x.toBytesSSZ()
if offset + totalLen.int > data.len(): return false
func serSeq(dest: var seq[byte], src: seq[SomeInteger]) =
dest.serInt src.len.uint32
for val in src:
dest.add val.toBytesSSZ()
let itemEnd = offset + totalLen.int
when T is seq:
# Items are of homogenous type, but not necessarily homogenous length,
# cannot pre-allocate item list generically
while offset < itemEnd:
dest.setLen dest.len + 1
if not deserialize(dest[^1], offset, data): return false
elif T is array:
var i = 0
while offset < itemEnd:
if not deserialize(dest[i], offset, data): return false
i += 1
if i > dest.len: return false
else:
for field in dest.fields:
if not deserialize(field, offset, data): return false
if offset != itemEnd: return false
true
func serialize(dest: var seq[byte], src: TrivialTypes) =
dest.add src.toBytesSSZ()
func serialize(dest: var seq[byte], src: Uint24) =
dest.add src.toBytesSSZ()
func serialize(dest: var seq[byte], x: enum) =
# TODO er, verify the size here, probably an uint64 but...
serialize dest, uint64(x)
func serialize[T: not enum](dest: var seq[byte], src: T) =
let lenPos = dest.len()
# Length is a prefix, so we'll put a dummy 0 here and fill it after
# serializing
dest.add toBytesSSZ(0'u32)
when T is seq|array:
# If you get an error here that looks like:
# type mismatch: got <type range 0..8191(uint64)>
# you just used an unsigned int for an array index thinking you'd get
# away with it (surprise, surprise: you can't, uints are crippled!)
# https://github.com/nim-lang/Nim/issues/9984
for val in src:
serialize dest, val
else:
# TODO to sort, or not to sort, that is the question:
# TODO or.. https://github.com/ethereum/eth2.0-specs/issues/275
when defined(debugFieldSizes) and T is (BeaconState | BeaconBlock):
# for research/serialized_sizes, remove when appropriate
for name, field in src.fieldPairs:
let start = dest.len()
serialize dest, field
let sz = dest.len() - start
debugEcho(name, ": ", sz)
else:
for field in src.fields:
serialize dest, field
# Write size (we only know it once we've serialized the object!)
var objLen = dest.len() - lenPos - 4
bigEndian32(dest[lenPos].addr, objLen.addr)
# ################### Core functions ###################################
func deserialize(data: ptr byte, pos: var int, len: int, typ: typedesc[object]):
auto =
var t: typ
for field in t.fields:
when field is EthAddress | Eth2Digest:
if not eat(field, data, pos, len): return
elif field is (SomeInteger or byte):
if not eatInt(field, data, pos, len): return
elif field is seq[SomeInteger or byte]:
if not eatSeq(field, data, pos, len): return
else: # TODO: deserializing subtypes (?, depends on final spec)
{.fatal: "Unsupported type deserialization: " & $typ.name.}
return some(t)
func deserialize*(
data: seq[byte or uint8] or openarray[byte or uint8] or string,
typ: typedesc[object]): auto {.inline.} =
proc deserialize*(data: openArray[byte],
typ: typedesc): auto {.inline.} =
# TODO: returns Option[typ]: https://github.com/nim-lang/Nim/issues/9195
var pos = 0
return deserialize((ptr byte)(data[0].unsafeAddr), pos, data.len, typ)
var ret: typ
var offset: int
if not deserialize(ret, offset, data): none(typ)
else: some(ret)
func serialize*[T](value: T): seq[byte] =
for field in value.fields:
when field is (EthAddress | Eth2Digest | SomeInteger):
result.add field.toBytesSSZ()
elif field is seq[SomeInteger or byte]:
result.serSeq field
else: # TODO: Serializing subtypes (?, depends on final spec)
{.fatal: "Unsupported type serialization: " & $typ.name.}
# TODO Fields should be sorted, but...
serialize(result, value)
# ################### Hashing ###################################
@ -199,7 +291,7 @@ func hash_tree_root*[T: not enum](x: T): array[32, byte] =
withHash:
for name, value in fields.sortedByIt(it.name):
h.update hash_tree_root(value.value)
h.update value.value
# #################################
# hash_tree_root not part of official spec
@ -271,9 +363,4 @@ func merkleHash[T](lst: openArray[T]): array[32, byte] =
chunkz.setLen(chunkz.len div 2)
if chunkz.len == 0:
const empty32 = empty(array[32, byte])
result = hash(empty32, dataLen)
return
result = hash(chunkz[0], dataLen)

View File

@ -0,0 +1,11 @@
import
../beacon_chain/[ssz],
../beacon_chain/spec/[beaconstate, digest],
../tests/testutil
proc stateSize(deposits: int) =
let state = on_startup(makeInitialDeposits(deposits), 0, Eth2Digest())
echo "Validators: ", deposits, ", total: ", state.serialize().len
stateSize(1000)

View File

@ -0,0 +1 @@
-d:debugFieldSizes

View File

@ -6,7 +6,7 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
unittest, nimcrypto, eth_common, sequtils, options,
unittest, nimcrypto, eth_common, sequtils, options, milagro_crypto,
../beacon_chain/ssz, ../beacon_chain/spec/datatypes
func filled[N: static[int], T](typ: type array[N, T], value: T): array[N, T] =
@ -20,8 +20,6 @@ func filled(T: type MDigest, value: byte): T =
suite "Simple serialization":
# pending spec updates in
# - https://github.com/ethereum/eth2.0-specs
# - https://github.com/ethereum/beacon_chain/blob/master/tests/ssz/test_deserialize.py
# - https://github.com/ethereum/beacon_chain/tree/master/ssz
type
Foo = object
f0: uint8
@ -29,36 +27,81 @@ suite "Simple serialization":
f2: EthAddress
f3: MDigest[256]
f4: seq[byte]
f5: Uint24
let expected_deser = Foo(
f0: 5,
f1: 0'u32 - 3,
f2: EthAddress.filled(byte 35),
f3: MDigest[256].filled(byte 35),
f4: @[byte 'c'.ord, 'o'.ord, 'w'.ord]
f4: @[byte 'c'.ord, 'o'.ord, 'w'.ord],
f5: Uint24(79)
)
var expected_ser = @[
byte 5,
byte 0, 0, 0, 67, # length
5,
'\xFF'.ord, '\xFF'.ord, '\xFF'.ord, '\xFD'.ord,
]
expected_ser &= EthAddress.filled(byte 35)
expected_ser &= MDigest[256].filled(byte 35).data
expected_ser &= [byte 0, 0, 0, 3, 'c'.ord, 'o'.ord, 'w'.ord]
expected_ser &= [byte 0, 0, 79]
test "Deserialization":
test "Object deserialization":
let deser = expected_ser.deserialize(Foo).get()
check: expected_deser == deser
test "Serialization":
test "Object serialization":
let ser = expected_deser.serialize()
check: expected_ser == ser
test "Overflow":
test "Not enough data":
check:
expected_ser[0..^2].deserialize(Foo).isNone()
expected_ser[1..^1].deserialize(Foo).isNone()
test "Uint24 roundtrip":
# https://github.com/nim-lang/Nim/issues/10027
let v = 79.Uint24
let ser = v.serialize()
check:
ser.len() == 3
deserialize(ser, type(v)).get() == v
test "Array roundtrip":
let v = [1, 2, 3]
let ser = v.serialize()
check:
deserialize(ser, type(v)).get() == v
test "Seq roundtrip":
let v = @[1, 2, 3]
let ser = v.serialize()
check:
deserialize(ser, type(v)).get() == v
test "Key roundtrip":
let v = newSigKey().fromSigKey()
let ser = v.serialize()
check:
deserialize(ser, type(v)).get() == v
# Just to see that we can serialize stuff at all
test "Roundtrip main eth2 types":
let
bb = BeaconBlock(
slot: 42,
signature: signMessage(newSigKey(), "")
)
bs = BeaconState(slot: 42)
check:
bb.serialize().deserialize(BeaconBlock).get() == bb
bs.serialize().deserialize(BeaconState).get() == bs
suite "Tree hashing":
# TODO Nothing but smoke tests for now..

View File

@ -73,6 +73,10 @@ func makeGenesisBlock*(state: BeaconState): BeaconBlock =
func makeBlock*(
state: BeaconState, latest_block: BeaconBlock): BeaconBlock =
# TODO: this works but looks wrong - we update the slot in the state without
# updating corresponding data - this works because the state update
# code does the same - updates slot, then uses that slot when calling
# beacon_proposer_index, then finally updates the shuffling at the end!
var next_state = state
next_state.slot += 1
let