# beacon_chain # Copyright (c) 2018 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. # SSZ Serialization (simple serialize) # See https://github.com/ethereum/eth2.0-specs/blob/master/specs/simple-serialize.md import endians, typetraits, options, algorithm, math, faststreams/input_stream, serialization, eth/common, nimcrypto/keccak, ./spec/[bitfield, crypto, datatypes, digest] # ################### Helper functions ################################### export serialization type SszReader* = object stream: ByteStreamVar SszWriter* = object stream: OutputStreamVar SszError* = object of SerializationError CorruptedDataError* = object of SszError RecordWritingMemo = object initialStreamPos: int sizePrefixCursor: DelayedWriteCursor serializationFormat SSZ, Reader = SszReader, Writer = SszWriter, PreferedOutput = seq[byte] proc init*(T: type SszReader, stream: ByteStreamVar): T = result.stream = stream func toSSZType(x: Slot|Epoch): auto = x.uint64 func toSSZType(x: auto): auto = x # toBytesSSZ convert simple fixed-length types to their SSZ wire representation func toBytesSSZ(x: SomeInteger): array[sizeof(x), byte] = ## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``) ## All integers are serialized as **little endian**. when x.sizeof == 8: littleEndian64(result.addr, x.unsafeAddr) elif x.sizeof == 4: littleEndian32(result.addr, x.unsafeAddr) elif x.sizeof == 2: littleEndian16(result.addr, x.unsafeAddr) elif x.sizeof == 1: copyMem(result.addr, x.unsafeAddr, sizeof(result)) else: {.fatal: "Unsupported type serialization: " & $(type(x)).name.} func toBytesSSZ(x: ValidatorIndex): array[3, byte] = ## Integers are all encoded as little endian and not padded let v = x.uint32 result[0] = byte(v and 0xff) result[1] = byte((v shr 8) and 0xff) result[2] = byte((v shr 16) and 0xff) func toBytesSSZ(x: bool): array[1, byte] = [if x: 1'u8 else: 0'u8] func toBytesSSZ(x: EthAddress): array[sizeof(x), byte] = x func toBytesSSZ(x: Eth2Digest): array[32, byte] = x.data # TODO these two are still being debated: # https://github.com/ethereum/eth2.0-specs/issues/308#issuecomment-447026815 func toBytesSSZ(x: ValidatorPubKey|ValidatorSig): auto = x.getBytes() type BasicType = # Types that serialize down to a fixed-length array - most importantly, # these values don't carry a length prefix in the final encoding. toBytesSSZ # provides the actual nim-type-to-bytes conversion. # TODO think about this for a bit - depends where the serialization of # validator keys ends up going.. # TODO can't put ranges like ValidatorIndex in here: # https://github.com/nim-lang/Nim/issues/10027 SomeInteger | EthAddress | Eth2Digest | ValidatorPubKey | ValidatorSig | bool | Slot | Epoch func sszLen(v: BasicType): int = toBytesSSZ(v.toSSZType()).len func sszLen(v: ValidatorIndex): int = toBytesSSZ(v).len func sszLen(v: object | tuple): int = result = 4 # Length for field in v.fields: result += sszLen(type field) func sszLen(v: seq | array): int = result = 4 # Length for i in v: result += sszLen(i) func sszLen(v: BitField): int = sszLen(v.bits) # fromBytesSSZ copies the wire representation to a Nim variable, # assuming there's enough data in the buffer func fromBytesSSZ(T: type SomeInteger, data: openarray[byte]): T = ## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``) ## All integers are serialized as **little endian**. ## TODO: Assumes data points to a sufficiently large buffer doAssert data.len == sizeof(result) # TODO: any better way to get a suitably aligned buffer in nim??? # see also: https://github.com/nim-lang/Nim/issues/9206 var tmp: uint64 var alignedBuf = cast[ptr byte](tmp.addr) copyMem(alignedBuf, unsafeAddr data[0], result.sizeof) when result.sizeof == 8: littleEndian64(result.addr, alignedBuf) elif result.sizeof == 4: littleEndian32(result.addr, alignedBuf) elif result.sizeof == 2: littleEndian16(result.addr, alignedBuf) elif result.sizeof == 1: copyMem(result.addr, alignedBuf, sizeof(result)) else: {.fatal: "Unsupported type deserialization: " & $(type(result)).name.} func fromBytesSSZ(T: type bool, data: openarray[byte]): T = # TODO: spec doesn't say what to do if the value is >1 - we'll use the C # definition for now, but maybe this should be a parse error instead? fromBytesSSZ(uint8, data) != 0 func fromBytesSSZ(T: type ValidatorIndex, data: openarray[byte]): T = ## Integers are all encoded as littleendian and not padded doAssert data.len == 3 var tmp: uint32 tmp = tmp or uint32(data[0]) tmp = tmp or uint32(data[1]) shl 8 tmp = tmp or uint32(data[2]) shl 16 result = tmp.ValidatorIndex func fromBytesSSZ(T: type EthAddress, data: openarray[byte]): T = doAssert data.len == sizeof(result) copyMem(result.addr, unsafeAddr data[0], sizeof(result)) func fromBytesSSZ(T: type Eth2Digest, data: openarray[byte]): T = doAssert data.len == sizeof(result.data) copyMem(result.data.addr, unsafeAddr data[0], sizeof(result.data)) proc init*(T: type SszWriter, stream: OutputStreamVar): T = result.stream = stream proc writeValue*(w: var SszWriter, obj: auto) # This is an alternative lower-level API useful for RPC # frameworks that can simulate the serialization of an # object without constructing an actual instance: proc beginRecord*(w: var SszWriter, T: type): RecordWritingMemo = result.initialStreamPos = w.stream.pos result.sizePrefixCursor = w.stream.delayFixedSizeWrite sizeof(uint32) template writeField*(w: var SszWriter, name: string, value: auto) = w.writeValue(value) proc endRecord*(w: var SszWriter, memo: RecordWritingMemo) = let finalSize = uint32(w.stream.pos - memo.initialStreamPos - 4) memo.sizePrefixCursor.endWrite(finalSize.toBytesSSZ) proc writeValue*(w: var SszWriter, obj: auto) = # We are not using overloads here, because this leads to # slightly better error messages when the user provides # additional overloads for `writeValue`. mixin writeValue when obj is ValidatorIndex|BasicType: w.stream.append obj.toSSZType().toBytesSSZ elif obj is enum: w.stream.append uint64(obj).toBytesSSZ else: let memo = w.beginRecord(obj.type) when obj is seq|array|openarray: # If you get an error here that looks like: # type mismatch: got # you just used an unsigned int for an array index thinking you'd get # away with it (surprise, surprise: you can't, uints are crippled!) # https://github.com/nim-lang/Nim/issues/9984 for elem in obj: w.writeValue elem elif obj is BitField: for elem in obj.bits: w.writeValue elem else: obj.serializeFields(fieldName, field): # for research/serialized_sizes, remove when appropriate when defined(debugFieldSizes) and obj is (BeaconState|BeaconBlock): let start = w.stream.pos w.writeValue field.toSSZType debugEcho fieldName, ": ", w.stream.pos - start else: w.writeValue field.toSSZType w.endRecord(memo) proc readValue*(r: var SszReader, result: var auto) = # We are not using overloads here, because this leads to # slightly better error messages when the user provides # additional overloads for `readValue`. type T = result.type mixin readValue template checkEof(n: int) = if not r.stream[].ensureBytes(n): raise newException(UnexpectedEofError, "SSZ has insufficient number of bytes") when result is ValidatorIndex|BasicType: let bytesToRead = result.sszLen; checkEof bytesToRead when result is ValidatorPubKey|ValidatorSig: if not result.init(r.stream.readBytes(bytesToRead)): raise newException(CorruptedDataError, "Failed to load a BLS key or signature") else: result = T.fromBytesSSZ(r.stream.readBytes(bytesToRead)) elif result is enum: # TODO what to do with out-of-range values?? rejecting means breaking # forwards compatibility.. result = cast[T](r.readValue(uint64)) elif result is string: {.error: "The SSZ format doesn't support the string type yet".} else: let totalLen = int r.readValue(uint32) checkEof totalLen let endPos = r.stream[].pos + totalLen when T is seq: type ElemType = type(result[0]) # Items are of homogenous type, but not necessarily homogenous length, # cannot pre-allocate item list generically while r.stream[].pos < endPos: result.add r.readValue(ElemType) elif T is BitField: type ElemType = type(result.bits[0]) while r.stream[].pos < endPos: result.bits.add r.readValue(ElemType) elif T is array: type ElemType = type(result[0]) var i = 0 while r.stream[].pos < endPos: if i > result.len: raise newException(CorruptedDataError, "SSZ includes unexpected bytes past the end of an array") result[i] = r.readValue(ElemType) i += 1 else: result.deserializeFields(fieldName, field): # TODO This hardcoding's ugly; generalize & abstract. when field is Slot: field = r.readValue(uint64).Slot elif field is Epoch: field = r.readValue(uint64).Epoch else: field = r.readValue(field.type) if r.stream[].pos != endPos: raise newException(CorruptedDataError, "SSZ includes unexpected bytes past the end of the deserialized object") # ################### Hashing ################################### # Sample hash_tree_root implementation based on: # https://github.com/ethereum/eth2.0-specs/blob/v0.5.1/specs/simple-serialize.md # https://github.com/ethereum/eth2.0-specs/blob/v0.5.1/utils/phase0/minimal_ssz.py # TODO Probably wrong - the spec is pretty bare-bones and no test vectors yet const BYTES_PER_CHUNK = 32 # ################### Hashing helpers ################################### # TODO varargs openarray, anyone? template withHash(body: untyped): array[32, byte] = let tmp = withEth2Hash: body toBytesSSZ tmp func hash(a, b: openArray[byte]): array[32, byte] = withHash: h.update(a) h.update(b) type Chunk = array[BYTES_PER_CHUNK, byte] # TODO: er, how is this _actually_ done? # Mandatory bug: https://github.com/nim-lang/Nim/issues/9825 func empty(T: type): T = discard const emptyChunk = empty(Chunk) func mix_in_length(root: Chunk, length: int): Chunk = var dataLen: array[32, byte] var lstLen = uint64(length) littleEndian64(dataLen[32-8].addr, lstLen.addr) hash(root, dataLen) proc pack(values: seq|array): iterator(): Chunk = result = iterator (): Chunk = # TODO should be trivial to avoid this seq also.. # TODO I get a feeling a copy of the array is taken to the closure, which # also needs fixing # TODO avoid closure iterators that involve GC var tmp = newSeqOfCap[byte](values.len() * sizeof(toBytesSSZ(values[0].toSSZType()))) for v in values: tmp.add toBytesSSZ(v.toSSZType) for v in 0.. 1 and stack[^1].height == stack[^2].height: # As tradition dictates - one feature, at least one nim bug: # https://github.com/nim-lang/Nim/issues/9684 let tmp = hash(stack[^2].chunk, stack[^1].chunk) stack[^2].height += 1 stack[^2].chunk = tmp discard stack.pop doAssert stack.len == 1, "With power-of-two leaves, we should end up with a single root" stack[0].chunk template elementType[T, N](_: type array[N, T]): typedesc = T template elementType[T](_: type seq[T]): typedesc = T func hash_tree_root*[T](value: T): Eth2Digest = # Merkle tree Eth2Digest(data: when T is BasicType: merkleize(pack([value])) elif T is array|seq: when T.elementType() is BasicType: mix_in_length(merkleize(pack(value)), len(value)) else: var roots = iterator(): Chunk = for v in value: yield hash_tree_root(v).data mix_in_length(merkleize(roots), len(value)) elif T is object: var roots = iterator(): Chunk = for v in value.fields: yield hash_tree_root(v).data merkleize(roots) else: static: doAssert false, "Unexpected type: " & T.name ) # https://github.com/ethereum/eth2.0-specs/blob/0.4.0/specs/simple-serialize.md#signed-roots func signed_root*[T: object](x: T): Eth2Digest = # TODO write tests for this (check vs hash_tree_root) var found_field_name = false var roots = iterator(): Chunk = for name, field in x.fieldPairs: # TODO we should truncate the last field, regardless of its name.. this # hack works for now - how to skip the last fieldPair though?? if name == "signature": found_field_name = true break yield hash_tree_root(field).data let root = merkleize(roots) doAssert found_field_name Eth2Digest(data: root)