# beacon_chain # Copyright (c) 2018 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. # SSZ Serialization (simple serialize) # See https://github.com/ethereum/eth2.0-specs/blob/master/specs/simple-serialize.md import endians, stew/shims/macros, options, algorithm, options, stew/[bitops2, bitseqs, objects, varints, ptrops, ranges/ptr_arith], stint, faststreams/input_stream, serialization, serialization/testing/tracing, nimcrypto/sha2, blscurve, eth/common, ./spec/[crypto, datatypes, digest], ./ssz/[types, bytes_reader] # ################### Helper functions ################################### export serialization, types, bytes_reader when defined(serialization_tracing): import typetraits, stew/ranges/ptr_arith const bytesPerChunk = 32 bitsPerChunk = bytesPerChunk * 8 maxChunkTreeDepth = 25 defaultMaxObjectSize = 1 * 1024 * 1024 type SszReader* = object stream: ByteStreamVar maxObjectSize: int SszWriter* = object stream: OutputStreamVar BasicType = char|bool|SomeUnsignedInt|StUint|ValidatorIndex SszChunksMerkelizer = ref object of RootObj combinedChunks: array[maxChunkTreeDepth, Eth2Digest] totalChunks: uint64 limit: uint64 TypeWithMaxLen*[T; maxLen: static int64] = distinct T SizePrefixed*[T] = distinct T SszMaxSizeExceeded* = object of SerializationError VarSizedWriterCtx = object fixedParts: WriteCursor offset: int FixedSizedWriterCtx = object Bytes = seq[byte] serializationFormat SSZ, Reader = SszReader, Writer = SszWriter, PreferedOutput = seq[byte] template sizePrefixed*[TT](x: TT): untyped = type T = TT SizePrefixed[T](x) proc init*(T: type SszReader, stream: ByteStreamVar, maxObjectSize = defaultMaxObjectSize): T = T(stream: stream, maxObjectSize: maxObjectSize) proc mount*(F: type SSZ, stream: ByteStreamVar, T: type): T = mixin readValue var reader = init(SszReader, stream) reader.readValue(T) method formatMsg*(err: ref SszSizeMismatchError, filename: string): string {.gcsafe.} = # TODO: implement proper error string "Serialisation error while processing " & filename when false: # TODO: Nim can't handle yet this simpler definition. File an issue. template valueOf[T; N](x: TypeWithMaxLen[T, N]): auto = T(x) else: proc unwrapImpl[T; N: static int64](x: ptr TypeWithMaxLen[T, N]): ptr T = cast[ptr T](x) template valueOf(x: TypeWithMaxLen): auto = let xaddr = unsafeAddr x unwrapImpl(xaddr)[] template sszList*(x: seq|array, maxLen: static int64): auto = TypeWithMaxLen[type(x), maxLen](x) template toSszType*(x: auto): auto = mixin toSszType when x is Slot|Epoch|ValidatorIndex|enum: uint64(x) elif x is Eth2Digest: x.data elif x is BlsValue|BlsCurveType: getBytes(x) elif x is BitSeq|BitList: Bytes(x) elif x is ref|ptr: toSszType x[] elif x is Option: toSszType x.get elif x is TypeWithMaxLen: toSszType valueOf(x) elif useListType and x is List: seq[x.T](x) else: x func writeFixedSized(c: var WriteCursor, x: auto) = mixin toSszType when x is byte: c.append x elif x is bool|char: c.append byte(ord(x)) elif x is SomeUnsignedInt: when system.cpuEndian == bigEndian: ## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``) ## All integers are serialized as **little endian**. var bytes: array[sizeof(x), byte] when x.sizeof == 8: littleEndian64(addr bytes[0], x.unsafeAddr) elif x.sizeof == 4: littleEndian32(addr bytes[0], x.unsafeAddr) elif x.sizeof == 2: littleEndian16(addr bytes[0], x.unsafeAddr) elif x.sizeof == 1: copyMem(addr bytes[0], x.unsafeAddr, sizeof(x)) else: unsupported x.type c.append bytes else: let valueAddr {.used.} = unsafeAddr x trs "APPENDING INT ", x, " = ", makeOpenArray(cast[ptr byte](valueAddr), sizeof(x)) c.appendMemCopy x elif x is StUint: c.appendMemCopy x # TODO: Is this always correct? elif x is array|string|seq|openarray: when x[0] is byte: trs "APPENDING FIXED SIZE BYTES", x c.append x else: for elem in x: trs "WRITING FIXED SIZE ARRAY ELEMENT" c.writeFixedSized toSszType(elem) elif x is tuple|object: enumInstanceSerializedFields(x, fieldName, field): trs "WRITING FIXED SIZE FIELD", fieldName c.writeFixedSized toSszType(field) else: unsupported x.type template writeFixedSized(s: OutputStreamVar, x: auto) = writeFixedSized(s.cursor, x) template supports*(_: type SSZ, T: type): bool = mixin toSszType anonConst compiles(fixedPortionSize toSszType(default(T))) func init*(T: type SszWriter, stream: OutputStreamVar): T = result.stream = stream template enumerateSubFields(holder, fieldVar, body: untyped) = when holder is array|string|seq|openarray: for fieldVar in holder: body else: enumInstanceSerializedFields(holder, _, fieldVar): body func writeVarSizeType(w: var SszWriter, value: auto) {.gcsafe.} func beginRecord*(w: var SszWriter, TT: type): auto = type T = TT when isFixedSize(T): FixedSizedWriterCtx() else: const offset = when T is array: len(T) * offsetSize else: fixedPortionSize(T) VarSizedWriterCtx(offset: offset, fixedParts: w.stream.delayFixedSizeWrite(offset)) template writeField*(w: var SszWriter, ctx: var auto, fieldName: string, field: auto) = mixin toSszType when ctx is FixedSizedWriterCtx: writeFixedSized(w.stream, toSszType(field)) else: type FieldType = type toSszType(field) when isFixedSize(FieldType): ctx.fixedParts.writeFixedSized toSszType(field) else: trs "WRITING OFFSET ", ctx.offset, " FOR ", fieldName ctx.fixedParts.writeFixedSized uint32(ctx.offset) let initPos = w.stream.pos trs "WRITING VAR SIZE VALUE OF TYPE ", name(FieldType) when FieldType is BitSeq: trs "BIT SEQ ", Bytes(field) writeVarSizeType(w, toSszType(field)) ctx.offset += w.stream.pos - initPos template endRecord*(w: var SszWriter, ctx: var auto) = when ctx is VarSizedWriterCtx: finalize ctx.fixedParts func writeVarSizeType(w: var SszWriter, value: auto) = trs "STARTING VAR SIZE TYPE" mixin toSszType type T = type toSszType(value) when T is seq|string|openarray: type E = ElemType(T) const isFixed = when E is Option: false else: isFixedSize(E) when isFixed: trs "WRITING LIST WITH FIXED SIZE ELEMENTS" for elem in value: w.stream.writeFixedSized toSszType(elem) trs "DONE" else: trs "WRITING LIST WITH VAR SIZE ELEMENTS" var offset = value.len * offsetSize var cursor = w.stream.delayFixedSizeWrite offset for elem in value: cursor.writeFixedSized uint32(offset) when elem is Option: if not isSome(elem): continue elif elem is ptr|ref: if isNil(elem): continue let initPos = w.stream.pos w.writeVarSizeType toSszType(elem) offset += w.stream.pos - initPos finalize cursor trs "DONE" elif T is object|tuple|array: trs "WRITING OBJECT OR ARRAY" var ctx = beginRecord(w, T) enumerateSubFields(value, field): writeField w, ctx, astToStr(field), field endRecord w, ctx func writeValue*(w: var SszWriter, x: auto) {.gcsafe.} = mixin toSszType type T = type toSszType(x) when isFixedSize(T): w.stream.writeFixedSized toSszType(x) elif T is array|seq|openarray|string|object|tuple: w.writeVarSizeType toSszType(x) else: unsupported type(x) func writeValue*[T](w: var SszWriter, x: SizePrefixed[T]) = var cursor = w.stream.delayVarSizeWrite(10) let initPos = w.stream.pos w.writeValue T(x) let length = uint64(w.stream.pos - initPos) when false: discard # TODO varintBytes is sub-optimal at the moment # cursor.writeAndFinalize length.varintBytes else: var buf: VarintBuffer buf.appendVarint length cursor.writeAndFinalize buf.writtenBytes template fromSszBytes*[T; N](_: type TypeWithMaxLen[T, N], bytes: openarray[byte]): auto = mixin fromSszBytes fromSszBytes(T, bytes) proc readValue*(r: var SszReader, val: var auto) = val = readSszValue(r.stream.readBytes(r.stream.endPos), val.type) proc readValue*[T](r: var SszReader, val: var SizePrefixed[T]) = let length = r.stream.readVarint(uint64) if length > r.maxObjectSize: raise newException(SszMaxSizeExceeded, "Maximum SSZ object size exceeded: " & $length) val = readSszValue(r.stream.readBytes(length), T) const zeroChunk = default array[32, byte] func hash(a, b: openArray[byte]): Eth2Digest = result = withEth2Hash: trs "MERGING BRANCHES " trs a trs b h.update a h.update b trs "HASH RESULT ", result func mergeBranches(existing: Eth2Digest, newData: openarray[byte]): Eth2Digest = result = withEth2Hash: trs "MERGING BRANCHES OPEN ARRAY" trs existing.data trs newData h.update existing.data h.update newData let paddingBytes = bytesPerChunk - newData.len if paddingBytes > 0: trs "USING ", paddingBytes, " PADDING BYTES" h.update zeroChunk[0 ..< paddingBytes] trs "HASH RESULT ", result template mergeBranches(a, b: Eth2Digest): Eth2Digest = hash(a.data, b.data) func computeZeroHashes: array[100, Eth2Digest] = result[0] = Eth2Digest(data: zeroChunk) for i in 1 .. result.high: result[i] = mergeBranches(result[i - 1], result[i - 1]) let zeroHashes = computeZeroHashes() func getZeroHashWithoutSideEffect(idx: int): Eth2Digest = # TODO this is a work-around for the somewhat broken side # effects analysis of Nim - reading from global let variables # is considered a side-effect. {.noSideEffect.}: zeroHashes[idx] func addChunk(merkelizer: SszChunksMerkelizer, data: openarray[byte]) = doAssert data.len > 0 and data.len <= bytesPerChunk if not getBitLE(merkelizer.totalChunks, 0): let chunkStartAddr = addr merkelizer.combinedChunks[0].data[0] copyMem(chunkStartAddr, unsafeAddr data[0], data.len) zeroMem(chunkStartAddr.offset(data.len), bytesPerChunk - data.len) trs "WROTE BASE CHUNK ", merkelizer.combinedChunks[0] else: var hash = mergeBranches(merkelizer.combinedChunks[0], data) for i in 1 .. high(merkelizer.combinedChunks): trs "ITERATING" if getBitLE(merkelizer.totalChunks, i): trs "CALLING MERGE BRANCHES" hash = mergeBranches(merkelizer.combinedChunks[i], hash) else: trs "WRITING FRESH CHUNK AT ", i, " = ", hash merkelizer.combinedChunks[i] = hash break inc merkelizer.totalChunks func getFinalHash(merkelizer: SszChunksMerkelizer): Eth2Digest = let limit = merkelizer.limit if merkelizer.totalChunks == 0: let limitHeight = if limit != 0: bitWidth(limit - 1) else: 0 return getZeroHashWithoutSideEffect(limitHeight) let bottomHashIdx = firstOne(merkelizer.totalChunks) - 1 submittedChunksHeight = bitWidth(merkelizer.totalChunks - 1) topHashIdx = if limit <= 1: submittedChunksHeight else: max(submittedChunksHeight, bitWidth(limit - 1)) trs "BOTTOM HASH ", bottomHashIdx trs "SUBMITTED HEIGHT ", submittedChunksHeight trs "LIMIT ", limit if bottomHashIdx != submittedChunksHeight: # Our tree is not finished. We must complete the work in progress # branches and then extend the tree to the right height. result = mergeBranches(merkelizer.combinedChunks[bottomHashIdx], getZeroHashWithoutSideEffect(bottomHashIdx)) for i in bottomHashIdx + 1 ..< topHashIdx: if getBitLE(merkelizer.totalChunks, i): result = mergeBranches(merkelizer.combinedChunks[i], result) trs "COMBINED" else: result = mergeBranches(result, getZeroHashWithoutSideEffect(i)) trs "COMBINED WITH ZERO" elif bottomHashIdx == topHashIdx: # We have a perfect tree (chunks == 2**n) at just the right height! result = merkelizer.combinedChunks[bottomHashIdx] else: # We have a perfect tree of user chunks, but we have more work to # do - we must extend it to reach the desired height result = mergeBranches(merkelizer.combinedChunks[bottomHashIdx], getZeroHashWithoutSideEffect(bottomHashIdx)) for i in bottomHashIdx + 1 ..< topHashIdx: result = mergeBranches(result, getZeroHashWithoutSideEffect(i)) let HashingStreamVTable = OutputStreamVTable( writePage: proc (s: OutputStreamVar, data: openarray[byte]) {.nimcall, gcsafe, raises: [IOError].} = trs "ADDING STREAM CHUNK ", data SszChunksMerkelizer(s.outputDevice).addChunk(data) , flush: proc (s: OutputStreamVar) {.nimcall, gcsafe.} = discard ) func getVtableAddresWithoutSideEffect: ptr OutputStreamVTable = # TODO this is a work-around for the somewhat broken side # effects analysis of Nim - reading from global let variables # is considered a side-effect. {.noSideEffect.}: unsafeAddr HashingStreamVTable func newSszHashingStream(merkelizer: SszChunksMerkelizer): ref OutputStream = new result result.initWithSinglePage(pageSize = bytesPerChunk, maxWriteSize = bytesPerChunk, minWriteSize = bytesPerChunk) result.outputDevice = merkelizer result.vtable = getVtableAddresWithoutSideEffect() func mixInLength(root: Eth2Digest, length: int): Eth2Digest = var dataLen: array[32, byte] var lstLen = uint64(length) littleEndian64(addr dataLen[0], addr lstLen) hash(root.data, dataLen) func merkelizeSerializedChunks(merkelizer: SszChunksMerkelizer, obj: auto): Eth2Digest = var hashingStream = newSszHashingStream merkelizer hashingStream.writeFixedSized obj hashingStream.flush merkelizer.getFinalHash func merkelizeSerializedChunks(obj: auto): Eth2Digest = merkelizeSerializedChunks(SszChunksMerkelizer(), obj) func hash_tree_root*(x: auto): Eth2Digest {.gcsafe.} template merkelizeFields(body: untyped): Eth2Digest {.dirty.} = var merkelizer {.inject.} = SszChunksMerkelizer() template addField(field) = let hash = hash_tree_root(field) trs "MERKLEIZING FIELD ", astToStr(field), " = ", hash addChunk(merkelizer, hash.data) trs "CHUNK ADDED" template addField2(field) {.used.}= const maxLen = fieldMaxLen(field) when maxLen > 0: type FieldType = type field addField TypeWithMaxLen[FieldType, maxLen](field) else: addField field body merkelizer.getFinalHash func bitlistHashTreeRoot(merkelizer: SszChunksMerkelizer, x: BitSeq): Eth2Digest = trs "CHUNKIFYING BIT SEQ WITH LIMIT ", merkelizer.limit var totalBytes = Bytes(x).len lastCorrectedByte = Bytes(x)[^1] if lastCorrectedByte == byte(1): if totalBytes == 1: # This is an empty bit list. # It should be hashed as a tree containing all zeros: let treeHeight = if merkelizer.limit == 0: 0 else: log2trunc(merkelizer.limit) return mergeBranches(getZeroHashWithoutSideEffect(treeHeight), getZeroHashWithoutSideEffect(0)) # this is the mixed length totalBytes -= 1 lastCorrectedByte = Bytes(x)[^2] else: let markerPos = log2trunc(lastCorrectedByte) lastCorrectedByte.lowerBit(markerPos) var bytesInLastChunk = totalBytes mod bytesPerChunk fullChunks = totalBytes div bytesPerChunk if bytesInLastChunk == 0: fullChunks -= 1 bytesInLastChunk = 32 for i in 0 ..< fullChunks: let chunkStartPos = i * bytesPerChunk chunkEndPos = chunkStartPos + bytesPerChunk - 1 merkelizer.addChunk Bytes(x).toOpenArray(chunkEndPos, chunkEndPos) var lastChunk: array[bytesPerChunk, byte] chunkStartPos = fullChunks * bytesPerChunk for i in 0 .. bytesInLastChunk - 2: lastChunk[i] = Bytes(x)[chunkStartPos + i] lastChunk[bytesInLastChunk - 1] = lastCorrectedByte merkelizer.addChunk lastChunk.toOpenArray(0, bytesInLastChunk - 1) let contentsHash = merkelizer.getFinalHash mixInLength contentsHash, x.len func hashTreeRootImpl[T](x: T): Eth2Digest = when T is uint64: trs "UINT64; LITTLE-ENDIAN IDENTITY MAPPING" when system.cpuEndian == bigEndian: littleEndian64(addr result.data[0], x.unsafeAddr) else: let valueAddr = unsafeAddr x result.data[0..7] = makeOpenArray(cast[ptr byte](valueAddr), 8) elif (when T is array: ElemType(T) is byte and sizeof(T) == sizeof(Eth2Digest) else: false): # TODO is this sizeof comparison guranteed? it's whole structure vs field trs "ETH2DIGEST; IDENTITY MAPPING" Eth2Digest(data: x) elif (T is BasicType) or (when T is array: ElemType(T) is BasicType else: false): trs "FIXED TYPE; USE CHUNK STREAM" merkelizeSerializedChunks x elif T is string or (when T is (seq|openarray): ElemType(T) is BasicType else: false): trs "TYPE WITH LENGTH" mixInLength merkelizeSerializedChunks(x), x.len elif T is array|object|tuple: trs "MERKELIZING FIELDS" merkelizeFields: x.enumerateSubFields(f): const maxLen = fieldMaxLen(f) when maxLen > 0: type FieldType = type f addField TypeWithMaxLen[FieldType, maxLen](f) else: addField f elif T is seq: trs "SEQ WITH VAR SIZE" let hash = merkelizeFields(for e in x: addField e) mixInLength hash, x.len #elif isCaseObject(T): # # TODO implement this else: unsupported T func maxChunksCount(T: type, maxLen: static int64): int64 {.compileTime.} = when T is BitList: (maxLen + bitsPerChunk - 1) div bitsPerChunk elif T is seq: type E = ElemType(T) when E is BasicType: (maxLen * sizeof(E) + bytesPerChunk - 1) div bytesPerChunk else: maxLen else: unsupported T # This should never happen func hash_tree_root*(x: auto): Eth2Digest = trs "STARTING HASH TREE ROOT FOR TYPE ", name(type(x)) mixin toSszType when x is SignedBeaconBlock: doassert false when x is TypeWithMaxLen: const maxLen = x.maxLen type T = type valueOf(x) const limit = maxChunksCount(T, maxLen) var merkelizer = SszChunksMerkelizer(limit: uint64(limit)) when T is BitList: result = merkelizer.bitlistHashTreeRoot(BitSeq valueOf(x)) elif T is seq: type E = ElemType(T) let contentsHash = when E is BasicType: merkelizeSerializedChunks(merkelizer, valueOf(x)) else: for elem in valueOf(x): let elemHash = hash_tree_root(elem) merkelizer.addChunk(elemHash.data) merkelizer.getFinalHash() result = mixInLength(contentsHash, valueOf(x).len) else: unsupported T # This should never happen else: result = hashTreeRootImpl toSszType(x) trs "HASH TREE ROOT FOR ", name(type x), " = ", "0x", $result iterator hash_tree_roots_prefix*[T](lst: openarray[T], limit: auto): Eth2Digest = # This is a particular type's instantiation of a general fold, reduce, # accumulation, prefix sums, etc family of operations. As long as that # Eth1 deposit case is the only notable example -- the usual uses of a # list involve, at some point, tree-hashing it -- finalized hashes are # the only abstraction that escapes from this module this way. var merkelizer = SszChunksMerkelizer(limit: uint64(limit)) for i, elem in lst: merkelizer.addChunk(hash_tree_root(elem).data) yield mixInLength(merkelizer.getFinalHash(), i + 1)