2020-05-13 12:18:42 +03:00

663 lines
22 KiB
Nim

# beacon_chain
# Copyright (c) 2018 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# SSZ Serialization (simple serialize)
# See https://github.com/ethereum/eth2.0-specs/blob/master/specs/simple-serialize.md
# TODO Cannot override push, even though the function is annotated
# nim-beacon-chain/beacon_chain/ssz.nim(212, 18) Error: can raise an unlisted exception: IOError
#{.push raises: [Defect].}
import
stew/shims/macros, options, algorithm, options, strformat,
stew/[bitops2, bitseqs, endians2, objects, varints, ptrops, ranges/ptr_arith], stint,
faststreams/[inputs, outputs, buffers], serialization, serialization/testing/tracing,
./spec/[crypto, datatypes, digest],
./ssz/[types, bytes_reader]
# ################### Helper functions ###################################
export
serialization, types, bytes_reader
when defined(serialization_tracing):
import
typetraits, stew/ranges/ptr_arith
const
bytesPerChunk = 32
bitsPerChunk = bytesPerChunk * 8
maxChunkTreeDepth = 25
defaultMaxObjectSize = 1 * 1024 * 1024
type
SszReader* = object
stream: InputStream
maxObjectSize: int
SszWriter* = object
stream: OutputStream
BasicType = char|bool|SomeUnsignedInt|StUint|ValidatorIndex
SszChunksMerkleizer = ref object
combinedChunks: array[maxChunkTreeDepth, Eth2Digest]
totalChunks: uint64
limit: uint64
SszHashingStream = ref object of OutputStream
merkleizer: SszChunksMerkleizer
TypeWithMaxLen*[T; maxLen: static int64] = distinct T
SizePrefixed*[T] = distinct T
SszMaxSizeExceeded* = object of SerializationError
VarSizedWriterCtx = object
fixedParts: WriteCursor
offset: int
FixedSizedWriterCtx = object
ByteList = seq[byte]
serializationFormat SSZ,
Reader = SszReader,
Writer = SszWriter,
PreferedOutput = seq[byte]
template sizePrefixed*[TT](x: TT): untyped =
type T = TT
SizePrefixed[T](x)
proc init*(T: type SszReader,
stream: InputStream,
maxObjectSize = defaultMaxObjectSize): T {.raises: [Defect].} =
T(stream: stream, maxObjectSize: maxObjectSize)
proc mount*(F: type SSZ, stream: InputStream, T: type): T {.raises: [Defect].} =
mixin readValue
var reader = init(SszReader, stream)
reader.readValue(T)
method formatMsg*(err: ref SszSizeMismatchError, filename: string): string {.gcsafe, raises: [Defect].} =
# TODO: implement proper error string
try:
&"SSZ size mismatch, element {err.elementSize}, actual {err.actualSszSize}, type {err.deserializedType}, file {filename}"
except CatchableError:
"SSZ size mismatch"
when false:
# TODO: Nim can't handle yet this simpler definition. File an issue.
template valueOf[T; N](x: TypeWithMaxLen[T, N]): auto = T(x)
else:
proc unwrapImpl[T; N: static int64](x: ptr TypeWithMaxLen[T, N]): ptr T =
cast[ptr T](x)
template valueOf(x: TypeWithMaxLen): auto =
let xaddr = unsafeAddr x
unwrapImpl(xaddr)[]
template sszList*(x: seq|array, maxLen: static int64): auto =
TypeWithMaxLen[type(x), maxLen](x)
template toSszType*(x: auto): auto =
mixin toSszType
when x is Slot|Epoch|ValidatorIndex|enum: uint64(x)
elif x is Eth2Digest: x.data
elif x is BlsCurveType: toRaw(x)
elif x is BitSeq|BitList: ByteList(x)
elif x is TypeWithMaxLen: toSszType valueOf(x)
elif x is ForkDigest|Version: array[4, byte](x)
elif useListType and x is List: seq[x.T](x)
else: x
proc writeFixedSized(s: var (OutputStream|WriteCursor), x: auto) {.raises: [Defect, IOError].} =
mixin toSszType
when x is byte:
s.write x
elif x is bool|char:
s.write byte(ord(x))
elif x is SomeUnsignedInt:
let value = x.toBytesLE()
trs "APPENDING INT ", x, " = ", value
s.writeMemCopy value
elif x is StUint:
s.writeMemCopy x # TODO: Is this always correct?
elif x is array|string|seq|openarray:
when x[0] is byte:
trs "APPENDING FIXED SIZE BYTES", x
s.write x
else:
for elem in x:
trs "WRITING FIXED SIZE ARRAY ELEMENT"
s.writeFixedSized toSszType(elem)
elif x is tuple|object:
enumInstanceSerializedFields(x, fieldName, field):
trs "WRITING FIXED SIZE FIELD", fieldName
s.writeFixedSized toSszType(field)
else:
unsupported x.type
template writeOffset(cursor: var WriteCursor, offset: int) =
write cursor, toBytesLE(uint32 offset)
template supports*(_: type SSZ, T: type): bool =
mixin toSszType
anonConst compiles(fixedPortionSize toSszType(declval T))
func init*(T: type SszWriter, stream: OutputStream): T {.raises: [Defect].} =
result.stream = stream
template enumerateSubFields(holder, fieldVar, body: untyped) =
when holder is array|string|seq|openarray:
for fieldVar in holder: body
else:
enumInstanceSerializedFields(holder, _, fieldVar): body
proc writeVarSizeType(w: var SszWriter, value: auto) {.gcsafe.}
proc beginRecord*(w: var SszWriter, TT: type): auto {.raises: [Defect].} =
type T = TT
when isFixedSize(T):
FixedSizedWriterCtx()
else:
const offset = when T is array: len(T) * offsetSize
else: fixedPortionSize(T)
VarSizedWriterCtx(offset: offset,
fixedParts: w.stream.delayFixedSizeWrite(offset))
template writeField*(w: var SszWriter,
ctx: var auto,
fieldName: string,
field: auto) =
mixin toSszType
when ctx is FixedSizedWriterCtx:
writeFixedSized(w.stream, toSszType(field))
else:
type FieldType = type toSszType(field)
when isFixedSize(FieldType):
writeFixedSized(ctx.fixedParts, toSszType(field))
else:
trs "WRITING OFFSET ", ctx.offset, " FOR ", fieldName
writeOffset(ctx.fixedParts, ctx.offset)
let initPos = w.stream.pos
trs "WRITING VAR SIZE VALUE OF TYPE ", name(FieldType)
when FieldType is BitSeq:
trs "BIT SEQ ", ByteList(field)
writeVarSizeType(w, toSszType(field))
ctx.offset += w.stream.pos - initPos
template endRecord*(w: var SszWriter, ctx: var auto) =
when ctx is VarSizedWriterCtx:
finalize ctx.fixedParts
proc writeVarSizeType(w: var SszWriter, value: auto) {.raises: [Defect, IOError].} =
trs "STARTING VAR SIZE TYPE"
mixin toSszType
type T = type toSszType(value)
when T is seq|string|openarray:
type E = ElemType(T)
const isFixed = isFixedSize(E)
when isFixed:
trs "WRITING LIST WITH FIXED SIZE ELEMENTS"
for elem in value:
w.stream.writeFixedSized toSszType(elem)
trs "DONE"
else:
trs "WRITING LIST WITH VAR SIZE ELEMENTS"
var offset = value.len * offsetSize
var cursor = w.stream.delayFixedSizeWrite offset
for elem in value:
cursor.writeFixedSized uint32(offset)
let initPos = w.stream.pos
w.writeVarSizeType toSszType(elem)
offset += w.stream.pos - initPos
finalize cursor
trs "DONE"
elif T is object|tuple|array:
trs "WRITING OBJECT OR ARRAY"
var ctx = beginRecord(w, T)
enumerateSubFields(value, field):
writeField w, ctx, astToStr(field), field
endRecord w, ctx
proc writeValue*(w: var SszWriter, x: auto) {.gcsafe, raises: [Defect, IOError].} =
mixin toSszType
type T = type toSszType(x)
when isFixedSize(T):
w.stream.writeFixedSized toSszType(x)
elif T is array|seq|openarray|string|object|tuple:
w.writeVarSizeType toSszType(x)
else:
unsupported type(x)
func sszSize*(value: auto): int =
mixin toSszType
type T = type toSszType(value)
when isFixedSize(T):
anonConst fixedPortionSize(T)
elif T is seq|string|array|openarray:
type E = ElemType(T)
when isFixedSize(E):
len(value) * anonConst(fixedPortionSize(E))
else:
result = len(value) * offsetSize
for elem in value:
result += sszSize(toSszType elem)
elif T is object|tuple:
result = anonConst fixedPortionSize(T)
enumInstanceSerializedFields(value, _, field):
type FieldType = type toSszType(field)
when not isFixedSize(FieldType):
result += sszSize(toSszType field)
else:
unsupported T
proc writeValue*[T](w: var SszWriter, x: SizePrefixed[T]) {.raises: [Defect, IOError].} =
var cursor = w.stream.delayVarSizeWrite(10)
let initPos = w.stream.pos
w.writeValue T(x)
let length = uint64(w.stream.pos - initPos)
when false:
discard
# TODO varintBytes is sub-optimal at the moment
# cursor.writeAndFinalize length.varintBytes
else:
var buf: VarintBuffer
buf.writeVarint length
cursor.finalWrite buf.writtenBytes
template fromSszBytes*[T; N](_: type TypeWithMaxLen[T, N],
bytes: openarray[byte]): auto =
mixin fromSszBytes
fromSszBytes(T, bytes)
proc readValue*[T](r: var SszReader, val: var T) {.raises: [Defect, MalformedSszError, SszSizeMismatchError, IOError].} =
when isFixedSize(T):
const minimalSize = fixedPortionSize(T)
if r.stream.readable(minimalSize):
val = readSszValue(r.stream.read(minimalSize), T)
else:
raise newException(MalformedSszError, "SSZ input of insufficient size")
else:
# TODO Read the fixed portion first and precisely measure the size of
# the dynamic portion to consume the right number of bytes.
val = readSszValue(r.stream.read(r.stream.len.get), T)
proc readValue*[T](r: var SszReader, val: var SizePrefixed[T]) {.raises: [Defect].} =
let length = r.stream.readVarint(uint64)
if length > r.maxObjectSize:
raise newException(SszMaxSizeExceeded,
"Maximum SSZ object size exceeded: " & $length)
val = readSszValue(r.stream.read(length), T)
const
zeroChunk = default array[32, byte]
func hash(a, b: openArray[byte]): Eth2Digest =
result = withEth2Hash:
trs "MERGING BRANCHES "
trs a
trs b
h.update a
h.update b
trs "HASH RESULT ", result
func mergeBranches(existing: Eth2Digest, newData: openarray[byte]): Eth2Digest =
result = withEth2Hash:
trs "MERGING BRANCHES OPEN ARRAY"
trs existing.data
trs newData
h.update existing.data
h.update newData
let paddingBytes = bytesPerChunk - newData.len
if paddingBytes > 0:
trs "USING ", paddingBytes, " PADDING BYTES"
h.update zeroChunk[0 ..< paddingBytes]
trs "HASH RESULT ", result
template mergeBranches(a, b: Eth2Digest): Eth2Digest =
hash(a.data, b.data)
func computeZeroHashes: array[100, Eth2Digest] =
result[0] = Eth2Digest(data: zeroChunk)
for i in 1 .. result.high:
result[i] = mergeBranches(result[i - 1], result[i - 1])
let zeroHashes = computeZeroHashes()
func getZeroHashWithoutSideEffect(idx: int): Eth2Digest =
# TODO this is a work-around for the somewhat broken side
# effects analysis of Nim - reading from global let variables
# is considered a side-effect.
{.noSideEffect.}:
zeroHashes[idx]
func addChunk(merkleizer: SszChunksMerkleizer, data: openarray[byte]) =
doAssert data.len > 0 and data.len <= bytesPerChunk
if not getBitLE(merkleizer.totalChunks, 0):
let chunkStartAddr = addr merkleizer.combinedChunks[0].data[0]
copyMem(chunkStartAddr, unsafeAddr data[0], data.len)
zeroMem(chunkStartAddr.offset(data.len), bytesPerChunk - data.len)
trs "WROTE BASE CHUNK ", merkleizer.combinedChunks[0]
else:
var hash = mergeBranches(merkleizer.combinedChunks[0], data)
for i in 1 .. high(merkleizer.combinedChunks):
trs "ITERATING"
if getBitLE(merkleizer.totalChunks, i):
trs "CALLING MERGE BRANCHES"
hash = mergeBranches(merkleizer.combinedChunks[i], hash)
else:
trs "WRITING FRESH CHUNK AT ", i, " = ", hash
merkleizer.combinedChunks[i] = hash
break
inc merkleizer.totalChunks
func getFinalHash(merkleizer: SszChunksMerkleizer): Eth2Digest =
let limit = merkleizer.limit
if merkleizer.totalChunks == 0:
let limitHeight = if limit != 0: bitWidth(limit - 1) else: 0
return getZeroHashWithoutSideEffect(limitHeight)
let
bottomHashIdx = firstOne(merkleizer.totalChunks) - 1
submittedChunksHeight = bitWidth(merkleizer.totalChunks - 1)
topHashIdx = if limit <= 1: submittedChunksHeight
else: max(submittedChunksHeight, bitWidth(limit - 1))
trs "BOTTOM HASH ", bottomHashIdx
trs "SUBMITTED HEIGHT ", submittedChunksHeight
trs "LIMIT ", limit
if bottomHashIdx != submittedChunksHeight:
# Our tree is not finished. We must complete the work in progress
# branches and then extend the tree to the right height.
result = mergeBranches(merkleizer.combinedChunks[bottomHashIdx],
getZeroHashWithoutSideEffect(bottomHashIdx))
for i in bottomHashIdx + 1 ..< topHashIdx:
if getBitLE(merkleizer.totalChunks, i):
result = mergeBranches(merkleizer.combinedChunks[i], result)
trs "COMBINED"
else:
result = mergeBranches(result, getZeroHashWithoutSideEffect(i))
trs "COMBINED WITH ZERO"
elif bottomHashIdx == topHashIdx:
# We have a perfect tree (chunks == 2**n) at just the right height!
result = merkleizer.combinedChunks[bottomHashIdx]
else:
# We have a perfect tree of user chunks, but we have more work to
# do - we must extend it to reach the desired height
result = mergeBranches(merkleizer.combinedChunks[bottomHashIdx],
getZeroHashWithoutSideEffect(bottomHashIdx))
for i in bottomHashIdx + 1 ..< topHashIdx:
result = mergeBranches(result, getZeroHashWithoutSideEffect(i))
let SszHashingStreamVTable = OutputStreamVTable(
# The SSZ Hashing stream is a way to simplify the implementation
# of `hash_tree_root` for variable-sized objects.
#
# Since the hash result is defined over the serialized representation
# of the objects and not over their memory contents, computing the
# hash naturally involves executing the serialization routines.
#
# It would be wasteful if we need to allocate memory for this, so we
# take a short-cut by defining a "virtual" output stream that will
# hash the serialized output as it is being produced.
#
# Since SSZ uses delayed writes, sometimes the buffering mechanism
# of FastStreams will allocate just enough memory to resolve the
# delayed writes, but our expectation is that most of the time
# the stream will be drained with just a single reusable memory
# page (currently set to 4000 bytes which equals 125 chunks).
writeSync: proc (s: OutputStream, src: pointer, srcLen: Natural)
{.nimcall, gcsafe, raises: [Defect, IOError].} =
let merkleizer = SszHashingStream(s).merkleizer
# This drains the FastStreams buffers in the usual way, simulating
# writing to an external device, but in our case, we just divide
# the content in chunks and feed them to the merkleizer:
implementWrites(s.buffers, src, srcLen, "Hashing Stream",
writeStartAddr, writeLen):
var
remainingBytes = writeLen
pos = cast[ptr byte](writeStartAddr)
while remainingBytes >= bytesPerChunk:
merkleizer.addChunk(makeOpenArray(pos, bytesPerChunk))
pos = offset(pos, bytesPerChunk)
remainingBytes -= bytesPerChunk
if remainingBytes > 0:
merkleizer.addChunk(makeOpenArray(pos, remainingBytes))
writeLen
,
flushSync: proc (s: OutputStream) {.nimcall, gcsafe.} =
discard
)
func newSszHashingStream(merkleizer: SszChunksMerkleizer): OutputStream =
# This is very close to the optimal page size.
const pageSize = 4000
# What's important for us is that it will hold complete chunks:
static: assert(pageSize mod bytesPerChunk == 0)
var
buffers = initPageBuffers(pageSize)
span = buffers.getWritableSpan
SszHashingStream(vtable: vtableAddr SszHashingStreamVTable,
buffers: buffers,
span: span,
spanEndPos: span.len,
merkleizer: merkleizer)
func mixInLength(root: Eth2Digest, length: int): Eth2Digest =
var dataLen: array[32, byte]
dataLen[0..<8] = uint64(length).toBytesLE()
hash(root.data, dataLen)
func merkleizeSerializedChunks(merkleizer: SszChunksMerkleizer,
obj: auto): Eth2Digest =
try:
var hashingStream = newSszHashingStream merkleizer
{.noSideEffect.}:
# We assume there are no side-effects here, because the
# SszHashingStream is keeping all of its output in memory.
hashingStream.writeFixedSized obj
close hashingStream
merkleizer.getFinalHash
except IOError as e:
# Hashing shouldn't raise in theory but because of abstraction
# tax in the faststreams library, we have to do this at runtime
raiseAssert($e.msg)
func merkleizeSerializedChunks(obj: auto): Eth2Digest =
merkleizeSerializedChunks(SszChunksMerkleizer(), obj)
func hash_tree_root*(x: auto): Eth2Digest {.gcsafe, raises: [Defect].}
template merkleizeFields(body: untyped): Eth2Digest {.dirty.} =
var merkleizer {.inject.} = SszChunksMerkleizer()
template addField(field) =
let hash = hash_tree_root(field)
trs "MERKLEIZING FIELD ", astToStr(field), " = ", hash
addChunk(merkleizer, hash.data)
trs "CHUNK ADDED"
template addField2(field) {.used.}=
const maxLen = fieldMaxLen(field)
when maxLen > 0:
type FieldType = type field
addField TypeWithMaxLen[FieldType, maxLen](field)
else:
addField field
body
merkleizer.getFinalHash
func bitlistHashTreeRoot(merkleizer: SszChunksMerkleizer, x: BitSeq): Eth2Digest =
trs "CHUNKIFYING BIT SEQ WITH LIMIT ", merkleizer.limit
var
totalBytes = ByteList(x).len
lastCorrectedByte = ByteList(x)[^1]
if lastCorrectedByte == byte(1):
if totalBytes == 1:
# This is an empty bit list.
# It should be hashed as a tree containing all zeros:
let treeHeight = if merkleizer.limit == 0: 0
else: log2trunc(merkleizer.limit)
return mergeBranches(getZeroHashWithoutSideEffect(treeHeight),
getZeroHashWithoutSideEffect(0)) # this is the mixed length
totalBytes -= 1
lastCorrectedByte = ByteList(x)[^2]
else:
let markerPos = log2trunc(lastCorrectedByte)
lastCorrectedByte.clearBit(markerPos)
var
bytesInLastChunk = totalBytes mod bytesPerChunk
fullChunks = totalBytes div bytesPerChunk
if bytesInLastChunk == 0:
fullChunks -= 1
bytesInLastChunk = 32
for i in 0 ..< fullChunks:
let
chunkStartPos = i * bytesPerChunk
chunkEndPos = chunkStartPos + bytesPerChunk - 1
merkleizer.addChunk ByteList(x).toOpenArray(chunkEndPos, chunkEndPos)
var
lastChunk: array[bytesPerChunk, byte]
chunkStartPos = fullChunks * bytesPerChunk
for i in 0 .. bytesInLastChunk - 2:
lastChunk[i] = ByteList(x)[chunkStartPos + i]
lastChunk[bytesInLastChunk - 1] = lastCorrectedByte
merkleizer.addChunk lastChunk.toOpenArray(0, bytesInLastChunk - 1)
let contentsHash = merkleizer.getFinalHash
mixInLength contentsHash, x.len
func hashTreeRootImpl[T](x: T): Eth2Digest =
when T is SignedBeaconBlock:
unsupported T # Blocks are identified by htr(BeaconBlock) so we avoid these
elif T is uint64:
trs "UINT64; LITTLE-ENDIAN IDENTITY MAPPING"
result.data[0..<8] = x.toBytesLE()
elif (when T is array: ElemType(T) is byte and
sizeof(T) == sizeof(Eth2Digest) else: false):
# TODO is this sizeof comparison guranteed? it's whole structure vs field
trs "ETH2DIGEST; IDENTITY MAPPING"
Eth2Digest(data: x)
elif (T is BasicType) or (when T is array: ElemType(T) is BasicType else: false):
trs "FIXED TYPE; USE CHUNK STREAM"
merkleizeSerializedChunks x
elif T is string or (when T is (seq|openarray): ElemType(T) is BasicType else: false):
trs "TYPE WITH LENGTH"
mixInLength merkleizeSerializedChunks(x), x.len
elif T is array|object|tuple:
trs "MERKLEIZING FIELDS"
merkleizeFields:
x.enumerateSubFields(f):
const maxLen = fieldMaxLen(f)
when maxLen > 0:
type FieldType = type f
addField TypeWithMaxLen[FieldType, maxLen](f)
else:
addField f
elif T is seq:
trs "SEQ WITH VAR SIZE"
let hash = merkleizeFields(for e in x: addField e)
mixInLength hash, x.len
#elif isCaseObject(T):
# # TODO implement this
else:
unsupported T
func maxChunksCount(T: type, maxLen: static int64): int64 {.compileTime.} =
when T is BitList:
(maxLen + bitsPerChunk - 1) div bitsPerChunk
elif T is seq:
type E = ElemType(T)
when E is BasicType:
(maxLen * sizeof(E) + bytesPerChunk - 1) div bytesPerChunk
else:
maxLen
else:
unsupported T # This should never happen
func hash_tree_root*(x: auto): Eth2Digest {.raises: [Defect].} =
trs "STARTING HASH TREE ROOT FOR TYPE ", name(type(x))
mixin toSszType
when x is TypeWithMaxLen:
const maxLen = x.maxLen
type T = type valueOf(x)
const limit = maxChunksCount(T, maxLen)
var merkleizer = SszChunksMerkleizer(limit: uint64(limit))
when T is BitList:
result = merkleizer.bitlistHashTreeRoot(BitSeq valueOf(x))
elif T is seq:
type E = ElemType(T)
let contentsHash = when E is BasicType:
merkleizeSerializedChunks(merkleizer, valueOf(x))
else:
for elem in valueOf(x):
let elemHash = hash_tree_root(elem)
merkleizer.addChunk(elemHash.data)
merkleizer.getFinalHash()
result = mixInLength(contentsHash, valueOf(x).len)
else:
unsupported T # This should never happen
else:
result = hashTreeRootImpl toSszType(x)
trs "HASH TREE ROOT FOR ", name(type x), " = ", "0x", $result
iterator hash_tree_roots_prefix*[T](lst: openarray[T], limit: auto):
Eth2Digest =
# This is a particular type's instantiation of a general fold, reduce,
# accumulation, prefix sums, etc family of operations. As long as that
# Eth1 deposit case is the only notable example -- the usual uses of a
# list involve, at some point, tree-hashing it -- finalized hashes are
# the only abstraction that escapes from this module this way.
var merkleizer = SszChunksMerkleizer(limit: uint64(limit))
for i, elem in lst:
merkleizer.addChunk(hash_tree_root(elem).data)
yield mixInLength(merkleizer.getFinalHash(), i + 1)