Jacek Sieka 2a67ac3c05 trie -> kv store
* simplify data storage to key-value, tries are not relevant for NBC
* locked-down version of lmdb dependency
* easier to build / maintain on various platforms
2020-01-20 13:39:37 +00:00

598 lines
20 KiB
Nim

# beacon_chain
# Copyright (c) 2018 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# SSZ Serialization (simple serialize)
# See https://github.com/ethereum/eth2.0-specs/blob/master/specs/simple-serialize.md
import
endians, stew/shims/macros, options, algorithm, options,
stew/[bitops2, bitseqs, objects, varints, ptrops, ranges/ptr_arith], stint,
faststreams/input_stream, serialization, serialization/testing/tracing,
nimcrypto/sha2, blscurve,
./spec/[crypto, datatypes, digest],
./ssz/[types, bytes_reader]
# ################### Helper functions ###################################
export
serialization, types, bytes_reader
when defined(serialization_tracing):
import
typetraits, stew/ranges/ptr_arith
const
bytesPerChunk = 32
bitsPerChunk = bytesPerChunk * 8
maxChunkTreeDepth = 25
defaultMaxObjectSize = 1 * 1024 * 1024
type
SszReader* = object
stream: ByteStreamVar
maxObjectSize: int
SszWriter* = object
stream: OutputStreamVar
BasicType = char|bool|SomeUnsignedInt|StUint|ValidatorIndex
SszChunksMerkelizer = ref object of RootObj
combinedChunks: array[maxChunkTreeDepth, Eth2Digest]
totalChunks: uint64
limit: uint64
TypeWithMaxLen*[T; maxLen: static int64] = distinct T
SizePrefixed*[T] = distinct T
SszMaxSizeExceeded* = object of SerializationError
VarSizedWriterCtx = object
fixedParts: WriteCursor
offset: int
FixedSizedWriterCtx = object
Bytes = seq[byte]
serializationFormat SSZ,
Reader = SszReader,
Writer = SszWriter,
PreferedOutput = seq[byte]
template sizePrefixed*[TT](x: TT): untyped =
type T = TT
SizePrefixed[T](x)
proc init*(T: type SszReader,
stream: ByteStreamVar,
maxObjectSize = defaultMaxObjectSize): T =
T(stream: stream, maxObjectSize: maxObjectSize)
proc mount*(F: type SSZ, stream: ByteStreamVar, T: type): T =
mixin readValue
var reader = init(SszReader, stream)
reader.readValue(T)
method formatMsg*(err: ref SszSizeMismatchError, filename: string): string {.gcsafe.} =
# TODO: implement proper error string
"Serialisation error while processing " & filename
when false:
# TODO: Nim can't handle yet this simpler definition. File an issue.
template valueOf[T; N](x: TypeWithMaxLen[T, N]): auto = T(x)
else:
proc unwrapImpl[T; N: static int64](x: ptr TypeWithMaxLen[T, N]): ptr T =
cast[ptr T](x)
template valueOf(x: TypeWithMaxLen): auto =
let xaddr = unsafeAddr x
unwrapImpl(xaddr)[]
template sszList*(x: seq|array, maxLen: static int64): auto =
TypeWithMaxLen[type(x), maxLen](x)
template toSszType*(x: auto): auto =
mixin toSszType
when x is Slot|Epoch|ValidatorIndex|enum: uint64(x)
elif x is Eth2Digest: x.data
elif x is BlsValue|BlsCurveType: getBytes(x)
elif x is BitSeq|BitList: Bytes(x)
elif x is ref|ptr: toSszType x[]
elif x is Option: toSszType x.get
elif x is TypeWithMaxLen: toSszType valueOf(x)
elif useListType and x is List: seq[x.T](x)
else: x
func writeFixedSized(c: var WriteCursor, x: auto) =
mixin toSszType
when x is byte:
c.append x
elif x is bool|char:
c.append byte(ord(x))
elif x is SomeUnsignedInt:
when system.cpuEndian == bigEndian:
## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``)
## All integers are serialized as **little endian**.
var bytes: array[sizeof(x), byte]
when x.sizeof == 8: littleEndian64(addr bytes[0], x.unsafeAddr)
elif x.sizeof == 4: littleEndian32(addr bytes[0], x.unsafeAddr)
elif x.sizeof == 2: littleEndian16(addr bytes[0], x.unsafeAddr)
elif x.sizeof == 1: copyMem(addr bytes[0], x.unsafeAddr, sizeof(x))
else: unsupported x.type
c.append bytes
else:
let valueAddr {.used.} = unsafeAddr x
trs "APPENDING INT ", x, " = ", makeOpenArray(cast[ptr byte](valueAddr), sizeof(x))
c.appendMemCopy x
elif x is StUint:
c.appendMemCopy x # TODO: Is this always correct?
elif x is array|string|seq|openarray:
when x[0] is byte:
trs "APPENDING FIXED SIZE BYTES", x
c.append x
else:
for elem in x:
trs "WRITING FIXED SIZE ARRAY ELEMENT"
c.writeFixedSized toSszType(elem)
elif x is tuple|object:
enumInstanceSerializedFields(x, fieldName, field):
trs "WRITING FIXED SIZE FIELD", fieldName
c.writeFixedSized toSszType(field)
else:
unsupported x.type
template writeFixedSized(s: OutputStreamVar, x: auto) =
writeFixedSized(s.cursor, x)
template supports*(_: type SSZ, T: type): bool =
mixin toSszType
anonConst compiles(fixedPortionSize toSszType(default(T)))
func init*(T: type SszWriter, stream: OutputStreamVar): T =
result.stream = stream
template enumerateSubFields(holder, fieldVar, body: untyped) =
when holder is array|string|seq|openarray:
for fieldVar in holder: body
else:
enumInstanceSerializedFields(holder, _, fieldVar): body
func writeVarSizeType(w: var SszWriter, value: auto) {.gcsafe.}
func beginRecord*(w: var SszWriter, TT: type): auto =
type T = TT
when isFixedSize(T):
FixedSizedWriterCtx()
else:
const offset = when T is array: len(T) * offsetSize
else: fixedPortionSize(T)
VarSizedWriterCtx(offset: offset,
fixedParts: w.stream.delayFixedSizeWrite(offset))
template writeField*(w: var SszWriter,
ctx: var auto,
fieldName: string,
field: auto) =
mixin toSszType
when ctx is FixedSizedWriterCtx:
writeFixedSized(w.stream, toSszType(field))
else:
type FieldType = type toSszType(field)
when isFixedSize(FieldType):
ctx.fixedParts.writeFixedSized toSszType(field)
else:
trs "WRITING OFFSET ", ctx.offset, " FOR ", fieldName
ctx.fixedParts.writeFixedSized uint32(ctx.offset)
let initPos = w.stream.pos
trs "WRITING VAR SIZE VALUE OF TYPE ", name(FieldType)
when FieldType is BitSeq:
trs "BIT SEQ ", Bytes(field)
writeVarSizeType(w, toSszType(field))
ctx.offset += w.stream.pos - initPos
template endRecord*(w: var SszWriter, ctx: var auto) =
when ctx is VarSizedWriterCtx:
finalize ctx.fixedParts
func writeVarSizeType(w: var SszWriter, value: auto) =
trs "STARTING VAR SIZE TYPE"
mixin toSszType
type T = type toSszType(value)
when T is seq|string|openarray:
type E = ElemType(T)
const isFixed = when E is Option: false
else: isFixedSize(E)
when isFixed:
trs "WRITING LIST WITH FIXED SIZE ELEMENTS"
for elem in value:
w.stream.writeFixedSized toSszType(elem)
trs "DONE"
else:
trs "WRITING LIST WITH VAR SIZE ELEMENTS"
var offset = value.len * offsetSize
var cursor = w.stream.delayFixedSizeWrite offset
for elem in value:
cursor.writeFixedSized uint32(offset)
when elem is Option:
if not isSome(elem): continue
elif elem is ptr|ref:
if isNil(elem): continue
let initPos = w.stream.pos
w.writeVarSizeType toSszType(elem)
offset += w.stream.pos - initPos
finalize cursor
trs "DONE"
elif T is object|tuple|array:
trs "WRITING OBJECT OR ARRAY"
var ctx = beginRecord(w, T)
enumerateSubFields(value, field):
writeField w, ctx, astToStr(field), field
endRecord w, ctx
func writeValue*(w: var SszWriter, x: auto) {.gcsafe.} =
mixin toSszType
type T = type toSszType(x)
when isFixedSize(T):
w.stream.writeFixedSized toSszType(x)
elif T is array|seq|openarray|string|object|tuple:
w.writeVarSizeType toSszType(x)
else:
unsupported type(x)
func writeValue*[T](w: var SszWriter, x: SizePrefixed[T]) =
var cursor = w.stream.delayVarSizeWrite(10)
let initPos = w.stream.pos
w.writeValue T(x)
let length = uint64(w.stream.pos - initPos)
when false:
discard
# TODO varintBytes is sub-optimal at the moment
# cursor.writeAndFinalize length.varintBytes
else:
var buf: VarintBuffer
buf.appendVarint length
cursor.writeAndFinalize buf.writtenBytes
template fromSszBytes*[T; N](_: type TypeWithMaxLen[T, N],
bytes: openarray[byte]): auto =
mixin fromSszBytes
fromSszBytes(T, bytes)
proc readValue*(r: var SszReader, val: var auto) =
val = readSszValue(r.stream.readBytes(r.stream.endPos), val.type)
proc readValue*[T](r: var SszReader, val: var SizePrefixed[T]) =
let length = r.stream.readVarint(uint64)
if length > r.maxObjectSize:
raise newException(SszMaxSizeExceeded,
"Maximum SSZ object size exceeded: " & $length)
val = readSszValue(r.stream.readBytes(length), T)
const
zeroChunk = default array[32, byte]
func hash(a, b: openArray[byte]): Eth2Digest =
result = withEth2Hash:
trs "MERGING BRANCHES "
trs a
trs b
h.update a
h.update b
trs "HASH RESULT ", result
func mergeBranches(existing: Eth2Digest, newData: openarray[byte]): Eth2Digest =
result = withEth2Hash:
trs "MERGING BRANCHES OPEN ARRAY"
trs existing.data
trs newData
h.update existing.data
h.update newData
let paddingBytes = bytesPerChunk - newData.len
if paddingBytes > 0:
trs "USING ", paddingBytes, " PADDING BYTES"
h.update zeroChunk[0 ..< paddingBytes]
trs "HASH RESULT ", result
template mergeBranches(a, b: Eth2Digest): Eth2Digest =
hash(a.data, b.data)
func computeZeroHashes: array[100, Eth2Digest] =
result[0] = Eth2Digest(data: zeroChunk)
for i in 1 .. result.high:
result[i] = mergeBranches(result[i - 1], result[i - 1])
let zeroHashes = computeZeroHashes()
func getZeroHashWithoutSideEffect(idx: int): Eth2Digest =
# TODO this is a work-around for the somewhat broken side
# effects analysis of Nim - reading from global let variables
# is considered a side-effect.
{.noSideEffect.}:
zeroHashes[idx]
func addChunk(merkelizer: SszChunksMerkelizer, data: openarray[byte]) =
doAssert data.len > 0 and data.len <= bytesPerChunk
if not getBitLE(merkelizer.totalChunks, 0):
let chunkStartAddr = addr merkelizer.combinedChunks[0].data[0]
copyMem(chunkStartAddr, unsafeAddr data[0], data.len)
zeroMem(chunkStartAddr.offset(data.len), bytesPerChunk - data.len)
trs "WROTE BASE CHUNK ", merkelizer.combinedChunks[0]
else:
var hash = mergeBranches(merkelizer.combinedChunks[0], data)
for i in 1 .. high(merkelizer.combinedChunks):
trs "ITERATING"
if getBitLE(merkelizer.totalChunks, i):
trs "CALLING MERGE BRANCHES"
hash = mergeBranches(merkelizer.combinedChunks[i], hash)
else:
trs "WRITING FRESH CHUNK AT ", i, " = ", hash
merkelizer.combinedChunks[i] = hash
break
inc merkelizer.totalChunks
func getFinalHash(merkelizer: SszChunksMerkelizer): Eth2Digest =
let limit = merkelizer.limit
if merkelizer.totalChunks == 0:
let limitHeight = if limit != 0: bitWidth(limit - 1) else: 0
return getZeroHashWithoutSideEffect(limitHeight)
let
bottomHashIdx = firstOne(merkelizer.totalChunks) - 1
submittedChunksHeight = bitWidth(merkelizer.totalChunks - 1)
topHashIdx = if limit <= 1: submittedChunksHeight
else: max(submittedChunksHeight, bitWidth(limit - 1))
trs "BOTTOM HASH ", bottomHashIdx
trs "SUBMITTED HEIGHT ", submittedChunksHeight
trs "LIMIT ", limit
if bottomHashIdx != submittedChunksHeight:
# Our tree is not finished. We must complete the work in progress
# branches and then extend the tree to the right height.
result = mergeBranches(merkelizer.combinedChunks[bottomHashIdx],
getZeroHashWithoutSideEffect(bottomHashIdx))
for i in bottomHashIdx + 1 ..< topHashIdx:
if getBitLE(merkelizer.totalChunks, i):
result = mergeBranches(merkelizer.combinedChunks[i], result)
trs "COMBINED"
else:
result = mergeBranches(result, getZeroHashWithoutSideEffect(i))
trs "COMBINED WITH ZERO"
elif bottomHashIdx == topHashIdx:
# We have a perfect tree (chunks == 2**n) at just the right height!
result = merkelizer.combinedChunks[bottomHashIdx]
else:
# We have a perfect tree of user chunks, but we have more work to
# do - we must extend it to reach the desired height
result = mergeBranches(merkelizer.combinedChunks[bottomHashIdx],
getZeroHashWithoutSideEffect(bottomHashIdx))
for i in bottomHashIdx + 1 ..< topHashIdx:
result = mergeBranches(result, getZeroHashWithoutSideEffect(i))
let HashingStreamVTable = OutputStreamVTable(
writePage: proc (s: OutputStreamVar, data: openarray[byte])
{.nimcall, gcsafe, raises: [IOError].} =
trs "ADDING STREAM CHUNK ", data
SszChunksMerkelizer(s.outputDevice).addChunk(data)
,
flush: proc (s: OutputStreamVar) {.nimcall, gcsafe.} =
discard
)
func getVtableAddresWithoutSideEffect: ptr OutputStreamVTable =
# TODO this is a work-around for the somewhat broken side
# effects analysis of Nim - reading from global let variables
# is considered a side-effect.
{.noSideEffect.}:
unsafeAddr HashingStreamVTable
func newSszHashingStream(merkelizer: SszChunksMerkelizer): ref OutputStream =
new result
result.initWithSinglePage(pageSize = bytesPerChunk,
maxWriteSize = bytesPerChunk,
minWriteSize = bytesPerChunk)
result.outputDevice = merkelizer
result.vtable = getVtableAddresWithoutSideEffect()
func mixInLength(root: Eth2Digest, length: int): Eth2Digest =
var dataLen: array[32, byte]
var lstLen = uint64(length)
littleEndian64(addr dataLen[0], addr lstLen)
hash(root.data, dataLen)
func merkelizeSerializedChunks(merkelizer: SszChunksMerkelizer,
obj: auto): Eth2Digest =
var hashingStream = newSszHashingStream merkelizer
hashingStream.writeFixedSized obj
hashingStream.flush
merkelizer.getFinalHash
func merkelizeSerializedChunks(obj: auto): Eth2Digest =
merkelizeSerializedChunks(SszChunksMerkelizer(), obj)
func hash_tree_root*(x: auto): Eth2Digest {.gcsafe.}
template merkelizeFields(body: untyped): Eth2Digest {.dirty.} =
var merkelizer {.inject.} = SszChunksMerkelizer()
template addField(field) =
let hash = hash_tree_root(field)
trs "MERKLEIZING FIELD ", astToStr(field), " = ", hash
addChunk(merkelizer, hash.data)
trs "CHUNK ADDED"
template addField2(field) {.used.}=
const maxLen = fieldMaxLen(field)
when maxLen > 0:
type FieldType = type field
addField TypeWithMaxLen[FieldType, maxLen](field)
else:
addField field
body
merkelizer.getFinalHash
func bitlistHashTreeRoot(merkelizer: SszChunksMerkelizer, x: BitSeq): Eth2Digest =
trs "CHUNKIFYING BIT SEQ WITH LIMIT ", merkelizer.limit
var
totalBytes = Bytes(x).len
lastCorrectedByte = Bytes(x)[^1]
if lastCorrectedByte == byte(1):
if totalBytes == 1:
# This is an empty bit list.
# It should be hashed as a tree containing all zeros:
let treeHeight = if merkelizer.limit == 0: 0
else: log2trunc(merkelizer.limit)
return mergeBranches(getZeroHashWithoutSideEffect(treeHeight),
getZeroHashWithoutSideEffect(0)) # this is the mixed length
totalBytes -= 1
lastCorrectedByte = Bytes(x)[^2]
else:
let markerPos = log2trunc(lastCorrectedByte)
lastCorrectedByte.clearBit(markerPos)
var
bytesInLastChunk = totalBytes mod bytesPerChunk
fullChunks = totalBytes div bytesPerChunk
if bytesInLastChunk == 0:
fullChunks -= 1
bytesInLastChunk = 32
for i in 0 ..< fullChunks:
let
chunkStartPos = i * bytesPerChunk
chunkEndPos = chunkStartPos + bytesPerChunk - 1
merkelizer.addChunk Bytes(x).toOpenArray(chunkEndPos, chunkEndPos)
var
lastChunk: array[bytesPerChunk, byte]
chunkStartPos = fullChunks * bytesPerChunk
for i in 0 .. bytesInLastChunk - 2:
lastChunk[i] = Bytes(x)[chunkStartPos + i]
lastChunk[bytesInLastChunk - 1] = lastCorrectedByte
merkelizer.addChunk lastChunk.toOpenArray(0, bytesInLastChunk - 1)
let contentsHash = merkelizer.getFinalHash
mixInLength contentsHash, x.len
func hashTreeRootImpl[T](x: T): Eth2Digest =
when T is uint64:
trs "UINT64; LITTLE-ENDIAN IDENTITY MAPPING"
when system.cpuEndian == bigEndian:
littleEndian64(addr result.data[0], x.unsafeAddr)
else:
let valueAddr = unsafeAddr x
result.data[0..7] = makeOpenArray(cast[ptr byte](valueAddr), 8)
elif (when T is array: ElemType(T) is byte and
sizeof(T) == sizeof(Eth2Digest) else: false):
# TODO is this sizeof comparison guranteed? it's whole structure vs field
trs "ETH2DIGEST; IDENTITY MAPPING"
Eth2Digest(data: x)
elif (T is BasicType) or (when T is array: ElemType(T) is BasicType else: false):
trs "FIXED TYPE; USE CHUNK STREAM"
merkelizeSerializedChunks x
elif T is string or (when T is (seq|openarray): ElemType(T) is BasicType else: false):
trs "TYPE WITH LENGTH"
mixInLength merkelizeSerializedChunks(x), x.len
elif T is array|object|tuple:
trs "MERKELIZING FIELDS"
merkelizeFields:
x.enumerateSubFields(f):
const maxLen = fieldMaxLen(f)
when maxLen > 0:
type FieldType = type f
addField TypeWithMaxLen[FieldType, maxLen](f)
else:
addField f
elif T is seq:
trs "SEQ WITH VAR SIZE"
let hash = merkelizeFields(for e in x: addField e)
mixInLength hash, x.len
#elif isCaseObject(T):
# # TODO implement this
else:
unsupported T
func maxChunksCount(T: type, maxLen: static int64): int64 {.compileTime.} =
when T is BitList:
(maxLen + bitsPerChunk - 1) div bitsPerChunk
elif T is seq:
type E = ElemType(T)
when E is BasicType:
(maxLen * sizeof(E) + bytesPerChunk - 1) div bytesPerChunk
else:
maxLen
else:
unsupported T # This should never happen
func hash_tree_root*(x: auto): Eth2Digest =
trs "STARTING HASH TREE ROOT FOR TYPE ", name(type(x))
mixin toSszType
when x is SignedBeaconBlock:
doassert false
when x is TypeWithMaxLen:
const maxLen = x.maxLen
type T = type valueOf(x)
const limit = maxChunksCount(T, maxLen)
var merkelizer = SszChunksMerkelizer(limit: uint64(limit))
when T is BitList:
result = merkelizer.bitlistHashTreeRoot(BitSeq valueOf(x))
elif T is seq:
type E = ElemType(T)
let contentsHash = when E is BasicType:
merkelizeSerializedChunks(merkelizer, valueOf(x))
else:
for elem in valueOf(x):
let elemHash = hash_tree_root(elem)
merkelizer.addChunk(elemHash.data)
merkelizer.getFinalHash()
result = mixInLength(contentsHash, valueOf(x).len)
else:
unsupported T # This should never happen
else:
result = hashTreeRootImpl toSszType(x)
trs "HASH TREE ROOT FOR ", name(type x), " = ", "0x", $result
iterator hash_tree_roots_prefix*[T](lst: openarray[T], limit: auto):
Eth2Digest =
# This is a particular type's instantiation of a general fold, reduce,
# accumulation, prefix sums, etc family of operations. As long as that
# Eth1 deposit case is the only notable example -- the usual uses of a
# list involve, at some point, tree-hashing it -- finalized hashes are
# the only abstraction that escapes from this module this way.
var merkelizer = SszChunksMerkelizer(limit: uint64(limit))
for i, elem in lst:
merkelizer.addChunk(hash_tree_root(elem).data)
yield mixInLength(merkelizer.getFinalHash(), i + 1)