mirror of
synced 2025-02-08 20:33:41 +00:00
Root encoding is on the hot path for block verification both in the consensus (when syncing) and execution clients and oddly consititutes a significant part of resource usage even though it is not that much work. While the trie code is capable of producing a transaction root and similar feats, it turns out that it is quite inefficient - even for small work loads. This PR brings in a helper for the specific use case of building tries of lists of values whose key is the RLP-encoded index of the item. As it happens, such keys follow a particular structure where items end up "almost" sorted, with the exception for the item at index 0 which gets encoded as `[0x80]`, ie the empty list, thus moving it to a new location. Armed with this knowledge and the understanding that inserting ordered items into a trie easily can be done with a simple recursion, this PR brings a ~100x improvement in CPU usage (360ms vs 33s) and a ~50x reduction in memory usage (70mb vs >3gb!) for the simple test of encoding 1000000 keys. In part, the memory usage reduction is due to a trick where the hash of the item is computed as the item is being added instead of storing it in the value. There are further reductions possible such as maintaining a hasher per level instead of storing hash values as well as using a direct-to-hash rlp encoder.
355 lines
11 KiB
355 lines
11 KiB
stew/[arraybuf, assign2, bitops2, shims/macros],
export arraybuf
RlpWriter* = object
pendingLists: seq[tuple[remainingItems, outBytes: int]]
output: seq[byte]
RlpIntBuf* = ArrayBuf[9, byte]
## Small buffer for holding a single RLP-encoded integer
wrapObjsInList* = true
func bytesNeeded(num: SomeUnsignedInt): int =
# Number of non-zero bytes in the big endian encoding
sizeof(num) - (num.leadingZeros() shr 3)
func writeBigEndian(outStream: var auto, number: SomeUnsignedInt,
lastByteIdx: int, numberOfBytes: int) =
var n = number
for i in countdown(lastByteIdx, lastByteIdx - numberOfBytes + 1):
outStream[i] = byte(n and 0xff)
n = n shr 8
func writeBigEndian(outStream: var auto, number: SomeUnsignedInt,
numberOfBytes: int) {.inline.} =
outStream.setLen(outStream.len + numberOfBytes)
outStream.writeBigEndian(number, outStream.len - 1, numberOfBytes)
func writeCount(bytes: var auto, count: int, baseMarker: byte) =
bytes.add(baseMarker + byte(count))
origLen = bytes.len
lenPrefixBytes = uint64(count).bytesNeeded
bytes.setLen(origLen + int(lenPrefixBytes) + 1)
bytes[origLen] = baseMarker + (THRESHOLD_LIST_LEN - 1) + byte(lenPrefixBytes)
bytes.writeBigEndian(uint64(count), bytes.len - 1, lenPrefixBytes)
func writeInt(outStream: var auto, i: SomeUnsignedInt) =
if i == typeof(i)(0):
elif i < typeof(i)(BLOB_START_MARKER):
outStream.add byte(i)
let bytesNeeded = i.bytesNeeded
outStream.writeCount(bytesNeeded, BLOB_START_MARKER)
outStream.writeBigEndian(i, bytesNeeded)
proc initRlpWriter*: RlpWriter =
# Avoid allocations during initial write of small items - since the writer is
# expected to be short-lived, it doesn't hurt to allocate this buffer
result.output = newSeqOfCap[byte](2000)
proc decRet(n: var int, delta: int): int =
n -= delta
proc maybeClosePendingLists(self: var RlpWriter) =
while self.pendingLists.len > 0:
let lastListIdx = self.pendingLists.len - 1
doAssert self.pendingLists[lastListIdx].remainingItems >= 1
if decRet(self.pendingLists[lastListIdx].remainingItems, 1) == 0:
# A list have been just finished. It was started in `startList`.
let listStartPos = self.pendingLists[lastListIdx].outBytes
self.pendingLists.setLen lastListIdx
# How many bytes were written since the start?
let listLen = self.output.len - listStartPos
# Compute the number of bytes required to write down the list length
let totalPrefixBytes = if listLen < int(THRESHOLD_LIST_LEN): 1
else: int(uint64(listLen).bytesNeeded) + 1
# Shift the written data to make room for the prefix length
self.output.setLen(self.output.len + totalPrefixBytes)
moveMem(addr self.output[listStartPos + totalPrefixBytes],
unsafeAddr self.output[listStartPos],
# Write out the prefix length
self.output[listStartPos] = LIST_START_MARKER + byte(listLen)
let listLenBytes = totalPrefixBytes - 1
self.output[listStartPos] = LEN_PREFIXED_LIST_MARKER + byte(listLenBytes)
self.output.writeBigEndian(uint64(listLen), listStartPos + listLenBytes, listLenBytes)
# The currently open list is not finished yet. Nothing to do.
proc appendRawBytes*(self: var RlpWriter, bytes: openArray[byte]) =
self.output.setLen(self.output.len + bytes.len)
self.output.len - bytes.len, self.output.len - 1), bytes)
proc appendRawList(self: var RlpWriter, bytes: openArray[byte]) =
self.output.writeCount(bytes.len, LIST_START_MARKER)
proc startList*(self: var RlpWriter, listSize: int) =
if listSize == 0:
self.pendingLists.add((listSize, self.output.len))
proc appendBlob(self: var RlpWriter, data: openArray[byte], startMarker: byte) =
if data.len == 1 and byte(data[0]) < BLOB_START_MARKER:
self.output.add byte(data[0])
self.output.writeCount(data.len, startMarker)
proc appendImpl(self: var RlpWriter, data: string) =
appendBlob(self, data.toOpenArrayByte(0, data.high), BLOB_START_MARKER)
proc appendBlob(self: var RlpWriter, data: openArray[byte]) =
appendBlob(self, data, BLOB_START_MARKER)
proc appendBlob(self: var RlpWriter, data: openArray[char]) =
appendBlob(self, data.toOpenArrayByte(0, data.high), BLOB_START_MARKER)
proc appendInt(self: var RlpWriter, i: SomeUnsignedInt) =
# this is created as a separate proc as an extra precaution against
# any overloading resolution problems when matching the IntLike concept.
template appendImpl(self: var RlpWriter, i: SomeUnsignedInt) =
appendInt(self, i)
template appendImpl(self: var RlpWriter, e: enum) =
appendImpl(self, int(e))
template appendImpl(self: var RlpWriter, b: bool) =
appendImpl(self, int(b))
proc appendImpl[T](self: var RlpWriter, listOrBlob: openArray[T]) =
mixin append
# TODO: This append proc should be overloaded by `openArray[byte]` after
# nim bug #7416 is fixed.
when T is (byte or char):
self.startList listOrBlob.len
for i in 0 ..< listOrBlob.len:
self.append listOrBlob[i]
proc hasOptionalFields(T: type): bool =
mixin enumerateRlpFields
proc helper: bool =
var dummy: T
result = false
template detectOptionalField(RT, n, x) {.used.} =
when x is Option or x is Opt:
return true
enumerateRlpFields(dummy, detectOptionalField)
const res = helper()
return res
proc optionalFieldsNum(x: openArray[bool]): int =
# count optional fields backward
for i in countdown(x.len-1, 0):
if x[i]: inc result
else: break
proc checkedOptionalFields(T: type, FC: static[int]): int =
mixin enumerateRlpFields
i = 0
dummy: T
res: array[FC, bool]
template op(RT, fN, f) =
res[i] = f is Option or f is Opt
inc i
enumerateRlpFields(dummy, op)
# ignoring first optional fields
optionalFieldsNum(res) - 1
proc genPrevFields(obj: NimNode, fd: openArray[FieldDescription], hi, lo: int): NimNode =
result = newStmtList()
for i in countdown(hi, lo):
let fieldName = fd[i].name
let msg = fieldName.strVal & " expected"
result.add quote do:
doAssert(`obj`.`fieldName`.isSome, `msg`)
macro genOptionalFieldsValidation(obj: untyped, T: type, num: static[int]): untyped =
Tresolved = getType(T)[1]
fd = recordFields(Tresolved.getImpl)
loidx = fd.len-num
result = newStmtList()
for i in countdown(fd.high, loidx):
let fieldName = fd[i].name
let prevFields = genPrevFields(obj, fd, i-1, loidx-1)
result.add quote do:
if `obj`.`fieldName`.isSome:
# generate something like
when false:
if obj.fee.isNone:
doAssert(obj.withdrawalsRoot.isNone, "withdrawalsRoot needs fee")
doAssert(obj.blobGasUsed.isNone, "blobGasUsed needs fee")
doAssert(obj.excessBlobGas.isNone, "excessBlobGas needs fee")
if obj.withdrawalsRoot.isNone:
doAssert(obj.blobGasUsed.isNone, "blobGasUsed needs withdrawalsRoot")
doAssert(obj.excessBlobGas.isNone, "excessBlobGas needs withdrawalsRoot")
doAssert obj.blobGasUsed.isSome == obj.excessBlobGas.isSome,
"blobGasUsed and excessBlobGas must both be present or absent"
macro countFieldsRuntimeImpl(obj: untyped, T: type, num: static[int]): untyped =
Tresolved = getType(T)[1]
fd = recordFields(Tresolved.getImpl)
res = ident("result")
mlen = fd.len - num
result = newStmtList()
result.add quote do:
`res` = `mlen`
for i in countdown(fd.high, fd.len-num):
let fieldName = fd[i].name
result.add quote do:
`res` += `obj`.`fieldName`.isSome.ord
proc countFieldsRuntime(obj: object|tuple): int =
# count mandatory fields and non empty optional fields
type ObjType = type obj
fieldsCount = ObjType.rlpFieldsCount
# include first optional fields
cof = checkedOptionalFields(ObjType, fieldsCount) + 1
countFieldsRuntimeImpl(obj, ObjType, cof)
proc appendRecordType*(self: var RlpWriter, obj: object|tuple, wrapInList = wrapObjsInList) =
mixin enumerateRlpFields, append
type ObjType = type obj
hasOptional = hasOptionalFields(ObjType)
fieldsCount = ObjType.rlpFieldsCount
when hasOptional:
cof = checkedOptionalFields(ObjType, fieldsCount)
when cof > 0:
genOptionalFieldsValidation(obj, ObjType, cof)
if wrapInList:
when hasOptional:
template op(RecordType, fieldName, field) {.used.} =
when hasCustomPragmaFixed(RecordType, fieldName, rlpCustomSerialization):
append(self, obj, field)
elif (field is Option or field is Opt) and hasOptional:
# this works for optional fields at the end of an object/tuple
# if the optional field is followed by a mandatory field,
# custom serialization for a field or for the parent object
# will be better
if field.isSome:
append(self, field.unsafeGet)
append(self, field)
enumerateRlpFields(obj, op)
proc appendImpl(self: var RlpWriter, data: object) {.inline.} =
proc appendImpl(self: var RlpWriter, data: tuple) {.inline.} =
# We define a single `append` template with a pretty low specificity
# score in order to facilitate easier overloading with user types:
template append*[T](w: var RlpWriter; data: T) =
when data is (enum|bool):
# TODO detect negative enum values at compile time?
appendImpl(w, uint64(data))
appendImpl(w, data)
template append*(w: var RlpWriter; data: SomeSignedInt) =
{.error: "Signed integer encoding is not defined for rlp".}
proc initRlpList*(listSize: int): RlpWriter =
result = initRlpWriter()
startList(result, listSize)
# TODO: This should return a lent value
template finish*(self: RlpWriter): seq[byte] =
doAssert self.pendingLists.len == 0, "Insufficient number of elements written to a started list"
func clear*(w: var RlpWriter) =
# Prepare writer for reuse
proc encode*[T](v: T): seq[byte] =
mixin append
var writer = initRlpWriter()
func encodeInt*(i: SomeUnsignedInt): RlpIntBuf =
var buf: RlpIntBuf
macro encodeList*(args: varargs[untyped]): seq[byte] =
listLen = args.len
writer = genSym(nskVar, "rlpWriter")
body = newStmtList()
append = bindSym("append", brForceOpen)
for arg in args:
body.add quote do:
`append`(`writer`, `arg`)
result = quote do:
var `writer` = initRlpList(`listLen`)