mirror of
https://github.com/status-im/nimbus-eth1.git
synced 2025-01-16 23:31:16 +00:00
f034af422a
Each branch node may have up to 16 sub-items - currently, these are given VertexID based when they are first needed leading to a mostly-random order of vertexid for each subitem. Here, we pre-allocate all 16 vertex ids such that when a branch subitem is filled, it already has a vertexid waiting for it. This brings several important benefits: * subitems are sorted and "close" in their id sequencing - this means that when rocksdb stores them, they are likely to end up in the same data block thus improving read efficiency * because the ids are consequtive, we can store just the starting id and a bitmap representing which subitems are in use - this reduces disk space usage for branches allowing more of them fit into a single disk read, further improving disk read and caching performance - disk usage at block 18M is down from 84 to 78gb! * the in-memory footprint of VertexRef reduced allowing more instances to fit into caches and less memory to be used overall. Because of the increased locality of reference, it turns out that we no longer need to iterate over the entire database to efficiently generate the hash key database because the normal computation is now faster - this significantly benefits "live" chain processing as well where each dirtied key must be accompanied by a read of all branch subitems next to it - most of the performance benefit in this branch comes from this locality-of-reference improvement. On a sample resync, there's already ~20% improvement with later blocks seeing increasing benefit (because the trie is deeper in later blocks leading to more benefit from branch read perf improvements) ``` blocks: 18729664, baseline: 190h43m49s, contender: 153h59m0s Time (total): -36h44m48s, -19.27% ``` Note: clients need to be resynced as the PR changes the on-disk format R.I.P. little bloom filter - your life in the repo was short but valuable
350 lines
10 KiB
Nim
350 lines
10 KiB
Nim
# nimbus-eth1
|
|
# Copyright (c) 2023-2024 Status Research & Development GmbH
|
|
# Licensed under either of
|
|
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
|
|
# http://www.apache.org/licenses/LICENSE-2.0)
|
|
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
|
|
# http://opensource.org/licenses/MIT)
|
|
# at your option. This file may not be copied, modified, or distributed
|
|
# except according to those terms.
|
|
|
|
{.push raises: [].}
|
|
|
|
import
|
|
results,
|
|
stew/[arrayops, endians2],
|
|
./aristo_desc
|
|
|
|
export aristo_desc, results
|
|
|
|
# Allocation-free version short big-endian encoding that skips the leading
|
|
# zeroes
|
|
type
|
|
SbeBuf*[I] = object
|
|
buf*: array[sizeof(I), byte]
|
|
len*: byte
|
|
|
|
RVidBuf* = object
|
|
buf*: array[sizeof(SbeBuf[VertexID]) * 2, byte]
|
|
len*: byte
|
|
|
|
func significantBytesBE(val: openArray[byte]): byte =
|
|
for i in 0 ..< val.len:
|
|
if val[i] != 0:
|
|
return byte(val.len - i)
|
|
return 1
|
|
|
|
func blobify*(v: VertexID|uint64): SbeBuf[typeof(v)] =
|
|
let b = v.uint64.toBytesBE()
|
|
SbeBuf[typeof(v)](buf: b, len: significantBytesBE(b))
|
|
|
|
func blobify*(v: StUint): SbeBuf[typeof(v)] =
|
|
let b = v.toBytesBE()
|
|
SbeBuf[typeof(v)](buf: b, len: significantBytesBE(b))
|
|
|
|
template data*(v: SbeBuf): openArray[byte] =
|
|
let vv = v
|
|
vv.buf.toOpenArray(vv.buf.len - int(vv.len), vv.buf.high)
|
|
|
|
|
|
func blobify*(rvid: RootedVertexID): RVidBuf =
|
|
# Length-prefixed root encoding creates a unique and common prefix for all
|
|
# verticies sharing the same root
|
|
# TODO evaluate an encoding that colocates short roots (like VertexID(1)) with
|
|
# the length
|
|
let root = rvid.root.blobify()
|
|
result.buf[0] = root.len
|
|
assign(result.buf.toOpenArray(1, root.len), root.data())
|
|
|
|
if rvid.root == rvid.vid:
|
|
result.len = root.len + 1
|
|
else:
|
|
# We can derive the length of the `vid` from the total length
|
|
let vid = rvid.vid.blobify()
|
|
assign(result.buf.toOpenArray(root.len + 1, root.len + vid.len), vid.data())
|
|
result.len = root.len + 1 + vid.len
|
|
|
|
proc deblobify*[T: uint64|VertexID](data: openArray[byte], _: type T): Result[T,AristoError] =
|
|
if data.len < 1 or data.len > 8:
|
|
return err(Deblob64LenUnsupported)
|
|
|
|
var tmp = 0'u64
|
|
let start = 8 - data.len
|
|
for i in 0..<data.len:
|
|
tmp += uint64(data[i]) shl (8*(7-(i + start)))
|
|
|
|
ok T(tmp)
|
|
|
|
proc deblobify*(data: openArray[byte], _: type UInt256): Result[UInt256,AristoError] =
|
|
if data.len < 1 or data.len > 32:
|
|
return err(Deblob256LenUnsupported)
|
|
|
|
ok UInt256.fromBytesBE(data)
|
|
|
|
func deblobify*(data: openArray[byte], T: type RootedVertexID): Result[T, AristoError] =
|
|
let rlen = int(data[0])
|
|
if data.len < 2:
|
|
return err(DeblobRVidLenUnsupported)
|
|
|
|
if data.len < rlen + 1:
|
|
return err(DeblobRVidLenUnsupported)
|
|
|
|
let
|
|
root = ?deblobify(data.toOpenArray(1, rlen), VertexID)
|
|
vid = if data.len > rlen + 1:
|
|
?deblobify(data.toOpenArray(rlen + 1, data.high()), VertexID)
|
|
else:
|
|
root
|
|
ok (root, vid)
|
|
|
|
template data*(v: RVidBuf): openArray[byte] =
|
|
let vv = v
|
|
vv.buf.toOpenArray(0, vv.len - 1)
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Private helper
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc load64(data: openArray[byte]; start: var int, len: int): Result[uint64,AristoError] =
|
|
if data.len < start + len:
|
|
return err(Deblob256LenUnsupported)
|
|
|
|
let val = ?deblobify(data.toOpenArray(start, start + len - 1), uint64)
|
|
start += len
|
|
ok val
|
|
|
|
proc load256(data: openArray[byte]; start: var int, len: int): Result[UInt256,AristoError] =
|
|
if data.len < start + len:
|
|
return err(Deblob256LenUnsupported)
|
|
let val = ?deblobify(data.toOpenArray(start, start + len - 1), UInt256)
|
|
start += len
|
|
ok val
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public functions
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc blobifyTo*(pyl: LeafPayload, data: var seq[byte]) =
|
|
case pyl.pType
|
|
of AccountData:
|
|
# `lens` holds `len-1` since `mask` filters out the zero-length case (which
|
|
# allows saving 1 bit per length)
|
|
var lens: uint16
|
|
var mask: byte
|
|
if 0 < pyl.account.nonce:
|
|
mask = mask or 0x01
|
|
let tmp = pyl.account.nonce.blobify()
|
|
lens += tmp.len - 1 # 3 bits
|
|
data &= tmp.data()
|
|
|
|
if 0 < pyl.account.balance:
|
|
mask = mask or 0x02
|
|
let tmp = pyl.account.balance.blobify()
|
|
lens += uint16(tmp.len - 1) shl 3 # 5 bits
|
|
data &= tmp.data()
|
|
|
|
if pyl.stoID.isValid:
|
|
mask = mask or 0x04
|
|
let tmp = pyl.stoID.vid.blobify()
|
|
lens += uint16(tmp.len - 1) shl 8 # 3 bits
|
|
data &= tmp.data()
|
|
|
|
if pyl.account.codeHash != EMPTY_CODE_HASH:
|
|
mask = mask or 0x08
|
|
data &= pyl.account.codeHash.data
|
|
|
|
data &= lens.toBytesBE()
|
|
data &= [mask]
|
|
of StoData:
|
|
data &= pyl.stoData.blobify().data
|
|
data &= [0x20.byte]
|
|
|
|
proc blobifyTo*(vtx: VertexRef, key: HashKey, data: var seq[byte]) =
|
|
## This function serialises the vertex argument to a database record.
|
|
## Contrary to RLP based serialisation, these records aim to align on
|
|
## fixed byte boundaries.
|
|
## ::
|
|
## Branch:
|
|
## <HashKey> -- optional hash key
|
|
## [VertexID, ..] -- list of up to 16 child vertices lookup keys
|
|
## seq[byte] -- hex encoded partial path (non-empty for extension nodes)
|
|
## uint64 -- lengths of each child vertex, each taking 4 bits
|
|
## 0x80 + xx -- marker(0/2) + pathSegmentLen(6)
|
|
##
|
|
## Leaf:
|
|
## seq[byte] -- opaque leaf data payload (might be zero length)
|
|
## seq[byte] -- hex encoded partial path (at least one byte)
|
|
## 0xc0 + yy -- marker(3) + partialPathLen(6)
|
|
##
|
|
## For a branch record, the bytes of the `access` array indicate the position
|
|
## of the Patricia Trie vertex reference. So the `vertexID` with index `n` has
|
|
## ::
|
|
## 8 * n * ((access shr (n * 4)) and 15)
|
|
##
|
|
doAssert vtx.isValid
|
|
|
|
let
|
|
bits =
|
|
case vtx.vType
|
|
of Branch:
|
|
let bits =
|
|
if key.isValid and key.len == 32:
|
|
# Shorter keys can be loaded from the vertex directly
|
|
data.add key.data()
|
|
0b10'u8
|
|
else:
|
|
0b00'u8
|
|
|
|
data.add vtx.startVid.blobify().data()
|
|
data.add toBytesBE(vtx.used)
|
|
bits
|
|
of Leaf:
|
|
vtx.lData.blobifyTo(data)
|
|
0b01'u8
|
|
|
|
pSegm =
|
|
if vtx.pfx.len > 0:
|
|
vtx.pfx.toHexPrefix(isleaf = vtx.vType == Leaf)
|
|
else:
|
|
default(HexPrefixBuf)
|
|
psLen = pSegm.len.byte
|
|
|
|
data &= pSegm.data()
|
|
data &= [(bits shl 6) or psLen]
|
|
|
|
proc blobify*(vtx: VertexRef, key: HashKey): seq[byte] =
|
|
## Variant of `blobify()`
|
|
result = newSeqOfCap[byte](128)
|
|
vtx.blobifyTo(key, result)
|
|
|
|
proc blobifyTo*(lSst: SavedState; data: var seq[byte]) =
|
|
## Serialise a last saved state record
|
|
data.add lSst.key.data
|
|
data.add lSst.serial.toBytesBE
|
|
data.add @[0x7fu8]
|
|
|
|
proc blobify*(lSst: SavedState): seq[byte] =
|
|
## Variant of `blobify()`
|
|
var data: seq[byte]
|
|
lSst.blobifyTo data
|
|
data
|
|
|
|
# -------------
|
|
proc deblobify(
|
|
data: openArray[byte];
|
|
pyl: var LeafPayload;
|
|
): Result[void,AristoError] =
|
|
if data.len == 0:
|
|
return err(DeblobVtxTooShort)
|
|
|
|
let mask = data[^1]
|
|
if (mask and 0x20) > 0: # Slot storage data
|
|
pyl = LeafPayload(
|
|
pType: StoData,
|
|
stoData: ?deblobify(data.toOpenArray(0, data.len - 2), UInt256))
|
|
ok()
|
|
elif (mask and 0xf0) == 0: # Only account fields set
|
|
pyl = LeafPayload(pType: AccountData)
|
|
var
|
|
start = 0
|
|
lens = uint16.fromBytesBE(data.toOpenArray(data.len - 3, data.len - 2))
|
|
|
|
if (mask and 0x01) > 0:
|
|
let len = lens and 0b111
|
|
pyl.account.nonce = ? load64(data, start, int(len + 1))
|
|
|
|
if (mask and 0x02) > 0:
|
|
let len = (lens shr 3) and 0b11111
|
|
pyl.account.balance = ? load256(data, start, int(len + 1))
|
|
|
|
if (mask and 0x04) > 0:
|
|
let len = (lens shr 8) and 0b111
|
|
pyl.stoID = (true, VertexID(? load64(data, start, int(len + 1))))
|
|
|
|
if (mask and 0x08) > 0:
|
|
if data.len() < start + 32:
|
|
return err(DeblobCodeLenUnsupported)
|
|
discard pyl.account.codeHash.data.copyFrom(data.toOpenArray(start, start + 31))
|
|
else:
|
|
pyl.account.codeHash = EMPTY_CODE_HASH
|
|
|
|
ok()
|
|
else:
|
|
err(DeblobUnknown)
|
|
|
|
proc deblobifyType*(record: openArray[byte]; T: type VertexRef):
|
|
Result[VertexType, AristoError] =
|
|
if record.len < 3: # minimum `Leaf` record
|
|
return err(DeblobVtxTooShort)
|
|
|
|
ok if ((record[^1] shr 6) and 0b01'u8) > 0:
|
|
Leaf
|
|
else:
|
|
Branch
|
|
|
|
proc deblobify*(
|
|
record: openArray[byte];
|
|
T: type VertexRef;
|
|
): Result[T,AristoError] =
|
|
## De-serialise a data record encoded with `blobify()`. The second
|
|
## argument `vtx` can be `nil`.
|
|
if record.len < 3: # minimum `Leaf` record
|
|
return err(DeblobVtxTooShort)
|
|
|
|
let
|
|
bits = record[^1] shr 6
|
|
vType = if (bits and 0b01'u8) > 0: Leaf else: Branch
|
|
hasKey = (bits and 0b10'u8) > 0
|
|
psLen = int(record[^1] and 0b00111111)
|
|
start = if hasKey: 32 else: 0
|
|
|
|
if psLen > record.len - 2 or start > record.len - 2 - psLen:
|
|
return err(DeblobBranchTooShort)
|
|
|
|
let
|
|
psPos = record.len - psLen - 1
|
|
(_, pathSegment) =
|
|
NibblesBuf.fromHexPrefix record.toOpenArray(psPos, record.len - 2)
|
|
|
|
ok case vType
|
|
of Branch:
|
|
var pos = start
|
|
let
|
|
svLen = psPos - pos - 2
|
|
startVid = VertexID(?load64(record, pos, svLen))
|
|
used = uint16.fromBytesBE(record.toOpenArray(pos, pos + 1))
|
|
|
|
pos += 2
|
|
|
|
VertexRef(vType: Branch, pfx: pathSegment, startVid: startVid, used: used)
|
|
of Leaf:
|
|
let vtx = VertexRef(vType: Leaf, pfx: pathSegment)
|
|
|
|
?record.toOpenArray(start, psPos - 1).deblobify(vtx.lData)
|
|
vtx
|
|
|
|
proc deblobify*(record: openArray[byte], T: type HashKey): Opt[HashKey] =
|
|
if record.len > 33 and (((record[^1] shr 6) and 0b10'u8) > 0):
|
|
HashKey.fromBytes(record.toOpenArray(0, 31))
|
|
else:
|
|
Opt.none(HashKey)
|
|
|
|
proc deblobify*(
|
|
data: openArray[byte];
|
|
T: type SavedState;
|
|
): Result[SavedState,AristoError] =
|
|
## De-serialise the last saved state data record previously encoded with
|
|
## `blobify()`.
|
|
if data.len != 41:
|
|
return err(DeblobWrongSize)
|
|
if data[^1] != 0x7f:
|
|
return err(DeblobWrongType)
|
|
|
|
ok(SavedState(
|
|
key: Hash32(array[32, byte].initCopyFrom(data.toOpenArray(0, 31))),
|
|
serial: uint64.fromBytesBE data.toOpenArray(32, 39)))
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# End
|
|
# ------------------------------------------------------------------------------
|