mirror of
https://github.com/status-im/nimbus-eth1.git
synced 2025-01-19 00:31:20 +00:00
6b68ff92d3
This buffer eleminates a large part of allocations during MPT traversal, reducing overall memory usage and GC pressure. Ideally, we would use it throughout in the API instead of `openArray[byte]` since the built-in length limit appropriately exposes the natural 64-nibble depth constraint that `openArray` fails to capture.
411 lines
14 KiB
Nim
411 lines
14 KiB
Nim
# nimbus-eth1
|
|
# Copyright (c) 2023-2024 Status Research & Development GmbH
|
|
# Licensed under either of
|
|
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
|
|
# http://www.apache.org/licenses/LICENSE-2.0)
|
|
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
|
|
# http://opensource.org/licenses/MIT)
|
|
# at your option. This file may not be copied, modified, or distributed
|
|
# except according to those terms.
|
|
|
|
## Aristo DB -- Identifier types
|
|
## =============================
|
|
##
|
|
|
|
{.push raises: [].}
|
|
|
|
import
|
|
std/[algorithm, sequtils, sets, strutils, hashes],
|
|
eth/common,
|
|
stew/byteutils,
|
|
chronicles,
|
|
results,
|
|
stint,
|
|
./desc_nibbles
|
|
|
|
export
|
|
desc_nibbles
|
|
|
|
type
|
|
VertexID* = distinct uint64
|
|
## Unique identifier for a vertex of the `Aristo Trie`. The vertex is the
|
|
## prefix tree (aka `Patricia Trie`) component. When augmented by hash
|
|
## keys, the vertex component will be called a node. On the persistent
|
|
## backend of the database, there is no other reference to the node than
|
|
## the very same `VertexID`.
|
|
##
|
|
## Vertex IDs are generated on the fly and thrown away when not needed,
|
|
## anymore. They are not recycled. A quick estimate
|
|
##
|
|
## (2^64) / (100 * 365.25 * 24 * 3600) / 1000 / 1000 / 1000 = 5.86
|
|
##
|
|
## shows that the `uint64` scalar space is not exhausted in a 100 years
|
|
## if the database consumes somewhat less than 6 IDs per nanosecond.
|
|
##
|
|
## A simple recycling mechanism was tested which slowed down the system
|
|
## considerably because large swaths of database vertices were regularly
|
|
## freed so recycling had do deal with extensive lists of non-consecutive
|
|
## IDs.
|
|
|
|
HashKey* = object
|
|
## Ethereum MPTs use Keccak hashes as node links if the size of an RLP
|
|
## encoded node is of size at least 32 bytes. Otherwise, the RLP encoded
|
|
## node value is used as a pseudo node link (rather than a hash.) Such a
|
|
## node is nor stored on key-value database. Rather the RLP encoded node
|
|
## value is stored instead of a lode link in a parent node instead. Only
|
|
## for the root hash, the top level node is always referred to by the
|
|
## hash.
|
|
##
|
|
## This compaction feature needed an abstraction of the `HashKey` object
|
|
## which is either a `Hash256` or a `Blob` of length at most 31 bytes.
|
|
## This leaves two ways of representing an empty/void `HashKey` type.
|
|
## It may be available as an empty `Blob` of zero length, or the
|
|
## `Hash256` type of the Keccak hash of an empty `Blob` (see constant
|
|
## `EMPTY_ROOT_HASH`.)
|
|
##
|
|
## For performance, we avoid storing blobs as `seq`, instead storing their
|
|
## length and sharing the data "space".
|
|
## TODO can we skip one byte of hash and reduce this type to 32 bytes?
|
|
buf: array[32, byte] # Either Hash256 or blob data, depending on `len`
|
|
len: int8 # length in the case of blobs, or 32 when it's a hash
|
|
|
|
PathID* = object
|
|
## Path into the `Patricia Trie`. This is a chain of maximal 64 nibbles
|
|
## (which is 32 bytes.) In most cases, the length is 64. So the path is
|
|
## encoded as a numeric value which is often easier to handle than a
|
|
## chain of nibbles.
|
|
##
|
|
## The path ID should be kept normalised, i.e.
|
|
## * 0 <= `length` <= 64
|
|
## * the unused trailing nibbles in `pfx` are set to `0`
|
|
##
|
|
pfx*: UInt256
|
|
length*: uint8
|
|
|
|
# ----------
|
|
|
|
LeafTie* = object
|
|
## Unique access key for a leaf vertex. It identifies a root vertex
|
|
## followed by a nibble path along the `Patricia Trie` down to a leaf
|
|
## vertex. So this implies an obvious injection from the set of `LeafTie`
|
|
## objects *into* the set of `VertexID` obvious (which is typically *into*
|
|
## only, not a bijection.)
|
|
##
|
|
## Note that `LeafTie` objects have no representation in the `Aristo Trie`.
|
|
## They are used temporarily and in caches or backlog tables.
|
|
root*: VertexID ## Root ID for the sub-trie
|
|
path*: PathID ## Path into the `Patricia Trie`
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Chronicles formatters
|
|
# ------------------------------------------------------------------------------
|
|
|
|
chronicles.formatIt(VertexID): $it
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public helpers: `VertexID` scalar data model
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func `<`*(a, b: VertexID): bool {.borrow.}
|
|
func `<=`*(a, b: VertexID): bool {.borrow.}
|
|
func `==`*(a, b: VertexID): bool {.borrow.}
|
|
func cmp*(a, b: VertexID): int {.borrow.}
|
|
|
|
func `$`*(vid: VertexID): string =
|
|
"$" & (if vid == VertexID(0): "ø"
|
|
else: vid.uint64.toHex.strip(trailing=false,chars={'0'}).toLowerAscii)
|
|
|
|
func `==`*(a: VertexID; b: static[uint]): bool = (a == VertexID(b))
|
|
|
|
# Scalar model extension as in `IntervalSetRef[VertexID,uint64]`
|
|
func `+`*(a: VertexID; b: uint64): VertexID = (a.uint64+b).VertexID
|
|
func `-`*(a: VertexID; b: uint64): VertexID = (a.uint64-b).VertexID
|
|
func `-`*(a, b: VertexID): uint64 = (a.uint64 - b.uint64)
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public helpers: `PathID` ordered scalar data model
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func high*(_: type PathID): PathID =
|
|
## Highest possible `PathID` object for given root vertex.
|
|
PathID(pfx: high(UInt256), length: 64)
|
|
|
|
func low*(_: type PathID): PathID =
|
|
## Lowest possible `PathID` object for given root vertex.
|
|
PathID()
|
|
|
|
func next*(pid: PathID): PathID =
|
|
## Return a `PathID` object with incremented path field. This function might
|
|
## return also a modified `length` field.
|
|
##
|
|
## The function returns the argument `pid` if it is already at its
|
|
## maximum value `high(PathID)`.
|
|
if pid.pfx.isZero and pid.length < 64:
|
|
PathID(length: pid.length + 1)
|
|
elif pid.pfx < high(UInt256):
|
|
PathID(pfx: pid.pfx + 1, length: 64)
|
|
else:
|
|
pid
|
|
|
|
func prev*(pid: PathID): PathID =
|
|
## Return a `PathID` object with decremented path field. This function might
|
|
## return also a modified `length` field.
|
|
##
|
|
## The function returns the argument `pid` if it is already at its
|
|
## minimum value `low(PathID)`.
|
|
if 0 < pid.pfx:
|
|
PathID(pfx: pid.pfx - 1, length: 64)
|
|
elif 0 < pid.length:
|
|
PathID(length: pid.length - 1)
|
|
else:
|
|
pid
|
|
|
|
func `<`*(a, b: PathID): bool =
|
|
## This function assumes that the arguments `a` and `b` are normalised
|
|
## (see `normal()`.)
|
|
a.pfx < b.pfx or (a.pfx == b.pfx and a.length < b.length)
|
|
|
|
func `<=`*(a, b: PathID): bool =
|
|
not (b < a)
|
|
|
|
func `==`*(a, b: PathID): bool =
|
|
## This function assumes that the arguments `a` and `b` are normalised
|
|
## (see `normal()`.)
|
|
a.pfx == b.pfx and a.length == b.length
|
|
|
|
func cmp*(a, b: PathID): int =
|
|
if a < b: -1 elif b < a: 1 else: 0
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public helpers: `HashKey` ordered scalar data model
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func len*(lid: HashKey): int =
|
|
lid.len.int # if lid.isHash: 32 else: lid.blob.len
|
|
|
|
template data*(lid: HashKey): openArray[byte] =
|
|
lid.buf.toOpenArray(0, lid.len - 1)
|
|
|
|
func to*(lid: HashKey; T: type PathID): T =
|
|
## Helper to bowrrow certain properties from `PathID`
|
|
if lid.len == 32:
|
|
PathID(pfx: UInt256.fromBytesBE lid.data, length: 64)
|
|
elif 0 < lid.len:
|
|
doAssert lid.len < 32
|
|
var a32: array[32,byte]
|
|
(addr a32[0]).copyMem(unsafeAddr lid.data[0], lid.len)
|
|
PathID(pfx: UInt256.fromBytesBE a32, length: 2 * lid.len.uint8)
|
|
else:
|
|
PathID()
|
|
|
|
func fromBytes*(T: type HashKey; data: openArray[byte]): Result[T,void] =
|
|
## Write argument `data` of length 0 or between 2 and 32 bytes as a `HashKey`.
|
|
##
|
|
## A function argument `data` of length 32 is used as-is.
|
|
##
|
|
## For a function argument `data` of length between 2 and 31, the first
|
|
## byte must be the start of an RLP encoded list, i.e. `0xc0 + len` where
|
|
## where `len` is one less as the `data` length.
|
|
##
|
|
if data.len == 32:
|
|
var lid: T
|
|
lid.len = 32
|
|
(addr lid.data[0]).copyMem(unsafeAddr data[0], data.len)
|
|
return ok lid
|
|
if data.len == 0:
|
|
return ok HashKey()
|
|
if 1 < data.len and data.len < 32 and data[0].int == 0xbf + data.len:
|
|
var lid: T
|
|
lid.len = int8 data.len
|
|
(addr lid.data[0]).copyMem(unsafeAddr data[0], data.len)
|
|
return ok lid
|
|
err()
|
|
|
|
func `<`*(a, b: HashKey): bool =
|
|
## Slow, but useful for debug sorting
|
|
a.to(PathID) < b.to(PathID)
|
|
|
|
func `==`*(a, b: HashKey): bool =
|
|
a.data == b.data
|
|
|
|
func cmp*(a, b: HashKey): int =
|
|
## Slow, but useful for debug sorting
|
|
cmp(a.data, b.data)
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public helpers: `LeafTie` ordered scalar data model
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func high*(_: type LeafTie; root = VertexID(1)): LeafTie =
|
|
## Highest possible `LeafTie` object for given root vertex.
|
|
LeafTie(root: root, path: high(PathID))
|
|
|
|
func low*(_: type LeafTie; root = VertexID(1)): LeafTie =
|
|
## Lowest possible `LeafTie` object for given root vertex.
|
|
LeafTie(root: root, path: low(PathID))
|
|
|
|
func next*(lty: LeafTie): LeafTie =
|
|
## Return a `LeafTie` object with the `next()` path field.
|
|
LeafTie(root: lty.root, path: lty.path.next)
|
|
|
|
func prev*(lty: LeafTie): LeafTie =
|
|
## Return a `LeafTie` object with the `prev()` path field.
|
|
LeafTie(root: lty.root, path: lty.path.prev)
|
|
|
|
func `<`*(a, b: LeafTie): bool =
|
|
## This function assumes that the arguments `a` and `b` are normalised
|
|
## (see `normal()`.)
|
|
a.root < b.root or (a.root == b.root and a.path < b.path)
|
|
|
|
func `==`*(a, b: LeafTie): bool =
|
|
## This function assumes that the arguments `a` and `b` are normalised
|
|
## (see `normal()`.)
|
|
a.root == b.root and a.path == b.path
|
|
|
|
func cmp*(a, b: LeafTie): int =
|
|
## This function assumes that the arguments `a` and `b` are normalised
|
|
## (see `normal()`.)
|
|
if a < b: -1 elif a == b: 0 else: 1
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public helpers: Reversible conversions between `PathID`, `HashKey`, etc.
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func to*(pid: PathID; T: type NibblesBuf): T =
|
|
## Representation of a `PathID` as `NibbleSeq` (preserving full information)
|
|
let nibbles = NibblesBuf.fromBytes(pid.pfx.toBytesBE)
|
|
if pid.length < 64:
|
|
nibbles.slice(0, pid.length.int)
|
|
else:
|
|
nibbles
|
|
|
|
func `@`*(pid: PathID): Blob =
|
|
## Representation of a `PathID` as a `Blob`. The result is left padded
|
|
## by a zero LSB if the path length was odd.
|
|
result = pid.pfx.toBytesBE.toSeq
|
|
if pid.length < 63:
|
|
result.setLen((pid.length + 1) shl 1)
|
|
|
|
func to*(lid: HashKey; T: type Hash256): T =
|
|
## Returns the `Hash236` key if available, otherwise the Keccak hash of
|
|
## the `Blob` version.
|
|
if lid.len == 32:
|
|
Hash256(data: lid.buf)
|
|
elif 0 < lid.len:
|
|
lid.data.keccakHash
|
|
else:
|
|
EMPTY_ROOT_HASH
|
|
|
|
func to*(key: Hash256; T: type HashKey): T =
|
|
## This is an efficient version of `HashKey.fromBytes(key.data).value`, not
|
|
## to be confused with `digestTo(HashKey)`.
|
|
if key == EMPTY_ROOT_HASH:
|
|
T()
|
|
else:
|
|
T(len: 32, buf: key.data)
|
|
|
|
func to*(n: SomeUnsignedInt; T: type PathID): T =
|
|
## Representation of a scalar as `PathID` (preserving full information)
|
|
T(pfx: n.u256, length: 64)
|
|
|
|
func to*(n: UInt256; T: type PathID): T =
|
|
## Representation of a scalar as `PathID` (preserving full information)
|
|
T(pfx: n, length: 64)
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public helpers: Miscellaneous mappings
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func digestTo*(data: openArray[byte]; T: type HashKey): T =
|
|
## For argument `data` with length smaller than 32, import them as-is into
|
|
## the result. Otherwise import the Keccak hash of the argument `data`.
|
|
if data.len == 0:
|
|
result.len = 0
|
|
elif data.len < 32:
|
|
result.len = int8 data.len
|
|
(addr result.data[0]).copyMem(unsafeAddr data[0], data.len)
|
|
else:
|
|
result.len = 32
|
|
result.buf = data.keccakHash.data
|
|
|
|
func normal*(a: PathID): PathID =
|
|
## Normalise path ID representation
|
|
result = a
|
|
if 64 < a.length:
|
|
result.length = 64
|
|
elif a.length < 64:
|
|
result.pfx = a.pfx and not (1.u256 shl (4 * (64 - a.length))) - 1.u256
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public helpers: `Tables` and `Rlp` support
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func hash*(a: PathID): Hash =
|
|
## Table/KeyedQueue mixin
|
|
var h: Hash = 0
|
|
h = h !& a.pfx.toBytesBE.hash
|
|
h = h !& a.length.hash
|
|
!$h
|
|
|
|
func hash*(a: HashKey): Hash =
|
|
## Table/KeyedQueue mixin
|
|
hash(a.data)
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Miscellaneous helpers
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func `$`*(vids: seq[VertexID]): string =
|
|
"[" & vids.toSeq.mapIt(
|
|
"$" & it.uint64.toHex.strip(trailing=false,chars={'0'})
|
|
).join(",") & "]"
|
|
|
|
func `$`*(vids: HashSet[VertexID]): string =
|
|
"{" & vids.toSeq.sorted.mapIt(
|
|
"$" & it.uint64.toHex.strip(trailing=false,chars={'0'})
|
|
).join(",") & "}"
|
|
|
|
func `$`*(key: Hash256): string =
|
|
let w = UInt256.fromBytesBE key.data
|
|
if w == high(UInt256):
|
|
"2^256-1"
|
|
elif w == 0.u256:
|
|
"0"
|
|
elif w == 2.u256.pow 255:
|
|
"2^255" # 800...
|
|
elif w == 2.u256.pow 254:
|
|
"2^254" # 400..
|
|
elif w == 2.u256.pow 253:
|
|
"2^253" # 200...
|
|
elif w == 2.u256.pow 251:
|
|
"2^252" # 100...
|
|
else:
|
|
w.toHex
|
|
|
|
func `$`*(key: HashKey): string =
|
|
toHex(key.data)
|
|
|
|
func `$`*(a: PathID): string =
|
|
if a.pfx.isZero.not:
|
|
var dgts = $a.pfx.toHex
|
|
if a.length < 64:
|
|
dgts = dgts[0 ..< a.length]
|
|
result = dgts.strip(
|
|
leading=true, trailing=false, chars={'0'})
|
|
elif a.length != 0:
|
|
result = "0"
|
|
if a.length < 64:
|
|
result &= "(" & $a.length & ")"
|
|
|
|
func `$`*(a: LeafTie): string =
|
|
if a.root != 0:
|
|
result = ($a.root.uint64.toHex).strip(
|
|
leading=true, trailing=false, chars={'0'})
|
|
else:
|
|
result = "0"
|
|
result &= ":" & $a.path
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# End
|
|
# ------------------------------------------------------------------------------
|