nimbus-eth1/nimbus/db/aristo/aristo_desc/desc_identifiers.nim
Jordan Hrycaj 657379f484
Aristo db update merkle hasher (#1925)
* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
2023-12-04 20:39:26 +00:00

446 lines
15 KiB
Nim

# nimbus-eth1
# Copyright (c) 2023 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
## Aristo DB -- Identifier types
## =============================
##
{.push raises: [].}
import
std/[sequtils, strutils, hashes],
eth/[common, trie/nibbles],
stew/byteutils,
chronicles,
results,
stint
type
QueueID* = distinct uint64
## Identifier used to tag filter logs stored on the backend.
FilterID* = distinct uint64
## Identifier used to identify a particular filter. It is generatied with
## the filter when stored to database.
VertexID* = distinct uint64
## Unique identifier for a vertex of the `Aristo Trie`. The vertex is the
## prefix tree (aka `Patricia Trie`) component. When augmented by hash
## keys, the vertex component will be called a node. On the persistent
## backend of the database, there is no other reference to the node than
## the very same `VertexID`.
HashKey* = object
## Ethereum MPTs use Keccak hashes as node links if the size of an RLP
## encoded node is of size at least 32 bytes. Otherwise, the RLP encoded
## node value is used as a pseudo node link (rather than a hash.) Such a
## node is nor stored on key-value database. Rather the RLP encoded node
## value is stored instead of a lode link in a parent node instead. Only
## for the root hash, the top level node is always referred to by the
## hash.
##
## This compaction feature needed an abstraction of the `HashKey` object
## which is either a `Hash256` or a `Blob` of length at most 31 bytes.
## This leaves two ways of representing an empty/void `HashKey` type.
## It may be available as an empty `Blob` of zero length, or the
## `Hash256` type of the Keccak hash of an empty `Blob` (see constant
## `EMPTY_ROOT_HASH`.)
##
case isHash: bool
of true:
key: Hash256 ## Merkle hash tacked to a vertex
else:
blob: Blob ## Optionally encoded small node data
PathID* = object
## Path into the `Patricia Trie`. This is a chain of maximal 64 nibbles
## (which is 32 bytes.) In most cases, the length is 64. So the path is
## encoded as a numeric value which is often easier to handle than a
## chain of nibbles.
##
## The path ID should be kept normalised, i.e.
## * 0 <= `length` <= 64
## * the unused trailing nibbles in `pfx` ar set to `0`
##
pfx*: UInt256
length*: uint8
# ----------
LeafTie* = object
## Unique access key for a leaf vertex. It identifies a root vertex
## followed by a nibble path along the `Patricia Trie` down to a leaf
## vertex. So this implies an obvious injection from the set of `LeafTie`
## objects *into* the set of `VertexID` obvious (which is typically *into*
## only, not a bijection.)
##
## Note that `LeafTie` objects have no representation in the `Aristo Trie`.
## They are used temporarily and in caches or backlog tables.
root*: VertexID ## Root ID for the sub-trie
path*: PathID ## Path into the `Patricia Trie`
HashLabel* = object
## Merkle hash key uniquely associated with a vertex ID. As hashes in a
## `Merkle Patricia Tree` are unique only on a particular sub-trie, the
## hash key is paired with the top vertex of the relevant sub-trie. This
## construction is similar to the one of a `LeafTie` object.
##
## Note that `HashLabel` objects have no representation in the
## `Aristo Trie`. They are used temporarily and in caches or backlog
## tables.
root*: VertexID ## Root ID for the sub-trie.
key*: HashKey ## Merkle hash or encoded small node data
# ------------------------------------------------------------------------------
# Chronicles formatters
# ------------------------------------------------------------------------------
chronicles.formatIt(VertexID): $it
chronicles.formatIt(QueueID): $it
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
func to(lid: HashKey; T: type PathID): T =
## Helper to bowrrow certain properties from `PathID`
if lid.isHash:
PathID(pfx: UInt256.fromBytesBE lid.key.data, length: 64)
elif 0 < lid.blob.len:
doAssert lid.blob.len < 32
var a32: array[32,byte]
(addr a32[0]).copyMem(unsafeAddr lid.blob[0], lid.blob.len)
PathID(pfx: UInt256.fromBytesBE a32, length: 2 * lid.blob.len.uint8)
else:
PathID()
# ------------------------------------------------------------------------------
# Public helpers: `VertexID` scalar data model
# ------------------------------------------------------------------------------
func `<`*(a, b: VertexID): bool {.borrow.}
func `<=`*(a, b: VertexID): bool {.borrow.}
func `==`*(a, b: VertexID): bool {.borrow.}
func cmp*(a, b: VertexID): int {.borrow.}
func `$`*(a: VertexID): string {.borrow.}
func `==`*(a: VertexID; b: static[uint]): bool = (a == VertexID(b))
# Scalar model extension as in `IntervalSetRef[VertexID,uint64]`
func `+`*(a: VertexID; b: uint64): VertexID = (a.uint64+b).VertexID
func `-`*(a: VertexID; b: uint64): VertexID = (a.uint64-b).VertexID
func `-`*(a, b: VertexID): uint64 = (a.uint64 - b.uint64)
# ------------------------------------------------------------------------------
# Public helpers: `QueueID` scalar data model
# ------------------------------------------------------------------------------
func `<`*(a, b: QueueID): bool {.borrow.}
func `<=`*(a, b: QueueID): bool {.borrow.}
func `==`*(a, b: QueueID): bool {.borrow.}
func cmp*(a, b: QueueID): int {.borrow.}
func `$`*(a: QueueID): string {.borrow.}
func `==`*(a: QueueID; b: static[uint]): bool = (a == QueueID(b))
func `+`*(a: QueueID; b: uint64): QueueID = (a.uint64+b).QueueID
func `-`*(a: QueueID; b: uint64): QueueID = (a.uint64-b).QueueID
func `-`*(a, b: QueueID): uint64 = (a.uint64 - b.uint64)
# ------------------------------------------------------------------------------
# Public helpers: `FilterID` scalar data model
# ------------------------------------------------------------------------------
func `<`*(a, b: FilterID): bool {.borrow.}
func `<=`*(a, b: FilterID): bool {.borrow.}
func `==`*(a, b: FilterID): bool {.borrow.}
func `$`*(a: FilterID): string {.borrow.}
func `==`*(a: FilterID; b: static[uint]): bool = (a == FilterID(b))
func `+`*(a: FilterID; b: uint64): FilterID = (a.uint64+b).FilterID
func `-`*(a: FilterID; b: uint64): FilterID = (a.uint64-b).FilterID
func `-`*(a, b: FilterID): uint64 = (a.uint64 - b.uint64)
# ------------------------------------------------------------------------------
# Public helpers: `PathID` ordered scalar data model
# ------------------------------------------------------------------------------
func high*(_: type PathID): PathID =
## Highest possible `PathID` object for given root vertex.
PathID(pfx: high(UInt256), length: 64)
func low*(_: type PathID): PathID =
## Lowest possible `PathID` object for given root vertex.
PathID()
func next*(pid: PathID): PathID =
## Return a `PathID` object with incremented path field. This function might
## return also a modified `length` field.
##
## The function returns the argument `pid` if it is already at its
## maximum value `high(PathID)`.
if pid.pfx == 0 and pid.length < 64:
PathID(length: pid.length + 1)
elif pid.pfx < high(UInt256):
PathID(pfx: pid.pfx + 1, length: 64)
else:
pid
func prev*(pid: PathID): PathID =
## Return a `PathID` object with decremented path field. This function might
## return also a modified `length` field.
##
## The function returns the argument `pid` if it is already at its
## minimum value `low(PathID)`.
if 0 < pid.pfx:
PathID(pfx: pid.pfx - 1, length: 64)
elif 0 < pid.length:
PathID(length: pid.length - 1)
else:
pid
func `<`*(a, b: PathID): bool =
## This function assumes that the arguments `a` and `b` are normalised
## (see `normal()`.)
a.pfx < b.pfx or (a.pfx == b.pfx and a.length < b.length)
func `<=`*(a, b: PathID): bool =
not (b < a)
func `==`*(a, b: PathID): bool =
## This function assumes that the arguments `a` and `b` are normalised
## (see `normal()`.)
a.pfx == b.pfx and a.length == b.length
func cmp*(a, b: PathID): int =
if a < b: -1 elif b < a: 1 else: 0
# ------------------------------------------------------------------------------
# Public helpers: `HashKey` ordered scalar data model
# ------------------------------------------------------------------------------
func len*(lid: HashKey): int =
if lid.isHash: 32 else: lid.blob.len
func fromBytes*(T: type HashKey; data: openArray[byte]): Result[T,void] =
## Write argument `data` of length 0 or between 2 and 32 bytes as a `HashKey`.
##
## A function argument `data` of length 32 is used as-is.
##
## For a function argument `data` of length between 2 and 31, the first
## byte must be the start of an RLP encoded list, i.e. `0xc0 + len` where
## where `len` is one less as the `data` length.
##
if data.len == 32:
var lid: T
lid.isHash = true
(addr lid.key.data[0]).copyMem(unsafeAddr data[0], data.len)
return ok lid
if data.len == 0:
return ok HashKey()
if 1 < data.len and data.len < 32 and data[0].int == 0xbf + data.len:
return ok T(isHash: false, blob: @data)
err()
func `<`*(a, b: HashKey): bool =
## Slow, but useful for debug sorting
a.to(PathID) < b.to(PathID)
func `==`*(a, b: HashKey): bool =
if a.isHash != b.isHash:
false
elif a.isHash:
a.key == b.key
else:
a.blob == b.blob
func cmp*(a, b: HashKey): int =
## Slow, but useful for debug sorting
if a < b: -1 elif b < a: 1 else: 0
# ------------------------------------------------------------------------------
# Public helpers: `LeafTie` ordered scalar data model
# ------------------------------------------------------------------------------
func high*(_: type LeafTie; root = VertexID(1)): LeafTie =
## Highest possible `LeafTie` object for given root vertex.
LeafTie(root: root, path: high(PathID))
func low*(_: type LeafTie; root = VertexID(1)): LeafTie =
## Lowest possible `LeafTie` object for given root vertex.
LeafTie(root: root, path: low(PathID))
func next*(lty: LeafTie): LeafTie =
## Return a `LeafTie` object with the `next()` path field.
LeafTie(root: lty.root, path: lty.path.next)
func prev*(lty: LeafTie): LeafTie =
## Return a `LeafTie` object with the `prev()` path field.
LeafTie(root: lty.root, path: lty.path.prev)
func `<`*(a, b: LeafTie): bool =
## This function assumes that the arguments `a` and `b` are normalised
## (see `normal()`.)
a.root < b.root or (a.root == b.root and a.path < b.path)
func `==`*(a, b: LeafTie): bool =
## This function assumes that the arguments `a` and `b` are normalised
## (see `normal()`.)
a.root == b.root and a.path == b.path
func cmp*(a, b: LeafTie): int =
## This function assumes that the arguments `a` and `b` are normalised
## (see `normal()`.)
if a < b: -1 elif a == b: 0 else: 1
# ------------------------------------------------------------------------------
# Public helpers: Reversible conversions between `PathID`, `HashKey`, etc.
# ------------------------------------------------------------------------------
func to*(key: HashKey; T: type Blob): T =
## Rewrite `HashKey` argument as `Blob` type of length between 0 and 32. A
## blob of length 32 is taken as a representation of a `HashKey` type while
## samller blobs are expected to represent an RLP encoded small node.
if key.isHash:
@(key.key.data)
else:
key.blob
func `@`*(lid: HashKey): Blob =
## Variant of `to(Blob)`
lid.to(Blob)
func to*(pid: PathID; T: type NibblesSeq): T =
## Representation of a `PathID` as `NibbleSeq` (preserving full information)
let nibbles = pid.pfx.toBytesBE.toSeq.initNibbleRange()
if pid.length < 64:
nibbles.slice(0, pid.length.int)
else:
nibbles
func to*(lid: HashKey; T: type Hash256): T =
## Returns the `Hash236` key if available, otherwise the Keccak hash of
## the `Blob` version.
if lid.isHash:
lid.key
elif 0 < lid.blob.len:
lid.blob.keccakHash
else:
EMPTY_ROOT_HASH
func to*(key: Hash256; T: type HashKey): T =
## This is an efficient version of `HashKey.fromBytes(key.data).value`, not
## to be confused with `digestTo(HashKey)`.
T(isHash: true, key: key)
func to*(n: SomeUnsignedInt|UInt256; T: type PathID): T =
## Representation of a scalar as `PathID` (preserving full information)
T(pfx: n.u256, length: 64)
# ------------------------------------------------------------------------------
# Public helpers: Miscellaneous mappings
# ------------------------------------------------------------------------------
func digestTo*(data: openArray[byte]; T: type HashKey): T =
## For argument `data` with length smaller than 32, import them as-is into
## the result. Otherwise import the Keccak hash of the argument `data`.
if data.len < 32:
result.blob = @data
else:
result.isHash = true
result.key = data.keccakHash
func normal*(a: PathID): PathID =
## Normalise path ID representation
result = a
if 64 < a.length:
result.length = 64
elif a.length < 64:
result.pfx = a.pfx and not (1.u256 shl (4 * (64 - a.length))) - 1.u256
# ------------------------------------------------------------------------------
# Public helpers: `Tables` and `Rlp` support
# ------------------------------------------------------------------------------
func hash*(a: PathID): Hash =
## Table/KeyedQueue mixin
var h: Hash = 0
h = h !& a.pfx.toBytesBE.hash
h = h !& a.length.hash
!$h
func hash*(a: HashKey): Hash =
## Table/KeyedQueue mixin
var h: Hash = 0
if a.isHash:
h = h !& a.key.hash
else:
h = h !& a.blob.hash
!$h
func hash*(lbl: HashLabel): Hash =
## Table/KeyedQueue/HashSet mixin
var h: Hash = 0
h = h !& lbl.root.hash
h = h !& lbl.key.hash
!$h
# ------------------------------------------------------------------------------
# Miscellaneous helpers
# ------------------------------------------------------------------------------
func `$`*(key: Hash256): string =
let w = UInt256.fromBytesBE key.data
if w == high(UInt256):
"2^256-1"
elif w == 0.u256:
"0"
elif w == 2.u256.pow 255:
"2^255" # 800...
elif w == 2.u256.pow 254:
"2^254" # 400..
elif w == 2.u256.pow 253:
"2^253" # 200...
elif w == 2.u256.pow 251:
"2^252" # 100...
else:
w.toHex
func `$`*(key: HashKey): string =
if key.isHash:
$key.key
else:
key.blob.toHex & "[#" & $key.blob.len & "]"
func `$`*(a: PathID): string =
if a.pfx != 0:
var dgts = $a.pfx.toHex
if a.length < 64:
dgts = dgts[0 ..< a.length]
result = dgts.strip(
leading=true, trailing=false, chars={'0'})
elif a.length != 0:
result = "0"
if a.length < 64:
result &= "(" & $a.length & ")"
func `$`*(a: LeafTie): string =
if a.root != 0:
result = ($a.root.uint64.toHex).strip(
leading=true, trailing=false, chars={'0'})
else:
result = "0"
result &= ":" & $a.path
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------