mirror of
https://github.com/status-im/nimbus-eth1.git
synced 2025-01-19 00:31:20 +00:00
9c3de888a4
This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives)
324 lines
11 KiB
Nim
324 lines
11 KiB
Nim
# nimbus-eth1
|
|
# Copyright (c) 2023-2024 Status Research & Development GmbH
|
|
# Licensed under either of
|
|
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
|
|
# http://www.apache.org/licenses/LICENSE-2.0)
|
|
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
|
|
# http://opensource.org/licenses/MIT)
|
|
# at your option. This file may not be copied, modified, or distributed
|
|
# except according to those terms.
|
|
|
|
## Aristo DB -- Patricia Trie Merkleisation
|
|
## ========================================
|
|
##
|
|
## For the current state of the `Patricia Trie`, keys (equivalent to hashes)
|
|
## are associated with the vertex IDs. Existing key associations are taken
|
|
## as-is/unchecked unless the ID is marked a proof node. In the latter case,
|
|
## the key is assumed to be correct after re-calculation.
|
|
##
|
|
## The labelling algorithm works roughly as follows:
|
|
##
|
|
## * Given a set of start or root vertices, build the forest (of trees)
|
|
## downwards towards leafs vertices so that none of these vertices has a
|
|
## Merkle hash label.
|
|
##
|
|
## * Starting at the leaf vertices in width-first fashion, calculate the
|
|
## Merkle hashes and label the leaf vertices. Recursively work up labelling
|
|
## vertices up until the root nodes are reached.
|
|
##
|
|
## Note that there are some tweaks for `proof` node vertices which lead to
|
|
## incomplete trees in a way that the algoritm handles existing Merkle hash
|
|
## labels for missing vertices.
|
|
##
|
|
{.push raises: [].}
|
|
|
|
import
|
|
std/[algorithm, sequtils, sets, tables],
|
|
chronicles,
|
|
eth/common,
|
|
results,
|
|
"."/[aristo_desc, aristo_get, aristo_layers, aristo_serialise, aristo_utils]
|
|
|
|
type
|
|
WidthFirstForest = object
|
|
## Collected width first search trees
|
|
root: HashSet[VertexID] ## Top level, root targets
|
|
pool: Table[VertexID,VertexID] ## Upper links pool
|
|
base: Table[VertexID,VertexID] ## Width-first leaf level links
|
|
leaf: HashSet[VertexID] ## Stans-alone leaf to process
|
|
rev: Table[VertexID,HashSet[VertexID]] ## Reverse look up table
|
|
|
|
logScope:
|
|
topics = "aristo-hashify"
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Private helpers
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func getOrVoid(tab: Table[VertexID,VertexID]; vid: VertexID): VertexID =
|
|
tab.getOrDefault(vid, VertexID(0))
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Private functions
|
|
# ------------------------------------------------------------------------------
|
|
|
|
func hasValue(
|
|
wffTable: Table[VertexID,VertexID];
|
|
vid: VertexID;
|
|
wff: WidthFirstForest;
|
|
): bool =
|
|
## Helper for efficient `value` access:
|
|
## ::
|
|
## wffTable.hasValue(wff, vid)
|
|
##
|
|
## instead of
|
|
## ::
|
|
## vid in wffTable.values.toSeq
|
|
##
|
|
for w in wff.rev.getOrVoid vid:
|
|
if w in wffTable:
|
|
return true
|
|
|
|
|
|
proc pedigree(
|
|
db: AristoDbRef; # Database, top layer
|
|
ancestors: HashSet[VertexID]; # Vertex IDs to start connecting from
|
|
proofs: HashSet[VertexID]; # Additional proof nodes to start from
|
|
): Result[WidthFirstForest,(VertexID,AristoError)] =
|
|
## For each vertex ID from the argument set `ancestors` find all un-labelled
|
|
## grand child vertices and build a forest (of trees) starting from the
|
|
## grand child vertices.
|
|
##
|
|
var
|
|
wff: WidthFirstForest
|
|
leafs: HashSet[VertexID]
|
|
|
|
proc register(wff: var WidthFirstForest; fromVid, toVid: VertexID) =
|
|
if toVid in wff.base:
|
|
# * there is `toVid->*` in `base[]`
|
|
# * so ``toVid->*` moved to `pool[]`
|
|
wff.pool[toVid] = wff.base.getOrVoid toVid
|
|
wff.base.del toVid
|
|
if wff.base.hasValue(fromVid, wff):
|
|
# * there is `*->fromVid` in `base[]`
|
|
# * so store `fromVid->toVid` in `pool[]`
|
|
wff.pool[fromVid] = toVid
|
|
else:
|
|
# store `fromVid->toVid` in `base[]`
|
|
wff.base[fromVid] = toVid
|
|
|
|
# Register reverse pair for quick table value lookup
|
|
wff.rev.withValue(toVid, val):
|
|
val[].incl fromVid
|
|
do:
|
|
wff.rev[toVid] = [fromVid].toHashSet
|
|
|
|
# Remove unnecessarey sup-trie roots (e.g. for a storage root)
|
|
wff.root.excl fromVid
|
|
|
|
# Initialise greedy search which will keep a set of current leafs in the
|
|
# `leafs{}` set and follow up links in the `pool[]` table, leading all the
|
|
# way up to the `root{}` set.
|
|
#
|
|
# Process root nodes if they are unlabelled
|
|
var rootWasDeleted = VertexID(0)
|
|
for root in ancestors:
|
|
let vtx = db.getVtx root
|
|
if vtx.isNil:
|
|
if VertexID(LEAST_FREE_VID) <= root:
|
|
# There must be a another root, as well (e.g. `$1` for a storage
|
|
# root). Only the last one of some will be reported with error code.
|
|
rootWasDeleted = root
|
|
elif not db.getKey(root).isValid:
|
|
# Need to process `root` node
|
|
let children = vtx.subVids
|
|
if children.len == 0:
|
|
# This is an isolated leaf node
|
|
wff.leaf.incl root
|
|
else:
|
|
wff.root.incl root
|
|
for child in vtx.subVids:
|
|
if not db.getKey(child).isValid:
|
|
leafs.incl child
|
|
wff.register(child, root)
|
|
if rootWasDeleted.isValid and
|
|
wff.root.len == 0 and
|
|
wff.leaf.len == 0:
|
|
return err((rootWasDeleted,HashifyRootVtxUnresolved))
|
|
|
|
# Initialisation for `proof` nodes which are sort of similar to `root` nodes.
|
|
for proof in proofs:
|
|
let vtx = db.getVtx proof
|
|
if vtx.isNil or not db.getKey(proof).isValid:
|
|
return err((proof,HashifyVtxUnresolved))
|
|
let children = vtx.subVids
|
|
if 0 < children.len:
|
|
# To be treated as a root node
|
|
wff.root.incl proof
|
|
for child in vtx.subVids:
|
|
if not db.getKey(child).isValid:
|
|
leafs.incl child
|
|
wff.register(child, proof)
|
|
|
|
# Recursively step down and collect unlabelled vertices
|
|
while 0 < leafs.len:
|
|
var redo: typeof(leafs)
|
|
|
|
for parent in leafs:
|
|
assert parent.isValid
|
|
assert not db.getKey(parent).isValid
|
|
|
|
let vtx = db.getVtx parent
|
|
if not vtx.isNil:
|
|
let children = vtx.subVids.filterIt(not db.getKey(it).isValid)
|
|
if 0 < children.len:
|
|
for child in children:
|
|
redo.incl child
|
|
wff.register(child, parent)
|
|
continue
|
|
|
|
if parent notin wff.base:
|
|
# The buck stops here:
|
|
# move `(parent,granny)` from `pool[]` to `base[]`
|
|
let granny = wff.pool.getOrVoid parent
|
|
assert granny.isValid
|
|
wff.register(parent, granny)
|
|
wff.pool.del parent
|
|
|
|
redo.swap leafs
|
|
|
|
ok move(wff)
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Private functions, tree traversal
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc createSched(
|
|
db: AristoDbRef; # Database, top layer
|
|
): Result[WidthFirstForest,(VertexID,AristoError)] =
|
|
## Create width-first search schedule (aka forest)
|
|
##
|
|
var wff = ? db.pedigree(db.dirty, db.pPrf)
|
|
|
|
if 0 < wff.leaf.len:
|
|
for vid in wff.leaf:
|
|
let node = db.getVtx(vid).toNode(db, beKeyOk=false).valueOr:
|
|
# Make sure that all those nodes are reachable
|
|
for needed in error:
|
|
if needed notin wff.base and
|
|
needed notin wff.pool:
|
|
return err((needed,HashifyVtxUnresolved))
|
|
continue
|
|
db.layersPutKey(VertexID(1), vid, node.digestTo(HashKey))
|
|
|
|
ok move(wff)
|
|
|
|
|
|
proc processSched(
|
|
wff: var WidthFirstForest; # Search tree to process
|
|
db: AristoDbRef; # Database, top layer
|
|
): Result[void,(VertexID,AristoError)] =
|
|
## Traverse width-first schedule and update vertex hash labels.
|
|
##
|
|
while 0 < wff.base.len:
|
|
var
|
|
accept = false
|
|
redo: typeof(wff.base)
|
|
|
|
for (vid,toVid) in wff.base.pairs:
|
|
let vtx = db.getVtx vid
|
|
assert vtx.isValid
|
|
|
|
# Try to convert the vertex to a node. This is possible only if all
|
|
# link references have Merkle hash keys, already.
|
|
let node = vtx.toNode(db, stopEarly=false).valueOr:
|
|
# Do this vertex later, again
|
|
if wff.pool.hasValue(vid, wff):
|
|
wff.pool[vid] = toVid
|
|
accept = true # `redo[]` will be fifferent from `base[]`
|
|
else:
|
|
redo[vid] = toVid
|
|
continue
|
|
# End `valueOr` terminates error clause
|
|
|
|
# Could resolve => update Merkle hash
|
|
db.layersPutKey(VertexID(1), vid, node.digestTo HashKey)
|
|
|
|
# Set follow up link for next round
|
|
let toToVid = wff.pool.getOrVoid toVid
|
|
if toToVid.isValid:
|
|
if toToVid in redo:
|
|
# Got predecessor `(toVid,toToVid)` of `(toToVid,xxx)`,
|
|
# so move `(toToVid,xxx)` from `redo[]` to `pool[]`
|
|
wff.pool[toToVid] = redo.getOrVoid toToVid
|
|
redo.del toToVid
|
|
# Move `(toVid,toToVid)` from `pool[]` to `redo[]`
|
|
wff.pool.del toVid
|
|
redo[toVid] = toToVid
|
|
|
|
accept = true # `redo[]` will be fifferent from `base[]`
|
|
# End `for (vid,toVid)..`
|
|
|
|
# Make sure that `base[]` is different from `redo[]`
|
|
if not accept:
|
|
let vid = wff.base.keys.toSeq[0]
|
|
return err((vid,HashifyVtxUnresolved))
|
|
# Restart `wff.base[]`
|
|
wff.base.swap redo
|
|
|
|
ok()
|
|
|
|
|
|
proc finaliseRoots(
|
|
wff: var WidthFirstForest; # Search tree to process
|
|
db: AristoDbRef; # Database, top layer
|
|
): Result[void,(VertexID,AristoError)] =
|
|
## Process root vertices after all other vertices are done.
|
|
##
|
|
# Make sure that the pool has been exhausted
|
|
if 0 < wff.pool.len:
|
|
let vid = wff.pool.keys.toSeq.sorted[0]
|
|
return err((vid,HashifyVtxUnresolved))
|
|
|
|
# Update or verify root nodes
|
|
for vid in wff.root:
|
|
# Calculate hash key
|
|
let
|
|
node = db.getVtx(vid).toNode(db).valueOr:
|
|
return err((vid,HashifyRootVtxUnresolved))
|
|
key = node.digestTo(HashKey)
|
|
if vid notin db.pPrf:
|
|
db.layersPutKey(VertexID(1), vid, key)
|
|
elif key != db.getKey vid:
|
|
return err((vid,HashifyProofHashMismatch))
|
|
|
|
ok()
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public functions
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc hashify*(
|
|
db: AristoDbRef; # Database, top layer
|
|
): Result[void,(VertexID,AristoError)] =
|
|
## Add keys to the `Patricia Trie` so that it becomes a `Merkle Patricia
|
|
## Tree`.
|
|
##
|
|
if 0 < db.dirty.len:
|
|
# Set up widh-first traversal schedule
|
|
var wff = ? db.createSched()
|
|
|
|
# Traverse tree spanned by `wff` and label remaining vertices.
|
|
? wff.processSched db
|
|
|
|
# Do/complete state root vertices
|
|
? wff.finaliseRoots db
|
|
|
|
db.top.final.dirty.clear # Mark top layer clean
|
|
|
|
ok()
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# End
|
|
# ------------------------------------------------------------------------------
|