mirror of
https://github.com/status-im/nimbus-eth1.git
synced 2025-01-16 15:25:24 +00:00
6e0397e276
* Fix debug noise in `hashify()` for perfectly normal situation why: Was previously considered a fixable error * Fix test sample file names why: The larger test file `goerli68161.txt.gz` is already in the local archive. So there is no need to use the smaller one from the external repo. * Activate `accounts_cache` module from `db/ledger` why: A copy of the original `accounts_cache.nim` source to be integrated into the `Ledger` module wrapper which allows to switch between different `accounts_cache` implementations unser tha same API. details: At a later state, the `db/accounts_cache.nim` wrapper will be removed so that there is only one access to that module via `db/ledger/accounts_cache.nim`. * Fix copyright headers in source code
466 lines
15 KiB
Nim
466 lines
15 KiB
Nim
# nimbus-eth1
|
|
# Copyright (c) 2023 Status Research & Development GmbH
|
|
# Licensed under either of
|
|
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
|
|
# http://www.apache.org/licenses/LICENSE-2.0)
|
|
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
|
|
# http://opensource.org/licenses/MIT)
|
|
# at your option. This file may not be copied, modified, or distributed
|
|
# except according to those terms.
|
|
|
|
## Aristo DB -- Patricia Trie Merkleisation
|
|
## ========================================
|
|
##
|
|
## For the current state of the `Patricia Trie`, keys (equivalent to hashes)
|
|
## are associated with the vertex IDs. Existing key associations are checked
|
|
## (i.e. recalculated and compared) unless the ID is locked. In the latter
|
|
## case, the key is assumed to be correct without checking.
|
|
##
|
|
## The association algorithm is an optimised version of:
|
|
##
|
|
## * For all leaf vertices, label them with parent vertex so that there are
|
|
## chains from the leafs to the root vertex.
|
|
##
|
|
## * Apply a width-first traversal starting with the set of leafs vertices
|
|
## compiling the keys to associate with by hashing the current vertex.
|
|
##
|
|
## Apperently, keys (aka hashes) can be compiled for leaf vertices. For the
|
|
## other vertices, the keys can be compiled if all the children keys are
|
|
## known which is assured by the nature of the width-first traversal method.
|
|
##
|
|
## For production, this algorithm is slightly optimised:
|
|
##
|
|
## * For each leaf vertex, calculate the chain from the leaf to the root vertex.
|
|
## + Starting at the leaf, calculate the key for each vertex towards the root
|
|
## vertex as long as possible.
|
|
## + Stash the rest of the partial chain to be completed later
|
|
##
|
|
## * While there is a partial chain left, use the ends towards the leaf
|
|
## vertices and calculate the remaining keys (which results in a width-first
|
|
## traversal, again.)
|
|
|
|
{.push raises: [].}
|
|
|
|
import
|
|
std/[algorithm, sequtils, sets, strutils, tables],
|
|
chronicles,
|
|
eth/common,
|
|
results,
|
|
stew/interval_set,
|
|
"."/[aristo_desc, aristo_get, aristo_hike, aristo_serialise, aristo_utils,
|
|
aristo_vid]
|
|
|
|
type
|
|
BackVidValRef = ref object
|
|
root: VertexID ## Root vertex
|
|
onBe: bool ## Table key vid refers to backend
|
|
toVid: VertexID ## Next/follow up vertex
|
|
|
|
BackVidTab =
|
|
Table[VertexID,BackVidValRef]
|
|
|
|
BackWVtxRef = ref object
|
|
w: BackVidValRef
|
|
vtx: VertexRef
|
|
|
|
BackWVtxTab =
|
|
Table[VertexID,BackWVtxRef]
|
|
|
|
const
|
|
SubTreeSearchDepthMax = 64
|
|
|
|
logScope:
|
|
topics = "aristo-hashify"
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Private helpers
|
|
# ------------------------------------------------------------------------------
|
|
|
|
template logTxt(info: static[string]): static[string] =
|
|
"Hashify " & info
|
|
|
|
|
|
func getOrVoid(tab: BackVidTab; vid: VertexID): BackVidValRef =
|
|
tab.getOrDefault(vid, BackVidValRef(nil))
|
|
|
|
func getOrVoid(tab: BackWVtxTab; vid: VertexID): BackWVtxRef =
|
|
tab.getOrDefault(vid, BackWVtxRef(nil))
|
|
|
|
func isValid(brv: BackVidValRef): bool =
|
|
brv != BackVidValRef(nil)
|
|
|
|
func isValid(brv: BackWVtxRef): bool =
|
|
brv != BackWVtxRef(nil)
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Private functions
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc updateHashKey(
|
|
db: AristoDbRef; # Database, top layer
|
|
root: VertexID; # Root ID
|
|
vid: VertexID; # Vertex ID to check for
|
|
expected: HashKey; # Hash key for vertex address by `vid`
|
|
backend: bool; # Set `true` id vertex is on backend
|
|
): Result[void,AristoError] =
|
|
## Update the argument hash key `expected` for the vertex addressed by `vid`.
|
|
##
|
|
# If the Merkle hash has been cached locally, already it must match.
|
|
block:
|
|
let key = db.top.kMap.getOrVoid(vid).key
|
|
if key.isValid:
|
|
if key != expected:
|
|
let error = HashifyExistingHashMismatch
|
|
debug logTxt "hash update failed", vid, key, expected, error
|
|
return err(error)
|
|
return ok()
|
|
|
|
# If the vertex had been cached locally, there would be no locally cached
|
|
# Merkle hash key. It will be created at the bottom end of the function.
|
|
#
|
|
# So there remains the case when vertex is available on the backend only.
|
|
# The Merkle hash not cached locally. It might be overloaded (and eventually
|
|
# overwitten.)
|
|
if backend:
|
|
# Ok, vertex is on the backend.
|
|
let rc = db.getKeyBE vid
|
|
if rc.isOk:
|
|
let key = rc.value
|
|
if key == expected:
|
|
return ok()
|
|
|
|
# Changes on the upper layers overload the lower layers. Some hash keys
|
|
# on the backend will have become obsolete which is corrected here.
|
|
#
|
|
# Proceed `vidAttach()`, below
|
|
|
|
elif rc.error != GetKeyNotFound:
|
|
debug logTxt "backend key fetch failed", vid, expected, error=rc.error
|
|
return err(rc.error)
|
|
|
|
else:
|
|
discard
|
|
# Proceed `vidAttach()`, below
|
|
|
|
# Othwise there is no Merkle hash, so create one with the `expected` key
|
|
# and write it to the top level `pAmk[]` and `kMap[]` tables.
|
|
db.vidAttach(HashLabel(root: root, key: expected), vid)
|
|
ok()
|
|
|
|
|
|
proc leafToRootHasher(
|
|
db: AristoDbRef; # Database, top layer
|
|
hike: Hike; # Hike for labelling leaf..root
|
|
): Result[int,(VertexID,AristoError)] =
|
|
## Returns the index of the first node that could not be hashed
|
|
for n in (hike.legs.len-1).countDown(0):
|
|
let
|
|
wp = hike.legs[n].wp
|
|
bg = hike.legs[n].backend
|
|
rc = wp.vtx.toNode db
|
|
if rc.isErr:
|
|
return ok n
|
|
|
|
# Vertices marked proof nodes need not be checked
|
|
if wp.vid in db.top.pPrf:
|
|
continue
|
|
|
|
# Check against existing key, or store new key
|
|
let key = rc.value.digestTo(HashKey)
|
|
db.updateHashKey(hike.root, wp.vid, key, bg).isOkOr:
|
|
return err((wp.vid,error))
|
|
|
|
ok -1 # all could be hashed
|
|
|
|
# ------------------
|
|
|
|
proc deletedLeafHasher(
|
|
db: AristoDbRef; # Database, top layer
|
|
hike: Hike; # Hike for labelling leaf..root
|
|
): Result[void,(VertexID,AristoError)] =
|
|
var
|
|
todo = hike.legs.reversed.mapIt(it.wp)
|
|
solved: HashSet[VertexID]
|
|
# Edge case for empty `hike`
|
|
if todo.len == 0:
|
|
let vtx = db.getVtx hike.root
|
|
if not vtx.isValid:
|
|
return err((hike.root,HashifyVtxMissing))
|
|
todo = @[VidVtxPair(vid: hike.root, vtx: vtx)]
|
|
while 0 < todo.len:
|
|
var
|
|
delayed: seq[VidVtxPair]
|
|
didHere: HashSet[VertexID] # avoid duplicates
|
|
for wp in todo:
|
|
let rc = wp.vtx.toNode(db, stopEarly=false)
|
|
if rc.isOk:
|
|
let
|
|
expected = rc.value.digestTo(HashKey)
|
|
key = db.getKey wp.vid
|
|
if key.isValid:
|
|
if key != expected:
|
|
return err((wp.vid,HashifyExistingHashMismatch))
|
|
else:
|
|
db.vidAttach(HashLabel(root: hike.root, key: expected), wp.vid)
|
|
solved.incl wp.vid
|
|
else:
|
|
# Resolve follow up vertices first
|
|
for vid in rc.error:
|
|
let vtx = db.getVtx vid
|
|
if not vtx.isValid:
|
|
return err((vid,HashifyVtxMissing))
|
|
if vid in solved:
|
|
discard wp.vtx.toNode(db, stopEarly=false)
|
|
return err((vid,HashifyVidCircularDependence))
|
|
if vid notin didHere:
|
|
didHere.incl vid
|
|
delayed.add VidVtxPair(vid: vid, vtx: vtx)
|
|
|
|
# Followed by this vertex which relies on the ones registered above.
|
|
if wp.vid notin didHere:
|
|
didHere.incl wp.vid
|
|
delayed.add wp
|
|
|
|
todo = delayed
|
|
|
|
ok()
|
|
|
|
# ------------------
|
|
|
|
proc resolveStateRoots(
|
|
db: AristoDbRef; # Database, top layer
|
|
uVids: BackVidTab; # Unresolved vertex IDs
|
|
): Result[void,(VertexID,AristoError)] =
|
|
## Resolve unresolved nodes. There might be a sub-tree on the backend which
|
|
## blocks resolving the current structure. So search the `uVids` argument
|
|
## list for missing vertices and resolve it.
|
|
#
|
|
# Update out-of-path hashes, i.e. fill gaps caused by branching out from
|
|
# `downMost` table vertices.
|
|
#
|
|
# Example
|
|
# ::
|
|
# $1 ^
|
|
# \ |
|
|
# $7 -- $6 -- leaf $8 | on top layer,
|
|
# \ `--- leaf $9 | $5..$9 were inserted,
|
|
# $5 | $1 was redefined
|
|
# \ v
|
|
# \
|
|
# \ ^
|
|
# $4 -- leaf $2 | from
|
|
# `--- leaf $3 | backend (BE)
|
|
# v
|
|
# backLink[] = {$7}
|
|
# downMost[] = {$7}
|
|
# top.kMap[] = {£1, £6, £8, £9}
|
|
# BE.kMap[] = {£1, £2, £3, £4}
|
|
#
|
|
# So `$5` (needed for `$7`) cannot be resolved because it is neither on
|
|
# the path `($1..$8)`, nor is it on `($1..$9)`.
|
|
#
|
|
var follow: BackWVtxTab
|
|
|
|
proc wVtxRef(db: AristoDbRef; root, vid, toVid: VertexID): BackWVtxRef =
|
|
let vtx = db.getVtx vid
|
|
if vtx.isValid:
|
|
return BackWVtxRef(
|
|
vtx: vtx,
|
|
w: BackVidValRef(
|
|
root: root,
|
|
onBe: not db.top.sTab.getOrVoid(vid).isValid,
|
|
toVid: toVid))
|
|
|
|
# Init `follow` table by unresolved `Branch` leafs from `vidTab`
|
|
for (uVid,uVal) in uVids.pairs:
|
|
let uVtx = db.getVtx uVid
|
|
if uVtx.isValid and uVtx.vType == Branch:
|
|
var didSomething = false
|
|
for vid in uVtx.bVid:
|
|
if vid.isValid and not db.getKey(vid).isValid:
|
|
let w = db.wVtxRef(root=uVal.root, vid=vid, toVid=uVid)
|
|
if not w.isNil:
|
|
follow[vid] = w
|
|
didSomething = true
|
|
# Add state root to be resolved, as well
|
|
if didSomething and not follow.hasKey uVal.root:
|
|
let w = db.wVtxRef(root=uVal.root, vid=uVal.root, toVid=uVal.root)
|
|
if not w.isNil:
|
|
follow[uVal.root] = w
|
|
|
|
# Update and re-collect into `follow` table
|
|
var level = 0
|
|
while 0 < follow.len:
|
|
var
|
|
changes = false
|
|
redo: BackWVtxTab
|
|
for (fVid,fVal) in follow.pairs:
|
|
# Resolve or keep for later
|
|
let rc = fVal.vtx.toNode db
|
|
if rc.isOk:
|
|
# Update Merkle hash
|
|
let key = rc.value.digestTo(HashKey)
|
|
db.updateHashKey(fVal.w.root, fVid, key, fVal.w.onBe).isOkOr:
|
|
return err((fVid, error))
|
|
changes = true
|
|
else:
|
|
# Cannot complete with this vertex, so dig deeper and do it later
|
|
redo[fVid] = fVal
|
|
|
|
case fVal.vtx.vType:
|
|
of Branch:
|
|
for vid in fVal.vtx.bVid:
|
|
if vid.isValid and not db.getKey(vid).isValid:
|
|
let w = db.wVtxRef(root=fVal.w.root, vid=vid, toVid=fVid)
|
|
if not w.isNil:
|
|
changes = true
|
|
redo[vid] = w
|
|
of Extension:
|
|
let vid = fVal.vtx.eVid
|
|
if vid.isValid and not db.getKey(vid).isValid:
|
|
let w = db.wVtxRef(root=fVal.w.root,vid=vid, toVid=fVid)
|
|
if not w.isNil:
|
|
changes = true
|
|
redo[vid] = w
|
|
of Leaf:
|
|
# Should habe been hashed earlier
|
|
return err((fVid,HashifyDownVtxLeafUnexpected))
|
|
|
|
# Beware of loops
|
|
if not changes or SubTreeSearchDepthMax < level:
|
|
return err((VertexID(0),HashifyDownVtxlevelExceeded))
|
|
|
|
# Restart with a new instance of `follow`
|
|
redo.swap follow
|
|
level.inc
|
|
|
|
ok()
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public functions
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc hashifyClear*(
|
|
db: AristoDbRef; # Database, top layer
|
|
locksOnly = false; # If `true`, then clear only proof locks
|
|
) =
|
|
## Clear all `Merkle` hashes from the `db` argument database top layer.
|
|
if not locksOnly:
|
|
db.top.pAmk.clear
|
|
db.top.kMap.clear
|
|
db.top.pPrf.clear
|
|
|
|
|
|
proc hashify*(
|
|
db: AristoDbRef; # Database, top layer
|
|
): Result[HashSet[VertexID],(VertexID,AristoError)] =
|
|
## Add keys to the `Patricia Trie` so that it becomes a `Merkle Patricia
|
|
## Tree`. If successful, the function returns the keys (aka Merkle hash) of
|
|
## the root vertices.
|
|
var
|
|
roots: HashSet[VertexID]
|
|
completed: HashSet[VertexID]
|
|
|
|
# Width-first leaf-to-root traversal structure
|
|
backLink: BackVidTab
|
|
downMost: BackVidTab
|
|
|
|
# Unconditionally mark the top layer
|
|
db.top.dirty = true
|
|
|
|
for (lky,vid) in db.top.lTab.pairs:
|
|
let rc = lky.hikeUp db
|
|
|
|
# There might be deleted entries on the leaf table. If this is the case,
|
|
# the Merkle hashes for the vertices in the `hike` can all be compiled.
|
|
if not vid.isValid:
|
|
? db.deletedLeafHasher rc.to(Hike)
|
|
|
|
elif rc.isErr:
|
|
return err((vid,rc.error[1]))
|
|
|
|
else:
|
|
# Hash as much of the `hike` as possible
|
|
let
|
|
hike = rc.value
|
|
n = ? db.leafToRootHasher hike
|
|
roots.incl hike.root
|
|
|
|
if 0 < n:
|
|
# Backtrack and register remaining nodes. Note that in case *n == 0*,
|
|
# the root vertex has not been fully resolved yet.
|
|
#
|
|
# .. unresolved hash keys | all set here ..
|
|
# |
|
|
# |
|
|
# hike.legs: (leg[0], leg[1], ..leg[n-1], leg[n], ..)
|
|
# | | | |
|
|
# | <---- | <---- | <-------- |
|
|
# | | |
|
|
# | backLink[] | downMost[] |
|
|
#
|
|
if n+1 < hike.legs.len:
|
|
downMost.del hike.legs[n+1].wp.vid
|
|
downMost[hike.legs[n].wp.vid] = BackVidValRef(
|
|
root: hike.root,
|
|
onBe: hike.legs[n].backend,
|
|
toVid: hike.legs[n-1].wp.vid)
|
|
for u in (n-1).countDown(1):
|
|
backLink[hike.legs[u].wp.vid] = BackVidValRef(
|
|
root: hike.root,
|
|
onBe: hike.legs[u].backend,
|
|
toVid: hike.legs[u-1].wp.vid)
|
|
elif n < 0:
|
|
completed.incl hike.root
|
|
|
|
# At least one full path leaf..root should have succeeded with labelling
|
|
# for each root.
|
|
if completed.len < roots.len:
|
|
? db.resolveStateRoots backLink
|
|
|
|
# Update remaining hashes
|
|
while 0 < downMost.len:
|
|
var
|
|
redo: BackVidTab
|
|
done: HashSet[VertexID]
|
|
|
|
for (vid,val) in downMost.pairs:
|
|
# Try to convert vertex to a node. This is possible only if all link
|
|
# references have Merkle hashes.
|
|
#
|
|
# Also `db.getVtx(vid)` => not nil as it was fetched earlier, already
|
|
let rc = db.getVtx(vid).toNode db
|
|
if rc.isErr:
|
|
# Cannot complete with this vertex, so do it later
|
|
redo[vid] = val
|
|
|
|
else:
|
|
# Update Merkle hash
|
|
let key = rc.value.digestTo(HashKey)
|
|
db.updateHashKey(val.root, vid, key, val.onBe).isOkOr:
|
|
return err((vid,error))
|
|
|
|
done.incl vid
|
|
|
|
# Proceed with back link
|
|
let nextItem = backLink.getOrVoid val.toVid
|
|
if nextItem.isValid:
|
|
redo[val.toVid] = nextItem
|
|
|
|
# Make sure that the algorithm proceeds
|
|
if done.len == 0:
|
|
let error = HashifyCannotComplete
|
|
return err((VertexID(0),error))
|
|
|
|
# Clean up dups from `backLink` and restart `downMost`
|
|
for vid in done.items:
|
|
backLink.del vid
|
|
downMost = redo
|
|
|
|
db.top.dirty = false
|
|
ok completed
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# End
|
|
# ------------------------------------------------------------------------------
|