nimbus-eth1/nimbus/db/aristo/aristo_hashify.nim
Jordan Hrycaj cd1d370543
Aristo db api extensions for use as core db backend (#1754)
* Update docu

* Update Aristo/Kvt constructor prototype

why:
  Previous version used an `enum` value to indicate what backend is to
  be used. This was replaced by using the backend object type.

* Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]`

why:
  Better code maintenance. Previously, the `Hike` object was returned. It
  had an internal error field so partial success was also available on
  a failure. This error field has been removed.

* Use `openArray[byte]` rather than `Blob` in functions prototypes

* Provide synchronised multi instance transactions

why:
  The `CoreDB` object was geared towards the legacy DB which used a single
  transaction for the key-value backend DB. Different state roots are
  provided by the backend database, so all instances work directly on the
  same backend.

  Aristo db instances have different in-memory mappings (aka different
  state roots) and the transactions are on top of there mappings. So each
  instance might run different transactions.

  Multi instance transactions are a compromise to converge towards the
  legacy behaviour. The synchronised transactions span over all instances
  available at the time when base transaction was opened. Instances
  created later are unaffected.

* Provide key-value pair database iterator

why:
  Needed in `CoreDB` for `replicate()` emulation

also:
  Some update of internal code

* Extend API (i.e. prototype variants)

why:
  Needed for `CoreDB` geared towards the legacy backend which has a more
  basic API than Aristo.
2023-09-15 16:23:53 +01:00

472 lines
15 KiB
Nim

# nimbus-eth1
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
## Aristo DB -- Patricia Trie Merkleisation
## ========================================
##
## For the current state of the `Patricia Trie`, keys (equivalent to hashes)
## are associated with the vertex IDs. Existing key associations are checked
## (i.e. recalculated and compared) unless the ID is locked. In the latter
## case, the key is assumed to be correct without checking.
##
## The association algorithm is an optimised version of:
##
## * For all leaf vertices, label them with parent vertex so that there are
## chains from the leafs to the root vertex.
##
## * Apply a width-first traversal starting with the set of leafs vertices
## compiling the keys to associate with by hashing the current vertex.
##
## Apperently, keys (aka hashes) can be compiled for leaf vertices. For the
## other vertices, the keys can be compiled if all the children keys are
## known which is assured by the nature of the width-first traversal method.
##
## For production, this algorithm is slightly optimised:
##
## * For each leaf vertex, calculate the chain from the leaf to the root vertex.
## + Starting at the leaf, calculate the key for each vertex towards the root
## vertex as long as possible.
## + Stash the rest of the partial chain to be completed later
##
## * While there is a partial chain left, use the ends towards the leaf
## vertices and calculate the remaining keys (which results in a width-first
## traversal, again.)
{.push raises: [].}
import
std/[algorithm, sequtils, sets, strutils, tables],
chronicles,
eth/common,
results,
stew/interval_set,
"."/[aristo_desc, aristo_get, aristo_hike, aristo_transcode, aristo_utils,
aristo_vid]
type
BackVidValRef = ref object
root: VertexID ## Root vertex
onBe: bool ## Table key vid refers to backend
toVid: VertexID ## Next/follow up vertex
BackVidTab =
Table[VertexID,BackVidValRef]
BackWVtxRef = ref object
w: BackVidValRef
vtx: VertexRef
BackWVtxTab =
Table[VertexID,BackWVtxRef]
const
SubTreeSearchDepthMax = 64
logScope:
topics = "aristo-hashify"
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
template logTxt(info: static[string]): static[string] =
"Hashify " & info
func getOrVoid(tab: BackVidTab; vid: VertexID): BackVidValRef =
tab.getOrDefault(vid, BackVidValRef(nil))
func getOrVoid(tab: BackWVtxTab; vid: VertexID): BackWVtxRef =
tab.getOrDefault(vid, BackWVtxRef(nil))
func isValid(brv: BackVidValRef): bool =
brv != BackVidValRef(nil)
func isValid(brv: BackWVtxRef): bool =
brv != BackWVtxRef(nil)
# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------
proc updateHashKey(
db: AristoDbRef; # Database, top layer
root: VertexID; # Root ID
vid: VertexID; # Vertex ID to check for
expected: HashKey; # Hash key for vertex address by `vid`
backend: bool; # Set `true` id vertex is on backend
): Result[void,AristoError] =
## Update the argument hash key `expected` for the vertex addressed by `vid`.
##
# If the Merkle hash has been cached locally, already it must match.
block:
let key = db.top.kMap.getOrVoid(vid).key
if key.isValid:
if key != expected:
let error = HashifyExistingHashMismatch
debug logTxt "hash update failed", vid, key, expected, error
return err(error)
return ok()
# If the vertex had been cached locally, there would be no locally cached
# Merkle hash key. It will be created at the bottom end of the function.
#
# So there remains tha case when vertex is available on the backend only.
# The Merkle hash not cached locally. It might be overloaded (and eventually
# overwitten.)
if backend:
# Ok, vertex is on the backend.
let rc = db.getKeyBE vid
if rc.isOk:
let key = rc.value
if key == expected:
return ok()
# This step is a error in the sense that something the on the backend
# is fishy. There should not be contradicting Merkle hashes. Throwing
# an error heres would lead to a deadlock so we correct it.
debug "correcting backend hash key mismatch", vid, key, expected
# Proceed `vidAttach()`, below
elif rc.error != GetKeyNotFound:
debug logTxt "backend key fetch failed", vid, expected, error=rc.error
return err(rc.error)
else:
discard
# Proceed `vidAttach()`, below
# Othwise there is no Merkle hash, so create one with the `expected` key
db.vidAttach(HashLabel(root: root, key: expected), vid)
ok()
proc leafToRootHasher(
db: AristoDbRef; # Database, top layer
hike: Hike; # Hike for labelling leaf..root
): Result[int,(VertexID,AristoError)] =
## Returns the index of the first node that could not be hashed
for n in (hike.legs.len-1).countDown(0):
let
wp = hike.legs[n].wp
bg = hike.legs[n].backend
rc = wp.vtx.toNode db
if rc.isErr:
return ok n
# Vertices marked proof nodes need not be checked
if wp.vid in db.top.pPrf:
continue
# Check against existing key, or store new key
let
key = rc.value.to(HashKey)
rx = db.updateHashKey(hike.root, wp.vid, key, bg)
if rx.isErr:
return err((wp.vid,rx.error))
ok -1 # all could be hashed
# ------------------
proc deletedLeafHasher(
db: AristoDbRef; # Database, top layer
hike: Hike; # Hike for labelling leaf..root
): Result[void,(VertexID,AristoError)] =
var
todo = hike.legs.reversed.mapIt(it.wp)
solved: HashSet[VertexID]
# Edge case for empty `hike`
if todo.len == 0:
let vtx = db.getVtx hike.root
if not vtx.isValid:
return err((hike.root,HashifyVtxMissing))
todo = @[VidVtxPair(vid: hike.root, vtx: vtx)]
while 0 < todo.len:
var
delayed: seq[VidVtxPair]
didHere: HashSet[VertexID] # avoid duplicates
for wp in todo:
let rc = wp.vtx.toNode(db, stopEarly=false)
if rc.isOk:
let
expected = rc.value.to(HashKey)
key = db.getKey wp.vid
if key.isValid:
if key != expected:
return err((wp.vid,HashifyExistingHashMismatch))
else:
db.vidAttach(HashLabel(root: hike.root, key: expected), wp.vid)
solved.incl wp.vid
else:
# Resolve follow up vertices first
for vid in rc.error:
let vtx = db.getVtx vid
if not vtx.isValid:
return err((vid,HashifyVtxMissing))
if vid in solved:
discard wp.vtx.toNode(db, stopEarly=false)
return err((vid,HashifyVidCircularDependence))
if vid notin didHere:
didHere.incl vid
delayed.add VidVtxPair(vid: vid, vtx: vtx)
# Followed by this vertex which relies on the ones registered above.
if wp.vid notin didHere:
didHere.incl wp.vid
delayed.add wp
todo = delayed
ok()
# ------------------
proc resolveStateRoots(
db: AristoDbRef; # Database, top layer
uVids: BackVidTab; # Unresolved vertex IDs
): Result[void,(VertexID,AristoError)] =
## Resolve unresolved nodes. There might be a sub-tree on the backend which
## blocks resolving the current structure. So search the `uVids` argument
## list for missing vertices and resolve it.
#
# Update out-of-path hashes, i.e. fill gaps caused by branching out from
# `downMost` table vertices.
#
# Example
# ::
# $1 ^
# \ |
# $7 -- $6 -- leaf $8 | on top layer,
# \ `--- leaf $9 | $5..$9 were inserted,
# $5 | $1 was redefined
# \ v
# \
# \ ^
# $4 -- leaf $2 | from
# `--- leaf $3 | backend (BE)
# v
# backLink[] = {$7}
# downMost[] = {$7}
# top.kMap[] = {£1, £6, £8, £9}
# BE.kMap[] = {£1, £2, £3, £4}
#
# So `$5` (needed for `$7`) cannot be resolved because it is neither on
# the path `($1..$8)`, nor is it on `($1..$9)`.
#
var follow: BackWVtxTab
proc wVtxRef(db: AristoDbRef; root, vid, toVid: VertexID): BackWVtxRef =
let vtx = db.getVtx vid
if vtx.isValid:
return BackWVtxRef(
vtx: vtx,
w: BackVidValRef(
root: root,
onBe: not db.top.sTab.getOrVoid(vid).isValid,
toVid: toVid))
# Init `follow` table by unresolved `Branch` leafs from `vidTab`
for (uVid,uVal) in uVids.pairs:
let uVtx = db.getVtx uVid
if uVtx.isValid and uVtx.vType == Branch:
var didSomething = false
for vid in uVtx.bVid:
if vid.isValid and not db.getKey(vid).isValid:
let w = db.wVtxRef(root=uVal.root, vid=vid, toVid=uVid)
if not w.isNil:
follow[vid] = w
didSomething = true
# Add state root to be resolved, as well
if didSomething and not follow.hasKey uVal.root:
let w = db.wVtxRef(root=uVal.root, vid=uVal.root, toVid=uVal.root)
if not w.isNil:
follow[uVal.root] = w
# Update and re-collect into `follow` table
var level = 0
while 0 < follow.len:
var
changes = false
redo: BackWVtxTab
for (fVid,fVal) in follow.pairs:
# Resolve or keep for later
let rc = fVal.vtx.toNode db
if rc.isOk:
# Update Merkle hash
let
key = rc.value.to(HashKey)
rx = db.updateHashKey(fVal.w.root, fVid, key, fVal.w.onBe)
if rx.isErr:
return err((fVid, rx.error))
changes = true
else:
# Cannot complete with this vertex, so dig deeper and do it later
redo[fVid] = fVal
case fVal.vtx.vType:
of Branch:
for vid in fVal.vtx.bVid:
if vid.isValid and not db.getKey(vid).isValid:
let w = db.wVtxRef(root=fVal.w.root, vid=vid, toVid=fVid)
if not w.isNil:
changes = true
redo[vid] = w
of Extension:
let vid = fVal.vtx.eVid
if vid.isValid and not db.getKey(vid).isValid:
let w = db.wVtxRef(root=fVal.w.root,vid=vid, toVid=fVid)
if not w.isNil:
changes = true
redo[vid] = w
of Leaf:
# Should habe been hashed earlier
return err((fVid,HashifyDownVtxLeafUnexpected))
# Beware of loops
if not changes or SubTreeSearchDepthMax < level:
return err((VertexID(0),HashifyDownVtxlevelExceeded))
# Restart with a new instance of `follow`
redo.swap follow
level.inc
ok()
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
proc hashifyClear*(
db: AristoDbRef; # Database, top layer
locksOnly = false; # If `true`, then clear only proof locks
) =
## Clear all `Merkle` hashes from the `db` argument database top layer.
if not locksOnly:
db.top.pAmk.clear
db.top.kMap.clear
db.top.pPrf.clear
proc hashify*(
db: AristoDbRef; # Database, top layer
): Result[HashSet[VertexID],(VertexID,AristoError)] =
## Add keys to the `Patricia Trie` so that it becomes a `Merkle Patricia
## Tree`. If successful, the function returns the keys (aka Merkle hash) of
## the root vertices.
var
roots: HashSet[VertexID]
completed: HashSet[VertexID]
# Width-first leaf-to-root traversal structure
backLink: BackVidTab
downMost: BackVidTab
# Unconditionally mark the top layer
db.top.dirty = true
for (lky,vid) in db.top.lTab.pairs:
let rc = lky.hikeUp db
# There might be deleted entries on the leaf table. If this is the case,
# the Merkle hashes for the vertices in the `hike` can all be compiled.
if not vid.isValid:
? db.deletedLeafHasher rc.to(Hike)
elif rc.isErr:
return err((vid,rc.error[1]))
else:
# Hash as much of the `hike` as possible
let
hike = rc.value
n = ? db.leafToRootHasher hike
roots.incl hike.root
if 0 < n:
# Backtrack and register remaining nodes. Note that in case *n == 0*,
# the root vertex has not been fully resolved yet.
#
# .. unresolved hash keys | all set here ..
# |
# |
# hike.legs: (leg[0], leg[1], ..leg[n-1], leg[n], ..)
# | | | |
# | <---- | <---- | <-------- |
# | | |
# | backLink[] | downMost[] |
#
if n+1 < hike.legs.len:
downMost.del hike.legs[n+1].wp.vid
downMost[hike.legs[n].wp.vid] = BackVidValRef(
root: hike.root,
onBe: hike.legs[n].backend,
toVid: hike.legs[n-1].wp.vid)
for u in (n-1).countDown(1):
backLink[hike.legs[u].wp.vid] = BackVidValRef(
root: hike.root,
onBe: hike.legs[u].backend,
toVid: hike.legs[u-1].wp.vid)
elif n < 0:
completed.incl hike.root
# At least one full path leaf..root should have succeeded with labelling
# for each root.
if completed.len < roots.len:
? db.resolveStateRoots backLink
# Update remaining hashes
while 0 < downMost.len:
var
redo: BackVidTab
done: HashSet[VertexID]
for (vid,val) in downMost.pairs:
# Try to convert vertex to a node. This is possible only if all link
# references have Merkle hashes.
#
# Also `db.getVtx(vid)` => not nil as it was fetched earlier, already
let rc = db.getVtx(vid).toNode db
if rc.isErr:
# Cannot complete with this vertex, so do it later
redo[vid] = val
else:
# Update Merkle hash
let
key = rc.value.to(HashKey)
rx = db.updateHashKey(val.root, vid, key, val.onBe)
if rx.isErr:
return err((vid,rx.error))
done.incl vid
# Proceed with back link
let nextItem = backLink.getOrVoid val.toVid
if nextItem.isValid:
redo[val.toVid] = nextItem
# Make sure that the algorithm proceeds
if done.len == 0:
let error = HashifyCannotComplete
return err((VertexID(0),error))
# Clean up dups from `backLink` and restart `downMost`
for vid in done.items:
backLink.del vid
downMost = redo
db.top.dirty = false
ok completed
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------