nimbus-eth1/nimbus/sync/snap/worker/pivot/heal_accounts.nim

373 lines
12 KiB
Nim

# Nimbus
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
## Heal accounts DB
## ================
##
## This module is a variation of the `swap-in` module in the sense that it
## searches for missing nodes in the database (which means that nodes which
## link to missing ones must exist), and then fetches the nodes from the
## network.
##
## Algorithm
## ---------
##
## * Run `swapInAccounts()` so that inheritable sub-tries are imported from
## previous pivots.
##
## * Find dangling nodes in the current account trie via `findMissingNodes()`.
##
## * Install that nodes from the network.
##
## * Rinse and repeat
##
## Discussion
## ----------
##
## A worst case scenario of a portentally failing `findMissingNodes()` call
## must be solved by fetching and storing more accounts and running this
## healing algorithm again.
##
{.push raises: [].}
import
std/[math, sequtils, sets, tables],
chronicles,
chronos,
eth/[common, p2p, trie/nibbles, trie/trie_defs, rlp],
stew/[byteutils, interval_set, keyed_queue],
../../../../utils/prettify,
"../../.."/[sync_desc, protocol, types],
"../.."/[constants, range_desc, worker_desc],
../com/[com_error, get_trie_nodes],
../db/[hexary_desc, hexary_envelope, hexary_error, hexary_nearby,
hexary_paths, hexary_range, snapdb_accounts],
"."/[find_missing_nodes, storage_queue_helper, swap_in]
logScope:
topics = "snap-acc"
const
extraTraceMessages = false # or true
## Enabled additional logging noise
# ------------------------------------------------------------------------------
# Private logging helpers
# ------------------------------------------------------------------------------
template logTxt(info: static[string]): static[string] =
"Accounts heal " & info
proc `$`(node: NodeSpecs): string =
node.partialPath.toHex
proc `$`(rs: NodeTagRangeSet): string =
rs.fullPC3
proc `$`(iv: NodeTagRange): string =
iv.fullPC3
proc toPC(w: openArray[NodeSpecs]; n: static[int] = 3): string =
let sumUp = w.mapIt(it.hexaryEnvelope.len).foldl(a+b, 0.u256)
(sumUp.to(float) / (2.0^256)).toPC(n)
proc healingCtx(
buddy: SnapBuddyRef;
env: SnapPivotRef;
): string =
let ctx = buddy.ctx
"{" &
"piv=" & env.stateHeader.blockNumber.toStr & "," &
"ctl=" & $buddy.ctrl.state & "," &
"nAccounts=" & $env.nAccounts & "," &
("covered=" & $env.fetchAccounts.processed & "/" &
$ctx.pool.coveredAccounts ) & "}"
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
template discardRlpError(info: static[string]; code: untyped) =
try:
code
except RlpError:
discard
# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------
proc compileMissingNodesList(
buddy: SnapBuddyRef;
env: SnapPivotRef;
): Future[seq[NodeSpecs]]
{.async.} =
## Find some missing glue nodes in accounts database.
let
ctx = buddy.ctx
peer {.used.} = buddy.peer
rootKey = env.stateHeader.stateRoot.to(NodeKey)
getFn = ctx.pool.snapDb.getAccountFn
fa {.used.} = env.fetchAccounts
# Import from earlier run
if ctx.swapInAccounts(env) != 0:
discard ctx.swapInAccounts(env)
if not fa.processed.isFull:
let mlv = await fa.findMissingNodes(
rootKey, getFn,
healAccountsInspectionPlanBLevel,
healAccountsInspectionPlanBRetryMax,
healAccountsInspectionPlanBRetryNapMSecs)
# Clean up empty account ranges found while looking for nodes
if not mlv.emptyGaps.isNil:
for w in mlv.emptyGaps.increasing:
discard env.fetchAccounts.processed.merge w
env.fetchAccounts.unprocessed.reduce w
discard buddy.ctx.pool.coveredAccounts.merge w
when extraTraceMessages:
trace logTxt "missing nodes", peer,
ctx=buddy.healingCtx(env), nLevel=mlv.level, nVisited=mlv.visited,
nResult=mlv.missing.len, result=mlv.missing.toPC
return mlv.missing
proc getNodesFromNetwork(
buddy: SnapBuddyRef;
missingNodes: seq[NodeSpecs]; # Nodes to fetch from the network
ignore: HashSet[Blob]; # Except for these partial paths listed
env: SnapPivotRef;
): Future[seq[NodeSpecs]]
{.async.} =
## Extract from `nodes.missing` the next batch of nodes that need
## to be merged it into the database
let
peer {.used.} = buddy.peer
rootHash = env.stateHeader.stateRoot
pivot = env.stateHeader.blockNumber.toStr # for logging in `getTrieNodes()`
# Initalise for fetching nodes from the network via `getTrieNodes()`
var
nodeKey: Table[Blob,NodeKey] # Temporary `path -> key` mapping
pathList: seq[SnapTriePaths] # Function argument for `getTrieNodes()`
# There is no point in fetching too many nodes as it will be rejected. So
# rest of the `missingNodes` list is ignored to be picked up later.
for w in missingNodes:
if w.partialPath notin ignore and not nodeKey.hasKey(w.partialPath):
pathList.add SnapTriePaths(accPath: w.partialPath)
nodeKey[w.partialPath] = w.nodeKey
if fetchRequestTrieNodesMax <= pathList.len:
break
if 0 < pathList.len:
# Fetch nodes from the network.
let rc = await buddy.getTrieNodes(rootHash, pathList, pivot)
if rc.isOk:
# Reset error counts for detecting repeated timeouts, network errors, etc.
buddy.only.errors.resetComError()
# Forget about unfetched missing nodes, will be picked up later
return rc.value.nodes.mapIt(NodeSpecs(
partialPath: it.partialPath,
nodeKey: nodeKey[it.partialPath],
data: it.data))
# Process error ...
let
error = rc.error
ok = await buddy.ctrl.stopAfterSeriousComError(error, buddy.only.errors)
when extraTraceMessages:
trace logTxt "reply error", peer, ctx=buddy.healingCtx(env),
error, stop=ok
return @[]
proc kvAccountLeaf(
buddy: SnapBuddyRef;
node: NodeSpecs;
env: SnapPivotRef;
): (bool,NodeKey,Account) =
## Re-read leaf node from persistent database (if any)
var nNibbles = -1
discardRlpError("kvAccountLeaf"):
let
nodeRlp = rlpFromBytes node.data
prefix = (hexPrefixDecode node.partialPath)[1]
segment = (hexPrefixDecode nodeRlp.listElem(0).toBytes)[1]
nibbles = prefix & segment
nNibbles = nibbles.len
if nNibbles == 64:
let
data = nodeRlp.listElem(1).toBytes
nodeKey = nibbles.getBytes.convertTo(NodeKey)
accData = rlp.decode(data,Account)
return (true, nodeKey, accData)
when extraTraceMessages:
trace logTxt "non-leaf node path or corrupt data", peer=buddy.peer,
ctx=buddy.healingCtx(env), nNibbles
proc registerAccountLeaf(
buddy: SnapBuddyRef;
accKey: NodeKey;
acc: Account;
env: SnapPivotRef;
) =
## Process single account node as would be done with an interval by
## the `storeAccounts()` function
let
ctx = buddy.ctx
peer = buddy.peer
rootKey = env.stateHeader.stateRoot.to(NodeKey)
getFn = ctx.pool.snapDb.getAccountFn
pt = accKey.to(NodeTag)
# Extend interval [pt,pt] if possible
var iv: NodeTagRange
try:
iv = getFn.hexaryRangeInflate(rootKey, pt)
except CatchableError as e:
error logTxt "inflating interval oops", peer, ctx=buddy.healingCtx(env),
accKey, name=($e.name), msg=e.msg
iv = NodeTagRange.new(pt,pt)
# Register isolated leaf node
if 0 < env.fetchAccounts.processed.merge iv:
env.nAccounts.inc
env.fetchAccounts.unprocessed.reduce iv
discard buddy.ctx.pool.coveredAccounts.merge iv
# Update storage slots batch
if acc.storageRoot != EMPTY_ROOT_HASH:
env.storageQueueAppendFull(acc.storageRoot, accKey)
# Update contract codes batch
if acc.codeHash != EMPTY_CODE_HASH:
env.fetchContracts[acc.codeHash] = accKey
#when extraTraceMessages:
# trace logTxt "registered single account", peer, ctx=buddy.healingCtx(env),
# leftSlack=(iv.minPt < pt), rightSlack=(pt < iv.maxPt)
# ------------------------------------------------------------------------------
# Private functions: do the healing for one round
# ------------------------------------------------------------------------------
proc accountsHealingImpl(
buddy: SnapBuddyRef;
ignore: HashSet[Blob];
env: SnapPivotRef;
): Future[(int,HashSet[Blob])]
{.async.} =
## Fetching and merging missing account trie database nodes. It returns the
## number of nodes fetched from the network, and -1 upon error.
let
ctx = buddy.ctx
db = ctx.pool.snapDb
peer = buddy.peer
# Import from earlier runs (if any)
while ctx.swapInAccounts(env) != 0:
discard
# Update for changes since last visit
let missingNodes = await buddy.compileMissingNodesList(env)
if missingNodes.len == 0:
# Nothing to do
trace logTxt "nothing to do", peer, ctx=buddy.healingCtx(env)
return (0,EmptyBlobSet) # nothing to do
# Get next batch of nodes that need to be merged it into the database
let fetchedNodes = await buddy.getNodesFromNetwork(missingNodes, ignore, env)
if fetchedNodes.len == 0:
return (0,EmptyBlobSet)
# Store nodes onto disk
let
nFetchedNodes = fetchedNodes.len
report = db.importRawAccountsNodes(peer, fetchedNodes)
if 0 < report.len and report[^1].slot.isNone:
# Storage error, just run the next lap (not much else that can be done)
error logTxt "databse error", peer, ctx=buddy.healingCtx(env),
nFetchedNodes, error=report[^1].error
return (-1,EmptyBlobSet)
# Filter out error and leaf nodes
var
nLeafNodes = 0 # for logging
rejected: HashSet[Blob]
for w in report:
if w.slot.isSome: # non-indexed entries appear typically at the end, though
let inx = w.slot.unsafeGet
# Node error, will need to pick up later and download again. Node that
# there need not be an expicit node specs (so `kind` is opted out.)
if w.kind.isNone or w.error != HexaryError(0):
rejected.incl fetchedNodes[inx].partialPath
elif w.kind.unsafeGet == Leaf:
# Leaf node has been stored, double check
let (isLeaf, key, acc) = buddy.kvAccountLeaf(fetchedNodes[inx], env)
if isLeaf:
# Update `unprocessed` registry, collect storage roots (if any)
buddy.registerAccountLeaf(key, acc, env)
nLeafNodes.inc
when extraTraceMessages:
trace logTxt "merged into database", peer, ctx=buddy.healingCtx(env),
nFetchedNodes, nLeafNodes, nRejected=rejected.len
return (nFetchedNodes - rejected.len, rejected)
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
proc healAccounts*(
buddy: SnapBuddyRef;
env: SnapPivotRef;
) {.async.} =
## Fetching and merging missing account trie database nodes.
trace logTxt "started", peer=buddy.peer, ctx=buddy.healingCtx(env)
let
fa = env.fetchAccounts
var
nNodesFetched = 0
nFetchLoop = 0
ignore: HashSet[Blob]
while not fa.processed.isFull() and
buddy.ctrl.running and
not env.archived:
let (nNodes, rejected) = await buddy.accountsHealingImpl(ignore, env)
if nNodes <= 0:
break
ignore = ignore + rejected
nNodesFetched.inc(nNodes)
nFetchLoop.inc
trace logTxt "done", peer=buddy.peer, ctx=buddy.healingCtx(env),
nNodesFetched, nFetchLoop, nIgnore=ignore.len
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------