nimbus-eth1/nimbus/sync/snap/worker/db/hexary_inspect.nim
Jordan Hrycaj c01045c246
Update snap client account healing (#1521)
* Update nearby/neighbour leaf nodes finder

details:
  Update return error codes so that in the case that there is no more
  leaf node beyond the search direction, the particular error code
  `NearbyBeyondRange` is returned.

* Compile largest interval range containing only this leaf point

why:
  Will be needed in snap sync for adding single leaf nodes to the range
  of already allocated nodes.

* Reorg `hexary_inspect.nim`

why:
 Merged the nodes collecting algorithm for persistent and in-memory
 into a single generic function `hexary_inspect.inspectTrieImpl()`

* Update fetching accounts range failure handling in `rangeFetchAccounts()`

why:
  Rejected response leads now to fetching for another account range. Only
  repeated failures (or all done) terminate the algorithm.

* Update accounts healing

why:
+ Fixed looping over a bogus node response that could not inserted into
  the database. As a solution, these nodes are locally registered and not
  asked for in this download cycle.
+ Sub-optimal handling of interval range for a healed account leaf node.
  Now the maximal range interval containing this node is registered as
  processed which leafs to de-fragementation of the processed (and
  unprocessed) range list(s). So *gap* ranges which are known not to
  cover any account leaf node are not asked for on the network, anymore.
+ Sporadically remove empty interval ranges (if any)

* Update logging, better variable names
2023-03-25 10:44:48 +00:00

318 lines
12 KiB
Nim

# nimbus-eth1
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
{.push raises: [].}
import
std/[sequtils, strutils, tables],
chronicles,
eth/[common, trie/nibbles],
stew/results,
"../.."/[constants, range_desc],
"."/[hexary_desc, hexary_nodes_helper, hexary_paths]
logScope:
topics = "snap-db"
type
TrieNodeStatCtxRef* = ref object
## Context to resume searching for dangling links
case persistent*: bool
of true:
hddCtx*: seq[(NodeKey,NibblesSeq)]
else:
memCtx*: seq[(RepairKey,NibblesSeq)]
TrieNodeStat* = object
## Trie inspection report
dangling*: seq[NodeSpecs] ## Referes to nodes with incomplete refs
count*: uint64 ## Number of nodes visited
level*: uint8 ## Maximum nesting depth of dangling nodes
stopped*: bool ## Potential loop detected if `true`
resumeCtx*: TrieNodeStatCtxRef ## Context for resuming inspection
const
extraTraceMessages = false # or true
when extraTraceMessages:
import stew/byteutils
# ------------------------------------------------------------------------------
# Private helpers, debugging
# ------------------------------------------------------------------------------
proc ppDangling(a: seq[NodeSpecs]; maxItems = 30): string =
proc ppBlob(w: Blob): string =
w.mapIt(it.toHex(2)).join.toLowerAscii
let
q = a.mapIt(it.partialPath.ppBlob)[0 ..< min(maxItems,a.len)]
andMore = if maxItems < a.len: ", ..[#" & $a.len & "].." else: ""
"{" & q.join(",") & andMore & "}"
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
proc convertTo(key: RepairKey; T: type NodeKey): T =
## Might be lossy, check before use
discard result.init(key.ByteArray33[1 .. 32])
proc convertTo(key: NodeKey; T: type NodeKey): T =
## For simplifying generic functions
key
proc convertTo(key: RepairKey; T: type RepairKey): T =
## For simplifying generic functions
key
proc isNodeKey(key: Blob): bool =
## For simplifying generic functions
key.len == 32 or key.len == 0
proc to(key: NodeKey; T: type NodeKey): T =
## For simplifying generic functions
key
# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------
proc processLink[Q](
db: HexaryTreeDbRef|HexaryGetFn; # Database abstraction
stats: var TrieNodeStat; # Collecting results
inspect: var Q; # Intermediate todo list
trail: NibblesSeq; # Todo list argument
child: RepairKey|Blob; # Todo list argument
) {.gcsafe, raises: [CatchableError]} =
## Helper for `inspectTrieImpl()`
if not child.isZeroLink:
if not child.isNodeKey:
# Oops -- caught in the middle of a repair process? Just register
# this node
stats.dangling.add NodeSpecs(
partialPath: trail.hexPrefixEncode(isLeaf = false))
elif child.getNode(db).isOk:
inspect.add (child.convertTo(typeof(inspect[0][0])), trail)
else:
stats.dangling.add NodeSpecs(
partialPath: trail.hexPrefixEncode(isLeaf = false),
nodeKey: child.convertTo(NodeKey))
proc inspectTrieImpl(
db: HexaryTreeDbRef|HexaryGetFn; # Database abstraction
rootKey: NodeKey|RepairKey; # State root
partialPaths: seq[Blob]; # Starting paths for search
resumeCtx: TrieNodeStatCtxRef; # Context for resuming inspection
suspendAfter: uint64; # To be resumed
stopAtLevel: uint8; # Width-first depth level
maxDangling: int; # Maximal number of dangling results
): TrieNodeStat
{.gcsafe, raises: [CatchableError]} =
## ...
when extraTraceMessages:
let nPaths = partialPaths.len
if rootKey.getNode(db).isErr:
when extraTraceMessages:
trace "Hexary inspect: missing root", nPaths, maxDangling,
rootKey=rootKey.convertTo(NodeKey)
return TrieNodeStat()
var
reVisit: seq[(typeof(rootKey),NibblesSeq)]
again: seq[(typeof(rootKey),NibblesSeq)]
resumeOk = false
# Initialise lists from previous session
if not resumeCtx.isNil:
when typeof(db) is HexaryTreeDbRef:
if not resumeCtx.persistent and 0 < resumeCtx.memCtx.len:
resumeOk = true
reVisit = resumeCtx.memCtx
else:
if resumeCtx.persistent and 0 < resumeCtx.hddCtx.len:
resumeOk = true
reVisit = resumeCtx.hddCtx
if partialPaths.len == 0 and not resumeOk:
reVisit.add (rootKey,EmptyNibbleSeq)
else:
# Add argument paths
for w in partialPaths:
let (isLeaf,nibbles) = hexPrefixDecode w
if not isLeaf:
let rc = nibbles.hexaryPathNodeKey(rootKey, db, missingOk=false)
if rc.isOk:
reVisit.add (rc.value.to(typeof(rootKey)), nibbles)
# Stopping on `suspendAfter` has precedence over `stopAtLevel`
while 0 < reVisit.len and result.count <= suspendAfter:
when extraTraceMessages:
trace "Hexary inspect processing", nPaths, maxDangling,
level=result.level, nReVisit=reVisit.len, nDangling=result.dangling.len
if stopAtLevel < result.level:
result.stopped = true
break
for n in 0 ..< reVisit.len:
if suspendAfter < result.count or
maxDangling <= result.dangling.len:
# Swallow rest
again &= reVisit[n ..< reVisit.len]
break
let
(rKey, parentTrail) = reVisit[n]
rc = rKey.getNode(db)
if rc.isErr:
continue # ignore this node
let node = rc.value
case node.kind:
of Extension:
let
trail = parentTrail & node.ePfx
child = node.eLink
db.processLink(stats=result, inspect=again, trail, child)
of Branch:
for n in 0 ..< 16:
let
trail = parentTrail & @[n.byte].initNibbleRange.slice(1)
child = node.bLink[n]
db.processLink(stats=result, inspect=again, trail, child)
of Leaf:
# Ooops, forget node and key
discard
result.count.inc
# End `for`
result.level.inc
swap(reVisit, again)
again.setLen(0)
# End while
# Collect left overs for resuming search
if 0 < reVisit.len:
when typeof(db) is HexaryTreeDbRef:
result.resumeCtx = TrieNodeStatCtxRef(
persistent: false,
memCtx: reVisit)
else:
result.resumeCtx = TrieNodeStatCtxRef(
persistent: true,
hddCtx: reVisit)
when extraTraceMessages:
trace "Hexary inspect finished", nPaths, maxDangling,
level=result.level, nResumeCtx=reVisit.len, nDangling=result.dangling.len,
maxLevel=stopAtLevel, stopped=result.stopped
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
proc to*(resumeCtx: TrieNodeStatCtxRef; T: type seq[NodeSpecs]): T =
## Convert resumption context to nodes that can be used otherwise. This
## function might be useful for error recovery.
##
## Note: In a non-persistant case, temporary `RepairKey` type node specs
## that cannot be converted to `NodeKey` type nodes are silently dropped.
## This should be no problem as a hexary trie with `RepairKey` type node
## refs must be repaired or discarded anyway.
if resumeCtx.persistent:
for (key,trail) in resumeCtx.hddCtx:
result.add NodeSpecs(
partialPath: trail.hexPrefixEncode(isLeaf = false),
nodeKey: key)
else:
for (key,trail) in resumeCtx.memCtx:
if key.isNodeKey:
result.add NodeSpecs(
partialPath: trail.hexPrefixEncode(isLeaf = false),
nodeKey: key.convertTo(NodeKey))
proc hexaryInspectTrie*(
db: HexaryTreeDbRef; # Database abstraction
rootKey: NodeKey; # State root
partialPaths = EmptyBlobSeq; # Starting paths for search
resumeCtx = TrieNodeStatCtxRef(nil); # Context for resuming inspection
suspendAfter = high(uint64); # To be resumed
stopAtLevel = 64u8; # Width-first depth level
maxDangling = high(int); # Maximal number of dangling results
): TrieNodeStat
{.gcsafe, raises: [CatchableError]} =
## Starting with the argument list `paths`, find all the non-leaf nodes in
## the hexary trie which have at least one node key reference missing in
## the trie database. The references for these nodes are collected and
## returned.
##
## * Argument `partialPaths` list entries that do not refer to an existing
## and allocated hexary trie node are silently ignored. So are enytries
## that not refer to either a valid extension or a branch type node.
##
## * This function traverses the hexary trie in *width-first* mode
## simultaneously for any entry of the argument `partialPaths` list. Abart
## from completing the search there are three conditions when the search
## pauses to return the current state (via `resumeCtx`, see next bullet
## point):
## + The depth level of the running algorithm exceeds `stopAtLevel`.
## + The number of visited nodes exceeds `suspendAfter`.
## + Te number of cunnently collected dangling nodes exceeds `maxDangling`.
## If the function pauses because the current depth exceeds `stopAtLevel`
## then the `stopped` flag of the result object will be set, as well.
##
## * When paused for some of the reasons listed above, the `resumeCtx` field
## of the result object contains the current state so that the function
## can resume searching from where is paused. An application using this
## feature could look like:
## ::
## var ctx = TrieNodeStatCtxRef()
## while not ctx.isNil:
## let state = hexaryInspectTrie(db, root, paths, resumeCtx=ctx, 1024)
## ...
## ctx = state.resumeCtx
## paths = EmptyBlobSeq
##
db.inspectTrieImpl(rootKey.to(RepairKey),
partialPaths, resumeCtx, suspendAfter, stopAtLevel, maxDangling)
proc hexaryInspectTrie*(
getFn: HexaryGetFn; # Database abstraction
rootKey: NodeKey; # State root
partialPaths = EmptyBlobSeq; # Starting paths for search
resumeCtx: TrieNodeStatCtxRef = nil; # Context for resuming inspection
suspendAfter = high(uint64); # To be resumed
stopAtLevel = 64u8; # Width-first depth level
maxDangling = high(int); # Maximal number of dangling results
): TrieNodeStat
{.gcsafe, raises: [CatchableError]} =
## Variant of `hexaryInspectTrie()` for persistent database.
getFn.inspectTrieImpl(
rootKey, partialPaths, resumeCtx, suspendAfter, stopAtLevel, maxDangling)
# ------------------------------------------------------------------------------
# Public functions, debugging
# ------------------------------------------------------------------------------
proc pp*(a: TrieNodeStat; db: HexaryTreeDbRef; maxItems = 30): string =
result = "(" & $a.level
if a.stopped:
result &= "stopped,"
result &= $a.dangling.len & "," &
a.dangling.ppDangling(maxItems) & ")"
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------