2022-09-16 08:24:12 +01:00
|
|
|
# nimbus-eth1
|
|
|
|
# Copyright (c) 2021 Status Research & Development GmbH
|
|
|
|
# Licensed under either of
|
|
|
|
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0)
|
|
|
|
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
|
|
|
|
# http://opensource.org/licenses/MIT)
|
|
|
|
# at your option. This file may not be copied, modified, or distributed
|
|
|
|
# except according to those terms.
|
|
|
|
|
2023-03-22 20:11:49 +00:00
|
|
|
{.push raises: [].}
|
|
|
|
|
2022-09-16 08:24:12 +01:00
|
|
|
import
|
2022-12-24 09:54:18 +00:00
|
|
|
std/[sequtils, strutils, tables],
|
2022-09-30 09:22:14 +01:00
|
|
|
chronicles,
|
2022-10-20 17:59:54 +01:00
|
|
|
eth/[common, trie/nibbles],
|
2022-09-16 08:24:12 +01:00
|
|
|
stew/results,
|
2023-03-22 20:11:49 +00:00
|
|
|
"../.."/[constants, range_desc],
|
2023-03-25 10:44:48 +00:00
|
|
|
"."/[hexary_desc, hexary_nodes_helper, hexary_paths]
|
2022-09-16 08:24:12 +01:00
|
|
|
|
2022-09-30 09:22:14 +01:00
|
|
|
logScope:
|
|
|
|
topics = "snap-db"
|
|
|
|
|
2022-12-24 09:54:18 +00:00
|
|
|
type
|
|
|
|
TrieNodeStatCtxRef* = ref object
|
|
|
|
## Context to resume searching for dangling links
|
|
|
|
case persistent*: bool
|
|
|
|
of true:
|
|
|
|
hddCtx*: seq[(NodeKey,NibblesSeq)]
|
|
|
|
else:
|
|
|
|
memCtx*: seq[(RepairKey,NibblesSeq)]
|
|
|
|
|
|
|
|
TrieNodeStat* = object
|
|
|
|
## Trie inspection report
|
|
|
|
dangling*: seq[NodeSpecs] ## Referes to nodes with incomplete refs
|
|
|
|
count*: uint64 ## Number of nodes visited
|
|
|
|
level*: uint8 ## Maximum nesting depth of dangling nodes
|
|
|
|
stopped*: bool ## Potential loop detected if `true`
|
|
|
|
resumeCtx*: TrieNodeStatCtxRef ## Context for resuming inspection
|
|
|
|
|
2022-09-30 09:22:14 +01:00
|
|
|
const
|
|
|
|
extraTraceMessages = false # or true
|
|
|
|
|
2022-10-08 18:20:50 +01:00
|
|
|
when extraTraceMessages:
|
|
|
|
import stew/byteutils
|
|
|
|
|
2022-12-24 09:54:18 +00:00
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
# Private helpers, debugging
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
proc ppDangling(a: seq[NodeSpecs]; maxItems = 30): string =
|
|
|
|
proc ppBlob(w: Blob): string =
|
|
|
|
w.mapIt(it.toHex(2)).join.toLowerAscii
|
|
|
|
let
|
|
|
|
q = a.mapIt(it.partialPath.ppBlob)[0 ..< min(maxItems,a.len)]
|
|
|
|
andMore = if maxItems < a.len: ", ..[#" & $a.len & "].." else: ""
|
|
|
|
"{" & q.join(",") & andMore & "}"
|
|
|
|
|
2022-09-16 08:24:12 +01:00
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
# Private helpers
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
proc convertTo(key: RepairKey; T: type NodeKey): T =
|
|
|
|
## Might be lossy, check before use
|
|
|
|
discard result.init(key.ByteArray33[1 .. 32])
|
|
|
|
|
2023-03-25 10:44:48 +00:00
|
|
|
proc convertTo(key: NodeKey; T: type NodeKey): T =
|
|
|
|
## For simplifying generic functions
|
|
|
|
key
|
|
|
|
|
|
|
|
proc convertTo(key: RepairKey; T: type RepairKey): T =
|
|
|
|
## For simplifying generic functions
|
|
|
|
key
|
|
|
|
|
|
|
|
proc isNodeKey(key: Blob): bool =
|
|
|
|
## For simplifying generic functions
|
|
|
|
key.len == 32 or key.len == 0
|
|
|
|
|
|
|
|
proc to(key: NodeKey; T: type NodeKey): T =
|
|
|
|
## For simplifying generic functions
|
|
|
|
key
|
2022-09-16 08:24:12 +01:00
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
# Private functions
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
|
2023-03-25 10:44:48 +00:00
|
|
|
proc processLink[Q](
|
|
|
|
db: HexaryTreeDbRef|HexaryGetFn; # Database abstraction
|
|
|
|
stats: var TrieNodeStat; # Collecting results
|
|
|
|
inspect: var Q; # Intermediate todo list
|
|
|
|
trail: NibblesSeq; # Todo list argument
|
|
|
|
child: RepairKey|Blob; # Todo list argument
|
|
|
|
) {.gcsafe, raises: [CatchableError]} =
|
|
|
|
## Helper for `inspectTrieImpl()`
|
|
|
|
if not child.isZeroLink:
|
2022-09-16 08:24:12 +01:00
|
|
|
if not child.isNodeKey:
|
|
|
|
# Oops -- caught in the middle of a repair process? Just register
|
|
|
|
# this node
|
Prep for full sync after snap make 4 (#1282)
* Re-arrange fetching storage slots in batch module
why;
Previously, fetching partial slot ranges first has a chance of
terminating the worker peer 9due to network error) while there were
many inheritable storage slots on the queue.
Now, inheritance is checked first, then full slot ranges and finally
partial ranges.
* Update logging
* Bundled node information for healing into single object `NodeSpecs`
why:
Previously, partial paths and node keys were kept in separate variables.
This approach was error prone due to copying/reassembling function
argument objects.
As all partial paths, keys, and node data types are more or less handled
as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to
hold these `Blob`s as named field in a single object (even if not all
fields are active for the current purpose.)
* For good housekeeping, using `NodeKey` type only for account keys
why:
previously, a mixture of `NodeKey` and `Hash256` was used. Now, only
state or storage root keys use the `Hash256` type.
* Always accept latest pivot (and not a slightly older one)
why;
For testing it was tried to use a slightly older pivot state root than
available. Some anecdotal tests seemed to suggest an advantage so that
more peers are willing to serve on that older pivot. But this could not
be confirmed in subsequent tests (still anecdotal, though.)
As a side note, the distance of the latest pivot to its predecessor is
at least 128 (or whatever the constant `minPivotBlockDistance` is
assigned to.)
* Reshuffle name components for some file and function names
why:
Clarifies purpose:
"storages" becomes: "storage slots"
"store" becomes: "range fetch"
* Stash away currently unused modules in sub-folder named "notused"
2022-10-27 14:49:28 +01:00
|
|
|
stats.dangling.add NodeSpecs(
|
2022-10-28 08:26:17 +01:00
|
|
|
partialPath: trail.hexPrefixEncode(isLeaf = false))
|
2023-03-25 10:44:48 +00:00
|
|
|
elif child.getNode(db).isOk:
|
|
|
|
inspect.add (child.convertTo(typeof(inspect[0][0])), trail)
|
2022-09-16 08:24:12 +01:00
|
|
|
else:
|
Prep for full sync after snap make 4 (#1282)
* Re-arrange fetching storage slots in batch module
why;
Previously, fetching partial slot ranges first has a chance of
terminating the worker peer 9due to network error) while there were
many inheritable storage slots on the queue.
Now, inheritance is checked first, then full slot ranges and finally
partial ranges.
* Update logging
* Bundled node information for healing into single object `NodeSpecs`
why:
Previously, partial paths and node keys were kept in separate variables.
This approach was error prone due to copying/reassembling function
argument objects.
As all partial paths, keys, and node data types are more or less handled
as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to
hold these `Blob`s as named field in a single object (even if not all
fields are active for the current purpose.)
* For good housekeeping, using `NodeKey` type only for account keys
why:
previously, a mixture of `NodeKey` and `Hash256` was used. Now, only
state or storage root keys use the `Hash256` type.
* Always accept latest pivot (and not a slightly older one)
why;
For testing it was tried to use a slightly older pivot state root than
available. Some anecdotal tests seemed to suggest an advantage so that
more peers are willing to serve on that older pivot. But this could not
be confirmed in subsequent tests (still anecdotal, though.)
As a side note, the distance of the latest pivot to its predecessor is
at least 128 (or whatever the constant `minPivotBlockDistance` is
assigned to.)
* Reshuffle name components for some file and function names
why:
Clarifies purpose:
"storages" becomes: "storage slots"
"store" becomes: "range fetch"
* Stash away currently unused modules in sub-folder named "notused"
2022-10-27 14:49:28 +01:00
|
|
|
stats.dangling.add NodeSpecs(
|
|
|
|
partialPath: trail.hexPrefixEncode(isLeaf = false),
|
2022-10-28 08:26:17 +01:00
|
|
|
nodeKey: child.convertTo(NodeKey))
|
2022-09-16 08:24:12 +01:00
|
|
|
|
2023-03-25 10:44:48 +00:00
|
|
|
proc inspectTrieImpl(
|
|
|
|
db: HexaryTreeDbRef|HexaryGetFn; # Database abstraction
|
|
|
|
rootKey: NodeKey|RepairKey; # State root
|
|
|
|
partialPaths: seq[Blob]; # Starting paths for search
|
|
|
|
resumeCtx: TrieNodeStatCtxRef; # Context for resuming inspection
|
|
|
|
suspendAfter: uint64; # To be resumed
|
|
|
|
stopAtLevel: uint8; # Width-first depth level
|
|
|
|
maxDangling: int; # Maximal number of dangling results
|
|
|
|
): TrieNodeStat
|
|
|
|
{.gcsafe, raises: [CatchableError]} =
|
|
|
|
## ...
|
|
|
|
when extraTraceMessages:
|
|
|
|
let nPaths = partialPaths.len
|
|
|
|
|
|
|
|
if rootKey.getNode(db).isErr:
|
|
|
|
when extraTraceMessages:
|
|
|
|
trace "Hexary inspect: missing root", nPaths, maxDangling,
|
|
|
|
rootKey=rootKey.convertTo(NodeKey)
|
|
|
|
return TrieNodeStat()
|
|
|
|
|
|
|
|
var
|
|
|
|
reVisit: seq[(typeof(rootKey),NibblesSeq)]
|
|
|
|
again: seq[(typeof(rootKey),NibblesSeq)]
|
|
|
|
resumeOk = false
|
|
|
|
|
|
|
|
# Initialise lists from previous session
|
|
|
|
if not resumeCtx.isNil:
|
|
|
|
when typeof(db) is HexaryTreeDbRef:
|
|
|
|
if not resumeCtx.persistent and 0 < resumeCtx.memCtx.len:
|
|
|
|
resumeOk = true
|
|
|
|
reVisit = resumeCtx.memCtx
|
2022-09-16 08:24:12 +01:00
|
|
|
else:
|
2023-03-25 10:44:48 +00:00
|
|
|
if resumeCtx.persistent and 0 < resumeCtx.hddCtx.len:
|
|
|
|
resumeOk = true
|
|
|
|
reVisit = resumeCtx.hddCtx
|
|
|
|
|
|
|
|
if partialPaths.len == 0 and not resumeOk:
|
|
|
|
reVisit.add (rootKey,EmptyNibbleSeq)
|
|
|
|
else:
|
|
|
|
# Add argument paths
|
|
|
|
for w in partialPaths:
|
|
|
|
let (isLeaf,nibbles) = hexPrefixDecode w
|
|
|
|
if not isLeaf:
|
|
|
|
let rc = nibbles.hexaryPathNodeKey(rootKey, db, missingOk=false)
|
|
|
|
if rc.isOk:
|
|
|
|
reVisit.add (rc.value.to(typeof(rootKey)), nibbles)
|
|
|
|
|
|
|
|
# Stopping on `suspendAfter` has precedence over `stopAtLevel`
|
|
|
|
while 0 < reVisit.len and result.count <= suspendAfter:
|
|
|
|
when extraTraceMessages:
|
|
|
|
trace "Hexary inspect processing", nPaths, maxDangling,
|
|
|
|
level=result.level, nReVisit=reVisit.len, nDangling=result.dangling.len
|
|
|
|
|
|
|
|
if stopAtLevel < result.level:
|
|
|
|
result.stopped = true
|
|
|
|
break
|
|
|
|
|
|
|
|
for n in 0 ..< reVisit.len:
|
|
|
|
if suspendAfter < result.count or
|
|
|
|
maxDangling <= result.dangling.len:
|
|
|
|
# Swallow rest
|
|
|
|
again &= reVisit[n ..< reVisit.len]
|
|
|
|
break
|
|
|
|
|
|
|
|
let
|
|
|
|
(rKey, parentTrail) = reVisit[n]
|
|
|
|
rc = rKey.getNode(db)
|
|
|
|
if rc.isErr:
|
|
|
|
continue # ignore this node
|
|
|
|
let node = rc.value
|
|
|
|
|
|
|
|
case node.kind:
|
|
|
|
of Extension:
|
|
|
|
let
|
|
|
|
trail = parentTrail & node.ePfx
|
|
|
|
child = node.eLink
|
|
|
|
db.processLink(stats=result, inspect=again, trail, child)
|
|
|
|
of Branch:
|
|
|
|
for n in 0 ..< 16:
|
|
|
|
let
|
|
|
|
trail = parentTrail & @[n.byte].initNibbleRange.slice(1)
|
|
|
|
child = node.bLink[n]
|
|
|
|
db.processLink(stats=result, inspect=again, trail, child)
|
|
|
|
of Leaf:
|
|
|
|
# Ooops, forget node and key
|
|
|
|
discard
|
|
|
|
|
|
|
|
result.count.inc
|
|
|
|
# End `for`
|
|
|
|
|
|
|
|
result.level.inc
|
|
|
|
swap(reVisit, again)
|
|
|
|
again.setLen(0)
|
|
|
|
# End while
|
|
|
|
|
|
|
|
# Collect left overs for resuming search
|
|
|
|
if 0 < reVisit.len:
|
|
|
|
when typeof(db) is HexaryTreeDbRef:
|
|
|
|
result.resumeCtx = TrieNodeStatCtxRef(
|
|
|
|
persistent: false,
|
|
|
|
memCtx: reVisit)
|
|
|
|
else:
|
|
|
|
result.resumeCtx = TrieNodeStatCtxRef(
|
|
|
|
persistent: true,
|
|
|
|
hddCtx: reVisit)
|
|
|
|
|
|
|
|
when extraTraceMessages:
|
|
|
|
trace "Hexary inspect finished", nPaths, maxDangling,
|
|
|
|
level=result.level, nResumeCtx=reVisit.len, nDangling=result.dangling.len,
|
|
|
|
maxLevel=stopAtLevel, stopped=result.stopped
|
2022-09-16 08:24:12 +01:00
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
# Public functions
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
|
2022-11-28 09:03:23 +00:00
|
|
|
proc to*(resumeCtx: TrieNodeStatCtxRef; T: type seq[NodeSpecs]): T =
|
|
|
|
## Convert resumption context to nodes that can be used otherwise. This
|
|
|
|
## function might be useful for error recovery.
|
|
|
|
##
|
|
|
|
## Note: In a non-persistant case, temporary `RepairKey` type node specs
|
|
|
|
## that cannot be converted to `NodeKey` type nodes are silently dropped.
|
|
|
|
## This should be no problem as a hexary trie with `RepairKey` type node
|
|
|
|
## refs must be repaired or discarded anyway.
|
|
|
|
if resumeCtx.persistent:
|
|
|
|
for (key,trail) in resumeCtx.hddCtx:
|
|
|
|
result.add NodeSpecs(
|
|
|
|
partialPath: trail.hexPrefixEncode(isLeaf = false),
|
|
|
|
nodeKey: key)
|
|
|
|
else:
|
|
|
|
for (key,trail) in resumeCtx.memCtx:
|
|
|
|
if key.isNodeKey:
|
|
|
|
result.add NodeSpecs(
|
|
|
|
partialPath: trail.hexPrefixEncode(isLeaf = false),
|
|
|
|
nodeKey: key.convertTo(NodeKey))
|
|
|
|
|
|
|
|
|
2022-09-16 08:24:12 +01:00
|
|
|
proc hexaryInspectTrie*(
|
2023-03-25 10:44:48 +00:00
|
|
|
db: HexaryTreeDbRef; # Database abstraction
|
|
|
|
rootKey: NodeKey; # State root
|
|
|
|
partialPaths = EmptyBlobSeq; # Starting paths for search
|
|
|
|
resumeCtx = TrieNodeStatCtxRef(nil); # Context for resuming inspection
|
2022-12-24 09:54:18 +00:00
|
|
|
suspendAfter = high(uint64); # To be resumed
|
|
|
|
stopAtLevel = 64u8; # Width-first depth level
|
|
|
|
maxDangling = high(int); # Maximal number of dangling results
|
2022-09-16 08:24:12 +01:00
|
|
|
): TrieNodeStat
|
2023-03-22 20:11:49 +00:00
|
|
|
{.gcsafe, raises: [CatchableError]} =
|
2022-09-16 08:24:12 +01:00
|
|
|
## Starting with the argument list `paths`, find all the non-leaf nodes in
|
|
|
|
## the hexary trie which have at least one node key reference missing in
|
2022-10-08 18:20:50 +01:00
|
|
|
## the trie database. The references for these nodes are collected and
|
|
|
|
## returned.
|
|
|
|
##
|
2022-12-24 09:54:18 +00:00
|
|
|
## * Argument `partialPaths` list entries that do not refer to an existing
|
|
|
|
## and allocated hexary trie node are silently ignored. So are enytries
|
|
|
|
## that not refer to either a valid extension or a branch type node.
|
|
|
|
##
|
|
|
|
## * This function traverses the hexary trie in *width-first* mode
|
|
|
|
## simultaneously for any entry of the argument `partialPaths` list. Abart
|
|
|
|
## from completing the search there are three conditions when the search
|
|
|
|
## pauses to return the current state (via `resumeCtx`, see next bullet
|
|
|
|
## point):
|
|
|
|
## + The depth level of the running algorithm exceeds `stopAtLevel`.
|
|
|
|
## + The number of visited nodes exceeds `suspendAfter`.
|
|
|
|
## + Te number of cunnently collected dangling nodes exceeds `maxDangling`.
|
|
|
|
## If the function pauses because the current depth exceeds `stopAtLevel`
|
|
|
|
## then the `stopped` flag of the result object will be set, as well.
|
|
|
|
##
|
|
|
|
## * When paused for some of the reasons listed above, the `resumeCtx` field
|
|
|
|
## of the result object contains the current state so that the function
|
|
|
|
## can resume searching from where is paused. An application using this
|
|
|
|
## feature could look like:
|
|
|
|
## ::
|
|
|
|
## var ctx = TrieNodeStatCtxRef()
|
|
|
|
## while not ctx.isNil:
|
|
|
|
## let state = hexaryInspectTrie(db, root, paths, resumeCtx=ctx, 1024)
|
|
|
|
## ...
|
|
|
|
## ctx = state.resumeCtx
|
2023-03-25 10:44:48 +00:00
|
|
|
## paths = EmptyBlobSeq
|
2022-11-16 23:51:06 +00:00
|
|
|
##
|
2023-03-25 10:44:48 +00:00
|
|
|
db.inspectTrieImpl(rootKey.to(RepairKey),
|
|
|
|
partialPaths, resumeCtx, suspendAfter, stopAtLevel, maxDangling)
|
2022-11-16 23:51:06 +00:00
|
|
|
|
2022-09-16 08:24:12 +01:00
|
|
|
|
|
|
|
proc hexaryInspectTrie*(
|
2022-12-24 09:54:18 +00:00
|
|
|
getFn: HexaryGetFn; # Database abstraction
|
|
|
|
rootKey: NodeKey; # State root
|
2023-03-25 10:44:48 +00:00
|
|
|
partialPaths = EmptyBlobSeq; # Starting paths for search
|
2022-12-24 09:54:18 +00:00
|
|
|
resumeCtx: TrieNodeStatCtxRef = nil; # Context for resuming inspection
|
|
|
|
suspendAfter = high(uint64); # To be resumed
|
|
|
|
stopAtLevel = 64u8; # Width-first depth level
|
|
|
|
maxDangling = high(int); # Maximal number of dangling results
|
2022-09-16 08:24:12 +01:00
|
|
|
): TrieNodeStat
|
2023-02-15 00:38:33 +01:00
|
|
|
{.gcsafe, raises: [CatchableError]} =
|
2022-09-30 09:22:14 +01:00
|
|
|
## Variant of `hexaryInspectTrie()` for persistent database.
|
2023-03-25 10:44:48 +00:00
|
|
|
getFn.inspectTrieImpl(
|
|
|
|
rootKey, partialPaths, resumeCtx, suspendAfter, stopAtLevel, maxDangling)
|
2022-09-16 08:24:12 +01:00
|
|
|
|
2022-12-24 09:54:18 +00:00
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
# Public functions, debugging
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
proc pp*(a: TrieNodeStat; db: HexaryTreeDbRef; maxItems = 30): string =
|
|
|
|
result = "(" & $a.level
|
|
|
|
if a.stopped:
|
|
|
|
result &= "stopped,"
|
|
|
|
result &= $a.dangling.len & "," &
|
|
|
|
a.dangling.ppDangling(maxItems) & ")"
|
|
|
|
|
2022-09-16 08:24:12 +01:00
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
# End
|
|
|
|
# ------------------------------------------------------------------------------
|