Fix trie inspector for healing (#1284)

* Update logging

* Fix node hash associated with partial path for missing nodes

why:
  Healing uses the partial paths for fetching nodes from the network. The
  node hash (or key) is used to verify the node data retrieved.

  The trie inspector function returned the parent hash instead of the node hash
  with the partial path when a missing node was detected. So all nodes
  for healing were rejected.

* Must not modify sequence while looping over it
This commit is contained in:
Jordan Hrycaj 2022-10-28 08:26:17 +01:00 committed by GitHub
parent 1b4572ed3b
commit a8df4c1165
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 66 additions and 49 deletions

View File

@ -118,6 +118,10 @@ proc to*(n: SomeUnsignedInt|UInt256; T: type NodeTag): T =
n.u256.T
proc digestTo*(data: Blob; T: type NodeKey): T =
keccakHash(data).data.T
proc hash*(a: NodeKey): Hash =
## Table/KeyedQueue mixin
a.ByteArray32.hash
@ -293,6 +297,26 @@ proc `$`*(a, b: NodeTag): string =
proc `$`*(iv: NodeTagRange): string =
leafRangePp(iv.minPt, iv.maxPt)
proc `$`*(n: NodeSpecs): string =
## Prints `(path,key,node-hash)`
let nHash = if n.data.len == 0: NodeKey.default
else: n.data.digestTo(NodeKey)
result = "("
if n.partialPath.len != 0:
result &= n.partialPath.toHex
result &= ","
if n.nodeKey != NodeKey.default:
result &= $n.nodeKey
if n.nodeKey != nHash:
result &= "(!)"
result &= ","
if nHash != NodeKey.default:
if n.nodeKey != nHash:
result &= $nHash
else:
result &= "ditto"
result &= ")"
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

View File

@ -378,9 +378,6 @@ proc isZero*[T: NodeTag|NodeKey|RepairKey](a: T): bool =
proc isNodeKey*(a: RepairKey): bool =
a.ByteArray33[0] == 0
proc digestTo*(data: Blob; T: type NodeKey): T =
keccakHash(data).data.T
proc convertTo*(data: Blob; T: type NodeKey): T =
## Probably lossy conversion, use `init()` for safe conversion
discard result.init(data)

View File

@ -116,7 +116,6 @@ proc processLink(
db: HexaryTreeDbRef;
stats: var TrieNodeStat;
inspect: TableRef[RepairKey,NibblesSeq];
parent: NodeKey;
trail: NibblesSeq;
child: RepairKey;
) {.gcsafe, raises: [Defect,KeyError]} =
@ -126,35 +125,29 @@ proc processLink(
# Oops -- caught in the middle of a repair process? Just register
# this node
stats.dangling.add NodeSpecs(
partialPath: trail.hexPrefixEncode(isLeaf = false),
nodeKey: parent)
partialPath: trail.hexPrefixEncode(isLeaf = false))
elif db.tab.hasKey(child):
inspect[child] = trail
else:
stats.dangling.add NodeSpecs(
partialPath: trail.hexPrefixEncode(isLeaf = false),
nodeKey: parent)
nodeKey: child.convertTo(NodeKey))
proc processLink(
getFn: HexaryGetFn;
stats: var TrieNodeStat;
inspect: TableRef[NodeKey,NibblesSeq];
parent: NodeKey;
trail: NibblesSeq;
child: Rlp;
) {.gcsafe, raises: [Defect,RlpError,KeyError]} =
## Ditto
if not child.isEmpty:
let
#parentKey = parent.convertTo(NodeKey)
childBlob = child.toBytes
let childBlob = child.toBytes
if childBlob.len != 32:
# Oops -- that is wrong, although the only sensible action is to
# register the node and otherwise ignore it
stats.dangling.add NodeSpecs(
partialPath: trail.hexPrefixEncode(isLeaf = false),
nodeKey: parent)
partialPath: trail.hexPrefixEncode(isLeaf = false))
else:
let childKey = childBlob.convertTo(NodeKey)
if 0 < child.toBytes.getFn().len:
@ -162,7 +155,7 @@ proc processLink(
else:
stats.dangling.add NodeSpecs(
partialPath: trail.hexPrefixEncode(isLeaf = false),
nodeKey: parent)
nodeKey: childKey)
# ------------------------------------------------------------------------------
# Public functions
@ -261,13 +254,13 @@ proc hexaryInspectTrie*(
let
trail = parentTrail & node.ePfx
child = node.eLink
db.processLink(stats=result, inspect=again, parent, trail, child)
db.processLink(stats=result, inspect=again, trail, child)
of Branch:
for n in 0 ..< 16:
let
trail = parentTrail & @[n.byte].initNibbleRange.slice(1)
child = node.bLink[n]
db.processLink(stats=result, inspect=again, parent, trail, child)
db.processLink(stats=result, inspect=again, trail, child)
of Leaf:
# Ooops, forget node and key
discard
@ -335,13 +328,13 @@ proc hexaryInspectTrie*(
let
trail = parentTrail & xPfx
child = nodeRlp.listElem(1)
getFn.processLink(stats=result, inspect=again, parent, trail, child)
getFn.processLink(stats=result, inspect=again, trail, child)
of 17:
for n in 0 ..< 16:
let
trail = parentTrail & @[n.byte].initNibbleRange.slice(1)
child = nodeRlp.listElem(n)
getFn.processLink(stats=result, inspect=again, parent, trail, child)
getFn.processLink(stats=result, inspect=again, trail, child)
else:
# Ooops, forget node and key
discard

View File

@ -213,7 +213,7 @@ proc importAccounts*(
except KeyError as e:
raiseAssert "Not possible @ importAccounts: " & e.msg
except OSError as e:
trace "Import Accounts exception", peer=ps.peer, name=($e.name), msg=e.msg
error "Import Accounts exception", peer=ps.peer, name=($e.name), msg=e.msg
return err(OSErrorException)
when extraTraceMessages:
@ -262,10 +262,10 @@ proc importRawAccountsNodes*(
slot: Option[int]
try:
# Import nodes
for n,rec in nodes:
if 0 < rec.data.len: # otherwise ignore empty placeholder
for n,node in nodes:
if 0 < node.data.len: # otherwise ignore empty placeholder
slot = some(n)
var rep = db.hexaryImport(rec)
var rep = db.hexaryImport(node)
if rep.error != NothingSerious:
rep.slot = slot
result.add rep
@ -293,7 +293,7 @@ proc importRawAccountsNodes*(
except OSError as e:
result.add HexaryNodeReport(slot: slot, error: OSErrorException)
nErrors.inc
trace "Import account nodes exception", peer, slot, nItems,
error "Import account nodes exception", peer, slot, nItems,
name=($e.name), msg=e.msg, nErrors
when extraTraceMessages:

View File

@ -248,7 +248,7 @@ proc importStorageSlots*(
raiseAssert "Not possible @ importStorages: " & e.msg
except OSError as e:
result.add HexaryNodeReport(slot: itemInx, error: OSErrorException)
trace "Import storage slots exception", peer, itemInx, nItems,
error "Import storage slots exception", peer, itemInx, nItems,
name=($e.name), msg=e.msg, nErrors=result.len
when extraTraceMessages:
@ -295,10 +295,10 @@ proc importRawStorageSlotsNodes*(
slot: Option[int]
try:
# Import nodes
for n,rec in nodes:
if 0 < rec.data.len: # otherwise ignore empty placeholder
for n,node in nodes:
if 0 < node.data.len: # otherwise ignore empty placeholder
slot = some(n)
var rep = db.hexaryImport(rec)
var rep = db.hexaryImport(node)
if rep.error != NothingSerious:
rep.slot = slot
result.add rep
@ -326,7 +326,7 @@ proc importRawStorageSlotsNodes*(
except OSError as e:
result.add HexaryNodeReport(slot: slot, error: OSErrorException)
nErrors.inc
trace "Import storage slots nodes exception", peer, slot, nItems,
error "Import storage slots nodes exception", peer, slot, nItems,
name=($e.name), msg=e.msg, nErrors
when extraTraceMessages:

View File

@ -158,6 +158,7 @@ proc updateMissingNodesList(buddy: SnapBuddyRef) =
env = buddy.data.pivotEnv
stateRoot = env.stateHeader.stateRoot
var delayed: seq[NodeSpecs]
for w in env.fetchAccounts.missingNodes:
let rc = db.getAccountsNodeKey(peer, stateRoot, w.partialPath)
if rc.isOk:
@ -165,7 +166,10 @@ proc updateMissingNodesList(buddy: SnapBuddyRef) =
env.fetchAccounts.checkNodes.add w.partialPath
else:
# Node is still missing
env.fetchAccounts.missingNodes.add w
delayed.add w
# Must not modify sequence while looping over it
env.fetchAccounts.missingNodes = env.fetchAccounts.missingNodes & delayed
proc appendMoreDanglingNodesToMissingNodesList(buddy: SnapBuddyRef): bool =
@ -247,12 +251,12 @@ proc getMissingNodesFromNetwork(
if await buddy.ctrl.stopAfterSeriousComError(error, buddy.data.errors):
discard
when extraTraceMessages:
trace logTxt "error fetching nodes => stop", peer,
trace logTxt "fetch nodes error => stop", peer,
ctx=buddy.healingCtx(), error
else:
discard
when extraTraceMessages:
trace logTxt "error fetching nodes", peer,
trace logTxt "fetch nodes error", peer,
ctx=buddy.healingCtx(), error
return @[]
@ -311,10 +315,6 @@ proc registerAccountLeaf(
acckey: accKey,
storageRoot: acc.storageRoot)
when extraTraceMessages:
trace logTxt "single node", peer,
ctx=buddy.healingCtx(), accKey=pt
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
@ -379,6 +379,7 @@ proc healAccounts*(buddy: SnapBuddyRef) {.async.} =
ctx=buddy.healingCtx(), nNodes=nodeSpecs.len
# Filter out error and leaf nodes
var nLeafNodes = 0 # for logging
for w in report:
if w.slot.isSome: # non-indexed entries appear typically at the end, though
let
@ -399,11 +400,12 @@ proc healAccounts*(buddy: SnapBuddyRef) {.async.} =
if isLeaf:
# Update `uprocessed` registry, collect storage roots (if any)
buddy.registerAccountLeaf(key, acc)
nLeafNodes.inc
else:
env.fetchAccounts.checkNodes.add nodePath
when extraTraceMessages:
trace logTxt "job done", peer, ctx=buddy.healingCtx()
trace logTxt "job done", peer, ctx=buddy.healingCtx(), nLeafNodes
# ------------------------------------------------------------------------------
# End

View File

@ -97,6 +97,7 @@ proc updateMissingNodesList(
storageRoot = kvp.key
slots = kvp.data.slots
var delayed: seq[NodeSpecs]
for w in slots.missingNodes:
let rc = db.getStorageSlotsNodeKey(peer, accKey, storageRoot, w.partialPath)
if rc.isOk:
@ -104,7 +105,10 @@ proc updateMissingNodesList(
slots.checkNodes.add w.partialPath
else:
# Node is still missing
slots.missingNodes.add w
delayed.add w
# Must not modify sequence while looping over it
slots.missingNodes = slots.missingNodes & delayed
proc appendMoreDanglingNodesToMissingNodesList(
@ -197,13 +201,13 @@ proc getMissingNodesFromNetwork(
if await buddy.ctrl.stopAfterSeriousComError(error, buddy.data.errors):
discard
when extraTraceMessages:
trace logTxt "error fetching nodes => stop", peer,
trace logTxt "fetch nodes error => stop", peer,
itCtx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
nStorageQueue=env.fetchStorage.len, error
else:
discard
when extraTraceMessages:
trace logTxt "error fetching nodes", peer,
trace logTxt "fetch nodes error", peer,
itCtx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
nStorageQueue=env.fetchStorage.len, error
@ -228,11 +232,6 @@ proc kvStorageSlotsLeaf(
if nibbles.len == 64:
return (true, nibbles.getBytes.convertTo(NodeKey))
when extraTraceMessages:
trace logTxt "non-leaf node path", peer,
itCtx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
nStorageQueue=env.fetchStorage.len, nNibbles=nibbles.len
proc registerStorageSlotsLeaf(
buddy: SnapBuddyRef;
@ -321,6 +320,7 @@ proc storageSlotsHealing(
nStorageQueue=env.fetchStorage.len, nNodes=nodeSpecs.len
# Filter out error and leaf nodes
var nLeafNodes = 0 # for logging
for w in report:
if w.slot.isSome: # non-indexed entries appear typically at the end, though
let
@ -342,13 +342,14 @@ proc storageSlotsHealing(
if isLeaf:
# Update `uprocessed` registry, collect storage roots (if any)
buddy.registerStorageSlotsLeaf(kvp, slotKey)
nLeafNodes.inc
else:
slots.checkNodes.add nodePath
when extraTraceMessages:
trace logTxt "job done", peer,
itCtx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
nStorageQueue=env.fetchStorage.len
nStorageQueue=env.fetchStorage.len, nLeafNodes
proc healingIsComplete(

View File

@ -130,7 +130,7 @@ proc rangeFetchAccounts*(buddy: SnapBuddyRef) {.async.} =
let error = rc.error
if await buddy.ctrl.stopAfterSeriousComError(error, buddy.data.errors):
when extraTraceMessages:
trace logTxt "fetching error => stop", peer,
trace logTxt "fetch error => stop", peer,
stateRoot, req=iv.len, error
return
# Reset error counts for detecting repeated timeouts

View File

@ -115,7 +115,7 @@ proc getNextSlotItems(
env.fetchStorage.del(reqKey)
when extraTraceMessages:
trace logTxt "prepare fetching partial", peer,
trace logTxt "prepare fetch partial", peer,
nSlotLists=env.nSlotLists, nStorageQueue=env.fetchStorage.len,
nToProcess=1, subRange=rc.value, account=reqData.accKey
@ -189,7 +189,7 @@ proc storeStoragesSingleBatch(
let error = rc.error
if await buddy.ctrl.stopAfterSeriousComError(error, buddy.data.errors):
discard
trace logTxt "fetching error => stop", peer,
trace logTxt "fetch error => stop", peer,
nSlotLists=env.nSlotLists, nReq=req.len,
nStorageQueue=env.fetchStorage.len, error
return