Prep for full sync after snap make 5 (#1286)
* Update docu and logging * Extracted and updated constants from `worker_desc` into separate file * Update and re-calibrate communication error handling * Allow simplified pivot negotiation why: This feature allows to turn off pivot negotiation so that peers agree on a a pivot header. For snap sync with fast changing pivots this only throttles the sync process. The finally downloaded DB snapshot is typically a merged version of different pivot states augmented by a healing process. * Re-model worker queues for accounts download & healing why: Currently there is only one data fetch per download or healing task. This task is then repeated by the scheduler after a short time. In many cases, this short time seems enough for some peers to decide to terminate connection. * Update main task batch `runMulti()` details: The function `runMulti()` is activated in quasi-parallel mode by the scheduler. This function calls the download, healing and fast-sync functions. While in debug mode, after each set of jobs run by this function the database is analysed (by the `snapdb_check` module) and the result printed.
This commit is contained in:
parent
ec59819953
commit
a689e9185a
|
@ -30,7 +30,7 @@ const
|
|||
extraTraceMessages = false or true
|
||||
## Additional trace commands
|
||||
|
||||
minPeersToStartSync = 2
|
||||
pivotMinPeersToStartSync* = 2
|
||||
## Wait for consensus of at least this number of peers before syncing.
|
||||
|
||||
type
|
||||
|
@ -39,6 +39,7 @@ type
|
|||
rng: ref HmacDrbgContext ## Random generator
|
||||
untrusted: seq[Peer] ## Clean up list
|
||||
trusted: HashSet[Peer] ## Peers ready for delivery
|
||||
relaxedMode: bool ## Not using strictly `trusted` set
|
||||
|
||||
BestPivotWorkerRef* = ref object of RootRef
|
||||
## Data for this peer only
|
||||
|
@ -242,13 +243,30 @@ proc clear*(bp: BestPivotWorkerRef) =
|
|||
# Public functions
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
proc pivotRelaxedMode*(ctx: BestPivotCtxRef; enable = false) =
|
||||
## Controls relaxed mode. In relaxed mode, the *best header* is fetched
|
||||
## from the network and used as pivot if its block number is large enough.
|
||||
## Otherwise, the default is to find at least `pivotMinPeersToStartSync`
|
||||
## peers (this one included) that agree on a minimum pivot.
|
||||
ctx.relaxedMode = enable
|
||||
|
||||
proc pivotHeader*(bp: BestPivotWorkerRef): Result[BlockHeader,void] =
|
||||
## Returns cached block header if available and the buddy `peer` is trusted.
|
||||
## In relaxed mode (see `pivotRelaxedMode()`), also lesser trusted pivots
|
||||
## are returned.
|
||||
if bp.header.isSome and
|
||||
bp.peer notin bp.global.untrusted and
|
||||
minPeersToStartSync <= bp.global.trusted.len and
|
||||
bp.peer in bp.global.trusted:
|
||||
return ok(bp.header.unsafeGet)
|
||||
bp.peer notin bp.global.untrusted:
|
||||
|
||||
if pivotMinPeersToStartSync <= bp.global.trusted.len and
|
||||
bp.peer in bp.global.trusted:
|
||||
return ok(bp.header.unsafeGet)
|
||||
|
||||
if bp.global.relaxedMode:
|
||||
when extraTraceMessages:
|
||||
trace "Returning not fully trusted pivot", peer=bp.peer,
|
||||
trusted=bp.global.trusted.len, untrusted=bp.global.untrusted.len
|
||||
return ok(bp.header.unsafeGet)
|
||||
|
||||
err()
|
||||
|
||||
proc pivotNegotiate*(
|
||||
|
@ -261,6 +279,10 @@ proc pivotNegotiate*(
|
|||
## the current `buddy` can be used for syncing and the function
|
||||
## `bestPivotHeader()` will succeed returning a `BlockHeader`.
|
||||
##
|
||||
## In relaxed mode (see `pivotRelaxedMode()`), negotiation stopps when there
|
||||
## is a *best header*. It caches the best header and returns `true` it the
|
||||
## block number is large enough.
|
||||
##
|
||||
## Ackn: nim-eth/eth/p2p/blockchain_sync.nim: `startSyncWithPeer()`
|
||||
##
|
||||
let peer = bp.peer
|
||||
|
@ -291,9 +313,14 @@ proc pivotNegotiate*(
|
|||
trace "Useless peer, best number too low", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state,
|
||||
minNumber, bestNumber
|
||||
return false
|
||||
bp.header = some(rc.value)
|
||||
|
||||
if minPeersToStartSync <= bp.global.trusted.len:
|
||||
# No further negotiation if in relaxed mode
|
||||
if bp.global.relaxedMode:
|
||||
return true
|
||||
|
||||
if pivotMinPeersToStartSync <= bp.global.trusted.len:
|
||||
# We have enough trusted peers. Validate new peer against trusted
|
||||
let rc = bp.getRandomTrustedPeer()
|
||||
if rc.isOK:
|
||||
|
@ -311,10 +338,11 @@ proc pivotNegotiate*(
|
|||
if not rx.error:
|
||||
# Other peer is dead
|
||||
bp.global.trusted.excl rc.value
|
||||
return false
|
||||
|
||||
# If there are no trusted peers yet, assume this very peer is trusted,
|
||||
# but do not finish initialisation until there are more peers.
|
||||
elif bp.global.trusted.len == 0:
|
||||
if bp.global.trusted.len == 0:
|
||||
bp.global.trusted.incl peer
|
||||
when extraTraceMessages:
|
||||
let bestHeader =
|
||||
|
@ -322,75 +350,75 @@ proc pivotNegotiate*(
|
|||
else: "nil"
|
||||
trace "Assume initial trusted peer", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state, bestHeader
|
||||
return false
|
||||
|
||||
elif bp.global.trusted.len == 1 and bp.peer in bp.global.trusted:
|
||||
if bp.global.trusted.len == 1 and bp.peer in bp.global.trusted:
|
||||
# Ignore degenerate case, note that `trusted.len < minPeersToStartSync`
|
||||
return false
|
||||
|
||||
# At this point we have some "trusted" candidates, but they are not
|
||||
# "trusted" enough. We evaluate `peer` against all other candidates. If
|
||||
# one of the candidates disagrees, we swap it for `peer`. If all candidates
|
||||
# agree, we add `peer` to trusted set. The peers in the set will become
|
||||
# "fully trusted" (and sync will start) when the set is big enough
|
||||
var
|
||||
agreeScore = 0
|
||||
otherPeer: Peer
|
||||
deadPeers: HashSet[Peer]
|
||||
when extraTraceMessages:
|
||||
trace "Trust scoring peer", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state
|
||||
for p in bp.global.trusted:
|
||||
if peer == p:
|
||||
inc agreeScore
|
||||
else:
|
||||
let rc = await bp.agreesOnChain(p)
|
||||
if rc.isOk:
|
||||
inc agreeScore
|
||||
elif bp.ctrl.stopped:
|
||||
# Beware of terminated session
|
||||
return false
|
||||
elif rc.error:
|
||||
otherPeer = p
|
||||
else:
|
||||
# `Other` peer is dead
|
||||
deadPeers.incl p
|
||||
|
||||
# Normalise
|
||||
if 0 < deadPeers.len:
|
||||
bp.global.trusted = bp.global.trusted - deadPeers
|
||||
if bp.global.trusted.len == 0 or
|
||||
bp.global.trusted.len == 1 and bp.peer in bp.global.trusted:
|
||||
return false
|
||||
|
||||
# Check for the number of peers that disagree
|
||||
case bp.global.trusted.len - agreeScore:
|
||||
of 0:
|
||||
bp.global.trusted.incl peer # best possible outcome
|
||||
when extraTraceMessages:
|
||||
trace "Agreeable trust score for peer", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state
|
||||
of 1:
|
||||
bp.global.trusted.excl otherPeer
|
||||
bp.global.trusted.incl peer
|
||||
when extraTraceMessages:
|
||||
trace "Other peer no longer trusted", peer,
|
||||
otherPeer, trusted=bp.global.trusted.len, runState=bp.ctrl.state
|
||||
else:
|
||||
when extraTraceMessages:
|
||||
trace "Peer not trusted", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state
|
||||
discard
|
||||
|
||||
else:
|
||||
# At this point we have some "trusted" candidates, but they are not
|
||||
# "trusted" enough. We evaluate `peer` against all other candidates. If
|
||||
# one of the candidates disagrees, we swap it for `peer`. If all candidates
|
||||
# agree, we add `peer` to trusted set. The peers in the set will become
|
||||
# "fully trusted" (and sync will start) when the set is big enough
|
||||
var
|
||||
agreeScore = 0
|
||||
otherPeer: Peer
|
||||
deadPeers: HashSet[Peer]
|
||||
# Evaluate status, finally
|
||||
if pivotMinPeersToStartSync <= bp.global.trusted.len:
|
||||
when extraTraceMessages:
|
||||
trace "Trust scoring peer", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state
|
||||
for p in bp.global.trusted:
|
||||
if peer == p:
|
||||
inc agreeScore
|
||||
else:
|
||||
let rc = await bp.agreesOnChain(p)
|
||||
if rc.isOk:
|
||||
inc agreeScore
|
||||
elif bp.ctrl.stopped:
|
||||
# Beware of terminated session
|
||||
return false
|
||||
elif rc.error:
|
||||
otherPeer = p
|
||||
else:
|
||||
# `Other` peer is dead
|
||||
deadPeers.incl p
|
||||
|
||||
# Normalise
|
||||
if 0 < deadPeers.len:
|
||||
bp.global.trusted = bp.global.trusted - deadPeers
|
||||
if bp.global.trusted.len == 0 or
|
||||
bp.global.trusted.len == 1 and bp.peer in bp.global.trusted:
|
||||
return false
|
||||
|
||||
# Check for the number of peers that disagree
|
||||
case bp.global.trusted.len - agreeScore:
|
||||
of 0:
|
||||
bp.global.trusted.incl peer # best possible outcome
|
||||
when extraTraceMessages:
|
||||
trace "Agreeable trust score for peer", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state
|
||||
of 1:
|
||||
bp.global.trusted.excl otherPeer
|
||||
bp.global.trusted.incl peer
|
||||
when extraTraceMessages:
|
||||
trace "Other peer no longer trusted", peer,
|
||||
otherPeer, trusted=bp.global.trusted.len, runState=bp.ctrl.state
|
||||
else:
|
||||
when extraTraceMessages:
|
||||
trace "Peer not trusted", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state
|
||||
discard
|
||||
|
||||
# Evaluate status, finally
|
||||
if minPeersToStartSync <= bp.global.trusted.len:
|
||||
when extraTraceMessages:
|
||||
let bestHeader =
|
||||
if bp.header.isSome: "#" & $bp.header.get.blockNumber
|
||||
else: "nil"
|
||||
trace "Peer trusted now", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state, bestHeader
|
||||
return true
|
||||
let bestHeader =
|
||||
if bp.header.isSome: "#" & $bp.header.get.blockNumber
|
||||
else: "nil"
|
||||
trace "Peer trusted now", peer,
|
||||
trusted=bp.global.trusted.len, runState=bp.ctrl.state, bestHeader
|
||||
return true
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# End
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
# Nimbus
|
||||
# Copyright (c) 2021 Status Research & Development GmbH
|
||||
# Licensed under either of
|
||||
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0)
|
||||
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
|
||||
# http://opensource.org/licenses/MIT)
|
||||
# at your option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
{.push raises: [Defect].}
|
||||
|
||||
const
|
||||
pivotBlockDistanceMin* = 128
|
||||
## The minimal depth of two block headers needed to activate a new state
|
||||
## root pivot.
|
||||
##
|
||||
## Effects on assembling the state via `snap/1` protocol:
|
||||
##
|
||||
## * A small value of this constant increases the propensity to update the
|
||||
## pivot header more often. This is so because each new peer negoiates a
|
||||
## pivot block number at least the current one.
|
||||
##
|
||||
## * A large value keeps the current pivot more stable but some experiments
|
||||
## suggest that the `snap/1` protocol is answered only for later block
|
||||
## numbers (aka pivot blocks.) So a large value tends to keep the pivot
|
||||
## farther away from the chain head.
|
||||
##
|
||||
## Note that 128 is the magic distance for snapshots used by *Geth*.
|
||||
|
||||
pivotEnvStopChangingIfComplete* = true
|
||||
## If set `true`, new peers will not change the pivot even if the
|
||||
## negotiated pivot would be newer. This should be the default.
|
||||
|
||||
# --------------
|
||||
|
||||
snapRequestBytesLimit* = 2 * 1024 * 1024
|
||||
## Soft bytes limit to request in `snap` protocol calls.
|
||||
|
||||
snapStoragesSlotsFetchMax* = 2 * 1024
|
||||
## Maximal number of storage tries to fetch with a single message.
|
||||
|
||||
snapTrieNodeFetchMax* = 1024
|
||||
## Informal maximal number of trie nodes to fetch at once. This is not
|
||||
## an official limit but found on several implementations (e.g. Geth.)
|
||||
##
|
||||
## Resticting the fetch list length early allows to better paralellise
|
||||
## healing.
|
||||
|
||||
snapAccountsHealBatchFetchMax* = 5 * snapTrieNodeFetchMax
|
||||
## Keap on gloing in healing task up until this many nodes have been
|
||||
## fetched from the network or some error contition therminates the task.
|
||||
|
||||
# --------------
|
||||
|
||||
healAccountsTrigger* = 0.95
|
||||
## Apply accounts healing if the global snap download coverage factor
|
||||
## exceeds this setting. The global coverage factor is derived by merging
|
||||
## all account ranges retrieved for all pivot state roots (see
|
||||
## `coveredAccounts` in `CtxData`.)
|
||||
##
|
||||
## A small value of this constant leads to early healing. This produces
|
||||
## stray leaf account records so fragmenting larger intervals of missing
|
||||
## account ranges. This in turn leads to smaller but more range requests
|
||||
## over the network. More requests might be a disadvantage if peers only
|
||||
## serve a maximum number requests (rather than data.)
|
||||
|
||||
healSlorageSlotsTrigger* = 0.70
|
||||
## Consider per account storage slost healing if a per-account hexary
|
||||
## sub-trie has reached this factor of completeness.
|
||||
|
||||
healStoragesSlotsBatchMax* = 32
|
||||
## Maximal number of storage tries to to heal in a single batch run. Only
|
||||
## this many items will be removed from the batch queue. These items will
|
||||
## then be processed one by one.
|
||||
|
||||
# --------------
|
||||
|
||||
comErrorsTimeoutMax* = 4
|
||||
## Maximal number of non-resonses accepted in a row. If there are more than
|
||||
## `comErrorsTimeoutMax` consecutive errors, the worker will be degraded
|
||||
## as zombie.
|
||||
|
||||
comErrorsTimeoutSleepMSecs* = 5000
|
||||
## Wait/suspend for this many seconds after a timeout error if there are
|
||||
## not more than `comErrorsTimeoutMax` errors in a row (maybe some other
|
||||
## network or no-data errors mixed in.) Set 0 to disable.
|
||||
|
||||
|
||||
comErrorsNetworkMax* = 5
|
||||
## Similar to `comErrorsTimeoutMax` but for network errors.
|
||||
|
||||
comErrorsNetworkSleepMSecs* = 5000
|
||||
## Similar to `comErrorsTimeoutSleepSecs` but for network errors.
|
||||
## Set 0 to disable.
|
||||
|
||||
comErrorsNoDataMax* = 3
|
||||
## Similar to `comErrorsTimeoutMax` but for missing data errors.
|
||||
|
||||
comErrorsNoDataSleepMSecs* = 0
|
||||
## Similar to `comErrorsTimeoutSleepSecs` but for missing data errors.
|
||||
## Set 0 to disable.
|
||||
|
||||
static:
|
||||
doAssert healAccountsTrigger < 1.0 # larger values make no sense
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# End
|
||||
# ------------------------------------------------------------------------------
|
|
@ -12,20 +12,20 @@ import
|
|||
std/[hashes, math, options, sets, strutils],
|
||||
chronicles,
|
||||
chronos,
|
||||
eth/[common/eth_types, p2p],
|
||||
eth/[common, p2p],
|
||||
stew/[interval_set, keyed_queue],
|
||||
../../db/select_backend,
|
||||
".."/[handlers, misc/best_pivot, protocol, sync_desc],
|
||||
./worker/[heal_accounts, heal_storage_slots,
|
||||
range_fetch_accounts, range_fetch_storage_slots, ticker],
|
||||
./worker/com/com_error,
|
||||
./worker/db/snapdb_desc,
|
||||
"."/[range_desc, worker_desc]
|
||||
./worker/db/[snapdb_check, snapdb_desc],
|
||||
"."/[constants, range_desc, worker_desc]
|
||||
|
||||
{.push raises: [Defect].}
|
||||
|
||||
logScope:
|
||||
topics = "snap-sync"
|
||||
topics = "snap-buddy"
|
||||
|
||||
const
|
||||
extraTraceMessages = false or true
|
||||
|
@ -121,7 +121,7 @@ proc appendPivotEnv(buddy: SnapBuddyRef; header: BlockHeader) =
|
|||
ctx = buddy.ctx
|
||||
minNumber = block:
|
||||
let rc = ctx.data.pivotTable.lastValue
|
||||
if rc.isOk: rc.value.stateHeader.blockNumber + minPivotBlockDistance
|
||||
if rc.isOk: rc.value.stateHeader.blockNumber + pivotBlockDistanceMin
|
||||
else: 1.toBlockNumber
|
||||
|
||||
# Check whether the new header follows minimum depth requirement. This is
|
||||
|
@ -151,19 +151,20 @@ proc updateSinglePivot(buddy: SnapBuddyRef): Future[bool] {.async.} =
|
|||
var header = buddy.pivot.pivotHeader.value
|
||||
|
||||
# Check whether there is no environment change needed
|
||||
when noPivotEnvChangeIfComplete:
|
||||
when pivotEnvStopChangingIfComplete:
|
||||
let rc = ctx.data.pivotTable.lastValue
|
||||
if rc.isOk and rc.value.serialSync:
|
||||
if rc.isOk and rc.value.storageDone:
|
||||
# No neede to change
|
||||
if extraTraceMessages:
|
||||
trace "No need to change snap pivot", peer,
|
||||
pivot=("#" & $rc.value.stateHeader.blockNumber),
|
||||
stateRoot=rc.value.stateHeader.stateRoot,
|
||||
multiOk=buddy.ctrl.multiOk, runState=buddy.ctrl.state
|
||||
return true
|
||||
|
||||
buddy.appendPivotEnv(header)
|
||||
|
||||
trace "Snap pivot initialised", peer, pivot=("#" & $header.blockNumber),
|
||||
info "Snap pivot initialised", peer, pivot=("#" & $header.blockNumber),
|
||||
multiOk=buddy.ctrl.multiOk, runState=buddy.ctrl.state
|
||||
|
||||
return true
|
||||
|
@ -222,6 +223,7 @@ proc setup*(ctx: SnapCtxRef; tickerOK: bool): bool =
|
|||
if ctx.data.dbBackend.isNil: SnapDbRef.init(ctx.chain.db.db)
|
||||
else: SnapDbRef.init(ctx.data.dbBackend)
|
||||
ctx.pivot = BestPivotCtxRef.init(ctx.data.rng)
|
||||
ctx.pivot.pivotRelaxedMode(enable = true)
|
||||
if tickerOK:
|
||||
ctx.data.ticker = TickerRef.init(ctx.tickerUpdate)
|
||||
else:
|
||||
|
@ -309,23 +311,31 @@ proc runPool*(buddy: SnapBuddyRef, last: bool) =
|
|||
|
||||
let rc = ctx.data.pivotTable.lastValue
|
||||
if rc.isOk:
|
||||
# Check whether accounts and storage might be complete.
|
||||
let env = rc.value
|
||||
if not env.serialSync:
|
||||
|
||||
# Check whether last pivot accounts and storage are complete.
|
||||
let
|
||||
env = rc.value
|
||||
peer = buddy.peer
|
||||
pivot = "#" & $env.stateHeader.blockNumber # for logging
|
||||
|
||||
if not env.storageDone:
|
||||
|
||||
# Check whether accounts download is complete
|
||||
block checkAccountsComplete:
|
||||
for ivSet in env.fetchAccounts.unprocessed:
|
||||
if ivSet.chunks != 0:
|
||||
break checkAccountsComplete
|
||||
env.accountsDone = true
|
||||
# Check whether storage slots are complete
|
||||
if env.fetchStorage.len == 0:
|
||||
env.serialSync = true
|
||||
if env.fetchAccounts.unprocessed.isEmpty():
|
||||
|
||||
# FIXME: This check might not be needed. It will visit *every* node
|
||||
# in the hexary trie for checking the account leaves.
|
||||
if buddy.checkAccountsTrieIsComplete(env):
|
||||
env.accountsState = HealerDone
|
||||
|
||||
# Check whether storage slots are complete
|
||||
if env.fetchStorage.len == 0:
|
||||
env.storageDone = true
|
||||
|
||||
if extraTraceMessages:
|
||||
trace "Checked for pivot DB completeness",
|
||||
nAccounts=env.nAccounts, accountsDone=env.accountsDone,
|
||||
nSlotLists=env.nSlotLists, storageDone=env.serialSync
|
||||
trace "Checked for pivot DB completeness", peer, pivot,
|
||||
nAccounts=env.nAccounts, accountsState=env.accountsState,
|
||||
nSlotLists=env.nSlotLists, storageDone=env.storageDone
|
||||
|
||||
|
||||
proc runMulti*(buddy: SnapBuddyRef) {.async.} =
|
||||
|
@ -338,44 +348,85 @@ proc runMulti*(buddy: SnapBuddyRef) {.async.} =
|
|||
peer = buddy.peer
|
||||
|
||||
# Set up current state root environment for accounts snapshot
|
||||
let env = block:
|
||||
let rc = ctx.data.pivotTable.lastValue
|
||||
if rc.isErr:
|
||||
return # nothing to do
|
||||
rc.value
|
||||
let
|
||||
env = block:
|
||||
let rc = ctx.data.pivotTable.lastValue
|
||||
if rc.isErr:
|
||||
return # nothing to do
|
||||
rc.value
|
||||
pivot = "#" & $env.stateHeader.blockNumber # for logging
|
||||
|
||||
buddy.data.pivotEnv = env
|
||||
|
||||
if env.serialSync:
|
||||
trace "Snap serial sync -- not implemented yet", peer
|
||||
# Full sync processsing based on current snapshot
|
||||
# -----------------------------------------------
|
||||
if env.storageDone:
|
||||
if not buddy.checkAccountsTrieIsComplete(env):
|
||||
error "Ooops, all accounts fetched but DvnB still incomplete", peer, pivot
|
||||
|
||||
if not buddy.checkStorageSlotsTrieIsComplete(env):
|
||||
error "Ooops, all storages fetched but DB still incomplete", peer, pivot
|
||||
|
||||
trace "Snap full sync -- not implemented yet", peer, pivot
|
||||
await sleepAsync(5.seconds)
|
||||
return
|
||||
|
||||
else:
|
||||
# Snapshot sync processing. Note that *serialSync => accountsDone*.
|
||||
await buddy.rangeFetchAccounts()
|
||||
if buddy.ctrl.stopped: return
|
||||
# Snapshot sync processing
|
||||
# ------------------------
|
||||
|
||||
await buddy.rangeFetchStorageSlots()
|
||||
if buddy.ctrl.stopped: return
|
||||
template runAsync(code: untyped) =
|
||||
await code
|
||||
if buddy.ctrl.stopped:
|
||||
# To be disconnected from peer.
|
||||
return
|
||||
if env != ctx.data.pivotTable.lastValue.value:
|
||||
# Pivot has changed, so restart with the latest one
|
||||
return
|
||||
|
||||
# Pivot might have changed, so restart with the latest one
|
||||
if env != ctx.data.pivotTable.lastValue.value: return
|
||||
# If this is a new pivot, the previous one can be partially cleaned up.
|
||||
# There is no point in keeping some older space consuming state data any
|
||||
# longer.
|
||||
block:
|
||||
let rc = ctx.data.pivotTable.beforeLastValue
|
||||
if rc.isOk:
|
||||
let nFetchStorage = rc.value.fetchStorage.len
|
||||
if 0 < nFetchStorage:
|
||||
trace "Cleaning up previous pivot", peer, pivot, nFetchStorage
|
||||
rc.value.fetchStorage.clear()
|
||||
rc.value.fetchAccounts.checkNodes.setLen(0)
|
||||
rc.value.fetchAccounts.missingNodes.setLen(0)
|
||||
|
||||
# If the current database is not complete yet
|
||||
if 0 < env.fetchAccounts.unprocessed[0].chunks or
|
||||
0 < env.fetchAccounts.unprocessed[1].chunks:
|
||||
if env.accountsState != HealerDone:
|
||||
runAsync buddy.rangeFetchAccounts()
|
||||
runAsync buddy.rangeFetchStorageSlots()
|
||||
|
||||
await buddy.healAccounts()
|
||||
if buddy.ctrl.stopped: return
|
||||
# Can only run a single accounts healer instance at a time. This instance
|
||||
# will clear the batch queue so there is nothing to do for another process.
|
||||
if env.accountsState == HealerIdle:
|
||||
env.accountsState = HealerRunning
|
||||
runAsync buddy.healAccounts()
|
||||
env.accountsState = HealerIdle
|
||||
|
||||
await buddy.healStorageSlots()
|
||||
if buddy.ctrl.stopped: return
|
||||
# Some additional storage slots might have been popped up
|
||||
runAsync buddy.rangeFetchStorageSlots()
|
||||
|
||||
# Check whether accounts might be complete.
|
||||
if env.fetchStorage.len == 0:
|
||||
# Possibly done but some buddies might wait for an account range to be
|
||||
# received from the network. So we need to sync.
|
||||
buddy.ctx.poolMode = true
|
||||
runAsync buddy.healStorageSlots()
|
||||
|
||||
# Debugging log: analyse pivot against database
|
||||
discard buddy.checkAccountsListOk(env)
|
||||
discard buddy.checkStorageSlotsTrieIsComplete(env)
|
||||
|
||||
# Check whether there are more accounts to fetch.
|
||||
#
|
||||
# Note that some other process might have temporarily borrowed from the
|
||||
# `fetchAccounts.unprocessed` list. Whether we are done can only be decided
|
||||
# if only a single buddy is active. S be it.
|
||||
if env.fetchAccounts.unprocessed.isEmpty():
|
||||
|
||||
# Check whether pivot download is complete.
|
||||
if env.fetchStorage.len == 0:
|
||||
trace "Running pool mode for verifying completeness", peer, pivot
|
||||
buddy.ctx.poolMode = true
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# End
|
||||
|
|
|
@ -10,19 +10,15 @@
|
|||
|
||||
import
|
||||
chronos,
|
||||
../../../sync_desc
|
||||
|
||||
const
|
||||
comErrorsTimeoutMax* = 2
|
||||
## Maximal number of non-resonses accepted in a row. If there are more than
|
||||
## `comErrorsTimeoutMax` consecutive errors, the worker will be degraded
|
||||
## as zombie.
|
||||
../../../sync_desc,
|
||||
../../constants
|
||||
|
||||
type
|
||||
ComErrorStatsRef* = ref object
|
||||
## particular error counters so connections will not be cut immediately
|
||||
## after a particular error.
|
||||
nTimeouts*: uint
|
||||
nNoData*: uint
|
||||
nNetwork*: uint
|
||||
|
||||
ComError* = enum
|
||||
|
@ -37,7 +33,7 @@ type
|
|||
ComNoAccountsForStateRoot
|
||||
ComNoByteCodesAvailable
|
||||
ComNoDataForProof
|
||||
ComNoHeaderAvailable
|
||||
#ComNoHeaderAvailable -- unused, see get_block_header.nim
|
||||
ComNoStorageForAccounts
|
||||
ComNoTrieNodesAvailable
|
||||
ComResponseTimeout
|
||||
|
@ -46,10 +42,10 @@ type
|
|||
ComTooManyStorageSlots
|
||||
ComTooManyTrieNodes
|
||||
|
||||
# Other errors not directly related to communication
|
||||
ComInspectDbFailed
|
||||
ComImportAccountsFailed
|
||||
|
||||
proc resetComError*(stats: ComErrorStatsRef) =
|
||||
## Reset error counts after successful network operation
|
||||
stats[].reset
|
||||
|
||||
proc stopAfterSeriousComError*(
|
||||
ctrl: BuddyCtrlRef;
|
||||
|
@ -57,42 +53,47 @@ proc stopAfterSeriousComError*(
|
|||
stats: ComErrorStatsRef;
|
||||
): Future[bool]
|
||||
{.async.} =
|
||||
## Error handling after data protocol failed.
|
||||
## Error handling after data protocol failed. Returns `true` if the current
|
||||
## worker should be terminated as *zombie*.
|
||||
case error:
|
||||
of ComResponseTimeout:
|
||||
stats.nTimeouts.inc
|
||||
if comErrorsTimeoutMax < stats.nTimeouts:
|
||||
# Mark this peer dead, i.e. avoid fetching from this peer for a while
|
||||
ctrl.zombie = true
|
||||
else:
|
||||
# Otherwise try again some time later. Nevertheless, stop the
|
||||
# current action.
|
||||
await sleepAsync(5.seconds)
|
||||
return true
|
||||
return true
|
||||
|
||||
of ComNetworkProblem,
|
||||
ComMissingProof,
|
||||
ComAccountsMinTooSmall,
|
||||
ComAccountsMaxTooLarge:
|
||||
when 0 < comErrorsTimeoutSleepMSecs:
|
||||
# Otherwise try again some time later.
|
||||
await sleepAsync(comErrorsTimeoutSleepMSecs.milliseconds)
|
||||
|
||||
of ComNetworkProblem:
|
||||
stats.nNetwork.inc
|
||||
# Mark this peer dead, i.e. avoid fetching from this peer for a while
|
||||
ctrl.zombie = true
|
||||
return true
|
||||
if comErrorsNetworkMax < stats.nNetwork:
|
||||
ctrl.zombie = true
|
||||
return true
|
||||
|
||||
of ComEmptyAccountsArguments,
|
||||
ComEmptyRequestArguments,
|
||||
ComEmptyPartialRange,
|
||||
ComInspectDbFailed,
|
||||
ComImportAccountsFailed,
|
||||
ComNoDataForProof,
|
||||
ComNothingSerious:
|
||||
discard
|
||||
when 0 < comErrorsNetworkSleepMSecs:
|
||||
# Otherwise try again some time later.
|
||||
await sleepAsync(comErrorsNetworkSleepMSecs.milliseconds)
|
||||
|
||||
of ComNoAccountsForStateRoot,
|
||||
ComNoStorageForAccounts,
|
||||
ComNoByteCodesAvailable,
|
||||
ComNoHeaderAvailable,
|
||||
ComNoTrieNodesAvailable,
|
||||
ComNoStorageForAccounts,
|
||||
#ComNoHeaderAvailable,
|
||||
ComNoTrieNodesAvailable:
|
||||
stats.nNoData.inc
|
||||
if comErrorsNoDataMax < stats.nNoData:
|
||||
ctrl.zombie = true
|
||||
return true
|
||||
|
||||
when 0 < comErrorsNoDataSleepMSecs:
|
||||
# Otherwise try again some time later.
|
||||
await sleepAsync(comErrorsNoDataSleepMSecs.milliseconds)
|
||||
|
||||
of ComMissingProof,
|
||||
ComAccountsMinTooSmall,
|
||||
ComAccountsMaxTooLarge,
|
||||
ComTooManyByteCodes,
|
||||
ComTooManyHeaders,
|
||||
ComTooManyStorageSlots,
|
||||
|
@ -101,4 +102,11 @@ proc stopAfterSeriousComError*(
|
|||
ctrl.zombie = true
|
||||
return true
|
||||
|
||||
of ComEmptyAccountsArguments,
|
||||
ComEmptyRequestArguments,
|
||||
ComEmptyPartialRange,
|
||||
ComNoDataForProof,
|
||||
ComNothingSerious:
|
||||
discard
|
||||
|
||||
# End
|
||||
|
|
|
@ -18,7 +18,7 @@ import
|
|||
eth/[common, p2p, trie/trie_defs],
|
||||
stew/interval_set,
|
||||
"../../.."/[protocol, protocol/trace_config],
|
||||
"../.."/[range_desc, worker_desc],
|
||||
"../.."/[constants, range_desc, worker_desc],
|
||||
./com_error
|
||||
|
||||
{.push raises: [Defect].}
|
||||
|
@ -40,6 +40,7 @@ proc getAccountRangeReq(
|
|||
buddy: SnapBuddyRef;
|
||||
root: Hash256;
|
||||
iv: NodeTagRange;
|
||||
pivot: string;
|
||||
): Future[Result[Option[SnapAccountRange],void]] {.async.} =
|
||||
let
|
||||
peer = buddy.peer
|
||||
|
@ -48,7 +49,7 @@ proc getAccountRangeReq(
|
|||
root, iv.minPt.to(Hash256), iv.maxPt.to(Hash256), snapRequestBytesLimit)
|
||||
return ok(reply)
|
||||
except CatchableError as e:
|
||||
trace trSnapRecvError & "waiting for GetAccountRange reply", peer,
|
||||
trace trSnapRecvError & "waiting for GetAccountRange reply", peer, pivot,
|
||||
error=e.msg
|
||||
return err()
|
||||
|
||||
|
@ -58,22 +59,23 @@ proc getAccountRangeReq(
|
|||
|
||||
proc getAccountRange*(
|
||||
buddy: SnapBuddyRef;
|
||||
stateRoot: Hash256;
|
||||
iv: NodeTagRange;
|
||||
stateRoot: Hash256; ## Current DB base (see `pivot` for logging)
|
||||
iv: NodeTagRange; ## Range to be fetched
|
||||
pivot: string; ## For logging, instead of `stateRoot`
|
||||
): Future[Result[GetAccountRange,ComError]] {.async.} =
|
||||
## Fetch data using the `snap#` protocol, returns the range covered.
|
||||
let
|
||||
peer = buddy.peer
|
||||
if trSnapTracePacketsOk:
|
||||
trace trSnapSendSending & "GetAccountRange", peer,
|
||||
accRange=iv, stateRoot, bytesLimit=snapRequestBytesLimit
|
||||
trace trSnapSendSending & "GetAccountRange", peer, pivot,
|
||||
accRange=iv, bytesLimit=snapRequestBytesLimit
|
||||
|
||||
var dd = block:
|
||||
let rc = await buddy.getAccountRangeReq(stateRoot, iv)
|
||||
let rc = await buddy.getAccountRangeReq(stateRoot, iv, pivot)
|
||||
if rc.isErr:
|
||||
return err(ComNetworkProblem)
|
||||
if rc.value.isNone:
|
||||
trace trSnapRecvTimeoutWaiting & "for reply to GetAccountRange", peer
|
||||
trace trSnapRecvTimeoutWaiting & "for AccountRange", peer, pivot
|
||||
return err(ComResponseTimeout)
|
||||
let snAccRange = rc.value.get
|
||||
GetAccountRange(
|
||||
|
@ -108,15 +110,15 @@ proc getAccountRange*(
|
|||
# any) account after limitHash must be provided.
|
||||
if nProof == 0:
|
||||
# Maybe try another peer
|
||||
trace trSnapRecvReceived & "empty AccountRange", peer,
|
||||
nAccounts, nProof, accRange="n/a", reqRange=iv, stateRoot
|
||||
trace trSnapRecvReceived & "empty AccountRange", peer, pivot,
|
||||
nAccounts, nProof, accRange="n/a", reqRange=iv
|
||||
return err(ComNoAccountsForStateRoot)
|
||||
|
||||
# So there is no data, otherwise an account beyond the interval end
|
||||
# `iv.maxPt` would have been returned.
|
||||
dd.consumed = NodeTagRange.new(iv.minPt, high(NodeTag))
|
||||
trace trSnapRecvReceived & "terminal AccountRange", peer,
|
||||
nAccounts, nProof, accRange=dd.consumed, reqRange=iv, stateRoot
|
||||
trace trSnapRecvReceived & "terminal AccountRange", peer, pivot,
|
||||
nAccounts, nProof, accRange=dd.consumed, reqRange=iv
|
||||
return ok(dd)
|
||||
|
||||
let (accMinPt, accMaxPt) = (
|
||||
|
@ -133,15 +135,15 @@ proc getAccountRange*(
|
|||
# across both.
|
||||
if 0.to(NodeTag) < iv.minPt:
|
||||
trace trSnapRecvProtocolViolation & "proof-less AccountRange", peer,
|
||||
nAccounts, nProof, accRange=NodeTagRange.new(iv.minPt, accMaxPt),
|
||||
reqRange=iv, stateRoot
|
||||
pivot, nAccounts, nProof, accRange=NodeTagRange.new(iv.minPt, accMaxPt),
|
||||
reqRange=iv
|
||||
return err(ComMissingProof)
|
||||
|
||||
if accMinPt < iv.minPt:
|
||||
# Not allowed
|
||||
trace trSnapRecvProtocolViolation & "min too small in AccountRange", peer,
|
||||
nAccounts, nProof, accRange=NodeTagRange.new(accMinPt, accMaxPt),
|
||||
reqRange=iv, stateRoot
|
||||
pivot, nAccounts, nProof, accRange=NodeTagRange.new(accMinPt, accMaxPt),
|
||||
reqRange=iv
|
||||
return err(ComAccountsMinTooSmall)
|
||||
|
||||
if iv.maxPt < accMaxPt:
|
||||
|
@ -157,13 +159,13 @@ proc getAccountRange*(
|
|||
if iv.maxPt < dd.data.accounts[^2].accKey.to(NodeTag):
|
||||
# The segcond largest should not excceed the top one requested.
|
||||
trace trSnapRecvProtocolViolation & "AccountRange top exceeded", peer,
|
||||
nAccounts, nProof, accRange=NodeTagRange.new(iv.minPt, accMaxPt),
|
||||
reqRange=iv, stateRoot
|
||||
pivot, nAccounts, nProof,
|
||||
accRange=NodeTagRange.new(iv.minPt, accMaxPt), reqRange=iv
|
||||
return err(ComAccountsMaxTooLarge)
|
||||
|
||||
dd.consumed = NodeTagRange.new(iv.minPt, max(iv.maxPt,accMaxPt))
|
||||
trace trSnapRecvReceived & "AccountRange", peer,
|
||||
nAccounts, nProof, accRange=dd.consumed, reqRange=iv, stateRoot
|
||||
trace trSnapRecvReceived & "AccountRange", peer, pivot,
|
||||
nAccounts, nProof, accRange=dd.consumed, reqRange=iv
|
||||
|
||||
return ok(dd)
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ import
|
|||
eth/[common, p2p],
|
||||
stew/interval_set,
|
||||
"../../.."/[protocol, protocol/trace_config],
|
||||
"../.."/[range_desc, worker_desc],
|
||||
"../.."/[constants, range_desc, worker_desc],
|
||||
./com_error
|
||||
|
||||
{.push raises: [Defect].}
|
||||
|
@ -46,8 +46,9 @@ const
|
|||
proc getStorageRangesReq(
|
||||
buddy: SnapBuddyRef;
|
||||
root: Hash256;
|
||||
accounts: seq[Hash256],
|
||||
iv: Option[NodeTagRange]
|
||||
accounts: seq[Hash256];
|
||||
iv: Option[NodeTagRange];
|
||||
pivot: string;
|
||||
): Future[Result[Option[SnapStorageRanges],void]]
|
||||
{.async.} =
|
||||
let
|
||||
|
@ -70,7 +71,7 @@ proc getStorageRangesReq(
|
|||
return ok(reply)
|
||||
|
||||
except CatchableError as e:
|
||||
trace trSnapRecvError & "waiting for GetStorageRanges reply", peer,
|
||||
trace trSnapRecvError & "waiting for GetStorageRanges reply", peer, pivot,
|
||||
error=e.msg
|
||||
return err()
|
||||
|
||||
|
@ -80,8 +81,9 @@ proc getStorageRangesReq(
|
|||
|
||||
proc getStorageRanges*(
|
||||
buddy: SnapBuddyRef;
|
||||
stateRoot: Hash256;
|
||||
accounts: seq[AccountSlotsHeader],
|
||||
stateRoot: Hash256; ## Current DB base (`pivot` for logging)
|
||||
accounts: seq[AccountSlotsHeader]; ## List of per-account storage slots
|
||||
pivot: string; ## For logging, instead of `stateRoot`
|
||||
): Future[Result[GetStorageRanges,ComError]]
|
||||
{.async.} =
|
||||
## Fetch data using the `snap#` protocol, returns the range covered.
|
||||
|
@ -99,16 +101,16 @@ proc getStorageRanges*(
|
|||
return err(ComEmptyAccountsArguments)
|
||||
|
||||
if trSnapTracePacketsOk:
|
||||
trace trSnapSendSending & "GetStorageRanges", peer,
|
||||
nAccounts, stateRoot, bytesLimit=snapRequestBytesLimit
|
||||
trace trSnapSendSending & "GetStorageRanges", peer, pivot,
|
||||
nAccounts, bytesLimit=snapRequestBytesLimit
|
||||
|
||||
let snStoRanges = block:
|
||||
let rc = await buddy.getStorageRangesReq(
|
||||
stateRoot, accounts.mapIt(it.accKey.to(Hash256)), accounts[0].subRange)
|
||||
let rc = await buddy.getStorageRangesReq(stateRoot,
|
||||
accounts.mapIt(it.accKey.to(Hash256)), accounts[0].subRange, pivot)
|
||||
if rc.isErr:
|
||||
return err(ComNetworkProblem)
|
||||
if rc.value.isNone:
|
||||
trace trSnapRecvTimeoutWaiting & "for reply to GetStorageRanges", peer,
|
||||
trace trSnapRecvTimeoutWaiting & "for StorageRanges", peer, pivot,
|
||||
nAccounts
|
||||
return err(ComResponseTimeout)
|
||||
if nAccounts < rc.value.get.slotLists.len:
|
||||
|
@ -129,8 +131,8 @@ proc getStorageRanges*(
|
|||
# for any requested account hash, it must return an empty reply. It is
|
||||
# the responsibility of the caller to query an state not older than 128
|
||||
# blocks; and the caller is expected to only ever query existing accounts.
|
||||
trace trSnapRecvReceived & "empty StorageRanges", peer,
|
||||
nAccounts, nSlotLists, nProof, stateRoot, firstAccount=accounts[0].accKey
|
||||
trace trSnapRecvReceived & "empty StorageRanges", peer, pivot,
|
||||
nAccounts, nSlotLists, nProof, firstAccount=accounts[0].accKey
|
||||
return err(ComNoStorageForAccounts)
|
||||
|
||||
# Assemble return structure for given peer response
|
||||
|
@ -171,8 +173,8 @@ proc getStorageRanges*(
|
|||
# assigning empty slice isa ok
|
||||
dd.leftOver = dd.leftOver & accounts[nSlotLists ..< nAccounts]
|
||||
|
||||
trace trSnapRecvReceived & "StorageRanges", peer, nAccounts, nSlotLists,
|
||||
nProof, nLeftOver=dd.leftOver.len, stateRoot
|
||||
trace trSnapRecvReceived & "StorageRanges", peer, pivot, nAccounts,
|
||||
nSlotLists, nProof, nLeftOver=dd.leftOver.len
|
||||
|
||||
return ok(dd)
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ import
|
|||
chronos,
|
||||
eth/[common, p2p],
|
||||
"../../.."/[protocol, protocol/trace_config],
|
||||
"../.."/[range_desc, worker_desc],
|
||||
"../.."/[constants, range_desc, worker_desc],
|
||||
./com_error
|
||||
|
||||
{.push raises: [Defect].}
|
||||
|
@ -37,6 +37,7 @@ proc getTrieNodesReq(
|
|||
buddy: SnapBuddyRef;
|
||||
stateRoot: Hash256;
|
||||
paths: seq[seq[Blob]];
|
||||
pivot: string;
|
||||
): Future[Result[Option[SnapTrieNodes],void]]
|
||||
{.async.} =
|
||||
let
|
||||
|
@ -46,7 +47,7 @@ proc getTrieNodesReq(
|
|||
return ok(reply)
|
||||
|
||||
except CatchableError as e:
|
||||
trace trSnapRecvError & "waiting for GetByteCodes reply", peer,
|
||||
trace trSnapRecvError & "waiting for GetByteCodes reply", peer, pivot,
|
||||
error=e.msg
|
||||
return err()
|
||||
|
||||
|
@ -56,8 +57,9 @@ proc getTrieNodesReq(
|
|||
|
||||
proc getTrieNodes*(
|
||||
buddy: SnapBuddyRef;
|
||||
stateRoot: Hash256;
|
||||
paths: seq[seq[Blob]],
|
||||
stateRoot: Hash256; ## Current DB base (see `pivot` for logging)
|
||||
paths: seq[seq[Blob]]; ## Nodes to fetch
|
||||
pivot: string; ## For logging, instead of `stateRoot`
|
||||
): Future[Result[GetTrieNodes,ComError]]
|
||||
{.async.} =
|
||||
## Fetch data using the `snap#` protocol, returns the trie nodes requested
|
||||
|
@ -72,15 +74,15 @@ proc getTrieNodes*(
|
|||
let nTotal = paths.mapIt(it.len).foldl(a+b, 0)
|
||||
|
||||
if trSnapTracePacketsOk:
|
||||
trace trSnapSendSending & "GetTrieNodes", peer,
|
||||
trace trSnapSendSending & "GetTrieNodes", peer, pivot,
|
||||
nPaths, nTotal, bytesLimit=snapRequestBytesLimit
|
||||
|
||||
let trieNodes = block:
|
||||
let rc = await buddy.getTrieNodesReq(stateRoot, paths)
|
||||
let rc = await buddy.getTrieNodesReq(stateRoot, paths, pivot)
|
||||
if rc.isErr:
|
||||
return err(ComNetworkProblem)
|
||||
if rc.value.isNone:
|
||||
trace trSnapRecvTimeoutWaiting & "for reply to GetTrieNodes", peer, nPaths
|
||||
trace trSnapRecvTimeoutWaiting & "for TrieNodes", peer, pivot, nPaths
|
||||
return err(ComResponseTimeout)
|
||||
let blobs = rc.value.get.nodes
|
||||
if nTotal < blobs.len:
|
||||
|
@ -104,7 +106,7 @@ proc getTrieNodes*(
|
|||
# nodes.
|
||||
# * The responding node is allowed to return less data than requested
|
||||
# (serving QoS limits), but the node must return at least one trie node.
|
||||
trace trSnapRecvReceived & "empty TrieNodes", peer, nPaths, nNodes
|
||||
trace trSnapRecvReceived & "empty TrieNodes", peer, pivot, nPaths, nNodes
|
||||
return err(ComNoByteCodesAvailable)
|
||||
|
||||
# Assemble return value
|
||||
|
@ -153,7 +155,7 @@ proc getTrieNodes*(
|
|||
if 0 < pushBack.len:
|
||||
dd.leftOver.add paths[n][0] & pushBack
|
||||
|
||||
trace trSnapRecvReceived & "TrieNodes", peer,
|
||||
trace trSnapRecvReceived & "TrieNodes", peer, pivot,
|
||||
nPaths, nNodes, nLeftOver=dd.leftOver.len
|
||||
|
||||
return ok(dd)
|
||||
|
|
|
@ -216,10 +216,10 @@ proc importAccounts*(
|
|||
error "Import Accounts exception", peer=ps.peer, name=($e.name), msg=e.msg
|
||||
return err(OSErrorException)
|
||||
|
||||
when extraTraceMessages:
|
||||
trace "Accounts and proofs ok", peer=ps.peer,
|
||||
root=ps.root.ByteArray32.toHex,
|
||||
proof=data.proof.len, base, accounts=data.accounts.len
|
||||
#when extraTraceMessages:
|
||||
# trace "Accounts imported", peer=ps.peer,
|
||||
# root=ps.root.ByteArray32.toHex,
|
||||
# proof=data.proof.len, base, accounts=data.accounts.len
|
||||
ok()
|
||||
|
||||
proc importAccounts*(
|
||||
|
|
|
@ -0,0 +1,266 @@
|
|||
# Nimbus
|
||||
# Copyright (c) 2021 Status Research & Development GmbH
|
||||
# Licensed under either of
|
||||
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0)
|
||||
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
|
||||
# http://opensource.org/licenses/MIT)
|
||||
# at your option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
## Check/analyse DB completeness
|
||||
## =============================
|
||||
|
||||
import
|
||||
chronicles,
|
||||
eth/[common, p2p, trie/trie_defs],
|
||||
stew/keyed_queue,
|
||||
../../../../utils/prettify,
|
||||
../../../sync_desc,
|
||||
"../.."/[range_desc, worker_desc],
|
||||
"."/[hexary_desc, hexary_error, snapdb_accounts, snapdb_storage_slots]
|
||||
|
||||
{.push raises: [Defect].}
|
||||
|
||||
logScope:
|
||||
topics = "snap-db"
|
||||
|
||||
const
|
||||
extraTraceMessages = false or true
|
||||
## Enabled additional logging noise
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Private logging helpers
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
template logTxt(info: static[string]): static[string] =
|
||||
"Check DB " & info
|
||||
|
||||
proc accountsCtx(
|
||||
buddy: SnapBuddyRef;
|
||||
env: SnapPivotRef;
|
||||
): string =
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
"{" &
|
||||
"pivot=" & "#" & $env.stateHeader.blockNumber & "," &
|
||||
"nAccounts=" & $env.nAccounts & "," &
|
||||
("covered=" & env.fetchAccounts.unprocessed.emptyFactor.toPC(0) & "/" &
|
||||
ctx.data.coveredAccounts.fullFactor.toPC(0)) & "," &
|
||||
"nCheckNodes=" & $env.fetchAccounts.checkNodes.len & "," &
|
||||
"nMissingNodes=" & $env.fetchAccounts.missingNodes.len & "}"
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
proc storageSlotsCtx(
|
||||
buddy: SnapBuddyRef;
|
||||
storageRoot: Hash256;
|
||||
env: SnapPivotRef;
|
||||
): string =
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
rc = env.fetchStorage.eq(storageRoot)
|
||||
if rc.isErr:
|
||||
return "n/a"
|
||||
let
|
||||
data = rc.value
|
||||
slots = data.slots
|
||||
result = "{" &
|
||||
"inherit=" & (if data.inherit: "t" else: "f") & ","
|
||||
if not slots.isNil:
|
||||
result &= "" &
|
||||
"covered=" & slots.unprocessed.emptyFactor.toPC(0) &
|
||||
"nCheckNodes=" & $slots.checkNodes.len & "," &
|
||||
"nMissingNodes=" & $slots.missingNodes.len
|
||||
result &= "}"
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Private functions
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
proc checkStorageSlotsTrie(
|
||||
buddy: SnapBuddyRef;
|
||||
accKey: NodeKey;
|
||||
storageRoot: Hash256;
|
||||
env: SnapPivotRef;
|
||||
): Result[bool,HexaryDbError] =
|
||||
## Check whether a storage slots hexary trie is complete.
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
db = ctx.data.snapDb
|
||||
peer = buddy.peer
|
||||
|
||||
rc = db.inspectStorageSlotsTrie(peer, accKey, storageRoot)
|
||||
|
||||
if rc.isErr:
|
||||
return err(rc.error)
|
||||
|
||||
ok(rc.value.dangling.len == 0)
|
||||
|
||||
|
||||
iterator accountsWalk(
|
||||
buddy: SnapBuddyRef;
|
||||
env: SnapPivotRef;
|
||||
): (NodeKey,Account,HexaryDbError) =
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
db = ctx.data.snapDb
|
||||
peer = buddy.peer
|
||||
stateRoot = env.stateHeader.stateRoot
|
||||
walk = SnapDbAccountsRef.init(db, stateRoot, peer)
|
||||
|
||||
var
|
||||
accKey = NodeKey.default
|
||||
count = 0
|
||||
runOk = true
|
||||
|
||||
while runOk:
|
||||
count.inc
|
||||
|
||||
let nextKey = block:
|
||||
let rc = walk.nextAccountsChainDbKey(accKey)
|
||||
if rc.isErr:
|
||||
if rc.error != AccountNotFound:
|
||||
error logTxt "accounts walk stopped", peer,
|
||||
account=accKey.to(NodeTag),
|
||||
ctx=buddy.accountsCtx(env), count, reason=rc.error
|
||||
runOk = false
|
||||
continue
|
||||
rc.value
|
||||
|
||||
accKey = nextKey
|
||||
|
||||
let accData = block:
|
||||
let rc = walk.getAccountsData(accKey, persistent = true)
|
||||
if rc.isErr:
|
||||
error logTxt "accounts walk error", peer, account=accKey,
|
||||
ctx=buddy.accountsCtx(env), count, error=rc.error
|
||||
runOk = false
|
||||
continue
|
||||
rc.value
|
||||
|
||||
yield (accKey, accData, NothingSerious)
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Public functions
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
proc checkAccountsTrieIsComplete*(
|
||||
buddy: SnapBuddyRef;
|
||||
env: SnapPivotRef;
|
||||
): bool =
|
||||
## Check whether accounts hexary trie is complete
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
db = ctx.data.snapDb
|
||||
peer = buddy.peer
|
||||
stateRoot = env.stateHeader.stateRoot
|
||||
|
||||
rc = db.inspectAccountsTrie(peer, stateRoot)
|
||||
|
||||
if rc.isErr:
|
||||
error logTxt "accounts health check failed", peer,
|
||||
ctx=buddy.accountsCtx(env), error=rc.error
|
||||
return false
|
||||
|
||||
rc.value.dangling.len == 0
|
||||
|
||||
|
||||
proc checkAccountsListOk*(
|
||||
buddy: SnapBuddyRef;
|
||||
env: SnapPivotRef;
|
||||
noisy = false;
|
||||
): bool =
|
||||
## Loop over accounts, returns `false` for some error.
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
peer = buddy.peer
|
||||
var
|
||||
accounts = 0
|
||||
storage = 0
|
||||
nextMsgThresh = 1
|
||||
|
||||
for (key,accData,error) in buddy.accountsWalk(env):
|
||||
|
||||
if error != NothingSerious:
|
||||
error logTxt "accounts loop stopped", peer, ctx=buddy.accountsCtx(env),
|
||||
accounts, storage, error
|
||||
return false
|
||||
|
||||
accounts.inc
|
||||
if accData.storageRoot != emptyRlpHash:
|
||||
storage.inc
|
||||
|
||||
when extraTraceMessages:
|
||||
if noisy and nextMsgThresh <= accounts:
|
||||
debug logTxt "accounts loop check point", peer,
|
||||
ctx=buddy.accountsCtx(env), accounts, storage
|
||||
nextMsgThresh *= 2
|
||||
|
||||
when extraTraceMessages:
|
||||
let isComplete = buddy.checkAccountsTrieIsComplete(env)
|
||||
debug logTxt "accounts list report", peer, ctx=buddy.accountsCtx(env),
|
||||
accounts, storage, isComplete
|
||||
|
||||
true
|
||||
|
||||
|
||||
proc checkStorageSlotsTrieIsComplete*(
|
||||
buddy: SnapBuddyRef;
|
||||
accKey: NodeKey;
|
||||
storageRoot: Hash256;
|
||||
env: SnapPivotRef;
|
||||
): bool =
|
||||
## Check whether a storage slots hexary trie is complete.
|
||||
let
|
||||
peer = buddy.peer
|
||||
rc = buddy.checkStorageSlotsTrie(accKey, storageRoot, env)
|
||||
if rc.isOk:
|
||||
return rc.value
|
||||
|
||||
when extraTraceMessages:
|
||||
debug logTxt "atorage slots health check failed", peer,
|
||||
nStoQueue=env.fetchStorage.len,
|
||||
ctx=buddy.storageSlotsCtx(storageRoot, env), error=rc.error
|
||||
|
||||
proc checkStorageSlotsTrieIsComplete*(
|
||||
buddy: SnapBuddyRef;
|
||||
env: SnapPivotRef;
|
||||
): bool =
|
||||
## Check for all accounts thye whether storage slot hexary tries are complete.
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
peer = buddy.peer
|
||||
var
|
||||
accounts = 0
|
||||
incomplete = 0
|
||||
complete = 0
|
||||
|
||||
for (accKey,accData,error) in buddy.accountsWalk(env):
|
||||
if error != NothingSerious:
|
||||
error logTxt "atorage slots accounts loop stopped", peer,
|
||||
nStoQueue=env.fetchStorage.len, accounts, incomplete, complete, error
|
||||
return false
|
||||
|
||||
accounts.inc
|
||||
let storageRoot = accData.storageRoot
|
||||
if storageRoot == emptyRlpHash:
|
||||
continue
|
||||
|
||||
let rc = buddy.checkStorageSlotsTrie(accKey, storageRoot, env)
|
||||
if rc.isOk and rc.value:
|
||||
complete.inc
|
||||
else:
|
||||
incomplete.inc
|
||||
|
||||
when extraTraceMessages:
|
||||
debug logTxt "storage slots report", peer, ctx=buddy.accountsCtx(env),
|
||||
nStoQueue=env.fetchStorage.len, accounts, incomplete, complete
|
||||
|
||||
0 < accounts and incomplete == 0
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# End
|
||||
# ------------------------------------------------------------------------------
|
|
@ -251,10 +251,10 @@ proc importStorageSlots*(
|
|||
error "Import storage slots exception", peer, itemInx, nItems,
|
||||
name=($e.name), msg=e.msg, nErrors=result.len
|
||||
|
||||
when extraTraceMessages:
|
||||
if result.len == 0:
|
||||
trace "Storage slots imported", peer, nItems,
|
||||
nSlotLists=data.storages.len, proofs=data.proof.len
|
||||
#when extraTraceMessages:
|
||||
# if result.len == 0:
|
||||
# trace "Storage slots imported", peer, nItems,
|
||||
# nSlotLists=data.storages.len, proofs=data.proof.len
|
||||
|
||||
proc importStorageSlots*(
|
||||
pv: SnapDbRef; ## Base descriptor on `BaseChainDB`
|
||||
|
|
|
@ -112,7 +112,7 @@ import
|
|||
stew/[interval_set, keyed_queue],
|
||||
../../../utils/prettify,
|
||||
../../sync_desc,
|
||||
".."/[range_desc, worker_desc],
|
||||
".."/[constants, range_desc, worker_desc],
|
||||
./com/[com_error, get_trie_nodes],
|
||||
./db/[hexary_desc, hexary_error, snapdb_accounts]
|
||||
|
||||
|
@ -137,6 +137,7 @@ proc healingCtx(buddy: SnapBuddyRef): string =
|
|||
ctx = buddy.ctx
|
||||
env = buddy.data.pivotEnv
|
||||
"{" &
|
||||
"pivot=" & "#" & $env.stateHeader.blockNumber & "," &
|
||||
"nAccounts=" & $env.nAccounts & "," &
|
||||
("covered=" & env.fetchAccounts.unprocessed.emptyFactor.toPC(0) & "/" &
|
||||
ctx.data.coveredAccounts.fullFactor.toPC(0)) & "," &
|
||||
|
@ -212,9 +213,10 @@ proc getMissingNodesFromNetwork(
|
|||
peer = buddy.peer
|
||||
env = buddy.data.pivotEnv
|
||||
stateRoot = env.stateHeader.stateRoot
|
||||
pivot = "#" & $env.stateHeader.blockNumber # for logging
|
||||
|
||||
nMissingNodes = env.fetchAccounts.missingNodes.len
|
||||
inxLeft = max(0, nMissingNodes - maxTrieNodeFetch)
|
||||
inxLeft = max(0, nMissingNodes - snapTrieNodeFetchMax)
|
||||
|
||||
# There is no point in processing too many nodes at the same time. So leave
|
||||
# the rest on the `missingNodes` queue to be handled later.
|
||||
|
@ -231,8 +233,11 @@ proc getMissingNodesFromNetwork(
|
|||
|
||||
# Fetch nodes from the network. Note that the remainder of the `missingNodes`
|
||||
# list might be used by another process that runs semi-parallel.
|
||||
let rc = await buddy.getTrieNodes(stateRoot, pathList)
|
||||
let rc = await buddy.getTrieNodes(stateRoot, pathList, pivot)
|
||||
if rc.isOk:
|
||||
# Reset error counts for detecting repeated timeouts, network errors, etc.
|
||||
buddy.data.errors.resetComError()
|
||||
|
||||
# Register unfetched missing nodes for the next pass
|
||||
for w in rc.value.leftOver:
|
||||
env.fetchAccounts.missingNodes.add NodeSpecs(
|
||||
|
@ -316,35 +321,18 @@ proc registerAccountLeaf(
|
|||
storageRoot: acc.storageRoot)
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Public functions
|
||||
# Private functions: do the healing for one round
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
proc healAccounts*(buddy: SnapBuddyRef) {.async.} =
|
||||
## Fetching and merging missing account trie database nodes.
|
||||
proc accountsHealingImpl(buddy: SnapBuddyRef): Future[int] {.async.} =
|
||||
## Fetching and merging missing account trie database nodes. It returns the
|
||||
## number of nodes fetched from the network, and -1 upon error.
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
db = ctx.data.snapDb
|
||||
peer = buddy.peer
|
||||
env = buddy.data.pivotEnv
|
||||
|
||||
# Only start healing if there is some completion level, already.
|
||||
#
|
||||
# We check against the global coverage factor, i.e. a measure for how
|
||||
# much of the total of all accounts have been processed. Even if the trie
|
||||
# database for the current pivot state root is sparsely filled, there
|
||||
# is a good chance that it can inherit some unchanged sub-trie from an
|
||||
# earlier pivor state root download. The healing process then works like
|
||||
# sort of glue.
|
||||
#
|
||||
if env.nAccounts == 0 or
|
||||
ctx.data.coveredAccounts.fullFactor < healAccountsTrigger:
|
||||
#when extraTraceMessages:
|
||||
# trace logTxt "postponed", peer, ctx=buddy.healingCtx()
|
||||
return
|
||||
|
||||
when extraTraceMessages:
|
||||
trace logTxt "started", peer, ctx=buddy.healingCtx()
|
||||
|
||||
# Update for changes since last visit
|
||||
buddy.updateMissingNodesList()
|
||||
|
||||
|
@ -353,17 +341,17 @@ proc healAccounts*(buddy: SnapBuddyRef) {.async.} =
|
|||
if env.fetchAccounts.checkNodes.len != 0 or
|
||||
env.fetchAccounts.missingNodes.len == 0:
|
||||
if not buddy.appendMoreDanglingNodesToMissingNodesList():
|
||||
return
|
||||
return 0
|
||||
|
||||
# Check whether the trie is complete.
|
||||
if env.fetchAccounts.missingNodes.len == 0:
|
||||
trace logTxt "complete", peer, ctx=buddy.healingCtx()
|
||||
return # nothing to do
|
||||
return 0 # nothing to do
|
||||
|
||||
# Get next batch of nodes that need to be merged it into the database
|
||||
let nodeSpecs = await buddy.getMissingNodesFromNetwork()
|
||||
if nodeSpecs.len == 0:
|
||||
return
|
||||
return 0
|
||||
|
||||
# Store nodes onto disk
|
||||
let report = db.importRawAccountsNodes(peer, nodeSpecs)
|
||||
|
@ -372,11 +360,7 @@ proc healAccounts*(buddy: SnapBuddyRef) {.async.} =
|
|||
error logTxt "error updating persistent database", peer,
|
||||
ctx=buddy.healingCtx(), nNodes=nodeSpecs.len, error=report[^1].error
|
||||
env.fetchAccounts.missingNodes = env.fetchAccounts.missingNodes & nodeSpecs
|
||||
return
|
||||
|
||||
when extraTraceMessages:
|
||||
trace logTxt "merged into database", peer,
|
||||
ctx=buddy.healingCtx(), nNodes=nodeSpecs.len
|
||||
return -1
|
||||
|
||||
# Filter out error and leaf nodes
|
||||
var nLeafNodes = 0 # for logging
|
||||
|
@ -405,7 +389,54 @@ proc healAccounts*(buddy: SnapBuddyRef) {.async.} =
|
|||
env.fetchAccounts.checkNodes.add nodePath
|
||||
|
||||
when extraTraceMessages:
|
||||
trace logTxt "job done", peer, ctx=buddy.healingCtx(), nLeafNodes
|
||||
trace logTxt "merged into database", peer,
|
||||
ctx=buddy.healingCtx(), nNodes=nodeSpecs.len, nLeafNodes
|
||||
|
||||
return nodeSpecs.len
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Public functions
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
proc healAccounts*(buddy: SnapBuddyRef) {.async.} =
|
||||
## Fetching and merging missing account trie database nodes.
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
peer = buddy.peer
|
||||
env = buddy.data.pivotEnv
|
||||
|
||||
# Only start healing if there is some completion level, already.
|
||||
#
|
||||
# We check against the global coverage factor, i.e. a measure for how
|
||||
# much of the total of all accounts have been processed. Even if the trie
|
||||
# database for the current pivot state root is sparsely filled, there
|
||||
# is a good chance that it can inherit some unchanged sub-trie from an
|
||||
# earlier pivor state root download. The healing process then works like
|
||||
# sort of glue.
|
||||
#
|
||||
if env.nAccounts == 0 or
|
||||
ctx.data.coveredAccounts.fullFactor < healAccountsTrigger:
|
||||
#when extraTraceMessages:
|
||||
# trace logTxt "postponed", peer, ctx=buddy.healingCtx()
|
||||
return
|
||||
|
||||
when extraTraceMessages:
|
||||
trace logTxt "started", peer, ctx=buddy.healingCtx()
|
||||
|
||||
var
|
||||
nNodesFetched = 0
|
||||
nFetchLoop = 0
|
||||
# Stop after `snapAccountsHealBatchFetchMax` nodes have been fetched
|
||||
while nNodesFetched < snapAccountsHealBatchFetchMax:
|
||||
var nNodes = await buddy.accountsHealingImpl()
|
||||
if nNodes <= 0:
|
||||
break
|
||||
nNodesFetched.inc(nNodes)
|
||||
nFetchLoop.inc
|
||||
|
||||
when extraTraceMessages:
|
||||
trace logTxt "job done", peer, ctx=buddy.healingCtx(),
|
||||
nNodesFetched, nFetchLoop
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# End
|
||||
|
|
|
@ -11,8 +11,18 @@
|
|||
## Heal storage DB:
|
||||
## ================
|
||||
##
|
||||
## This module works similar to `heal_accounts` applied to each
|
||||
## per-account storage slots hexary trie.
|
||||
## This module works similar to `heal_accounts` applied to each per-account
|
||||
## storage slots hexary trie. These per-account trie work items are stored in
|
||||
## the list `env.fetchStorage`.
|
||||
##
|
||||
## There is one additional short cut for speeding up processing. If a
|
||||
## per-account storage slots hexary trie is marked inheritable, it will be
|
||||
## checked whether it is complete and can be used wholesale.
|
||||
##
|
||||
## Inheritable tries appear after a pivot state root change. Typically, not all
|
||||
## account data have changed and so the same per-account storage slots are
|
||||
## valid.
|
||||
##
|
||||
|
||||
import
|
||||
std/sequtils,
|
||||
|
@ -22,7 +32,7 @@ import
|
|||
stew/[interval_set, keyed_queue],
|
||||
../../../utils/prettify,
|
||||
../../sync_desc,
|
||||
".."/[range_desc, worker_desc],
|
||||
".."/[constants, range_desc, worker_desc],
|
||||
./com/[com_error, get_trie_nodes],
|
||||
./db/[hexary_desc, hexary_error, snapdb_storage_slots]
|
||||
|
||||
|
@ -47,8 +57,10 @@ proc healingCtx(
|
|||
kvp: SnapSlotsQueuePair;
|
||||
): string =
|
||||
let
|
||||
env = buddy.data.pivotEnv
|
||||
slots = kvp.data.slots
|
||||
"{" &
|
||||
"pivot=" & "#" & $env.stateHeader.blockNumber & "," &
|
||||
"covered=" & slots.unprocessed.emptyFactor.toPC(0) & "," &
|
||||
"nCheckNodes=" & $slots.checkNodes.len & "," &
|
||||
"nMissingNodes=" & $slots.missingNodes.len & "}"
|
||||
|
@ -158,10 +170,11 @@ proc getMissingNodesFromNetwork(
|
|||
env = buddy.data.pivotEnv
|
||||
accKey = kvp.data.accKey
|
||||
storageRoot = kvp.key
|
||||
pivot = "#" & $env.stateHeader.blockNumber # for logging
|
||||
slots = kvp.data.slots
|
||||
|
||||
nMissingNodes = slots.missingNodes.len
|
||||
inxLeft = max(0, nMissingNodes - maxTrieNodeFetch)
|
||||
inxLeft = max(0, nMissingNodes - snapTrieNodeFetchMax)
|
||||
|
||||
# There is no point in processing too many nodes at the same time. So leave
|
||||
# the rest on the `missingNodes` queue to be handled later.
|
||||
|
@ -180,8 +193,11 @@ proc getMissingNodesFromNetwork(
|
|||
# list might be used by another process that runs semi-parallel.
|
||||
let
|
||||
req = @[accKey.to(Blob)] & fetchNodes.mapIt(it.partialPath)
|
||||
rc = await buddy.getTrieNodes(storageRoot, @[req])
|
||||
rc = await buddy.getTrieNodes(storageRoot, @[req], pivot)
|
||||
if rc.isOk:
|
||||
# Reset error counts for detecting repeated timeouts, network errors, etc.
|
||||
buddy.data.errors.resetComError()
|
||||
|
||||
# Register unfetched missing nodes for the next pass
|
||||
for w in rc.value.leftOver:
|
||||
for n in 1 ..< w.len:
|
||||
|
@ -418,7 +434,7 @@ proc healStorageSlots*(buddy: SnapBuddyRef) {.async.} =
|
|||
for kvp in env.fetchStorage.nextPairs:
|
||||
|
||||
# Marked items indicate that a partial sub-trie existsts which might have
|
||||
# been inherited from an earlier state root.
|
||||
# been inherited from an earlier storage root.
|
||||
if not kvp.data.inherit:
|
||||
let slots = kvp.data.slots
|
||||
|
||||
|
@ -442,7 +458,7 @@ proc healStorageSlots*(buddy: SnapBuddyRef) {.async.} =
|
|||
|
||||
# Add to local batch to be processed, below
|
||||
toBeHealed.add kvp
|
||||
if maxStoragesHeal <= toBeHealed.len:
|
||||
if healStoragesSlotsBatchMax <= toBeHealed.len:
|
||||
break
|
||||
|
||||
# Run against local batch
|
||||
|
|
|
@ -8,24 +8,26 @@
|
|||
# at your option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
## Fetch accounts stapshot
|
||||
## =======================
|
||||
## Fetch account ranges
|
||||
## ====================
|
||||
##
|
||||
## Worker items state diagram:
|
||||
## ::
|
||||
## unprocessed | peer workers + |
|
||||
## account ranges | account database update | unprocessed storage slots
|
||||
## ========================================================================
|
||||
## Acccount ranges not on the database yet are organised in the set
|
||||
## `env.fetchAccounts.unprocessed` of intervals (of account hashes.)
|
||||
##
|
||||
## +---------------------------------------+
|
||||
## | |
|
||||
## v |
|
||||
## <unprocessed> -----+------> <worker-0> ------+-----> OUTPUT
|
||||
## | |
|
||||
## +------> <worker-1> ------+
|
||||
## | |
|
||||
## +------> <worker-2> ------+
|
||||
## : :
|
||||
## When processing, the followin happens.
|
||||
##
|
||||
## * Some interval `iv` is removed from the `env.fetchAccounts.unprocessed`
|
||||
## set. This interval set might then be safely accessed and manipulated by
|
||||
## other worker instances.
|
||||
##
|
||||
## * The data points in the interval `iv` (aka ccount hashes) are fetched from
|
||||
## another peer over the network.
|
||||
##
|
||||
## * The received data points of the interval `iv` are verified and merged
|
||||
## into the persistent database.
|
||||
##
|
||||
## * Data points in `iv` that were invalid or not recevied from the network
|
||||
## are merged back it the set `env.fetchAccounts.unprocessed`.
|
||||
##
|
||||
|
||||
import
|
||||
|
@ -48,6 +50,12 @@ const
|
|||
extraTraceMessages = false or true
|
||||
## Enabled additional logging noise
|
||||
|
||||
numChunksMax = 2000
|
||||
## Bound for `numChunks()` (some fancy number)
|
||||
|
||||
addToFetchLoopMax = 4
|
||||
## Add some extra when calculating number of fetch/store rounds
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Private logging helpers
|
||||
# ------------------------------------------------------------------------------
|
||||
|
@ -59,6 +67,12 @@ template logTxt(info: static[string]): static[string] =
|
|||
# Private helpers
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
proc numChunks(buddy: SnapBuddyRef): int =
|
||||
var total = 0u64
|
||||
for ivSet in buddy.data.pivotEnv.fetchAccounts.unprocessed:
|
||||
total += ivSet.chunks.uint64
|
||||
min(numChunksMax.uint64, total).int
|
||||
|
||||
proc withMaxLen(
|
||||
buddy: SnapBuddyRef;
|
||||
iv: NodeTagRange;
|
||||
|
@ -102,48 +116,50 @@ proc markGloballyProcessed(buddy: SnapBuddyRef; iv: NodeTagRange) =
|
|||
discard buddy.ctx.data.coveredAccounts.merge(iv)
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Public functions
|
||||
# Private functions: do the account fetching for one round
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
proc rangeFetchAccounts*(buddy: SnapBuddyRef) {.async.} =
|
||||
## Fetch accounts and store them in the database.
|
||||
proc accountsRagefetchImpl(buddy: SnapBuddyRef): Future[bool] {.async.} =
|
||||
## Fetch accounts and store them in the database. Returns true while more
|
||||
## data can probably be fetched.
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
peer = buddy.peer
|
||||
env = buddy.data.pivotEnv
|
||||
stateRoot = env.stateHeader.stateRoot
|
||||
pivot = "#" & $env.stateHeader.blockNumber # for logging
|
||||
|
||||
# Get a range of accounts to fetch from
|
||||
let iv = block:
|
||||
let rc = buddy.getUnprocessed()
|
||||
if rc.isErr:
|
||||
when extraTraceMessages:
|
||||
trace logTxt "currently all processed", peer, stateRoot
|
||||
trace logTxt "currently all processed", peer, pivot
|
||||
return
|
||||
rc.value
|
||||
|
||||
# Process received accounts and stash storage slots to fetch later
|
||||
let dd = block:
|
||||
let rc = await buddy.getAccountRange(stateRoot, iv)
|
||||
let rc = await buddy.getAccountRange(stateRoot, iv, pivot)
|
||||
if rc.isErr:
|
||||
buddy.putUnprocessed(iv) # fail => interval back to pool
|
||||
let error = rc.error
|
||||
if await buddy.ctrl.stopAfterSeriousComError(error, buddy.data.errors):
|
||||
when extraTraceMessages:
|
||||
trace logTxt "fetch error => stop", peer,
|
||||
stateRoot, req=iv.len, error
|
||||
trace logTxt "fetch error => stop", peer, pivot, reqLen=iv.len, error
|
||||
return
|
||||
# Reset error counts for detecting repeated timeouts
|
||||
buddy.data.errors.nTimeouts = 0
|
||||
rc.value
|
||||
|
||||
# Reset error counts for detecting repeated timeouts, network errors, etc.
|
||||
buddy.data.errors.resetComError()
|
||||
|
||||
let
|
||||
gotAccounts = dd.data.accounts.len
|
||||
gotStorage = dd.withStorage.len
|
||||
|
||||
when extraTraceMessages:
|
||||
trace logTxt "fetched", peer, gotAccounts, gotStorage,
|
||||
stateRoot, req=iv.len, got=dd.consumed
|
||||
#when extraTraceMessages:
|
||||
# trace logTxt "fetched", peer, gotAccounts, gotStorage,
|
||||
# pivot, reqLen=iv.len, gotLen=dd.consumed.len
|
||||
|
||||
block:
|
||||
let rc = ctx.data.snapDb.importAccounts(peer, stateRoot, iv.minPt, dd.data)
|
||||
|
@ -153,7 +169,7 @@ proc rangeFetchAccounts*(buddy: SnapBuddyRef) {.async.} =
|
|||
buddy.ctrl.zombie = true
|
||||
when extraTraceMessages:
|
||||
trace logTxt "import failed => stop", peer, gotAccounts, gotStorage,
|
||||
stateRoot, req=iv.len, got=dd.consumed, error=rc.error
|
||||
pivot, reqLen=iv.len, gotLen=dd.consumed.len, error=rc.error
|
||||
return
|
||||
|
||||
# Statistics
|
||||
|
@ -184,9 +200,35 @@ proc rangeFetchAccounts*(buddy: SnapBuddyRef) {.async.} =
|
|||
# Store accounts on the storage TODO list.
|
||||
env.fetchStorage.merge dd.withStorage
|
||||
|
||||
when extraTraceMessages:
|
||||
trace logTxt "done", peer, gotAccounts, gotStorage,
|
||||
stateRoot, req=iv.len, got=dd.consumed
|
||||
return true
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Public functions
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
proc rangeFetchAccounts*(buddy: SnapBuddyRef) {.async.} =
|
||||
## Fetch accounts and store them in the database.
|
||||
let numChunks = buddy.numChunks()
|
||||
if 0 < numChunks:
|
||||
let
|
||||
ctx = buddy.ctx
|
||||
peer = buddy.peer
|
||||
env = buddy.data.pivotEnv
|
||||
pivot = "#" & $env.stateHeader.blockNumber # for logging
|
||||
|
||||
nFetchLoopMax = max(ctx.buddiesMax + 1, numChunks) + addToFetchLoopMax
|
||||
|
||||
when extraTraceMessages:
|
||||
trace logTxt "start", peer, pivot, nFetchLoopMax
|
||||
|
||||
var nFetchAccounts = 0
|
||||
while nFetchAccounts < nFetchLoopMax:
|
||||
if not await buddy.accountsRagefetchImpl():
|
||||
break
|
||||
nFetchAccounts.inc
|
||||
|
||||
when extraTraceMessages:
|
||||
trace logTxt "done", peer, pivot, nFetchAccounts, nFetchLoopMax
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# End
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
# at your option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
## Fetch accounts stapshot
|
||||
## =======================
|
||||
## Fetch storage slots
|
||||
## ===================
|
||||
##
|
||||
## Flow chart for storage slots download
|
||||
## -------------------------------------
|
||||
|
@ -39,6 +39,17 @@
|
|||
## * `{completed}`: list is optimised out
|
||||
## * `{partial}`: list is optimised out
|
||||
##
|
||||
## Discussion
|
||||
## ----------
|
||||
## Handling storage slots can be seen as an generalisation of handling account
|
||||
## ranges (see `range_fetch_accounts` module.) Contrary to the situation with
|
||||
## accounts, storage slots are typically downloaded in the size of a full list
|
||||
## that can be expanded to a full hexary trie for the given storage root.
|
||||
##
|
||||
## Only in rare cases a storage slots list is incomplete, a partial hexary
|
||||
## trie. In that case, the list of storage slots is processed as described
|
||||
## for accounts (see `range_fetch_accounts` module.)
|
||||
##
|
||||
|
||||
import
|
||||
chronicles,
|
||||
|
@ -47,7 +58,7 @@ import
|
|||
stew/[interval_set, keyed_queue],
|
||||
stint,
|
||||
../../sync_desc,
|
||||
".."/[range_desc, worker_desc],
|
||||
".."/[constants, range_desc, worker_desc],
|
||||
./com/[com_error, get_storage_ranges],
|
||||
./db/snapdb_storage_slots
|
||||
|
||||
|
@ -81,10 +92,10 @@ proc getNextSlotItems(
|
|||
## item. An explicit list of slots is only calculated if there was a queue
|
||||
## item with a partially completed slots download.
|
||||
##
|
||||
## * Otherwise, a list of at most `maxStoragesFetch` work items is returned.
|
||||
## These work items were checked for that there was no trace of a previously
|
||||
## installed (probably partial) storage trie on the database (e.g. inherited
|
||||
## from an earlier state root pivot.)
|
||||
## * Otherwise, a list of at most `snapStoragesSlotsFetchMax` work items is
|
||||
## returned. These work items were checked for that there was no trace of a
|
||||
## previously installed (probably partial) storage trie on the database
|
||||
## (e.g. inherited from an earlier state root pivot.)
|
||||
##
|
||||
## If there is an indication that the storage trie may have some data
|
||||
## already it is ignored here and marked `inherit` so that it will be
|
||||
|
@ -152,7 +163,7 @@ proc getNextSlotItems(
|
|||
env.fetchStorage.del(kvp.key) # ok to delete this item from batch queue
|
||||
|
||||
# Maximal number of items to fetch
|
||||
if maxStoragesFetch <= result.len:
|
||||
if snapStoragesSlotsFetchMax <= result.len:
|
||||
break
|
||||
|
||||
when extraTraceMessages:
|
||||
|
@ -170,6 +181,7 @@ proc storeStoragesSingleBatch(
|
|||
peer = buddy.peer
|
||||
env = buddy.data.pivotEnv
|
||||
stateRoot = env.stateHeader.stateRoot
|
||||
pivot = "#" & $env.stateHeader.blockNumber # for logging
|
||||
|
||||
# Fetch storage data and save it on disk. Storage requests are managed by
|
||||
# a request queue for handling partioal replies and re-fetch issues. For
|
||||
|
@ -182,26 +194,26 @@ proc storeStoragesSingleBatch(
|
|||
|
||||
# Get storages slots data from the network
|
||||
var stoRange = block:
|
||||
let rc = await buddy.getStorageRanges(stateRoot, req)
|
||||
let rc = await buddy.getStorageRanges(stateRoot, req, pivot)
|
||||
if rc.isErr:
|
||||
env.fetchStorage.merge req
|
||||
|
||||
let error = rc.error
|
||||
if await buddy.ctrl.stopAfterSeriousComError(error, buddy.data.errors):
|
||||
discard
|
||||
trace logTxt "fetch error => stop", peer,
|
||||
trace logTxt "fetch error => stop", peer, pivot,
|
||||
nSlotLists=env.nSlotLists, nReq=req.len,
|
||||
nStorageQueue=env.fetchStorage.len, error
|
||||
return
|
||||
rc.value
|
||||
|
||||
# Reset error counts for detecting repeated timeouts
|
||||
buddy.data.errors.nTimeouts = 0
|
||||
# Reset error counts for detecting repeated timeouts, network errors, etc.
|
||||
buddy.data.errors.resetComError()
|
||||
|
||||
var gotSlotLists = stoRange.data.storages.len
|
||||
|
||||
#when extraTraceMessages:
|
||||
# trace logTxt "fetched", peer,
|
||||
# trace logTxt "fetched", peer, pivot,
|
||||
# nSlotLists=env.nSlotLists, nSlotLists=gotSlotLists, nReq=req.len,
|
||||
# nStorageQueue=env.fetchStorage.len, nLeftOvers=stoRange.leftOver.len
|
||||
|
||||
|
@ -217,7 +229,7 @@ proc storeStoragesSingleBatch(
|
|||
env.fetchStorage.merge req
|
||||
gotSlotLists.dec(report.len - 1) # for logging only
|
||||
|
||||
error logTxt "import failed", peer,
|
||||
error logTxt "import failed", peer, pivot,
|
||||
nSlotLists=env.nSlotLists, nSlotLists=gotSlotLists, nReq=req.len,
|
||||
nStorageQueue=env.fetchStorage.len, error=report[^1].error
|
||||
return
|
||||
|
@ -246,7 +258,7 @@ proc storeStoragesSingleBatch(
|
|||
# Update local statistics counter for `nSlotLists` counter update
|
||||
gotSlotLists.dec
|
||||
|
||||
trace logTxt "processing error", peer, nSlotLists=env.nSlotLists,
|
||||
trace logTxt "processing error", peer, pivot, nSlotLists=env.nSlotLists,
|
||||
nSlotLists=gotSlotLists, nReqInx=inx, nReq=req.len,
|
||||
nStorageQueue=env.fetchStorage.len, error=report[inx].error
|
||||
|
||||
|
@ -279,7 +291,8 @@ proc rangeFetchStorageSlots*(buddy: SnapBuddyRef) {.async.} =
|
|||
if 0 < env.fetchStorage.len:
|
||||
# Run at most the minimum number of times to get the batch queue cleaned up.
|
||||
var
|
||||
fullRangeLoopCount = 1 + (env.fetchStorage.len - 1) div maxStoragesFetch
|
||||
fullRangeLoopCount =
|
||||
1 + (env.fetchStorage.len - 1) div snapStoragesSlotsFetchMax
|
||||
subRangeLoopCount = 0
|
||||
|
||||
# Add additional counts for partial slot range items
|
||||
|
|
|
@ -39,15 +39,15 @@ type
|
|||
## Account fetching state that is shared among all peers.
|
||||
nBuddies: int
|
||||
lastStats: TickerStats
|
||||
lastTick: uint64
|
||||
statsCb: TickerStatsUpdater
|
||||
logTicker: TimerCallback
|
||||
tick: uint64 # more than 5*10^11y before wrap when ticking every sec
|
||||
started: Time
|
||||
visited: Time
|
||||
|
||||
const
|
||||
tickerStartDelay = chronos.milliseconds(100)
|
||||
tickerLogInterval = chronos.seconds(1)
|
||||
tickerLogSuppressMax = 100
|
||||
tickerLogSuppressMax = initDuration(seconds = 100)
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Private functions: pretty printing
|
||||
|
@ -105,11 +105,13 @@ template noFmtError(info: static[string]; code: untyped) =
|
|||
proc setLogTicker(t: TickerRef; at: Moment) {.gcsafe.}
|
||||
|
||||
proc runLogTicker(t: TickerRef) {.gcsafe.} =
|
||||
let data = t.statsCb()
|
||||
let
|
||||
data = t.statsCb()
|
||||
now = getTime().utc.toTime
|
||||
|
||||
if data != t.lastStats or t.lastTick + tickerLogSuppressMax < t.tick:
|
||||
if data != t.lastStats or tickerLogSuppressMax < (now - t.visited):
|
||||
t.lastStats = data
|
||||
t.lastTick = t.tick
|
||||
t.visited = now
|
||||
var
|
||||
nAcc, nSto, bulk: string
|
||||
pivot = "n/a"
|
||||
|
@ -119,7 +121,9 @@ proc runLogTicker(t: TickerRef) {.gcsafe.} =
|
|||
"(" & data.accountsFill[1].toPC(0) & ")" &
|
||||
"/" & data.accountsFill[2].toPC(0)
|
||||
buddies = t.nBuddies
|
||||
tick = t.tick.toSI
|
||||
|
||||
# With `int64`, there are more than 29*10^10 years range for seconds
|
||||
up = (now - t.started).inSeconds.uint64.toSI
|
||||
mem = getTotalMem().uint.toSI
|
||||
|
||||
noFmtError("runLogTicker"):
|
||||
|
@ -134,9 +138,8 @@ proc runLogTicker(t: TickerRef) {.gcsafe.} =
|
|||
nStoQue = $data.nStorageQueue.unsafeGet
|
||||
|
||||
info "Snap sync statistics",
|
||||
tick, buddies, pivot, nAcc, accCov, nSto, nStoQue, mem
|
||||
up, buddies, pivot, nAcc, accCov, nSto, nStoQue, mem
|
||||
|
||||
t.tick.inc
|
||||
t.setLogTicker(Moment.fromNow(tickerLogInterval))
|
||||
|
||||
|
||||
|
@ -156,6 +159,8 @@ proc start*(t: TickerRef) =
|
|||
## Re/start ticker unconditionally
|
||||
#debug "Started ticker"
|
||||
t.logTicker = safeSetTimer(Moment.fromNow(tickerStartDelay), runLogTicker, t)
|
||||
if t.started == Time.default:
|
||||
t.started = getTime().utc.toTime
|
||||
|
||||
proc stop*(t: TickerRef) =
|
||||
## Stop ticker unconditionally
|
||||
|
|
|
@ -19,60 +19,6 @@ import
|
|||
|
||||
{.push raises: [Defect].}
|
||||
|
||||
const
|
||||
snapRequestBytesLimit* = 2 * 1024 * 1024
|
||||
## Soft bytes limit to request in `snap` protocol calls.
|
||||
|
||||
minPivotBlockDistance* = 128
|
||||
## The minimal depth of two block headers needed to activate a new state
|
||||
## root pivot.
|
||||
##
|
||||
## Effects on assembling the state via `snap/1` protocol:
|
||||
##
|
||||
## * A small value of this constant increases the propensity to update the
|
||||
## pivot header more often. This is so because each new peer negoiates a
|
||||
## pivot block number at least the current one.
|
||||
##
|
||||
## * A large value keeps the current pivot more stable but some experiments
|
||||
## suggest that the `snap/1` protocol is answered only for later block
|
||||
## numbers (aka pivot blocks.) So a large value tends to keep the pivot
|
||||
## farther away from the chain head.
|
||||
##
|
||||
## Note that 128 is the magic distance for snapshots used by *Geth*.
|
||||
|
||||
healAccountsTrigger* = 0.95
|
||||
## Apply accounts healing if the global snap download coverage factor
|
||||
## exceeds this setting. The global coverage factor is derived by merging
|
||||
## all account ranges retrieved for all pivot state roots (see
|
||||
## `coveredAccounts` in `CtxData`.)
|
||||
|
||||
healSlorageSlotsTrigger* = 0.70
|
||||
## Consider per account storage slost healing if this particular sub-trie
|
||||
## has reached this factor of completeness
|
||||
|
||||
maxStoragesFetch* = 5 * 1024
|
||||
## Maximal number of storage tries to fetch with a single message.
|
||||
|
||||
maxStoragesHeal* = 32
|
||||
## Maximal number of storage tries to to heal in a single batch run.
|
||||
|
||||
maxTrieNodeFetch* = 1024
|
||||
## Informal maximal number of trie nodes to fetch at once. This is nor
|
||||
## an official limit but found on several implementations (e.g. geth.)
|
||||
##
|
||||
## Resticting the fetch list length early allows to better paralellise
|
||||
## healing.
|
||||
|
||||
maxHealingLeafPaths* = 1024
|
||||
## Retrieve this many leave nodes with proper 32 bytes path when inspecting
|
||||
## for dangling nodes. This allows to run healing paralell to accounts or
|
||||
## storage download without requestinng an account/storage slot found by
|
||||
## healing again with the download.
|
||||
|
||||
noPivotEnvChangeIfComplete* = true
|
||||
## If set `true`, new peers will not change the pivot even if the
|
||||
## negotiated pivot would be newer. This should be the default.
|
||||
|
||||
type
|
||||
SnapSlotsQueue* = KeyedQueue[Hash256,SnapSlotsQueueItemRef]
|
||||
## Handles list of storage slots data for fetch indexed by storage root.
|
||||
|
@ -110,6 +56,13 @@ type
|
|||
## Referenced object, so it can be made optional for the storage
|
||||
## batch list
|
||||
|
||||
SnapHealingState* = enum
|
||||
## State of healing process. The `HealerRunning` state indicates that
|
||||
## dangling and/or missing nodes have been temprarily removed from the
|
||||
## batch queue while processing.
|
||||
HealerIdle
|
||||
HealerRunning
|
||||
HealerDone
|
||||
|
||||
SnapPivotRef* = ref object
|
||||
## Per-state root cache for particular snap data environment
|
||||
|
@ -117,11 +70,11 @@ type
|
|||
|
||||
# Accounts download
|
||||
fetchAccounts*: SnapTrieRangeBatch ## Set of accounts ranges to fetch
|
||||
accountsDone*: bool ## All accounts have been processed
|
||||
accountsState*: SnapHealingState ## All accounts have been processed
|
||||
|
||||
# Storage slots download
|
||||
fetchStorage*: SnapSlotsQueue ## Fetch storage for these accounts
|
||||
serialSync*: bool ## Done with storage, block sync next
|
||||
storageDone*: bool ## Done with storage, block sync next
|
||||
|
||||
# Info
|
||||
nAccounts*: uint64 ## Imported # of accounts
|
||||
|
@ -155,9 +108,6 @@ type
|
|||
SnapCtxRef* = CtxRef[CtxData]
|
||||
## Extended global descriptor
|
||||
|
||||
static:
|
||||
doAssert healAccountsTrigger < 1.0 # larger values make no sense
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Public functions
|
||||
# ------------------------------------------------------------------------------
|
||||
|
|
|
@ -161,7 +161,7 @@ proc workerLoop[S,W](buddy: RunnerBuddyRef[S,W]) {.async.} =
|
|||
discard dsc.buddies.lruFetch(peer.hash)
|
||||
|
||||
# Allow task switch
|
||||
await sleepAsync(50.milliseconds)
|
||||
await sleepAsync(1.milliseconds)
|
||||
if worker.ctrl.stopped:
|
||||
break
|
||||
|
||||
|
|
Loading…
Reference in New Issue