mirror of
https://github.com/status-im/nimbus-eth1.git
synced 2025-01-18 00:01:07 +00:00
bd42ebb193
* Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
333 lines
10 KiB
Nim
333 lines
10 KiB
Nim
# Nimbus
|
|
# Copyright (c) 2021 Status Research & Development GmbH
|
|
# Licensed under either of
|
|
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
|
|
# http://www.apache.org/licenses/LICENSE-2.0)
|
|
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
|
|
# http://opensource.org/licenses/MIT)
|
|
# at your option. This file may not be copied, modified, or distributed
|
|
# except according to those terms.
|
|
|
|
import
|
|
std/[hashes, options, sets, strutils],
|
|
chronicles,
|
|
chronos,
|
|
eth/[common, p2p],
|
|
stew/[interval_set, keyed_queue],
|
|
../../db/select_backend,
|
|
../../utils/prettify,
|
|
../misc/best_pivot,
|
|
".."/[protocol, sync_desc],
|
|
./worker/[pivot, ticker],
|
|
./worker/com/com_error,
|
|
./worker/db/[hexary_desc, snapdb_desc, snapdb_pivot],
|
|
"."/[constants, range_desc, worker_desc]
|
|
|
|
{.push raises: [Defect].}
|
|
|
|
logScope:
|
|
topics = "snap-buddy"
|
|
|
|
const
|
|
extraTraceMessages = false or true
|
|
## Enabled additional logging noise
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Private helpers: integration of pivot finder
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc pivot(ctx: SnapCtxRef): BestPivotCtxRef =
|
|
# Getter
|
|
ctx.data.pivotFinderCtx.BestPivotCtxRef
|
|
|
|
proc `pivot=`(ctx: SnapCtxRef; val: BestPivotCtxRef) =
|
|
# Setter
|
|
ctx.data.pivotFinderCtx = val
|
|
|
|
proc pivot(buddy: SnapBuddyRef): BestPivotWorkerRef =
|
|
# Getter
|
|
buddy.data.pivotFinder.BestPivotWorkerRef
|
|
|
|
proc `pivot=`(buddy: SnapBuddyRef; val: BestPivotWorkerRef) =
|
|
# Setter
|
|
buddy.data.pivotFinder = val
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Private functions
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc recoveryStepContinue(ctx: SnapCtxRef): Future[bool] {.async.} =
|
|
let recov = ctx.data.recovery
|
|
if recov.isNil:
|
|
return false
|
|
|
|
let
|
|
checkpoint =
|
|
"#" & $recov.state.header.blockNumber & "(" & $recov.level & ")"
|
|
topLevel = recov.level == 0
|
|
env = block:
|
|
let rc = ctx.data.pivotTable.eq recov.state.header.stateRoot
|
|
if rc.isErr:
|
|
error "Recovery pivot context gone", checkpoint, topLevel
|
|
return false
|
|
rc.value
|
|
|
|
# Cosmetics: allow other processes (e.g. ticker) to log the current recovery
|
|
# state. There is no other intended purpose of this wait state.
|
|
await sleepAsync 1100.milliseconds
|
|
|
|
#when extraTraceMessages:
|
|
# trace "Recovery continued ...", checkpoint, topLevel,
|
|
# nAccounts=recov.state.nAccounts, nDangling=recov.state.dangling.len
|
|
|
|
# Update pivot data from recovery checkpoint
|
|
env.recoverPivotFromCheckpoint(ctx, topLevel)
|
|
|
|
# Fetch next recovery record if there is any
|
|
if recov.state.predecessor.isZero:
|
|
#when extraTraceMessages:
|
|
# trace "Recovery done", checkpoint, topLevel
|
|
return false
|
|
let rc = ctx.data.snapDb.recoverPivot(recov.state.predecessor)
|
|
if rc.isErr:
|
|
when extraTraceMessages:
|
|
trace "Recovery stopped at pivot stale checkpoint", checkpoint, topLevel
|
|
return false
|
|
|
|
# Set up next level pivot checkpoint
|
|
ctx.data.recovery = SnapRecoveryRef(
|
|
state: rc.value,
|
|
level: recov.level + 1)
|
|
|
|
# Push onto pivot table and continue recovery (i.e. do not stop it yet)
|
|
ctx.data.pivotTable.update(
|
|
ctx.data.recovery.state.header, ctx, reverse=true)
|
|
|
|
return true # continue recovery
|
|
|
|
|
|
proc updateSinglePivot(buddy: SnapBuddyRef): Future[bool] {.async.} =
|
|
## Helper, negotiate pivot unless present
|
|
if buddy.pivot.pivotHeader.isOk:
|
|
return true
|
|
|
|
let
|
|
ctx = buddy.ctx
|
|
peer = buddy.peer
|
|
env = ctx.data.pivotTable.lastValue.get(otherwise = nil)
|
|
nMin = if env.isNil: none(BlockNumber)
|
|
else: some(env.stateHeader.blockNumber)
|
|
|
|
if await buddy.pivot.pivotNegotiate(nMin):
|
|
var header = buddy.pivot.pivotHeader.value
|
|
|
|
# Check whether there is no environment change needed
|
|
when pivotEnvStopChangingIfComplete:
|
|
let rc = ctx.data.pivotTable.lastValue
|
|
if rc.isOk and rc.value.storageDone:
|
|
# No neede to change
|
|
when extraTraceMessages:
|
|
trace "No need to change snap pivot", peer,
|
|
pivot=("#" & $rc.value.stateHeader.blockNumber),
|
|
stateRoot=rc.value.stateHeader.stateRoot,
|
|
multiOk=buddy.ctrl.multiOk, runState=buddy.ctrl.state
|
|
return true
|
|
|
|
buddy.ctx.data.pivotTable.update(header, buddy.ctx)
|
|
|
|
info "Snap pivot initialised", peer, pivot=("#" & $header.blockNumber),
|
|
multiOk=buddy.ctrl.multiOk, runState=buddy.ctrl.state
|
|
|
|
return true
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public start/stop and admin functions
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc setup*(ctx: SnapCtxRef; tickerOK: bool): bool =
|
|
## Global set up
|
|
ctx.data.coveredAccounts = NodeTagRangeSet.init()
|
|
ctx.data.snapDb =
|
|
if ctx.data.dbBackend.isNil: SnapDbRef.init(ctx.chain.db.db)
|
|
else: SnapDbRef.init(ctx.data.dbBackend)
|
|
ctx.pivot = BestPivotCtxRef.init(ctx.data.rng)
|
|
ctx.pivot.pivotRelaxedMode(enable = true)
|
|
|
|
if tickerOK:
|
|
ctx.data.ticker = TickerRef.init(ctx.data.pivotTable.tickerStats(ctx))
|
|
else:
|
|
trace "Ticker is disabled"
|
|
|
|
# Check for recovery mode
|
|
if not ctx.data.noRecovery:
|
|
let rc = ctx.data.snapDb.recoverPivot()
|
|
if rc.isOk:
|
|
ctx.data.recovery = SnapRecoveryRef(state: rc.value)
|
|
ctx.daemon = true
|
|
|
|
# Set up early initial pivot
|
|
ctx.data.pivotTable.update(ctx.data.recovery.state.header, ctx)
|
|
trace "Recovery started",
|
|
checkpoint=("#" & $ctx.data.pivotTable.topNumber() & "(0)")
|
|
if not ctx.data.ticker.isNil:
|
|
ctx.data.ticker.startRecovery()
|
|
true
|
|
|
|
proc release*(ctx: SnapCtxRef) =
|
|
## Global clean up
|
|
ctx.pivot = nil
|
|
if not ctx.data.ticker.isNil:
|
|
ctx.data.ticker.stop()
|
|
ctx.data.ticker = nil
|
|
|
|
proc start*(buddy: SnapBuddyRef): bool =
|
|
## Initialise worker peer
|
|
let
|
|
ctx = buddy.ctx
|
|
peer = buddy.peer
|
|
if peer.supports(protocol.snap) and
|
|
peer.supports(protocol.eth) and
|
|
peer.state(protocol.eth).initialized:
|
|
buddy.pivot = BestPivotWorkerRef.init(
|
|
buddy.ctx.pivot, buddy.ctrl, buddy.peer)
|
|
buddy.data.errors = ComErrorStatsRef()
|
|
if not ctx.data.ticker.isNil:
|
|
ctx.data.ticker.startBuddy()
|
|
return true
|
|
|
|
proc stop*(buddy: SnapBuddyRef) =
|
|
## Clean up this peer
|
|
let
|
|
ctx = buddy.ctx
|
|
peer = buddy.peer
|
|
buddy.ctrl.stopped = true
|
|
buddy.pivot.clear()
|
|
if not ctx.data.ticker.isNil:
|
|
ctx.data.ticker.stopBuddy()
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Public functions
|
|
# ------------------------------------------------------------------------------
|
|
|
|
proc runDaemon*(ctx: SnapCtxRef) {.async.} =
|
|
## Enabled while `ctx.daemon` is `true`
|
|
##
|
|
if not ctx.data.recovery.isNil:
|
|
if not await ctx.recoveryStepContinue():
|
|
# Done, stop recovery
|
|
ctx.data.recovery = nil
|
|
ctx.daemon = false
|
|
|
|
# Update logging
|
|
if not ctx.data.ticker.isNil:
|
|
ctx.data.ticker.stopRecovery()
|
|
return
|
|
|
|
|
|
proc runSingle*(buddy: SnapBuddyRef) {.async.} =
|
|
## Enabled while
|
|
## * `buddy.ctrl.multiOk` is `false`
|
|
## * `buddy.ctrl.poolMode` is `false`
|
|
##
|
|
let peer = buddy.peer
|
|
# Find pivot, probably relaxed mode enabled in `setup()`
|
|
if not await buddy.updateSinglePivot():
|
|
# Wait if needed, then return => repeat
|
|
if not buddy.ctrl.stopped:
|
|
await sleepAsync(2.seconds)
|
|
return
|
|
|
|
buddy.ctrl.multiOk = true
|
|
|
|
|
|
proc runPool*(buddy: SnapBuddyRef, last: bool): bool =
|
|
## Enabled when `buddy.ctrl.poolMode` is `true`
|
|
##
|
|
let ctx = buddy.ctx
|
|
ctx.poolMode = false
|
|
result = true
|
|
|
|
|
|
proc runMulti*(buddy: SnapBuddyRef) {.async.} =
|
|
## Enabled while
|
|
## * `buddy.ctx.multiOk` is `true`
|
|
## * `buddy.ctx.poolMode` is `false`
|
|
##
|
|
let
|
|
ctx = buddy.ctx
|
|
peer = buddy.peer
|
|
|
|
# Set up current state root environment for accounts snapshot
|
|
let
|
|
env = block:
|
|
let rc = ctx.data.pivotTable.lastValue
|
|
if rc.isErr:
|
|
return # nothing to do
|
|
rc.value
|
|
pivot = "#" & $env.stateHeader.blockNumber # for logging
|
|
|
|
buddy.data.pivotEnv = env
|
|
|
|
# Full sync processsing based on current snapshot
|
|
# -----------------------------------------------
|
|
if env.storageDone:
|
|
trace "Snap full sync -- not implemented yet", peer, pivot
|
|
await sleepAsync(5.seconds)
|
|
return
|
|
|
|
# Snapshot sync processing
|
|
# ------------------------
|
|
|
|
# If this is a new pivot, the previous one can be cleaned up. There is no
|
|
# point in keeping some older space consuming state data any longer.
|
|
ctx.data.pivotTable.beforeTopMostlyClean()
|
|
|
|
when extraTraceMessages:
|
|
block:
|
|
let
|
|
nAccounts = env.nAccounts
|
|
nSlotLists = env.nSlotLists
|
|
processed = env.fetchAccounts.processed.fullFactor.toPC(2)
|
|
nStoQu = env.fetchStorageFull.len + env.fetchStoragePart.len
|
|
accHealThresh = env.healThresh.toPC(2)
|
|
trace "Multi sync runner", peer, pivot, nAccounts, nSlotLists, processed,
|
|
nStoQu, accHealThresh
|
|
|
|
# This one is the syncing work horse which downloads the database
|
|
await env.execSnapSyncAction(buddy)
|
|
|
|
if env.archived:
|
|
let
|
|
peer = buddy.peer
|
|
nAccounts = env.nAccounts
|
|
nSlotLists = env.nSlotLists
|
|
when extraTraceMessages:
|
|
trace "Mothballing", peer, pivot=("#" & $env.stateHeader.blockNumber),
|
|
nAccounts=env.nAccounts, nSlotLists=env.nSlotLists
|
|
env.pivotMothball()
|
|
return # pivot has changed
|
|
|
|
block:
|
|
# Save state so sync can be partially resumed at next start up
|
|
let
|
|
nAccounts = env.nAccounts
|
|
nSlotLists = env.nSlotLists
|
|
processed = env.fetchAccounts.processed.fullFactor.toPC(2)
|
|
nStoQu = env.fetchStorageFull.len + env.fetchStoragePart.len
|
|
accHealThresh = env.healThresh.toPC(2)
|
|
rc = env.saveCheckpoint(ctx)
|
|
if rc.isErr:
|
|
error "Failed to save recovery checkpoint", peer, pivot, nAccounts,
|
|
nSlotLists, processed, nStoQu, error=rc.error
|
|
else:
|
|
when extraTraceMessages:
|
|
trace "Saved recovery checkpoint", peer, pivot, nAccounts, nSlotLists,
|
|
processed, nStoQu, blobSize=rc.value, accHealThresh
|
|
|
|
if buddy.ctrl.stopped:
|
|
return # peer worker has gone
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# End
|
|
# ------------------------------------------------------------------------------
|