nimbus-eth1/nimbus/sync/snap/worker/pivot.nim

630 lines
23 KiB
Nim
Raw Normal View History

# Nimbus
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
{.push raises: [].}
import
std/[math, sets, sequtils],
chronicles,
chronos,
eth/p2p, # trie/trie_defs],
stew/[interval_set, keyed_queue, sorted_set],
"../.."/[sync_desc, types],
".."/[constants, range_desc, worker_desc],
./db/[hexary_error, snapdb_accounts, snapdb_contracts, snapdb_pivot],
./pivot/[heal_accounts, heal_storage_slots, range_fetch_accounts,
range_fetch_contracts, range_fetch_storage_slots,
Snap sync refactor healing (#1397) * Simplify accounts healing threshold management why: Was over-engineered. details: Previously, healing was based on recursive hexary trie perusal. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Control number of dangling result nodes in `hexaryInspectTrie()` also: + Returns number of visited nodes available for logging so the maximum number of nodes can be tuned accordingly. + Some code and docu update * Update names of constants why: Declutter, more systematic naming * Re-implemented `worker_desc.merge()` for storage slots why: Provided as proper queue management in `storage_queue_helper`. details: + Several append modes (replaces `merge()`) + Added third queue to record entries currently fetched by a worker. So another parallel running worker can safe the complete set of storage slots in as checkpoint. This was previously lost. * Refactor healing why: Simplify and remove deep hexary trie perusal for finding completeness. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Docu update * Run a storage job only once in download loop why: Download failure or rejection (i.e. missing data) lead to repeated fetch requests until peer disconnects, otherwise.
2022-12-24 09:54:18 +00:00
storage_queue_helper],
./ticker
logScope:
topics = "snap-pivot"
const
extraTraceMessages = false or true
## Enabled additional logging noise
proc pivotMothball*(env: SnapPivotRef) {.gcsafe.}
# ------------------------------------------------------------------------------
# Private helpers, logging
# ------------------------------------------------------------------------------
template logTxt(info: static[string]): static[string] =
"Pivot " & info
template ignExceptionOops(info: static[string]; code: untyped) =
try:
code
except CatchableError as e:
trace logTxt "Ooops", `info`=info, name=($e.name), msg=(e.msg)
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
Snap sync refactor healing (#1397) * Simplify accounts healing threshold management why: Was over-engineered. details: Previously, healing was based on recursive hexary trie perusal. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Control number of dangling result nodes in `hexaryInspectTrie()` also: + Returns number of visited nodes available for logging so the maximum number of nodes can be tuned accordingly. + Some code and docu update * Update names of constants why: Declutter, more systematic naming * Re-implemented `worker_desc.merge()` for storage slots why: Provided as proper queue management in `storage_queue_helper`. details: + Several append modes (replaces `merge()`) + Added third queue to record entries currently fetched by a worker. So another parallel running worker can safe the complete set of storage slots in as checkpoint. This was previously lost. * Refactor healing why: Simplify and remove deep hexary trie perusal for finding completeness. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Docu update * Run a storage job only once in download loop why: Download failure or rejection (i.e. missing data) lead to repeated fetch requests until peer disconnects, otherwise.
2022-12-24 09:54:18 +00:00
proc accountsHealingOk(
env: SnapPivotRef; # Current pivot environment
ctx: SnapCtxRef; # Some global context
): bool =
## Returns `true` if accounts healing is enabled for this pivot.
not env.fetchAccounts.processed.isEmpty and
healAccountsCoverageTrigger <= ctx.pivotAccountsCoverage()
proc init(
T: type SnapRangeBatchRef; # Collection of sets of account ranges
ctx: SnapCtxRef; # Some global context
): T =
## Account ranges constructor
new result
result.unprocessed.init() # full range on the first set of the pair
result.processed = NodeTagRangeSet.init()
# Update coverage level roll over
ctx.pivotAccountsCoverage100PcRollOver()
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
# Initialise accounts range fetch batch, the pair of `fetchAccounts[]` range
# sets. Deprioritise already processed ranges by moving it to the second set.
for iv in ctx.pool.coveredAccounts.increasing:
discard result.unprocessed[0].reduce iv
discard result.unprocessed[1].merge iv
proc init(
T: type SnapPivotRef; # Privot descriptor type
ctx: SnapCtxRef; # Some global context
header: BlockHeader; # Header to generate new pivot from
): T =
## Pivot constructor.
result = T(
stateHeader: header,
fetchAccounts: SnapRangeBatchRef.init(ctx))
result.storageAccounts.init()
# ------------------------------------------------------------------------------
# Public functions: pivot table related
# ------------------------------------------------------------------------------
proc beforeTopMostlyClean*(pivotTable: var SnapPivotTable) =
## Clean up pivot queues of the entry before the top one. The queues are
## the pivot data that need most of the memory. This cleaned pivot is not
## usable any more after cleaning but might be useful as historic record.
let rc = pivotTable.beforeLastValue
if rc.isOk:
rc.value.pivotMothball
proc topNumber*(pivotTable: var SnapPivotTable): BlockNumber =
## Return the block number of the top pivot entry, or zero if there is none.
let rc = pivotTable.lastValue
if rc.isOk:
return rc.value.stateHeader.blockNumber
proc reverseUpdate*(
pivotTable: var SnapPivotTable; # Pivot table
header: BlockHeader; # Header to generate new pivot from
ctx: SnapCtxRef; # Some global context
) =
## Activate environment for earlier state root implied by `header` argument.
##
## Note that the pivot table is assumed to be sorted by the block numbers of
## the pivot header.
##
# Append per-state root environment to LRU queue
discard pivotTable.prepend(
header.stateRoot, SnapPivotRef.init(ctx, header))
# Make sure that the LRU table does not grow too big.
if max(3, ctx.buddiesMax) < pivotTable.len:
# Delete second entry rather than the first which might currently
# be needed.
let rc = pivotTable.secondKey
if rc.isOk:
pivotTable.del rc.value
proc tickerStats*(
pivotTable: var SnapPivotTable; # Pivot table
ctx: SnapCtxRef; # Some global context
): TickerSnapStatsUpdater =
## This function returns a function of type `TickerStatsUpdater` that prints
## out pivot table statitics. The returned fuction is supposed to drive
## ticker` module.
proc meanStdDev(sum, sqSum: float; length: int): (float,float) =
if 0 < length:
result[0] = sum / length.float
let
sqSumAv = sqSum / length.float
rSq = result[0] * result[0]
if rSq < sqSumAv:
result[1] = sqrt(sqSum / length.float - result[0] * result[0])
result = proc: TickerSnapStats =
var
aSum, aSqSum, uSum, uSqSum, sSum, sSqSum, cSum, cSqSum: float
count = 0
for kvp in ctx.pool.pivotTable.nextPairs:
# Accounts mean & variance
let aLen = kvp.data.nAccounts.float
if 0 < aLen:
count.inc
aSum += aLen
aSqSum += aLen * aLen
# Fill utilisation mean & variance
let fill = kvp.data.fetchAccounts.processed.fullFactor
uSum += fill
uSqSum += fill * fill
let sLen = kvp.data.nSlotLists.float
sSum += sLen
sSqSum += sLen * sLen
# Lists of missing contracts
let cLen = kvp.data.nContracts.float
cSum += cLen
cSqSum += cLen * cLen
let
env = ctx.pool.pivotTable.lastValue.get(otherwise = nil)
accCoverage = (ctx.pool.coveredAccounts.fullFactor +
ctx.pool.covAccTimesFull.float)
accFill = meanStdDev(uSum, uSqSum, count)
var
beaconBlock = none(BlockNumber)
pivotBlock = none(BlockNumber)
stoQuLen = none(int)
ctraQuLen = none(int)
procChunks = 0
if not env.isNil:
pivotBlock = some(env.stateHeader.blockNumber)
procChunks = env.fetchAccounts.processed.chunks
Snap sync refactor healing (#1397) * Simplify accounts healing threshold management why: Was over-engineered. details: Previously, healing was based on recursive hexary trie perusal. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Control number of dangling result nodes in `hexaryInspectTrie()` also: + Returns number of visited nodes available for logging so the maximum number of nodes can be tuned accordingly. + Some code and docu update * Update names of constants why: Declutter, more systematic naming * Re-implemented `worker_desc.merge()` for storage slots why: Provided as proper queue management in `storage_queue_helper`. details: + Several append modes (replaces `merge()`) + Added third queue to record entries currently fetched by a worker. So another parallel running worker can safe the complete set of storage slots in as checkpoint. This was previously lost. * Refactor healing why: Simplify and remove deep hexary trie perusal for finding completeness. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Docu update * Run a storage job only once in download loop why: Download failure or rejection (i.e. missing data) lead to repeated fetch requests until peer disconnects, otherwise.
2022-12-24 09:54:18 +00:00
stoQuLen = some(env.storageQueueTotal())
ctraQuLen = some(env.fetchContracts.len)
if 0 < ctx.pool.beaconHeader.blockNumber:
beaconBlock = some(ctx.pool.beaconHeader.blockNumber)
TickerSnapStats(
beaconBlock: beaconBlock,
pivotBlock: pivotBlock,
nQueues: ctx.pool.pivotTable.len,
nAccounts: meanStdDev(aSum, aSqSum, count),
nSlotLists: meanStdDev(sSum, sSqSum, count),
nContracts: meanStdDev(cSum, cSqSum, count),
accountsFill: (accFill[0], accFill[1], accCoverage),
nAccountStats: procChunks,
nStorageQueue: stoQuLen,
nContractQueue: ctraQuLen)
# ------------------------------------------------------------------------------
# Public functions: particular pivot
# ------------------------------------------------------------------------------
proc pivotCompleteOk*(env: SnapPivotRef): bool =
## Returns `true` iff the pivot covers a complete set of accounts ans
## storage slots.
env.fetchAccounts.processed.isFull and
env.storageQueueTotal() == 0 and
env.fetchContracts.len == 0
proc pivotMothball*(env: SnapPivotRef) =
## Clean up most of this argument `env` pivot record and mark it `archived`.
## Note that archived pivots will be checked for swapping in already known
## accounts and storage slots.
env.fetchAccounts.unprocessed.init()
# Simplify storage slots queues by resolving partial slots into full list
for kvp in env.fetchStoragePart.nextPairs:
discard env.fetchStorageFull.append(
kvp.key, SnapSlotsQueueItemRef(acckey: kvp.data.accKey))
env.fetchStoragePart.clear()
# Provide index into `fetchStorageFull`
env.storageAccounts.clear()
for kvp in env.fetchStorageFull.nextPairs:
let rc = env.storageAccounts.insert(kvp.data.accKey.to(NodeTag))
# Note that `rc.isErr` should not exist as accKey => storageRoot
if rc.isOk:
rc.value.data = kvp.key
# Finally, mark that node `archived`
env.archived = true
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
proc execSnapSyncAction*(
env: SnapPivotRef; # Current pivot environment
buddy: SnapBuddyRef; # Worker peer
) {.async.} =
## Execute a synchronisation run.
let
ctx = buddy.ctx
if env.savedFullPivotOk:
return # no need to do anything
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
block:
# Clean up storage slots queue and contracts first it becomes too large
if storageSlotsQuPrioThresh < env.storageQueueAvail():
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
await buddy.rangeFetchStorageSlots(env)
if buddy.ctrl.stopped or env.archived:
return
if contractsQuPrioThresh < env.fetchContracts.len:
await buddy.rangeFetchContracts(env)
if buddy.ctrl.stopped or env.archived:
return
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
var rangeFetchOk = true
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
if not env.fetchAccounts.processed.isFull:
await buddy.rangeFetchAccounts(env)
Snap sync refactor healing (#1397) * Simplify accounts healing threshold management why: Was over-engineered. details: Previously, healing was based on recursive hexary trie perusal. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Control number of dangling result nodes in `hexaryInspectTrie()` also: + Returns number of visited nodes available for logging so the maximum number of nodes can be tuned accordingly. + Some code and docu update * Update names of constants why: Declutter, more systematic naming * Re-implemented `worker_desc.merge()` for storage slots why: Provided as proper queue management in `storage_queue_helper`. details: + Several append modes (replaces `merge()`) + Added third queue to record entries currently fetched by a worker. So another parallel running worker can safe the complete set of storage slots in as checkpoint. This was previously lost. * Refactor healing why: Simplify and remove deep hexary trie perusal for finding completeness. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Docu update * Run a storage job only once in download loop why: Download failure or rejection (i.e. missing data) lead to repeated fetch requests until peer disconnects, otherwise.
2022-12-24 09:54:18 +00:00
# Update 100% accounting
ctx.pivotAccountsCoverage100PcRollOver()
Snap sync refactor healing (#1397) * Simplify accounts healing threshold management why: Was over-engineered. details: Previously, healing was based on recursive hexary trie perusal. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Control number of dangling result nodes in `hexaryInspectTrie()` also: + Returns number of visited nodes available for logging so the maximum number of nodes can be tuned accordingly. + Some code and docu update * Update names of constants why: Declutter, more systematic naming * Re-implemented `worker_desc.merge()` for storage slots why: Provided as proper queue management in `storage_queue_helper`. details: + Several append modes (replaces `merge()`) + Added third queue to record entries currently fetched by a worker. So another parallel running worker can safe the complete set of storage slots in as checkpoint. This was previously lost. * Refactor healing why: Simplify and remove deep hexary trie perusal for finding completeness. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Docu update * Run a storage job only once in download loop why: Download failure or rejection (i.e. missing data) lead to repeated fetch requests until peer disconnects, otherwise.
2022-12-24 09:54:18 +00:00
# Run at least one round fetching storage slosts and contracts even if
# the `archived` flag is set in order to keep the batch queue small.
if buddy.ctrl.running:
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
await buddy.rangeFetchStorageSlots(env)
await buddy.rangeFetchContracts(env)
else:
rangeFetchOk = false
if env.archived or (buddy.ctrl.zombie and buddy.only.errors.peerDegraded):
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
return
# Uncconditonally try healing if enabled.
Snap sync refactor healing (#1397) * Simplify accounts healing threshold management why: Was over-engineered. details: Previously, healing was based on recursive hexary trie perusal. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Control number of dangling result nodes in `hexaryInspectTrie()` also: + Returns number of visited nodes available for logging so the maximum number of nodes can be tuned accordingly. + Some code and docu update * Update names of constants why: Declutter, more systematic naming * Re-implemented `worker_desc.merge()` for storage slots why: Provided as proper queue management in `storage_queue_helper`. details: + Several append modes (replaces `merge()`) + Added third queue to record entries currently fetched by a worker. So another parallel running worker can safe the complete set of storage slots in as checkpoint. This was previously lost. * Refactor healing why: Simplify and remove deep hexary trie perusal for finding completeness. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Docu update * Run a storage job only once in download loop why: Download failure or rejection (i.e. missing data) lead to repeated fetch requests until peer disconnects, otherwise.
2022-12-24 09:54:18 +00:00
if env.accountsHealingOk(ctx):
# Let this procedure decide whether to ditch this peer (if any.) The idea
# is that the healing process might address different peer ressources
# than the fetch procedure. So that peer might still be useful unless
# physically disconnected.
buddy.ctrl.forceRun = true
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
await buddy.healAccounts(env)
if env.archived or (buddy.ctrl.zombie and buddy.only.errors.peerDegraded):
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
return
# Some additional storage slots and contracts might have been popped up
if rangeFetchOk:
await buddy.rangeFetchStorageSlots(env)
await buddy.rangeFetchContracts(env)
if env.archived:
return
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
# Don't bother with storage slots healing before accounts healing takes
# place. This saves communication bandwidth. The pivot might change soon,
# anyway.
Snap sync refactor healing (#1397) * Simplify accounts healing threshold management why: Was over-engineered. details: Previously, healing was based on recursive hexary trie perusal. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Control number of dangling result nodes in `hexaryInspectTrie()` also: + Returns number of visited nodes available for logging so the maximum number of nodes can be tuned accordingly. + Some code and docu update * Update names of constants why: Declutter, more systematic naming * Re-implemented `worker_desc.merge()` for storage slots why: Provided as proper queue management in `storage_queue_helper`. details: + Several append modes (replaces `merge()`) + Added third queue to record entries currently fetched by a worker. So another parallel running worker can safe the complete set of storage slots in as checkpoint. This was previously lost. * Refactor healing why: Simplify and remove deep hexary trie perusal for finding completeness. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Docu update * Run a storage job only once in download loop why: Download failure or rejection (i.e. missing data) lead to repeated fetch requests until peer disconnects, otherwise.
2022-12-24 09:54:18 +00:00
if env.accountsHealingOk(ctx):
buddy.ctrl.forceRun = true
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
await buddy.healStorageSlots(env)
proc saveCheckpoint*(
env: SnapPivotRef; # Current pivot environment
ctx: SnapCtxRef; # Some global context
): Result[int,HexaryError] =
## Save current sync admin data. On success, the size of the data record
## saved is returned (e.g. for logging.)
##
if env.savedFullPivotOk:
return ok(0) # no need to do anything
let fa = env.fetchAccounts
if fa.processed.isEmpty:
return err(NoAccountsYet)
if saveAccountsProcessedChunksMax < fa.processed.chunks:
return err(TooManyChunksInAccountsQueue)
if saveStorageSlotsMax < env.storageQueueTotal():
return err(TooManyQueuedStorageSlots)
if saveContactsMax < env.fetchContracts.len:
return err(TooManyQueuedContracts)
result = ctx.pool.snapDb.pivotSaveDB SnapDbPivotRegistry(
header: env.stateHeader,
nAccounts: env.nAccounts,
nSlotLists: env.nSlotLists,
processed: toSeq(env.fetchAccounts.processed.increasing)
.mapIt((it.minPt,it.maxPt)),
slotAccounts: (toSeq(env.fetchStorageFull.nextKeys) &
Snap sync refactor healing (#1397) * Simplify accounts healing threshold management why: Was over-engineered. details: Previously, healing was based on recursive hexary trie perusal. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Control number of dangling result nodes in `hexaryInspectTrie()` also: + Returns number of visited nodes available for logging so the maximum number of nodes can be tuned accordingly. + Some code and docu update * Update names of constants why: Declutter, more systematic naming * Re-implemented `worker_desc.merge()` for storage slots why: Provided as proper queue management in `storage_queue_helper`. details: + Several append modes (replaces `merge()`) + Added third queue to record entries currently fetched by a worker. So another parallel running worker can safe the complete set of storage slots in as checkpoint. This was previously lost. * Refactor healing why: Simplify and remove deep hexary trie perusal for finding completeness. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Docu update * Run a storage job only once in download loop why: Download failure or rejection (i.e. missing data) lead to repeated fetch requests until peer disconnects, otherwise.
2022-12-24 09:54:18 +00:00
toSeq(env.fetchStoragePart.nextKeys)).mapIt(it.to(NodeKey)) &
toSeq(env.parkedStorage.items),
ctraAccounts: (toSeq(env.fetchContracts.nextValues)))
if result.isOk and env.pivotCompleteOk():
env.savedFullPivotOk = true
proc pivotRecoverFromCheckpoint*(
env: SnapPivotRef; # Current pivot environment
ctx: SnapCtxRef; # Global context (containing save state)
topLevel: bool; # Full data set on top level only
) =
## Recover some pivot variables and global list `coveredAccounts` from
## checkpoint data. If the argument `toplevel` is set `true`, also the
## `processed`, `unprocessed`, and the `fetchStorageFull` lists are
## initialised.
##
let recov = ctx.pool.recovery
if recov.isNil:
return
env.nAccounts = recov.state.nAccounts
env.nSlotLists = recov.state.nSlotLists
# Import processed interval
for (minPt,maxPt) in recov.state.processed:
if topLevel:
env.fetchAccounts.unprocessed.reduce NodeTagRange.new(minPt, maxPt)
discard env.fetchAccounts.processed.merge(minPt, maxPt)
discard ctx.pool.coveredAccounts.merge(minPt, maxPt)
ctx.pivotAccountsCoverage100PcRollOver() # update coverage level roll over
# Handle storage slots
let stateRoot = recov.state.header.stateRoot
for w in recov.state.slotAccounts:
Snap sync refactor accounts healing (#1392) * Relocated mothballing (i.e. swap-in preparation) logic details: Mothballing was previously tested & started after downloading account ranges in `range_fetch_accounts`. Whenever current download or healing stops because of a pivot change, swap-in preparation is needed (otherwise some storage slots may get lost when swap-in takes place.) Also, `execSnapSyncAction()` has been moved back to `pivot_helper`. * Reorganised source file directories details: Grouped pivot focused modules into `pivot` directory * Renamed `checkNodes`, `sickSubTries` as `nodes.check`, `nodes.missing` why: Both lists are typically used together as pair. Renaming `sickSubTries` reflects moving away from a healing centric view towards a swap-in attitude. * Multi times coverage recording details: Per pivot account ranges are accumulated into coverage range set. This set fill eventually contain a singe range of account hashes [0..2^256] which amounts to 100% capacity. A counter has been added that is incremented whenever max capacity is reached. The accumulated range is then reset to empty. The effect of this setting is that the coverage can be evenly duplicated. So 200% would not accumulate on a particular region. * Update range length comparisons (mod 2^256) why: A range interval can have sizes 1..2^256 as it cannot be empty by definition. The number of points in a range intervals set can have 0..2^256 points. As the scalar range is a residue class modulo 2^256, the residue class 0 means length 2^256 for a range interval, but can be 0 or 2^256 for the number of points in a range intervals set. * Generalised `hexaryEnvelopeDecompose()` details: Compile the complement of the union of some (processed) intervals and express this complement as a list of envelopes of sub-tries. This facility is directly applicable to swap-in book-keeping. * Re-factor `swapIn()` why: Good idea but baloney implementation. The main algorithm is based on the generalised version of `hexaryEnvelopeDecompose()` which has been derived from this implementation. * Refactor `healAccounts()` using `hexaryEnvelopeDecompose()` as main driver why: Previously, the hexary trie was searched recursively for dangling nodes which has a poor worst case performance already when the trie is reasonably populated. The function `hexaryEnvelopeDecompose()` is a magnitude faster because it does not peruse existing sub-tries in order to find missing nodes although result is not fully compatible with the previous function. So recursive search is used in a limited mode only when the decomposer will not deliver a useful result. * Logging & maintenance fixes details: Preparation for abandoning buddy-global healing variables `node`, `resumeCtx`, and `lockTriePerusal`. These variable are trie-perusal centric which will be run on the back burner in favour of `hexaryEnvelopeDecompose()` which is used for accounts healing already.
2022-12-19 21:22:09 +00:00
let pt = NodeTagRange.new(w.to(NodeTag),w.to(NodeTag)) # => `pt.len == 1`
if 0 < env.fetchAccounts.processed.covered(pt):
# Ignoring slots that have accounts to be downloaded, anyway
let rc = ctx.pool.snapDb.getAccountsData(stateRoot, w)
if rc.isErr:
# Oops, how did that account get lost?
discard env.fetchAccounts.processed.reduce pt
env.fetchAccounts.unprocessed.merge pt
elif rc.value.storageRoot != EMPTY_ROOT_HASH:
Snap sync refactor healing (#1397) * Simplify accounts healing threshold management why: Was over-engineered. details: Previously, healing was based on recursive hexary trie perusal. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Control number of dangling result nodes in `hexaryInspectTrie()` also: + Returns number of visited nodes available for logging so the maximum number of nodes can be tuned accordingly. + Some code and docu update * Update names of constants why: Declutter, more systematic naming * Re-implemented `worker_desc.merge()` for storage slots why: Provided as proper queue management in `storage_queue_helper`. details: + Several append modes (replaces `merge()`) + Added third queue to record entries currently fetched by a worker. So another parallel running worker can safe the complete set of storage slots in as checkpoint. This was previously lost. * Refactor healing why: Simplify and remove deep hexary trie perusal for finding completeness. Due to "cheap" envelope decomposition of a range complement for the hexary trie, the cost of running extra laps have become time-affordable again and a simple trigger mechanism for healing will do. * Docu update * Run a storage job only once in download loop why: Download failure or rejection (i.e. missing data) lead to repeated fetch requests until peer disconnects, otherwise.
2022-12-24 09:54:18 +00:00
env.storageQueueAppendFull(rc.value.storageRoot, w)
# Handle contracts
for w in recov.state.ctraAccounts:
let pt = NodeTagRange.new(w.to(NodeTag),w.to(NodeTag)) # => `pt.len == 1`
if 0 < env.fetchAccounts.processed.covered(pt):
# Ignoring contracts that have accounts to be downloaded, anyway
let rc = ctx.pool.snapDb.getAccountsData(stateRoot, w)
if rc.isErr:
# Oops, how did that account get lost?
discard env.fetchAccounts.processed.reduce pt
env.fetchAccounts.unprocessed.merge pt
elif rc.value.codeHash != EMPTY_CODE_HASH:
env.fetchContracts[rc.value.codeHash] = w
# Handle mothballed pivots for swapping in (see `pivotMothball()`)
if topLevel:
env.savedFullPivotOk = env.pivotCompleteOk()
when extraTraceMessages:
trace logTxt "recovered top level record",
pivot=env.stateHeader.blockNumber.toStr,
savedFullPivotOk=env.savedFullPivotOk,
processed=env.fetchAccounts.processed.fullPC3,
nStoQ=env.storageQueueTotal()
else:
for kvp in env.fetchStorageFull.nextPairs:
let rc = env.storageAccounts.insert(kvp.data.accKey.to(NodeTag))
if rc.isOk:
rc.value.data = kvp.key
env.archived = true
# ------------------------------------------------------------------------------
# Public function, manage new peer and pivot update
# ------------------------------------------------------------------------------
proc pivotApprovePeer*(buddy: SnapBuddyRef) {.async.} =
## Approve peer and update pivot. On failure, the `buddy` will be stopped so
## it will not proceed to the next scheduler task.
let
ctx = buddy.ctx
beaconHeader = ctx.pool.beaconHeader
var
pivotHeader: BlockHeader
block:
let rc = ctx.pool.pivotTable.lastValue
if rc.isOk:
pivotHeader = rc.value.stateHeader
# Check whether the pivot needs to be updated
if pivotHeader.blockNumber+pivotBlockDistanceMin <= beaconHeader.blockNumber:
# If the entry before the previous entry is unused, then run a pool mode
# based session (which should enable a pivot table purge).
block:
let rc = ctx.pool.pivotTable.beforeLast
if rc.isOk and rc.value.data.fetchAccounts.processed.isEmpty:
ctx.poolMode = true
when extraTraceMessages:
trace logTxt "new pivot from beacon chain", peer=buddy.peer,
pivot=pivotHeader.blockNumber.toStr,
beacon=beaconHeader.blockNumber.toStr, poolMode=ctx.poolMode
discard ctx.pool.pivotTable.lruAppend(
beaconHeader.stateRoot, SnapPivotRef.init(ctx, beaconHeader),
pivotTableLruEntriesMax)
pivotHeader = beaconHeader
# Not ready yet?
if pivotHeader.blockNumber == 0:
buddy.ctrl.stopped = true
proc pivotUpdateBeaconHeaderCB*(ctx: SnapCtxRef): SyncReqNewHeadCB =
## Update beacon header. This function is intended as a call back function
## for the RPC module.
result = proc(h: BlockHeader) {.gcsafe.} =
if ctx.pool.beaconHeader.blockNumber < h.blockNumber:
# when extraTraceMessages:
# trace logTxt "external beacon info update", header=h.blockNumber.toStr
ctx.pool.beaconHeader = h
# ------------------------------------------------------------------------------
# Public function, debugging
# ------------------------------------------------------------------------------
import
db/[hexary_desc, hexary_inspect, hexary_nearby, hexary_paths,
snapdb_storage_slots]
const
pivotVerifyExtraBlurb = false # or true
inspectSuspendAfter = 10_000
inspectExtraNap = 100.milliseconds
proc pivotVerifyComplete*(
env: SnapPivotRef; # Current pivot environment
ctx: SnapCtxRef; # Some global context
inspectAccountsTrie = false; # Check for dangling links
walkAccountsDB = true; # Walk accounts db
inspectSlotsTries = true; # Check dangling links (if `walkAccountsDB`)
verifyContracts = true; # Verify that code hashes are in database
): Future[bool]
{.async,discardable.} =
## Check the database whether the pivot is complete -- not advidsed on a
## production system as the process takes a lot of ressources.
let
rootKey = env.stateHeader.stateRoot.to(NodeKey)
accFn = ctx.pool.snapDb.getAccountFn
ctraFn = ctx.pool.snapDb.getContractsFn
# Verify consistency of accounts trie database. This should not be needed
# if `walkAccountsDB` is set. In case that there is a dangling link that would
# have been detected by `hexaryInspectTrie()`, the `hexaryNearbyRight()`
# function should fail at that point as well.
if inspectAccountsTrie:
var
stats = accFn.hexaryInspectTrie(rootKey,
suspendAfter=inspectSuspendAfter,
maxDangling=1)
nVisited = stats.count
nRetryCount = 0
while stats.dangling.len == 0 and not stats.resumeCtx.isNil:
when pivotVerifyExtraBlurb:
trace logTxt "accounts db inspect ..", nVisited, nRetryCount
await sleepAsync inspectExtraNap
nRetryCount.inc
stats = accFn.hexaryInspectTrie(rootKey,
resumeCtx=stats.resumeCtx,
suspendAfter=inspectSuspendAfter,
maxDangling=1)
nVisited += stats.count
# End while
if stats.dangling.len != 0:
error logTxt "accounts trie has danglig links", nVisited, nRetryCount
return false
trace logTxt "accounts trie ok", nVisited, nRetryCount
# End `if inspectAccountsTrie`
# Visit accounts and make sense of storage slots
if walkAccountsDB:
var
nAccounts = 0
nStorages = 0
nContracts = 0
nRetryTotal = 0
nodeTag = low(NodeTag)
while true:
if (nAccounts mod inspectSuspendAfter) == 0 and 0 < nAccounts:
when pivotVerifyExtraBlurb:
trace logTxt "accounts db walk ..",
nAccounts, nStorages, nContracts, nRetryTotal,
inspectSlotsTries, verifyContracts
await sleepAsync inspectExtraNap
# Find next account key => `nodeTag`
let rc = nodeTag.hexaryPath(rootKey,accFn).hexaryNearbyRight(accFn)
if rc.isErr:
if rc.error == NearbyBeyondRange:
break # No more accounts
error logTxt "accounts db problem", nodeTag,
nAccounts, nStorages, nContracts, nRetryTotal,
inspectSlotsTries, verifyContracts, error=rc.error
return false
nodeTag = rc.value.getPartialPath.convertTo(NodeKey).to(NodeTag)
nAccounts.inc
# Decode accounts data
var accData: Account
try:
accData = rc.value.leafData.decode(Account)
except RlpError as e:
error logTxt "account data problem", nodeTag,
nAccounts, nStorages, nContracts, nRetryTotal,
inspectSlotsTries, verifyContracts, name=($e.name), msg=(e.msg)
return false
# Check for storage slots for this account
if accData.storageRoot != EMPTY_ROOT_HASH:
nStorages.inc
if inspectSlotsTries:
let
slotFn = ctx.pool.snapDb.getStorageSlotsFn(nodeTag.to(NodeKey))
stoKey = accData.storageRoot.to(NodeKey)
var
stats = slotFn.hexaryInspectTrie(stoKey,
suspendAfter=inspectSuspendAfter,
maxDangling=1)
nVisited = stats.count
nRetryCount = 0
while stats.dangling.len == 0 and not stats.resumeCtx.isNil:
when pivotVerifyExtraBlurb:
trace logTxt "storage slots inspect ..", nodeTag,
nAccounts, nStorages, nContracts, nRetryTotal,
inspectSlotsTries, verifyContracts, nVisited, nRetryCount
await sleepAsync inspectExtraNap
nRetryCount.inc
nRetryTotal.inc
stats = accFn.hexaryInspectTrie(stoKey,
resumeCtx=stats.resumeCtx,
suspendAfter=inspectSuspendAfter,
maxDangling=1)
nVisited += stats.count
if stats.dangling.len != 0:
error logTxt "storage slots trie has dangling link", nodeTag,
nAccounts, nStorages, nContracts, nRetryTotal,
inspectSlotsTries, nVisited, nRetryCount
return false
if nVisited == 0:
error logTxt "storage slots trie is empty", nodeTag,
nAccounts, nStorages, nContracts, nRetryTotal,
inspectSlotsTries, verifyContracts, nVisited, nRetryCount
return false
# Check for contract codes for this account
if accData.codeHash != EMPTY_CODE_HASH:
nContracts.inc
if verifyContracts:
let codeKey = accData.codeHash.to(NodeKey)
if codeKey.to(Blob).ctraFn.len == 0:
error logTxt "Contract code missing", nodeTag,
codeKey=codeKey.to(NodeTag),
nAccounts, nStorages, nContracts, nRetryTotal,
inspectSlotsTries, verifyContracts
return false
# Set up next node key for looping
if nodeTag == high(NodeTag):
break
nodeTag = nodeTag + 1.u256
# End while
trace logTxt "accounts db walk ok",
nAccounts, nStorages, nContracts, nRetryTotal, inspectSlotsTries
# End `if walkAccountsDB`
return true
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------