nimbus-eth1/nimbus/sync/snap/worker/db/snapdb_accounts.nim

621 lines
22 KiB
Nim
Raw Normal View History

# nimbus-eth1
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
import
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
std/[algorithm, sequtils, tables],
chronicles,
eth/[common, p2p, rlp, trie/nibbles],
stew/byteutils,
../../range_desc,
"."/[hexary_desc, hexary_error, hexary_import, hexary_interpolate,
hexary_inspect, hexary_paths, snapdb_desc, snapdb_persistent]
{.push raises: [Defect].}
logScope:
topics = "snap-db"
type
SnapDbAccountsRef* = ref object of SnapDbBaseRef
peer: Peer ## For log messages
getClsFn: AccountsGetFn ## Persistent database `get()` closure
SnapAccountsGaps* = object
innerGaps*: seq[NodeSpecs]
dangling*: seq[NodeSpecs]
const
extraTraceMessages = false or true
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
proc to(h: Hash256; T: type NodeKey): T =
h.data.T
proc convertTo(data: openArray[byte]; T: type Hash256): T =
discard result.data.NodeKey.init(data) # size error => zero
proc getFn(ps: SnapDbAccountsRef): HexaryGetFn =
## Derive from `GetClsFn` closure => `HexaryGetFn`. There reason for that
## seemingly redundant mapping is that here is space for additional localised
## and locked parameters as done with the `StorageSlotsGetFn`.
return proc(key: openArray[byte]): Blob = ps.getClsFn(key)
template noKeyError(info: static[string]; code: untyped) =
try:
code
except KeyError as e:
raiseAssert "Not possible (" & info & "): " & e.msg
template noRlpExceptionOops(info: static[string]; code: untyped) =
try:
code
except RlpError:
return err(RlpEncoding)
except KeyError as e:
raiseAssert "Not possible (" & info & "): " & e.msg
except Defect as e:
raise e
except Exception as e:
raiseAssert "Ooops " & info & ": name=" & $e.name & " msg=" & e.msg
# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------
proc persistentAccounts(
db: HexaryTreeDbRef; ## Current table
ps: SnapDbAccountsRef; ## For persistent database
): Result[void,HexaryDbError]
{.gcsafe, raises: [Defect,OSError,KeyError].} =
## Store accounts trie table on databse
if ps.rockDb.isNil:
let rc = db.persistentAccountsPut(ps.kvDb)
if rc.isErr: return rc
else:
let rc = db.persistentAccountsPut(ps.rockDb)
if rc.isErr: return rc
ok()
proc collectAccounts(
peer: Peer, ## for log messages
base: NodeTag;
acc: seq[PackedAccount];
): Result[seq[RLeafSpecs],HexaryDbError]
{.gcsafe, raises: [Defect, RlpError].} =
## Repack account records into a `seq[RLeafSpecs]` queue. The argument data
## `acc` are as received with the snap message `AccountRange`).
##
## The returned list contains leaf node information for populating a repair
## table. The accounts, together with some hexary trie records for proofs
## can be used for validating the argument account data.
var rcAcc: seq[RLeafSpecs]
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
if 0 < acc.len:
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
let pathTag0 = acc[0].accKey.to(NodeTag)
# Verify lower bound
if pathTag0 < base:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
let error = LowerBoundAfterFirstEntry
trace "collectAccounts()", peer, base, accounts=acc.len, error
return err(error)
# Add first account
rcAcc.add RLeafSpecs(pathTag: pathTag0, payload: acc[0].accBlob)
# Veify & add other accounts
for n in 1 ..< acc.len:
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
let nodeTag = acc[n].accKey.to(NodeTag)
if nodeTag <= rcAcc[^1].pathTag:
let error = AccountsNotSrictlyIncreasing
trace "collectAccounts()", peer, item=n, base, accounts=acc.len, error
return err(error)
rcAcc.add RLeafSpecs(pathTag: nodeTag, payload: acc[n].accBlob)
ok(rcAcc)
# ------------------------------------------------------------------------------
# Public constructor
# ------------------------------------------------------------------------------
proc init*(
T: type SnapDbAccountsRef;
pv: SnapDbRef;
root: Hash256;
peer: Peer = nil
): T =
## Constructor, starts a new accounts session.
let db = pv.kvDb
new result
result.init(pv, root.to(NodeKey))
result.peer = peer
result.getClsFn = db.persistentAccountsGetFn()
proc dup*(
ps: SnapDbAccountsRef;
root: Hash256;
peer: Peer;
): SnapDbAccountsRef =
## Resume an accounts session with different `root` key and `peer`.
new result
result[].shallowCopy(ps[])
result.root = root.to(NodeKey)
result.peer = peer
proc dup*(
ps: SnapDbAccountsRef;
root: Hash256;
): SnapDbAccountsRef =
## Variant of `dup()` without the `peer` argument.
new result
result[].shallowCopy(ps[])
result.root = root.to(NodeKey)
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
proc nodeExists*(
ps: SnapDbAccountsRef; ## Re-usable session descriptor
node: NodeSpecs; ## Node specs, e.g. returned by `importAccounts()`
persistent = false; ## Whether to check data on disk
): bool =
## ..
if not persistent:
return ps.hexaDb.tab.hasKey(node.nodeKey.to(RepairKey))
try:
return 0 < ps.getFn()(node.nodeKey.ByteArray32).len
except Exception as e:
raiseAssert "Not possible @ importAccounts(" & $e.name & "):" & e.msg
proc nodeExists*(
pv: SnapDbRef; ## Base descriptor on `BaseChainDB`
peer: Peer; ## For log messages
root: Hash256; ## State root
node: NodeSpecs; ## Node specs, e.g. returned by `importAccounts()`
): bool =
## Variant of `nodeExists()` for presistent storage, only.
SnapDbAccountsRef.init(pv, root, peer).nodeExists(node, persistent=true)
proc importAccounts*(
ps: SnapDbAccountsRef; ## Re-usable session descriptor
base: NodeTag; ## Before or at first account entry in `data`
data: PackedAccountRange; ## Re-packed `snap/1 ` reply data
persistent = false; ## Store data on disk
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
noBaseBoundCheck = false; ## Ignore left boundary proof check if `true`
): Result[SnapAccountsGaps,HexaryDbError] =
## Validate and import accounts (using proofs as received with the snap
## message `AccountRange`). This function accumulates data in a memory table
## which can be written to disk with the argument `persistent` set `true`.
## The memory table is held in the descriptor argument`ps`.
##
## On success, the function returns a list `innerGaps` of dangling node
## links from the argument `proof` list of nodes after the populating with
## accounts. The following example may illustrate the case:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
##
## Assume an accounts hexary trie
## ::
## | 0 1 2 3 4 5 6 7 8 9 a b c d e f -- nibble positions
## | root -> (a, .. b, .. c, .. d, .. ,) -- root branch node
## | | | | |
## | ... v v v
## | (x,X) (y,Y) (z,Z)
##
## with `a`,`b`,`c`,`d` node hashes, `x`,`y`,`z` partial paths and account
## hashes `3&x`,`7&y`,`b&z` for account values `X`,`Y`,`Z`. All other
## links in the *root branch node* are assumed nil.
##
## The passing to this function
## * base: `3&x`
## * data.proof: *root branch node*
## * data.accounts: `(3&x,X)`, `(7&y,Y)`, `(b&z,Z)`
## a partial tree can be fully constructed and boundary proofs succeed.
## The return value will be an empty list.
##
## Leaving out `(7&y,Y)` the boundary proofs still succeed but the
## return value will be @[`(7&y,c)`].
##
## The left boundary proof might be omitted by passing `true` for the
## `noBaseBoundCheck` argument. In this case, the boundary check must be
## performed on the return code as
## * if `data.accounts` is empty, the return value must be an empty list
## * otherwise, all type `NodeSpecs` items `w` of the return code must
## satisfy
## ::
## let leastAccountPath = data.accounts[0].accKey.to(NodeTag)
## leastAccountPath <= w.partialPath.max(NodeKey).to(NodeTag)
##
## Besides the inner gaps, the function also returns the dangling nodes left
## from the `proof` list.
##
## Note that the `peer` argument is for log messages, only.
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
var
accounts: seq[RLeafSpecs]
outside: seq[NodeSpecs]
gaps: SnapAccountsGaps
try:
if 0 < data.proof.len:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
let rc = ps.mergeProofs(ps.peer, data.proof)
if rc.isErr:
return err(rc.error)
block:
let rc = ps.peer.collectAccounts(base, data.accounts)
if rc.isErr:
return err(rc.error)
accounts = rc.value
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
if 0 < accounts.len:
var innerSubTrie: seq[NodeSpecs]
if 0 < data.proof.len:
# Inspect trie for dangling nodes. This is not a big deal here as the
# proof data is typically small.
let
proofStats = ps.hexaDb.hexaryInspectTrie(ps.root, @[])
topTag = accounts[^1].pathTag
for w in proofStats.dangling:
if base <= w.partialPath.max(NodeKey).to(NodeTag) and
w.partialPath.min(NodeKey).to(NodeTag) <= topTag:
# Extract dangling links which are inside the accounts range
innerSubTrie.add w
else:
# Otherwise register outside links
outside.add w
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
# Build partial hexary trie
let rc = ps.hexaDb.hexaryInterpolate(
ps.root, accounts, bootstrap = (data.proof.len == 0))
if rc.isErr:
return err(rc.error)
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
# Collect missing inner sub-trees in the reconstructed partial hexary
# trie (if any).
let bottomTag = accounts[0].pathTag
for w in innerSubTrie:
if ps.hexaDb.tab.hasKey(w.nodeKey.to(RepairKey)):
continue
# Verify that `base` is to the left of the first account and there is
# nothing in between. Without proof, there can only be a complete
# set/list of accounts. There must be a proof for an empty list.
if not noBaseBoundCheck and
w.partialPath.max(NodeKey).to(NodeTag) < bottomTag:
return err(LowerBoundProofError)
# Otherwise register left over entry
gaps.innerGaps.add w
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
if persistent:
let rc = ps.hexaDb.persistentAccounts(ps)
if rc.isErr:
return err(rc.error)
# Verify outer links against database
let getFn = ps.getFn
for w in outside:
if w.nodeKey.ByteArray32.getFn().len == 0:
gaps.dangling.add w
else:
for w in outside:
if not ps.hexaDb.tab.hasKey(w.nodeKey.to(RepairKey)):
gaps.dangling.add w
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
elif data.proof.len == 0:
# There must be a proof for an empty argument list.
return err(LowerBoundProofError)
except RlpError:
return err(RlpEncoding)
except KeyError as e:
raiseAssert "Not possible @ importAccounts(KeyError): " & e.msg
except OSError as e:
error "Import Accounts exception", peer=ps.peer, name=($e.name), msg=e.msg
return err(OSErrorException)
except Exception as e:
raiseAssert "Not possible @ importAccounts(" & $e.name & "):" & e.msg
#when extraTraceMessages:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
# trace "Accounts imported", peer=ps.peer, root=ps.root.ByteArray32.toHex,
# proof=data.proof.len, base, accounts=data.accounts.len,
# top=accounts[^1].pathTag, innerGapsLen=gaps.innerGaps.len,
# danglingLen=gaps.dangling.len
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
ok(gaps)
proc importAccounts*(
pv: SnapDbRef; ## Base descriptor on `BaseChainDB`
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
peer: Peer; ## For log messages
root: Hash256; ## State root
base: NodeTag; ## Before or at first account entry in `data`
data: PackedAccountRange; ## Re-packed `snap/1 ` reply data
noBaseBoundCheck = false; ## Ignore left bound proof check if `true`
): Result[SnapAccountsGaps,HexaryDbError] =
## Variant of `importAccounts()` for presistent storage, only.
SnapDbAccountsRef.init(
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
pv, root, peer).importAccounts(
base, data, persistent=true, noBaseBoundCheck)
proc importRawAccountsNodes*(
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
ps: SnapDbAccountsRef; ## Re-usable session descriptor
nodes: openArray[NodeSpecs]; ## List of `(key,data)` records
reportNodes = {Leaf}; ## Additional node types to report
persistent = false; ## store data on disk
): seq[HexaryNodeReport] =
## Store data nodes given as argument `nodes` on the persistent database.
##
## If there were an error when processing a particular argument `notes` item,
## it will be reported with the return value providing argument slot/index,
## node type, end error code.
##
## If there was an error soring persistent data, the last report item will
## have an error code, only.
##
## Additional node items might be reported if the node type is in the
## argument set `reportNodes`. These reported items will have no error
## code set (i.e. `NothingSerious`.)
##
let
peer = ps.peer
db = HexaryTreeDbRef.init(ps)
nItems = nodes.len
var
nErrors = 0
slot: Option[int]
try:
# Import nodes
for n,node in nodes:
if 0 < node.data.len: # otherwise ignore empty placeholder
slot = some(n)
var rep = db.hexaryImport(node)
if rep.error != NothingSerious:
rep.slot = slot
result.add rep
nErrors.inc
trace "Error importing account nodes", peer, inx=n, nItems,
error=rep.error, nErrors
elif rep.kind.isSome and rep.kind.unsafeGet in reportNodes:
rep.slot = slot
result.add rep
# Store to disk
if persistent and 0 < db.tab.len:
slot = none(int)
let rc = db.persistentAccounts(ps)
if rc.isErr:
result.add HexaryNodeReport(slot: slot, error: rc.error)
except RlpError:
result.add HexaryNodeReport(slot: slot, error: RlpEncoding)
nErrors.inc
trace "Error importing account nodes", peer, slot, nItems,
error=RlpEncoding, nErrors
except KeyError as e:
raiseAssert "Not possible @ importRawAccountNodes: " & e.msg
except OSError as e:
result.add HexaryNodeReport(slot: slot, error: OSErrorException)
nErrors.inc
error "Import account nodes exception", peer, slot, nItems,
name=($e.name), msg=e.msg, nErrors
when extraTraceMessages:
if nErrors == 0:
trace "Raw account nodes imported", peer, slot, nItems, report=result.len
proc importRawAccountsNodes*(
pv: SnapDbRef; ## Base descriptor on `BaseChainDB`
peer: Peer, ## For log messages, only
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
nodes: openArray[NodeSpecs]; ## List of `(key,data)` records
reportNodes = {Leaf}; ## Additional node types to report
): seq[HexaryNodeReport] =
## Variant of `importRawNodes()` for persistent storage.
SnapDbAccountsRef.init(
pv, Hash256(), peer).importRawAccountsNodes(
nodes, reportNodes, persistent=true)
proc inspectAccountsTrie*(
ps: SnapDbAccountsRef; ## Re-usable session descriptor
pathList = seq[Blob].default; ## Starting nodes for search
resumeCtx: TrieNodeStatCtxRef = nil; ## Context for resuming inspection
suspendAfter = high(uint64); ## To be resumed
persistent = false; ## Read data from disk
ignoreError = false; ## Always return partial results if any
): Result[TrieNodeStat, HexaryDbError] =
## Starting with the argument list `pathSet`, find all the non-leaf nodes in
## the hexary trie which have at least one node key reference missing in
## the trie database. Argument `pathSet` list entries that do not refer to a
## valid node are silently ignored.
##
## Trie inspection can be automatically suspended after having visited
## `suspendAfter` nodes to be resumed at the last state. An application of
## this feature would look like
## ::
## var ctx = TrieNodeStatCtxRef()
## while not ctx.isNil:
## let rc = inspectAccountsTrie(.., resumeCtx=ctx, suspendAfter=1024)
## ...
## ctx = rc.value.resumeCtx
##
let peer = ps.peer
var stats: TrieNodeStat
noRlpExceptionOops("inspectAccountsTrie()"):
if persistent:
stats = ps.getFn.hexaryInspectTrie(
ps.root, pathList, resumeCtx, suspendAfter=suspendAfter)
else:
stats = ps.hexaDb.hexaryInspectTrie(
ps.root, pathList, resumeCtx, suspendAfter=suspendAfter)
block checkForError:
var error = TrieIsEmpty
if stats.stopped:
error = TrieLoopAlert
trace "Inspect account trie failed", peer, nPathList=pathList.len,
nDangling=stats.dangling.len, stoppedAt=stats.level, error
elif 0 < stats.level:
break checkForError
if ignoreError:
return ok(stats)
return err(error)
#when extraTraceMessages:
# trace "Inspect account trie ok", peer, nPathList=pathList.len,
# nDangling=stats.dangling.len, level=stats.level
return ok(stats)
proc inspectAccountsTrie*(
pv: SnapDbRef; ## Base descriptor on `BaseChainDB`
peer: Peer; ## For log messages, only
root: Hash256; ## state root
pathList = seq[Blob].default; ## Starting paths for search
resumeCtx: TrieNodeStatCtxRef = nil; ## Context for resuming inspection
suspendAfter = high(uint64); ## To be resumed
ignoreError = false; ## Always return partial results if any
): Result[TrieNodeStat, HexaryDbError] =
## Variant of `inspectAccountsTrie()` for persistent storage.
SnapDbAccountsRef.init(
pv, root, peer).inspectAccountsTrie(
pathList, resumeCtx, suspendAfter, persistent=true, ignoreError)
proc getAccountsNodeKey*(
ps: SnapDbAccountsRef; ## Re-usable session descriptor
path: Blob; ## Partial node path
persistent = false; ## Read data from disk
): Result[NodeKey,HexaryDbError] =
## For a partial node path argument `path`, return the raw node key.
var rc: Result[NodeKey,void]
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
noRlpExceptionOops("getAccountsNodeKey()"):
if persistent:
rc = ps.getFn.hexaryInspectPath(ps.root, path)
else:
rc = ps.hexaDb.hexaryInspectPath(ps.root, path)
if rc.isOk:
return ok(rc.value)
err(NodeNotFound)
proc getAccountsNodeKey*(
pv: SnapDbRef; ## Base descriptor on `BaseChainDB`
peer: Peer; ## For log messages, only
root: Hash256; ## state root
path: Blob; ## Partial node path
): Result[NodeKey,HexaryDbError] =
## Variant of `getAccountsNodeKey()` for persistent storage.
SnapDbAccountsRef.init(
pv, root, peer).getAccountsNodeKey(path, persistent=true)
proc getAccountsData*(
ps: SnapDbAccountsRef; ## Re-usable session descriptor
path: NodeKey; ## Account to visit
persistent = false; ## Read data from disk
): Result[Account,HexaryDbError] =
## Fetch account data.
##
## Caveat: There is no unit test yet for the non-persistent version
let peer = ps.peer
var acc: Account
noRlpExceptionOops("getAccountData()"):
var leaf: Blob
if persistent:
leaf = path.hexaryPath(ps.root, ps.getFn).leafData
else:
leaf = path.hexaryPath(ps.root.to(RepairKey),ps.hexaDb).leafData
if leaf.len == 0:
return err(AccountNotFound)
acc = rlp.decode(leaf,Account)
return ok(acc)
proc getAccountsData*(
pv: SnapDbRef; ## Base descriptor on `BaseChainDB`
peer: Peer; ## For log messages, only
root: Hash256; ## State root
path: NodeKey; ## Account to visit
): Result[Account,HexaryDbError] =
## Variant of `getAccountsData()` for persistent storage.
SnapDbAccountsRef.init(
pv, root, peer).getAccountsData(path, persistent=true)
# ------------------------------------------------------------------------------
# Public functions: additional helpers
# ------------------------------------------------------------------------------
proc sortMerge*(base: openArray[NodeTag]): NodeTag =
## Helper for merging several `(NodeTag,seq[PackedAccount])` data sets
## so that there are no overlap which would be rejected by `merge()`.
##
## This function selects a `NodeTag` from a list.
result = high(NodeTag)
for w in base:
if w < result:
result = w
proc sortMerge*(acc: openArray[seq[PackedAccount]]): seq[PackedAccount] =
## Helper for merging several `(NodeTag,seq[PackedAccount])` data sets
## so that there are no overlap which would be rejected by `merge()`.
##
## This function flattens and sorts the argument account lists.
noKeyError("sortMergeAccounts"):
var accounts: Table[NodeTag,PackedAccount]
for accList in acc:
for item in accList:
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
accounts[item.accKey.to(NodeTag)] = item
result = toSeq(accounts.keys).sorted(cmp).mapIt(accounts[it])
proc getAccountsChainDb*(
ps: SnapDbAccountsRef;
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
accKey: NodeKey;
): Result[Account,HexaryDbError] =
## Fetch account via `BaseChainDB`
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
ps.getAccountsData(accKey, persistent = true)
proc nextAccountsChainDbKey*(
ps: SnapDbAccountsRef;
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
accKey: NodeKey;
): Result[NodeKey,HexaryDbError] =
## Fetch the account path on the `BaseChainDB`, the one next to the
## argument account key.
noRlpExceptionOops("getChainDbAccount()"):
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
let path = accKey
.hexaryPath(ps.root, ps.getFn)
.next(ps.getFn)
.getNibbles
if 64 == path.len:
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
return ok(path.getBytes.convertTo(Hash256).to(NodeKey))
err(AccountNotFound)
proc prevAccountsChainDbKey*(
ps: SnapDbAccountsRef;
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
accKey: NodeKey;
): Result[NodeKey,HexaryDbError] =
## Fetch the account path on the `BaseChainDB`, the one before to the
## argument account.
noRlpExceptionOops("getChainDbAccount()"):
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
let path = accKey
.hexaryPath(ps.root, ps.getFn)
.prev(ps.getFn)
.getNibbles
if 64 == path.len:
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
return ok(path.getBytes.convertTo(Hash256).to(NodeKey))
err(AccountNotFound)
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------