nimbus-eth1/tests/test_sync_snap.nim

1210 lines
44 KiB
Nim
Raw Normal View History

# Nimbus - Types, data structures and shared utilities used in network sync
#
# Copyright (c) 2018-2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or
# distributed except according to those terms.
## Snap sync components tester and TDD environment
import
std/[algorithm, distros, hashes, math, os, sets,
sequtils, strformat, strutils, tables, times],
chronicles,
eth/[common, p2p, rlp, trie/db],
rocksdb,
stint,
stew/[byteutils, results],
unittest2,
../nimbus/[chain_config, config, genesis],
../nimbus/db/[db_chain, select_backend, storage_types],
../nimbus/p2p/chain,
../nimbus/sync/types,
../nimbus/sync/snap/range_desc,
../nimbus/sync/snap/worker/db/[
hexary_desc, hexary_error, hexary_inspect, rocky_bulk_load,
snapdb_accounts, snapdb_desc, snapdb_storage_slots],
../nimbus/utils/prettify,
./replay/[pp, undump_blocks, undump_accounts, undump_storages],
./test_sync_snap/[bulk_test_xx, snap_test_xx, test_types]
const
baseDir = [".", "..", ".."/"..", $DirSep]
repoDir = [".", "tests"/"replay", "tests"/"test_sync_snap",
"nimbus-eth1-blobs"/"replay"]
# Reference file for finding the database directory
sampleDirRefFile = "sample0.txt.gz"
# Standard test samples
bChainCapture = bulkTest0
accSample = snapTest0
storSample = snapTest4
# Number of database slots (needed for timing tests)
nTestDbInstances = 9
type
TestDbs = object
## Provide enough spare empty databases
persistent: bool
dbDir: string
cdb: array[nTestDbInstances,ChainDb]
when defined(linux):
# The `detectOs(Ubuntu)` directive is not Windows compatible, causes an
# error when running the system command `lsb_release -d` in the background.
let isUbuntu32bit = detectOs(Ubuntu) and int.sizeof == 4
else:
const isUbuntu32bit = false
let
# Forces `check()` to print the error (as opposed when using `isOk()`)
OkHexDb = Result[void,HexaryDbError].ok()
OkStoDb = Result[void,seq[(int,HexaryDbError)]].ok()
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
OkImport = Result[seq[NodeSpecs],HexaryDbError].ok(@[])
# There was a problem with the Github/CI which results in spurious crashes
# when leaving the `runner()` if the persistent BaseChainDB initialisation
# was present, see `test_custom_network` for more details.
disablePersistentDB = isUbuntu32bit
var
xTmpDir: string
xDbs: TestDbs # for repeated storage/overwrite tests
xTab32: Table[ByteArray32,Blob] # extracted data
xTab33: Table[ByteArray33,Blob]
xVal32Sum, xVal32SqSum: float # statistics
xVal33Sum, xVal33SqSum: float
# ------------------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------------------
proc isOk(rc: ValidationResult): bool =
rc == ValidationResult.OK
proc toStoDbRc(r: seq[HexaryNodeReport]): Result[void,seq[(int,HexaryDbError)]]=
## Kludge: map error report to (older version) return code
if r.len != 0:
return err(r.mapIt((it.slot.get(otherwise = -1),it.error)))
ok()
proc findFilePath(file: string;
baseDir, repoDir: openArray[string]): Result[string,void] =
for dir in baseDir:
for repo in repoDir:
let path = dir / repo / file
if path.fileExists:
return ok(path)
echo "*** File not found \"", file, "\"."
err()
proc getTmpDir(sampleDir = sampleDirRefFile): string =
sampleDir.findFilePath(baseDir,repoDir).value.splitFile.dir
proc pp(d: Duration): string =
if 40 < d.inSeconds:
d.ppMins
elif 200 < d.inMilliseconds:
d.ppSecs
elif 200 < d.inMicroseconds:
d.ppMs
else:
d.ppUs
proc pp(rc: Result[Account,HexaryDbError]): string =
if rc.isErr: $rc.error else: rc.value.pp
proc pp(rc: Result[Hash256,HexaryDbError]): string =
if rc.isErr: $rc.error else: $rc.value.to(NodeTag)
proc pp(rc: Result[TrieNodeStat,HexaryDbError]; db: SnapDbBaseRef): string =
if rc.isErr: $rc.error else: rc.value.pp(db.hexaDb)
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
proc pp(a: NodeKey; collapse = true): string =
a.to(Hash256).pp(collapse)
proc ppKvPc(w: openArray[(string,int)]): string =
w.mapIt(&"{it[0]}={it[1]}%").join(", ")
proc say*(noisy = false; pfx = "***"; args: varargs[string, `$`]) =
if noisy:
if args.len == 0:
echo "*** ", pfx
elif 0 < pfx.len and pfx[^1] != ' ':
echo pfx, " ", args.toSeq.join
else:
echo pfx, args.toSeq.join
proc setTraceLevel =
discard
when defined(chronicles_runtime_filtering) and loggingEnabled:
setLogLevel(LogLevel.TRACE)
proc setErrorLevel =
discard
when defined(chronicles_runtime_filtering) and loggingEnabled:
setLogLevel(LogLevel.ERROR)
# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------
proc to(sample: AccountsSample; T: type seq[UndumpAccounts]): T =
## Convert test data into usable in-memory format
let file = sample.file.findFilePath(baseDir,repoDir).value
var root: Hash256
for w in file.undumpNextAccount:
let n = w.seenAccounts - 1
if n < sample.firstItem:
continue
if sample.lastItem < n:
break
if sample.firstItem == n:
root = w.root
elif w.root != root:
break
result.add w
proc to(sample: AccountsSample; T: type seq[UndumpStorages]): T =
## Convert test data into usable in-memory format
let file = sample.file.findFilePath(baseDir,repoDir).value
var root: Hash256
for w in file.undumpNextStorages:
let n = w.seenAccounts - 1 # storages selector based on accounts
if n < sample.firstItem:
continue
if sample.lastItem < n:
break
if sample.firstItem == n:
root = w.root
elif w.root != root:
break
result.add w
proc to(b: openArray[byte]; T: type ByteArray32): T =
## Convert to other representation (or exception)
if b.len == 32:
(addr result[0]).copyMem(unsafeAddr b[0], 32)
else:
doAssert b.len == 32
proc to(b: openArray[byte]; T: type ByteArray33): T =
## Convert to other representation (or exception)
if b.len == 33:
(addr result[0]).copyMem(unsafeAddr b[0], 33)
else:
doAssert b.len == 33
proc to(b: ByteArray32|ByteArray33; T: type Blob): T =
b.toSeq
proc to(b: openArray[byte]; T: type NodeTag): T =
## Convert from serialised equivalent
UInt256.fromBytesBE(b).T
proc to(w: (byte, NodeTag); T: type Blob): T =
let (b,t) = w
@[b] & toSeq(t.UInt256.toBytesBE)
proc to(t: NodeTag; T: type Blob): T =
toSeq(t.UInt256.toBytesBE)
proc flushDbDir(s: string; subDir = "") =
if s != "":
let baseDir = s / "tmp"
for n in 0 ..< nTestDbInstances:
let instDir = if subDir == "": baseDir / $n else: baseDir / subDir / $n
if (instDir / "nimbus" / "data").dirExists:
# Typically under Windows: there might be stale file locks.
try: instDir.removeDir except: discard
try: (baseDir / subDir).removeDir except: discard
block dontClearUnlessEmpty:
for w in baseDir.walkDir:
break dontClearUnlessEmpty
try: baseDir.removeDir except: discard
proc testDbs(workDir = ""; subDir = ""; instances = nTestDbInstances): TestDbs =
if disablePersistentDB or workDir == "":
result.persistent = false
result.dbDir = "*notused*"
else:
result.persistent = true
if subDir != "":
result.dbDir = workDir / "tmp" / subDir
else:
result.dbDir = workDir / "tmp"
if result.persistent:
result.dbDir.flushDbDir
for n in 0 ..< min(result.cdb.len, instances):
result.cdb[n] = (result.dbDir / $n).newChainDB
proc lastTwo(a: openArray[string]): seq[string] =
if 1 < a.len: @[a[^2],a[^1]] else: a.toSeq
proc flatten(list: openArray[seq[Blob]]): seq[Blob] =
for w in list:
result.add w
proc thisRecord(r: rocksdb_iterator_t): (Blob,Blob) =
var kLen, vLen: csize_t
let
kData = r.rocksdb_iter_key(addr kLen)
vData = r.rocksdb_iter_value(addr vLen)
if not kData.isNil and not vData.isNil:
let
key = string.fromBytes(toOpenArrayByte(kData,0,int(kLen)-1))
value = string.fromBytes(toOpenArrayByte(vData,0,int(vLen)-1))
return (key.mapIt(it.byte),value.mapIt(it.byte))
proc meanStdDev(sum, sqSum: float; length: int): (float,float) =
if 0 < length:
result[0] = sum / length.float
result[1] = sqrt(sqSum / length.float - result[0] * result[0])
# ------------------------------------------------------------------------------
# Test Runners: accounts and accounts storages
# ------------------------------------------------------------------------------
proc accountsRunner(noisy = true; persistent = true; sample = accSample) =
let
peer = Peer.new
accountsList = sample.to(seq[UndumpAccounts])
root = accountsList[0].root
tmpDir = getTmpDir()
db = if persistent: tmpDir.testDbs(sample.name, instances=2) else: testDbs()
dbDir = db.dbDir.split($DirSep).lastTwo.join($DirSep)
info = if db.persistent: &"persistent db on \"{dbDir}\""
else: "in-memory db"
fileInfo = sample.file.splitPath.tail.replace(".txt.gz","")
defer:
if db.persistent:
if not db.cdb[0].rocksStoreRef.isNil:
db.cdb[0].rocksStoreRef.store.db.rocksdb_close
db.cdb[1].rocksStoreRef.store.db.rocksdb_close
tmpDir.flushDbDir(sample.name)
suite &"SyncSnap: {fileInfo} accounts and proofs for {info}":
var
desc: SnapDbAccountsRef
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
accKeys: seq[NodeKey]
test &"Snap-proofing {accountsList.len} items for state root ..{root.pp}":
let
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
dbBase = if persistent: SnapDbRef.init(db.cdb[0])
else: SnapDbRef.init(newMemoryDB())
dbDesc = SnapDbAccountsRef.init(dbBase, root, peer)
for n,w in accountsList:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
check dbDesc.importAccounts(w.base, w.data, persistent) == OkImport
test &"Merging {accountsList.len} proofs for state root ..{root.pp}":
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
let dbBase = if persistent: SnapDbRef.init(db.cdb[1])
else: SnapDbRef.init(newMemoryDB())
desc = SnapDbAccountsRef.init(dbBase, root, peer)
# Load/accumulate data from several samples (needs some particular sort)
let
lowerBound = accountsList.mapIt(it.base).sortMerge
packed = PackedAccountRange(
accounts: accountsList.mapIt(it.data.accounts).sortMerge,
proof: accountsList.mapIt(it.data.proof).flatten)
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
# Merging intervals will produce gaps, so the result is expected OK but
# different from `OkImport`
check desc.importAccounts(lowerBound, packed, true).isOk
# check desc.merge(lowerBound, accounts) == OkHexDb
desc.assignPrettyKeys() # for debugging, make sure that state root ~ "$0"
# Update list of accounts. There might be additional accounts in the set
# of proof nodes, typically before the `lowerBound` of each block. As
# there is a list of account ranges (that were merged for testing), one
# need to check for additional records only on either end of a range.
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
var keySet = packed.accounts.mapIt(it.accKey).toHashSet
for w in accountsList:
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
var key = desc.prevAccountsChainDbKey(w.data.accounts[0].accKey)
while key.isOk and key.value notin keySet:
keySet.incl key.value
let newKey = desc.prevAccountsChainDbKey(key.value)
check newKey != key
key = newKey
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
key = desc.nextAccountsChainDbKey(w.data.accounts[^1].accKey)
while key.isOk and key.value notin keySet:
keySet.incl key.value
let newKey = desc.nextAccountsChainDbKey(key.value)
check newKey != key
key = newKey
accKeys = toSeq(keySet).mapIt(it.to(NodeTag)).sorted(cmp)
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
.mapIt(it.to(NodeKey))
check packed.accounts.len <= accKeys.len
test &"Revisiting {accKeys.len} items stored items on BaseChainDb":
var
nextAccount = accKeys[0]
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
prevAccount: NodeKey
count = 0
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
for accKey in accKeys:
count.inc
let
pfx = $count & "#"
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
byChainDB = desc.getAccountsChainDb(accKey)
byNextKey = desc.nextAccountsChainDbKey(accKey)
byPrevKey = desc.prevAccountsChainDbKey(accKey)
noisy.say "*** find",
"<", count, "> byChainDb=", byChainDB.pp
check byChainDB.isOk
# Check `next` traversal funcionality. If `byNextKey.isOk` fails, the
# `nextAccount` value is still the old one and will be different from
# the account in the next for-loop cycle (if any.)
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
check pfx & accKey.pp(false) == pfx & nextAccount.pp(false)
if byNextKey.isOk:
nextAccount = byNextKey.value
else:
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
nextAccount = NodeKey.default
# Check `prev` traversal funcionality
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
if prevAccount != NodeKey.default:
check byPrevKey.isOk
if byPrevKey.isOk:
check pfx & byPrevKey.value.pp(false) == pfx & prevAccount.pp(false)
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
prevAccount = accKey
# Hexary trie memory database dump. These are key value pairs for
# ::
# Branch: ($1,b(<$2,$3,..,$17>,))
# Extension: ($18,e(832b5e..06e697,$19))
# Leaf: ($20,l(cc9b5d..1c3b4,f84401..f9e5129d[#70]))
#
# where keys are typically represented as `$<id>` or `¶<id>` or `ø`
# depending on whether a key is final (`$<id>`), temporary (`¶<id>`)
# or unset/missing (`ø`).
#
# The node types are indicated by a letter after the first key before
# the round brackets
# ::
# Branch: 'b', 'þ', or 'B'
# Extension: 'e', '€', or 'E'
# Leaf: 'l', 'ł', or 'L'
#
# Here a small letter indicates a `Static` node which was from the
# original `proofs` list, a capital letter indicates a `Mutable` node
# added on the fly which might need some change, and the decorated
# letters stand for `Locked` nodes which are like `Static` ones but
# added later (typically these nodes are update `Mutable` nodes.)
#
# Beware: dumping a large database is not recommended
#noisy.say "***", "database dump\n ", desc.dumpAccDB()
proc storagesRunner(
noisy = true;
persistent = true;
sample = storSample;
knownFailures: seq[(string,seq[(int,HexaryDbError)])] = @[]) =
let
peer = Peer.new
accountsList = sample.to(seq[UndumpAccounts])
storagesList = sample.to(seq[UndumpStorages])
root = accountsList[0].root
tmpDir = getTmpDir()
db = if persistent: tmpDir.testDbs(sample.name, instances=1) else: testDbs()
dbDir = db.dbDir.split($DirSep).lastTwo.join($DirSep)
info = if db.persistent: &"persistent db on \"{dbDir}\""
else: "in-memory db"
fileInfo = sample.file.splitPath.tail.replace(".txt.gz","")
defer:
if db.persistent:
if not db.cdb[0].rocksStoreRef.isNil:
db.cdb[0].rocksStoreRef.store.db.rocksdb_close
tmpDir.flushDbDir(sample.name)
suite &"SyncSnap: {fileInfo} accounts storage for {info}":
let
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
dbBase = if persistent: SnapDbRef.init(db.cdb[0])
else: SnapDbRef.init(newMemoryDB())
test &"Merging {accountsList.len} accounts for state root ..{root.pp}":
for w in accountsList:
let desc = SnapDbAccountsRef.init(dbBase, root, peer)
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
check desc.importAccounts(w.base, w.data, persistent) == OkImport
test &"Merging {storagesList.len} storages lists":
let
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
dbDesc = SnapDbStorageSlotsRef.init(
dbBase, Hash256().to(NodeKey), Hash256(), peer)
ignore = knownFailures.toTable
for n,w in storagesList:
let
testId = fileInfo & "#" & $n
expRc = if ignore.hasKey(testId):
Result[void,seq[(int,HexaryDbError)]].err(ignore[testId])
else:
OkStoDb
check dbDesc.importStorageSlots(w.data, persistent).toStoDbRc == expRc
test &"Inspecting {storagesList.len} imported storages lists sub-tries":
let ignore = knownFailures.toTable
for n,w in storagesList:
let
testId = fileInfo & "#" & $n
errInx = if ignore.hasKey(testId): ignore[testId][0][0]
else: high(int)
for m in 0 ..< w.data.storages.len:
let
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
accKey = w.data.storages[m].account.accKey
root = w.data.storages[m].account.storageRoot
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
dbDesc = SnapDbStorageSlotsRef.init(dbBase, accKey, root, peer)
rc = dbDesc.inspectStorageSlotsTrie(persistent=persistent)
if m == errInx:
check rc == Result[TrieNodeStat,HexaryDbError].err(TrieIsEmpty)
else:
check rc.isOk # ok => level > 0 and not stopped
proc inspectionRunner(
noisy = true;
persistent = true;
cascaded = true;
sample: openArray[AccountsSample] = snapTestList) =
let
peer = Peer.new
inspectList = sample.mapIt(it.to(seq[UndumpAccounts]))
tmpDir = getTmpDir()
db = if persistent: tmpDir.testDbs(sample[0].name) else: testDbs()
dbDir = db.dbDir.split($DirSep).lastTwo.join($DirSep)
info = if db.persistent: &"persistent db on \"{dbDir}\""
else: "in-memory db"
fileInfo = "[" & sample[0].file.splitPath.tail.replace(".txt.gz","") & "..]"
defer:
if db.persistent:
for n in 0 ..< nTestDbInstances:
if db.cdb[n].rocksStoreRef.isNil:
break
db.cdb[n].rocksStoreRef.store.db.rocksdb_close
tmpDir.flushDbDir(sample[0].name)
suite &"SyncSnap: inspect {fileInfo} lists for {info} for healing":
let
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
memBase = SnapDbRef.init(newMemoryDB())
memDesc = SnapDbAccountsRef.init(memBase, Hash256(), peer)
var
singleStats: seq[(int,TrieNodeStat)]
accuStats: seq[(int,TrieNodeStat)]
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
perBase,altBase: SnapDbRef
perDesc,altDesc: SnapDbAccountsRef
if persistent:
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
perBase = SnapDbRef.init(db.cdb[0])
perDesc = SnapDbAccountsRef.init(perBase, Hash256(), peer)
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
altBase = SnapDbRef.init(db.cdb[1])
altDesc = SnapDbAccountsRef.init(altBase, Hash256(), peer)
test &"Fingerprinting {inspectList.len} single accounts lists " &
"for in-memory-db":
for n,accList in inspectList:
# Separate storage
let
root = accList[0].root
rootKey = root.to(NodeKey)
desc = SnapDbAccountsRef.init(memBase, root, peer)
for w in accList:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
check desc.importAccounts(w.base, w.data, persistent=false)==OkImport
let rc = desc.inspectAccountsTrie(persistent=false)
check rc.isOk
let
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
dangling = rc.value.dangling.mapIt(it.partialPath)
keys = desc.hexaDb.hexaryInspectToKeys(
rootKey, dangling.toHashSet.toSeq)
check dangling.len == keys.len
singleStats.add (desc.hexaDb.tab.len,rc.value)
test &"Fingerprinting {inspectList.len} single accounts lists " &
"for persistent db":
if not persistent:
skip()
else:
for n,accList in inspectList:
if nTestDbInstances <= 2+n or db.cdb[2+n].rocksStoreRef.isNil:
continue
# Separate storage on persistent DB (leaving first db slot empty)
let
root = accList[0].root
rootKey = root.to(NodeKey)
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
dbBase = SnapDbRef.init(db.cdb[2+n])
desc = SnapDbAccountsRef.init(dbBase, root, peer)
for w in accList:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
check desc.importAccounts(w.base, w.data, persistent) == OkImport
let rc = desc.inspectAccountsTrie(persistent=false)
check rc.isOk
let
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
dangling = rc.value.dangling.mapIt(it.partialPath)
keys = desc.hexaDb.hexaryInspectToKeys(
rootKey, dangling.toHashSet.toSeq)
check dangling.len == keys.len
# Must be the same as the in-memory fingerprint
check singleStats[n][1] == rc.value
test &"Fingerprinting {inspectList.len} accumulated accounts lists " &
"for in-memory-db":
for n,accList in inspectList:
# Accumulated storage
let
root = accList[0].root
rootKey = root.to(NodeKey)
desc = memDesc.dup(root,Peer())
for w in accList:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
check desc.importAccounts(w.base, w.data, persistent=false)==OkImport
let rc = desc.inspectAccountsTrie(persistent=false)
check rc.isOk
let
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
dangling = rc.value.dangling.mapIt(it.partialPath)
keys = desc.hexaDb.hexaryInspectToKeys(
rootKey, dangling.toHashSet.toSeq)
check dangling.len == keys.len
accuStats.add (desc.hexaDb.tab.len,rc.value)
test &"Fingerprinting {inspectList.len} accumulated accounts lists " &
"for persistent db":
if not persistent:
skip()
else:
for n,accList in inspectList:
# Accumulated storage on persistent DB (using first db slot)
let
root = accList[0].root
rootKey = root.to(NodeKey)
rootSet = [rootKey].toHashSet
desc = perDesc.dup(root,Peer())
for w in accList:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
check desc.importAccounts(w.base, w.data, persistent) == OkImport
let rc = desc.inspectAccountsTrie(persistent=false)
check rc.isOk
let
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
dangling = rc.value.dangling.mapIt(it.partialPath)
keys = desc.hexaDb.hexaryInspectToKeys(
rootKey, dangling.toHashSet.toSeq)
check dangling.len == keys.len
check accuStats[n][1] == rc.value
test &"Cascaded fingerprinting {inspectList.len} accumulated accounts " &
"lists for in-memory-db":
if not cascaded:
skip()
else:
let
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
cscBase = SnapDbRef.init(newMemoryDB())
cscDesc = SnapDbAccountsRef.init(cscBase, Hash256(), peer)
var
cscStep: Table[NodeKey,(int,seq[Blob])]
for n,accList in inspectList:
# Accumulated storage
let
root = accList[0].root
rootKey = root.to(NodeKey)
desc = cscDesc.dup(root,Peer())
for w in accList:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
check desc.importAccounts(w.base,w.data,persistent=false)==OkImport
if cscStep.hasKeyOrPut(rootKey,(1,seq[Blob].default)):
cscStep[rootKey][0].inc
let
r0 = desc.inspectAccountsTrie(persistent=false)
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
rc = desc.inspectAccountsTrie(cscStep[rootKey][1],persistent=false)
check rc.isOk
let
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
accumulated = r0.value.dangling.mapIt(it.partialPath).toHashSet
cascaded = rc.value.dangling.mapIt(it.partialPath).toHashSet
check accumulated == cascaded
# Make sure that there are no trivial cases
let trivialCases = toSeq(cscStep.values).filterIt(it[0] <= 1).len
check trivialCases == 0
test &"Cascaded fingerprinting {inspectList.len} accumulated accounts " &
"for persistent db":
if not cascaded or not persistent:
skip()
else:
let
cscBase = altBase
cscDesc = altDesc
var
cscStep: Table[NodeKey,(int,seq[Blob])]
for n,accList in inspectList:
# Accumulated storage
let
root = accList[0].root
rootKey = root.to(NodeKey)
desc = cscDesc.dup(root,Peer())
for w in accList:
Prep for full sync after snap make 6 (#1291) * Update log ticker, using time interval rather than ticker count why: Counting and logging ticker occurrences is inherently imprecise. So time intervals are used. * Use separate storage tables for snap sync data * Left boundary proof update why: Was not properly implemented, yet. * Capture pivot in peer worker (aka buddy) tasks why: The pivot environment is linked to the `buddy` descriptor. While there is a task switch, the pivot may change. So it is passed on as function argument `env` rather than retrieved from the buddy at the start of a sub-function. * Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart` * Remove obsolete account range returned from `GetAccountRange` message why: Handler returned the wrong right value of the range. This range was for convenience, only. * Prioritise storage slots if the queue becomes large why: Currently, accounts processing is prioritised up until all accounts are downloaded. The new prioritisation has two thresholds for + start processing storage slots with a new worker + stop account processing and switch to storage processing also: Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim` * Generalise left boundary proof for accounts or storage slots. why: Detailed explanation how this works is documented with `snapdb_accounts.importAccounts()`. Instead of enforcing a left boundary proof (which is still the default), the importer functions return a list of `holes` (aka node paths) found in the argument ranges of leaf nodes. This in turn is used by the book keeping software for data download. * Forgot to pass on variable in function wrapper also: + Start healing not before 99% accounts covered (previously 95%) + Logging updated/prettified
2022-11-08 18:56:04 +00:00
check desc.importAccounts(w.base,w.data,persistent) == OkImport
if cscStep.hasKeyOrPut(rootKey,(1,seq[Blob].default)):
cscStep[rootKey][0].inc
let
r0 = desc.inspectAccountsTrie(persistent=true)
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim`
2022-10-08 17:20:50 +00:00
rc = desc.inspectAccountsTrie(cscStep[rootKey][1],persistent=true)
check rc.isOk
let
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
accumulated = r0.value.dangling.mapIt(it.partialPath).toHashSet
cascaded = rc.value.dangling.mapIt(it.partialPath).toHashSet
check accumulated == cascaded
# Make sure that there are no trivial cases
let trivialCases = toSeq(cscStep.values).filterIt(it[0] <= 1).len
check trivialCases == 0
# ------------------------------------------------------------------------------
# Test Runners: database timing tests
# ------------------------------------------------------------------------------
proc importRunner(noisy = true; persistent = true; capture = bChainCapture) =
let
fileInfo = capture.file.splitFile.name.split(".")[0]
filePath = capture.file.findFilePath(baseDir,repoDir).value
tmpDir = getTmpDir()
db = if persistent: tmpDir.testDbs(capture.name) else: testDbs()
numBlocksInfo = if capture.numBlocks == high(int): ""
else: $capture.numBlocks & " "
loadNoise = noisy
defer:
if db.persistent:
tmpDir.flushDbDir(capture.name)
suite &"SyncSnap: using {fileInfo} capture for testing db timings":
var
ddb: BaseChainDB # perstent DB on disk
chn: Chain
test &"Create persistent BaseChainDB on {tmpDir}":
let chainDb = if db.persistent: db.cdb[0].trieDB
else: newMemoryDB()
# Constructor ...
ddb = newBaseChainDB(
chainDb,
id = capture.network,
pruneTrie = true,
params = capture.network.networkParams)
ddb.initializeEmptyDb
chn = ddb.newChain
test &"Storing {numBlocksInfo}persistent blocks from dump":
for w in filePath.undumpNextGroup:
let (fromBlock, toBlock) = (w[0][0].blockNumber, w[0][^1].blockNumber)
if fromBlock == 0.u256:
doAssert w[0][0] == ddb.getBlockHeader(0.u256)
continue
# Message if [fromBlock,toBlock] contains a multiple of 700
if fromBlock + (toBlock mod 900) <= toBlock:
loadNoise.say "***", &"processing ...[#{fromBlock},#{toBlock}]..."
check chn.persistBlocks(w[0], w[1]).isOk
if capture.numBlocks.toBlockNumber <= w[0][^1].blockNumber:
break
test "Extract key-value records into memory tables via rocksdb iterator":
# Implicit test: if not persistent => db.cdb[0] is nil
if db.cdb[0].rocksStoreRef.isNil:
skip()
else:
let
rdb = db.cdb[0].rocksStoreRef
rop = rdb.store.readOptions
rit = rdb.store.db.rocksdb_create_iterator(rop)
check not rit.isNil
xTab32.clear
xTab33.clear
rit.rocksdb_iter_seek_to_first()
while rit.rocksdb_iter_valid() != 0:
let (key,val) = rit.thisRecord()
rit.rocksdb_iter_next()
if key.len == 32:
xTab32[key.to(ByteArray32)] = val
xVal32Sum += val.len.float
xVal32SqSum += val.len.float * val.len.float
check key.to(ByteArray32).to(Blob) == key
elif key.len == 33:
xTab33[key.to(ByteArray33)] = val
xVal33Sum += val.len.float
xVal33SqSum += val.len.float * val.len.float
check key.to(ByteArray33).to(Blob) == key
else:
noisy.say "***", "ignoring key=", key.toHex
rit.rocksdb_iter_destroy()
var
(mean32, stdv32) = meanStdDev(xVal32Sum, xVal32SqSum, xTab32.len)
(mean33, stdv33) = meanStdDev(xVal33Sum, xVal33SqSum, xTab33.len)
noisy.say "***",
"key 32 table: ",
&"size={xTab32.len} valLen={(mean32+0.5).int}({(stdv32+0.5).int})",
", key 33 table: ",
&"size={xTab33.len} valLen={(mean33+0.5).int}({(stdv33+0.5).int})"
proc storeRunner(noisy = true; persistent = true; cleanUp = true) =
let
fullNoise = false
var
emptyDb = "empty"
# Allows to repeat storing on existing data
if not xDbs.cdb[0].isNil:
emptyDb = "pre-loaded"
elif persistent:
xTmpDir = getTmpDir()
xDbs = xTmpDir.testDbs("store-runner")
else:
xDbs = testDbs()
defer:
if xDbs.persistent and cleanUp:
for n in 0 ..< nTestDbInstances:
if xDbs.cdb[n].rocksStoreRef.isNil:
break
xDbs.cdb[n].rocksStoreRef.store.db.rocksdb_close
xTmpDir.flushDbDir("store-runner")
xDbs.reset
suite &"SyncSnap: storage tests on {emptyDb} databases":
#
# `xDbs` instance slots layout:
#
# * cdb[0] -- direct db, key length 32, no transaction
# * cdb[1] -- direct db, key length 32 as 33, no transaction
#
# * cdb[2] -- direct db, key length 32, transaction based
# * cdb[3] -- direct db, key length 32 as 33, transaction based
#
# * cdb[4] -- direct db, key length 33, no transaction
# * cdb[5] -- direct db, key length 33, transaction based
#
# * cdb[6] -- rocksdb, key length 32
# * cdb[7] -- rocksdb, key length 32 as 33
# * cdb[8] -- rocksdb, key length 33
#
doAssert 9 <= nTestDbInstances
doAssert not xDbs.cdb[8].isNil
if xTab32.len == 0 or xTab33.len == 0:
test &"Both tables with 32 byte keys(size={xTab32.len}), " &
&"33 byte keys(size={xTab32.len}) must be non-empty":
skip()
else:
# cdb[0] -- direct db, key length 32, no transaction
test &"Directly store {xTab32.len} records " &
&"(key length 32) into {emptyDb} trie database":
var ela: Duration
let tdb = xDbs.cdb[0].trieDB
if noisy: echo ""
noisy.showElapsed("Standard db loader(keyLen 32)", ela):
for (key,val) in xTab32.pairs:
tdb.put(key, val)
if ela.inNanoseconds != 0:
let
elaNs = ela.inNanoseconds.float
perRec = ((elaNs / xTab32.len.float) + 0.5).int.initDuration
noisy.say "***",
"nRecords=", xTab32.len, ", ",
"perRecord=", perRec.pp
# cdb[1] -- direct db, key length 32 as 33, no transaction
test &"Directly store {xTab32.len} records " &
&"(key length 33) into {emptyDb} trie database":
var ela = initDuration()
let tdb = xDbs.cdb[1].trieDB
if noisy: echo ""
noisy.showElapsed("Standard db loader(keyLen 32 as 33)", ela):
for (key,val) in xTab32.pairs:
tdb.put(@[99.byte] & key.toSeq, val)
if ela.inNanoseconds != 0:
let
elaNs = ela.inNanoseconds.float
perRec = ((elaNs / xTab32.len.float) + 0.5).int.initDuration
noisy.say "***",
"nRecords=", xTab32.len, ", ",
"perRecord=", perRec.pp
# cdb[2] -- direct db, key length 32, transaction based
test &"Transactionally store {xTab32.len} records " &
&"(key length 32) into {emptyDb} trie database":
var ela: Duration
let tdb = xDbs.cdb[2].trieDB
if noisy: echo ""
noisy.showElapsed("Standard db loader(tx,keyLen 32)", ela):
let dbTx = tdb.beginTransaction
defer: dbTx.commit
for (key,val) in xTab32.pairs:
tdb.put(key, val)
if ela.inNanoseconds != 0:
let
elaNs = ela.inNanoseconds.float
perRec = ((elaNs / xTab32.len.float) + 0.5).int.initDuration
noisy.say "***",
"nRecords=", xTab32.len, ", ",
"perRecord=", perRec.pp
# cdb[3] -- direct db, key length 32 as 33, transaction based
test &"Transactionally store {xTab32.len} records " &
&"(key length 33) into {emptyDb} trie database":
var ela: Duration
let tdb = xDbs.cdb[3].trieDB
if noisy: echo ""
noisy.showElapsed("Standard db loader(tx,keyLen 32 as 33)", ela):
let dbTx = tdb.beginTransaction
defer: dbTx.commit
for (key,val) in xTab32.pairs:
tdb.put(@[99.byte] & key.toSeq, val)
if ela.inNanoseconds != 0:
let
elaNs = ela.inNanoseconds.float
perRec = ((elaNs / xTab32.len.float) + 0.5).int.initDuration
noisy.say "***",
"nRecords=", xTab32.len, ", ",
"perRecord=", perRec.pp
# cdb[4] -- direct db, key length 33, no transaction
test &"Directly store {xTab33.len} records " &
&"(key length 33) into {emptyDb} trie database":
var ela: Duration
let tdb = xDbs.cdb[4].trieDB
if noisy: echo ""
noisy.showElapsed("Standard db loader(keyLen 33)", ela):
for (key,val) in xTab33.pairs:
tdb.put(key, val)
if ela.inNanoseconds != 0:
let
elaNs = ela.inNanoseconds.float
perRec = ((elaNs / xTab33.len.float) + 0.5).int.initDuration
noisy.say "***",
"nRecords=", xTab33.len, ", ",
"perRecord=", perRec.pp
# cdb[5] -- direct db, key length 33, transaction based
test &"Transactionally store {xTab33.len} records " &
&"(key length 33) into {emptyDb} trie database":
var ela: Duration
let tdb = xDbs.cdb[5].trieDB
if noisy: echo ""
noisy.showElapsed("Standard db loader(tx,keyLen 33)", ela):
let dbTx = tdb.beginTransaction
defer: dbTx.commit
for (key,val) in xTab33.pairs:
tdb.put(key, val)
if ela.inNanoseconds != 0:
let
elaNs = ela.inNanoseconds.float
perRec = ((elaNs / xTab33.len.float) + 0.5).int.initDuration
noisy.say "***",
"nRecords=", xTab33.len, ", ",
"perRecord=", perRec.pp
if xDbs.cdb[0].rocksStoreRef.isNil:
test "The rocksdb interface must be available": skip()
else:
# cdb[6] -- rocksdb, key length 32
test &"Store {xTab32.len} records " &
"(key length 32) into empty rocksdb table":
var
ela: array[4,Duration]
size: int64
let
rdb = xDbs.cdb[6].rocksStoreRef
# Note that 32 and 33 size keys cannot be usefiully merged into the
# same SST file. The keys must be added in a sorted mode. So playing
# safe, key sizes should be of
# equal length.
if noisy: echo ""
noisy.showElapsed("Rocky bulk loader(keyLen 32)", ela[0]):
let bulker = RockyBulkLoadRef.init(rdb)
defer: bulker.destroy()
check bulker.begin("rocky-bulk-cache")
var
keyList = newSeq[NodeTag](xTab32.len)
fullNoise.showElapsed("Rocky bulk loader/32, sorter", ela[1]):
var inx = 0
for key in xTab32.keys:
keyList[inx] = key.to(NodeTag)
inx.inc
keyList.sort(cmp)
fullNoise.showElapsed("Rocky bulk loader/32, append", ela[2]):
for n,nodeTag in keyList:
let key = nodeTag.to(Blob)
check bulker.add(key, xTab32[key.to(ByteArray32)])
fullNoise.showElapsed("Rocky bulk loader/32, slurp", ela[3]):
let rc = bulker.finish()
if rc.isOk:
size = rc.value
else:
check bulker.lastError == "" # force printing error
fullNoise.say "***", " ela[]=", $ela.toSeq.mapIt(it.pp)
if ela[0].inNanoseconds != 0:
let
elaNs = ela.toSeq.mapIt(it.inNanoseconds.float)
elaPc = elaNs.mapIt(((it / elaNs[0]) * 100 + 0.5).int)
perRec = ((elaNs[0] / xTab32.len.float) + 0.5).int.initDuration
noisy.say "***",
"nRecords=", xTab32.len, ", ",
"perRecord=", perRec.pp, ", ",
"sstSize=", size.uint64.toSI, ", ",
"perRecord=", ((size.float / xTab32.len.float) + 0.5).int, ", ",
["Total","Sorter","Append","Ingest"].zip(elaPc).ppKvPc
# cdb[7] -- rocksdb, key length 32 as 33
test &"Store {xTab32.len} records " &
"(key length 33) into empty rocksdb table":
var
ela: array[4,Duration]
size: int64
let
rdb = xDbs.cdb[7].rocksStoreRef
# Note that 32 and 33 size keys cannot be usefiully merged into the
# same SST file. The keys must be added in a sorted mode. So playing
# safe, key sizes should be of
# equal length.
if noisy: echo ""
noisy.showElapsed("Rocky bulk loader(keyLen 32 as 33)", ela[0]):
let bulker = RockyBulkLoadRef.init(rdb)
defer: bulker.destroy()
check bulker.begin("rocky-bulk-cache")
var
keyList = newSeq[NodeTag](xTab32.len)
fullNoise.showElapsed("Rocky bulk loader/32 as 33, sorter", ela[1]):
var inx = 0
for key in xTab32.keys:
keyList[inx] = key.to(NodeTag)
inx.inc
keyList.sort(cmp)
fullNoise.showElapsed("Rocky bulk loader/32 as 33, append", ela[2]):
for n,nodeTag in keyList:
let key = nodeTag.to(Blob)
check bulker.add(@[99.byte] & key, xTab32[key.to(ByteArray32)])
fullNoise.showElapsed("Rocky bulk loader/32 as 33, slurp", ela[3]):
let rc = bulker.finish()
if rc.isOk:
size = rc.value
else:
check bulker.lastError == "" # force printing error
fullNoise.say "***", " ela[]=", $ela.toSeq.mapIt(it.pp)
if ela[0].inNanoseconds != 0:
let
elaNs = ela.toSeq.mapIt(it.inNanoseconds.float)
elaPc = elaNs.mapIt(((it / elaNs[0]) * 100 + 0.5).int)
perRec = ((elaNs[0] / xTab32.len.float) + 0.5).int.initDuration
noisy.say "***",
"nRecords=", xTab32.len, ", ",
"perRecord=", perRec.pp, ", ",
"sstSize=", size.uint64.toSI, ", ",
"perRecord=", ((size.float / xTab32.len.float) + 0.5).int, ", ",
["Total","Sorter","Append","Ingest"].zip(elaPc).ppKvPc
# cdb[8] -- rocksdb, key length 33
test &"Store {xTab33.len} records " &
&"(key length 33) into {emptyDb} rocksdb table":
var
ela: array[4,Duration]
size: int64
let rdb = xDbs.cdb[8].rocksStoreRef
# Note that 32 and 33 size keys cannot be usefiully merged into the
# same SST file. The keys must be added in a sorted mode. So playing
# safe, key sizes should be of equal length.
if noisy: echo ""
noisy.showElapsed("Rocky bulk loader(keyLen 33)", ela[0]):
let bulker = RockyBulkLoadRef.init(rdb)
defer: bulker.destroy()
check bulker.begin("rocky-bulk-cache")
var
kKeys: seq[byte] # need to cacscade
kTab: Table[byte,seq[NodeTag]]
fullNoise.showElapsed("Rocky bulk loader/33, sorter", ela[1]):
for key in xTab33.keys:
if kTab.hasKey(key[0]):
kTab[key[0]].add key.toOpenArray(1,32).to(NodeTag)
else:
kTab[key[0]] = @[key.toOpenArray(1,32).to(NodeTag)]
kKeys = toSeq(kTab.keys).sorted
for w in kKeys:
kTab[w].sort(cmp)
fullNoise.showElapsed("Rocky bulk loader/33, append", ela[2]):
for w in kKeys:
fullNoise.say "***", " prefix=", w, " entries=", kTab[w].len
for n,nodeTag in kTab[w]:
let key = (w,nodeTag).to(Blob)
check bulker.add(key, xTab33[key.to(ByteArray33)])
fullNoise.showElapsed("Rocky bulk loader/33, slurp", ela[3]):
let rc = bulker.finish()
if rc.isOk:
size = rc.value
else:
check bulker.lastError == "" # force printing error
fullNoise.say "***", " ela[]=", $ela.toSeq.mapIt(it.pp)
if ela[0].inNanoseconds != 0:
let
elaNs = ela.toSeq.mapIt(it.inNanoseconds.float)
elaPc = elaNs.mapIt(((it / elaNs[0]) * 100 + 0.5).int)
perRec = ((elaNs[0] / xTab33.len.float) + 0.5).int.initDuration
noisy.say "***",
"nRecords=", xTab33.len, ", ",
"perRecord=", perRec.pp, ", ",
"sstSize=", size.uint64.toSI, ", ",
"perRecord=", ((size.float / xTab33.len.float) + 0.5).int, ", ",
["Total","Cascaded-Sorter","Append","Ingest"].zip(elaPc).ppKvPc
# ------------------------------------------------------------------------------
# Main function(s)
# ------------------------------------------------------------------------------
proc syncSnapMain*(noisy = defined(debug)) =
noisy.accountsRunner(persistent=true)
#noisy.accountsRunner(persistent=false) # problems unless running stand-alone
noisy.importRunner() # small sample, just verify functionality
noisy.inspectionRunner()
noisy.storeRunner()
when isMainModule:
const
noisy = defined(debug) or true
#setTraceLevel()
setErrorLevel()
# The `accountsRunner()` tests a snap sync functionality for storing chain
# chain data directly rather than derive them by executing the EVM. Here,
# only accounts are considered.
#
# The `snap/1` protocol allows to fetch data for a certain account range. The
# following boundary conditions apply to the received data:
#
# * `State root`: All data are relaive to the same state root.
#
# * `Accounts`: There is an accounts interval sorted in strictly increasing
# order. The accounts are required consecutive, i.e. without holes in
# between although this cannot be verified immediately.
#
# * `Lower bound`: There is a start value which might be lower than the first
# account hash. There must be no other account between this start value and
# the first account (not verifyable yet.) For all practicat purposes, this
# value is mostly ignored but carried through.
#
# * `Proof`: There is a list of hexary nodes which allow to build a partial
# Patricia-Merkle trie starting at the state root with all the account
# leaves. There are enough nodes that show that there is no account before
# the least account (which is currently ignored.)
#
# There are test data samples on the sub-directory `test_sync_snap`. These
# are complete replies for some (admittedly smapp) test requests from a `kiln`
# session.
#
# The `accountsRunner()` does three tests:
#
# 1. Run the `importAccounts()` function which is the all-in-one production
# function processoing the data described above. The test applies it
# sequentially to about 20 data sets.
#
# 2. Test individual functional items which are hidden in test 1. while
# merging the sample data.
# * Load/accumulate `proofs` data from several samples
# * Load/accumulate accounts (needs some unique sorting)
# * Build/complete hexary trie for accounts
# * Save/bulk-store hexary trie on disk. If rocksdb is available, data
# are bulk stored via sst.
#
# 3. Traverse trie nodes stored earlier. The accounts from test 2 are
# re-visted using the account hash as access path.
#
# This one uses dumps from the external `nimbus-eth1-blob` repo
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
when true and false:
import ./test_sync_snap/snap_other_xx
noisy.showElapsed("accountsRunner()"):
for n,sam in snapOtherList:
false.accountsRunner(persistent=true, sam)
noisy.showElapsed("inspectRunner()"):
for n,sam in snapOtherHealingList:
false.inspectionRunner(persistent=true, cascaded=false, sam)
# This one usues dumps from the external `nimbus-eth1-blob` repo
Prep for full sync after snap make 4 (#1282) * Re-arrange fetching storage slots in batch module why; Previously, fetching partial slot ranges first has a chance of terminating the worker peer 9due to network error) while there were many inheritable storage slots on the queue. Now, inheritance is checked first, then full slot ranges and finally partial ranges. * Update logging * Bundled node information for healing into single object `NodeSpecs` why: Previously, partial paths and node keys were kept in separate variables. This approach was error prone due to copying/reassembling function argument objects. As all partial paths, keys, and node data types are more or less handled as `Blob`s over the network (using Eth/6x, or Snap/1) it makes sense to hold these `Blob`s as named field in a single object (even if not all fields are active for the current purpose.) * For good housekeeping, using `NodeKey` type only for account keys why: previously, a mixture of `NodeKey` and `Hash256` was used. Now, only state or storage root keys use the `Hash256` type. * Always accept latest pivot (and not a slightly older one) why; For testing it was tried to use a slightly older pivot state root than available. Some anecdotal tests seemed to suggest an advantage so that more peers are willing to serve on that older pivot. But this could not be confirmed in subsequent tests (still anecdotal, though.) As a side note, the distance of the latest pivot to its predecessor is at least 128 (or whatever the constant `minPivotBlockDistance` is assigned to.) * Reshuffle name components for some file and function names why: Clarifies purpose: "storages" becomes: "storage slots" "store" becomes: "range fetch" * Stash away currently unused modules in sub-folder named "notused"
2022-10-27 13:49:28 +00:00
when true and false:
import ./test_sync_snap/snap_storage_xx
let knownFailures = @[
("storages3__18__25_dump#11", @[( 233, RightBoundaryProofFailed)]),
("storages4__26__33_dump#11", @[(1193, RightBoundaryProofFailed)]),
("storages5__34__41_dump#10", @[( 508, RootNodeMismatch)]),
("storagesB__84__92_dump#6", @[( 325, RightBoundaryProofFailed)]),
("storagesD_102_109_dump#17", @[(1102, RightBoundaryProofFailed)]),
]
noisy.showElapsed("storageRunner()"):
for n,sam in snapStorageList:
false.storagesRunner(persistent=true, sam, knownFailures)
# This one uses readily available dumps
when true: # and false:
false.inspectionRunner()
for sam in snapTestList:
false.accountsRunner(persistent=true, sam)
for sam in snapTestStorageList:
false.accountsRunner(persistent=true, sam)
false.storagesRunner(persistent=true, sam)
# This one uses readily available dumps
when true and false:
# ---- database storage timings -------
noisy.showElapsed("importRunner()"):
noisy.importRunner(capture = bulkTest0)
noisy.showElapsed("storeRunner()"):
true.storeRunner(cleanUp = false)
true.storeRunner()
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------