diff --git a/nimbus/sync/handlers/snap.nim b/nimbus/sync/handlers/snap.nim index 66bd7b24b..c364f6cb7 100644 --- a/nimbus/sync/handlers/snap.nim +++ b/nimbus/sync/handlers/snap.nim @@ -26,6 +26,14 @@ type chain: ChainRef peerPool: PeerPool +const + transportAccountSizeMax = 110 + ## Account record with `high(UInt256)` hashes and balance, and maximal + ## nonce within RLP list + + transportProofNodeSizeMax = 536 + ## Branch node with all branches `high(UInt256)` within RLP list + # ------------------------------------------------------------------------------ # Private functions: helper functions # ------------------------------------------------------------------------------ @@ -73,6 +81,45 @@ proc init*( #ctx.setupPeerObserver() ctx +# ------------------------------------------------------------------------------ +# Public functions: helpers +# ------------------------------------------------------------------------------ + +proc accountRangeSize*(n: int): int = + ## Max number of bytes needed to store `n` RLP encoded `Account()` type + ## entries. Note that this is an *approximate* upper bound. + ## + ## The maximum size of a single RLP encoded account item can be determined + ## by setting every field of `Account()` to `high()` or `0xff`. + ## + ## Note: Public function subject to unit tests + # Experimentally derived, see `test_calc` unit test module + if 595 < n: + 4 + n * transportAccountSizeMax + elif 2 < n: + 3 + n * transportAccountSizeMax + elif 0 < n: + 2 + n * transportAccountSizeMax + else: + 1 + +proc proofNodesSize*(n: int): int = + ## Ditto for proof nodes + ## + ## Note: Public function subject to unit tests + # Experimentally derived, see `test_calc` unit test module + if 125 < n: + 4 + n * transportProofNodeSizeMax + elif 0 < n: + 3 + n * transportProofNodeSizeMax + else: + 1 + +proc accountRangeNumEntries*(size: int): int = + ## Number of entries with size guaranteed to not exceed the argument `size`. + if transportAccountSizeMax + 3 <= size: + result = (size - 3) div transportAccountSizeMax + # ------------------------------------------------------------------------------ # Public functions: snap wire protocol handlers # ------------------------------------------------------------------------------ diff --git a/nimbus/sync/snap/worker/db/hexary_range.nim b/nimbus/sync/snap/worker/db/hexary_range.nim index 4f2b7a66d..17e9a27a7 100644 --- a/nimbus/sync/snap/worker/db/hexary_range.nim +++ b/nimbus/sync/snap/worker/db/hexary_range.nim @@ -40,9 +40,9 @@ proc convertTo(key: RepairKey; T: type NodeKey): T = # ------------------------------------------------------------------------------ template collectLeafs( - iv: NodeTagRange; # Proofed range of leaf paths - rootKey: NodeKey|RepairKey; # State root db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction + rootKey: NodeKey|RepairKey; # State root + iv: NodeTagRange; # Proofed range of leaf paths nLeafs: int; # Implies maximal data size ): auto = ## Collect trie database leafs prototype. This directive is provided as @@ -90,12 +90,12 @@ template collectLeafs( template updateProof( + db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction + rootKey: NodeKey|RepairKey; # State root baseTag: NodeTag; # Left boundary leafList: seq[RangeLeaf]; # Set of collected leafs - rootKey: NodeKey|RepairKey; # State root - db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction ): auto = - ## Update leafs list by adding proof nodes. This directive is provided as + ## Complement leafs list by adding proof nodes. This directive is provided as ## `template` for avoiding varying exceprion annotations. var proof = baseTag.hexaryPath(rootKey, db) .path @@ -120,53 +120,53 @@ template updateProof( # ------------------------------------------------------------------------------ proc hexaryRangeLeafsProof*( - iv: NodeTagRange; # Proofed range of leaf paths - rootKey: NodeKey; # State root - db: HexaryGetFn; # Database abstraction - nLeafs = high(int); # Implies maximal data size - ): Result[RangeProof,HexaryError] - {.gcsafe, raises: [Defect,RlpError]} = - ## ... - let rc = iv.collectLeafs(rootKey, db, nLeafs) - if rc.isErr: - err(rc.error) - else: - ok(iv.minPt.updateProof(rc.value, rootKey, db)) - -proc hexaryRangeLeafsProof*( - baseTag: NodeTag; # Left boundary - leafList: seq[RangeLeaf]; # Set of already collected leafs - rootKey: NodeKey; # State root - db: HexaryGetFn; # Database abstraction - ): RangeProof - {.gcsafe, raises: [Defect,RlpError]} = - ## ... - baseTag.updateProof(leafList, rootKey, db) - - -proc hexaryRangeLeafsProof*( - iv: NodeTagRange; # Proofed range of leaf paths - rootKey: NodeKey; # State root db: HexaryTreeDbRef; # Database abstraction + rootKey: NodeKey; # State root + iv: NodeTagRange; # Proofed range of leaf paths nLeafs = high(int); # Implies maximal data size ): Result[RangeProof,HexaryError] {.gcsafe, raises: [Defect,KeyError]} = - ## ... - let rc = iv.collectLeafs(rootKey, db, nLeafs) + ## Collect trie database leafs prototype and add proof. + let rc = db.collectLeafs(rootKey, iv, nLeafs) if rc.isErr: err(rc.error) else: - ok(iv.minPt.updateProof(rc.value, rootKey, db)) + ok(db.updateProof(rootKey, iv.minPt, rc.value)) proc hexaryRangeLeafsProof*( + db: HexaryTreeDbRef; # Database abstraction + rootKey: NodeKey; # State root baseTag: NodeTag; # Left boundary leafList: seq[RangeLeaf]; # Set of already collected leafs - rootKey: NodeKey; # State root - db: HexaryTreeDbRef; # Database abstraction ): RangeProof {.gcsafe, raises: [Defect,KeyError]} = - ## ... - baseTag.updateProof(leafList, rootKey, db) + ## Complement leafs list by adding proof nodes to the argument list + ## `leafList`. + db.updateProof(rootKey, baseTag, leafList) + +proc hexaryRangeLeafsProof*( + db: HexaryGetFn; # Database abstraction + rootKey: NodeKey; # State root + iv: NodeTagRange; # Proofed range of leaf paths + nLeafs = high(int); # Implies maximal data size + ): Result[RangeProof,HexaryError] + {.gcsafe, raises: [Defect,RlpError]} = + ## Variant of `hexaryRangeLeafsProof()` for persistent database. + let rc = db.collectLeafs(rootKey, iv, nLeafs) + if rc.isErr: + err(rc.error) + else: + ok(db.updateProof(rootKey, iv.minPt, rc.value)) + +proc hexaryRangeLeafsProof*( + db: HexaryGetFn; # Database abstraction + rootKey: NodeKey; # State root + baseTag: NodeTag; # Left boundary + leafList: seq[RangeLeaf]; # Set of already collected leafs + ): RangeProof + {.gcsafe, raises: [Defect,RlpError]} = + ## Variant of `hexaryRangeLeafsProof()` for persistent database. + db.updateProof(rootKey, baseTag, leafList) # ------------------------------------------------------------------------------ # End diff --git a/tests/test_sync_snap.nim b/tests/test_sync_snap.nim index 251ca892a..11200eebd 100644 --- a/tests/test_sync_snap.nim +++ b/tests/test_sync_snap.nim @@ -27,8 +27,8 @@ import ./replay/[pp, undump_accounts, undump_storages], ./test_sync_snap/[ bulk_test_xx, snap_test_xx, - test_accounts, test_helpers, test_node_range, test_inspect, test_pivot, - test_storage, test_db_timing, test_types] + test_accounts, test_calc, test_helpers, test_node_range, test_inspect, + test_pivot, test_storage, test_db_timing, test_types] const baseDir = [".", "..", ".."/"..", $DirSep] @@ -174,9 +174,19 @@ proc snapDbAccountsRef(cdb:ChainDb; root:Hash256; pers:bool):SnapDbAccountsRef = # Test Runners: accounts and accounts storages # ------------------------------------------------------------------------------ +proc miscRunner(noisy = true) = + + suite "SyncSnap: Verify setup, constants, limits": + + test "RLP accounts list sizes": + test_calcAccountsListSizes() + + test "RLP proofs list sizes": + test_calcProofsListSizes() + + proc accountsRunner(noisy = true; persistent = true; sample = accSample) = let - peer = Peer.new accLst = sample.to(seq[UndumpAccounts]) root = accLst[0].root tmpDir = getTmpDir() @@ -490,6 +500,7 @@ proc dbTimingRunner(noisy = true; persistent = true; cleanUp = true) = # ------------------------------------------------------------------------------ proc syncSnapMain*(noisy = defined(debug)) = + noisy.miscRunner() noisy.accountsRunner(persistent=true) noisy.accountsRunner(persistent=false) noisy.importRunner() # small sample, just verify functionality @@ -503,50 +514,8 @@ when isMainModule: #setTraceLevel() setErrorLevel() - # The `accountsRunner()` tests a snap sync functionality for storing chain - # chain data directly rather than derive them by executing the EVM. Here, - # only accounts are considered. - # - # The `snap/1` protocol allows to fetch data for a certain account range. The - # following boundary conditions apply to the received data: - # - # * `State root`: All data are relaive to the same state root. - # - # * `Accounts`: There is an accounts interval sorted in strictly increasing - # order. The accounts are required consecutive, i.e. without holes in - # between although this cannot be verified immediately. - # - # * `Lower bound`: There is a start value which might be lower than the first - # account hash. There must be no other account between this start value and - # the first account (not verifyable yet.) For all practicat purposes, this - # value is mostly ignored but carried through. - # - # * `Proof`: There is a list of hexary nodes which allow to build a partial - # Patricia-Merkle trie starting at the state root with all the account - # leaves. There are enough nodes that show that there is no account before - # the least account (which is currently ignored.) - # - # There are test data samples on the sub-directory `test_sync_snap`. These - # are complete replies for some (admittedly smapp) test requests from a `kiln` - # session. - # - # The `accountsRunner()` does three tests: - # - # 1. Run the `importAccounts()` function which is the all-in-one production - # function processoing the data described above. The test applies it - # sequentially to about 20 data sets. - # - # 2. Test individual functional items which are hidden in test 1. while - # merging the sample data. - # * Load/accumulate `proofs` data from several samples - # * Load/accumulate accounts (needs some unique sorting) - # * Build/complete hexary trie for accounts - # * Save/bulk-store hexary trie on disk. If rocksdb is available, data - # are bulk stored via sst. - # - # 3. Traverse trie nodes stored earlier. The accounts from test 2 are - # re-visted using the account hash as access path. - # + # Test constant, calculations etc. + noisy.miscRunner() # This one uses dumps from the external `nimbus-eth1-blob` repo when true and false: diff --git a/tests/test_sync_snap/test_accounts.nim b/tests/test_sync_snap/test_accounts.nim index 368df725d..8aeb661af 100644 --- a/tests/test_sync_snap/test_accounts.nim +++ b/tests/test_sync_snap/test_accounts.nim @@ -10,6 +10,52 @@ # distributed except according to those terms. ## Snap sync components tester and TDD environment +## +## This module provides test bodies for storing chain chain data directly +## rather than derive them by executing the EVM. Here, only accounts are +## considered. +## +## The `snap/1` protocol allows to fetch data for a certain account range. The +## following boundary conditions apply to the received data: +## +## * `State root`: All data are relaive to the same state root. +## +## * `Accounts`: There is an accounts interval sorted in strictly increasing +## order. The accounts are required consecutive, i.e. without holes in +## between although this cannot be verified immediately. +## +## * `Lower bound`: There is a start value which might be lower than the first +## account hash. There must be no other account between this start value and +## the first account (not verifyable yet.) For all practicat purposes, this +## value is mostly ignored but carried through. +## +## * `Proof`: There is a list of hexary nodes which allow to build a partial +## Patricia-Merkle trie starting at the state root with all the account +## leaves. There are enough nodes that show that there is no account before +## the least account (which is currently ignored.) +## +## There are test data samples on the sub-directory `test_sync_snap`. These +## are complete replies for some (admittedly snap) test requests from a `kiln#` +## session. +## +## There are three tests: +## +## 1. Run the `test_accountsImport()` function which is the all-in-one +## production function processoing the data described above. The test +## applies it sequentially to all argument data sets. +## +## 2. With `test_accountsMergeProofs()` individual items are tested which are +## hidden in test 1. while merging the sample data. +## * Load/accumulate `proofs` data from several samples +## * Load/accumulate accounts (needs some unique sorting) +## * Build/complete hexary trie for accounts +## * Save/bulk-store hexary trie on disk. If rocksdb is available, data +## are bulk stored via sst. +## +## 3. The function `test_accountsRevisitStoredItems()` traverses trie nodes +## stored earlier. The accounts from test 2 are re-visted using the account +## hash as access path. +## import std/algorithm, diff --git a/tests/test_sync_snap/test_calc.nim b/tests/test_sync_snap/test_calc.nim new file mode 100644 index 000000000..65019be7d --- /dev/null +++ b/tests/test_sync_snap/test_calc.nim @@ -0,0 +1,65 @@ +# Nimbus - Types, data structures and shared utilities used in network sync +# +# Copyright (c) 2018-2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + +## Snap sync components tester and TDD environment + +import + std/sequtils, + eth/common, + unittest2, + ../../nimbus/sync/handlers/snap, + ../../nimbus/sync/snap/[range_desc, worker/db/hexary_desc], + ./test_helpers + +# ------------------------------------------------------------------------------ +# Public test function +# ------------------------------------------------------------------------------ + +proc test_calcAccountsListSizes*() = + ## RLP does not allow static check .. + + let sample = Account( + storageRoot: Hash256(data: high(UInt256).toBytesBE), + codeHash: Hash256(data: high(UInt256).toBytesBE), + nonce: high(uint64), + balance: high(UInt256)) + + let tryLst = [0, 1, 2, 3, 594, 595, 596] + + for n in tryLst: + #echo ">>> ", n, " ", sample.repeat(n).encode.len + check n.accountRangeSize == sample.repeat(n).encode.len + block: + let n = tryLst[^1] + check 4 + n * sample.encode.len == sample.repeat(n).encode.len + + +proc test_calcProofsListSizes*() = + ## RLP does not allow static check .. + + let sample = block: + var xNode = XNodeObj(kind: Branch) + for n in 0 .. 15: + xNode.bLink[n] = high(NodeTag).to(Blob) + xNode + + let tryLst = [0, 1, 2, 126, 127] + + for n in tryLst: + #echo ">>> ", n, " ", sample.repeat(n).encode.len + check n.proofNodesSize == sample.repeat(n).encode.len + block: + let n = tryLst[^1] + check 4 + n * sample.encode.len == sample.repeat(n).encode.len + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tests/test_sync_snap/test_helpers.nim b/tests/test_sync_snap/test_helpers.nim index c98e46243..9dcf4b7ee 100644 --- a/tests/test_sync_snap/test_helpers.nim +++ b/tests/test_sync_snap/test_helpers.nim @@ -110,27 +110,6 @@ proc say*(noisy = false; pfx = "***"; args: varargs[string, `$`]) = else: echo pfx, args.toSeq.join -# ------------------------------------------------------------------------------ -# Public free parking -# ------------------------------------------------------------------------------ - -proc rangeAccountSizeMax*(n: int): int = - ## Max number of bytes needed to store `n` RLP encoded `Account()` type - ## entries. Note that this is an upper bound. - ## - ## The maximum size of a single RLP encoded account item can be determined - ## by setting every field of `Account()` to `high()` or `0xff`. - if 127 < n: - 3 + n * 110 - elif 0 < n: - 2 + n * 110 - else: - 1 - -proc rangeNumAccounts*(size: int): int = - ## .. - (size - 3) div 110 - # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ diff --git a/tests/test_sync_snap/test_node_range.nim b/tests/test_sync_snap/test_node_range.nim index 632b51f26..63744ee67 100644 --- a/tests/test_sync_snap/test_node_range.nim +++ b/tests/test_sync_snap/test_node_range.nim @@ -13,7 +13,7 @@ import std/[sequtils, sets, strformat, strutils], - eth/[common, p2p, rlp, trie/nibbles], + eth/[common, p2p, trie/nibbles], stew/[byteutils, interval_set, results], unittest2, ../../nimbus/sync/types, @@ -189,16 +189,6 @@ proc printCompareLeftNearby( "\n" -proc verifyAccountListSizes() = - ## RLP does not allow static check .. - for n in [0, 1, 128, 129, 200]: - check n.rangeAccountSizeMax == Account( - storageRoot: Hash256(data: high(UInt256).toBytesBE), - codeHash: Hash256(data: high(UInt256).toBytesBE), - nonce: high(uint64), - balance: high(UInt256)).repeat(n).encode.len - - proc verifyRangeProof( rootKey: NodeKey; leafs: seq[RangeLeaf]; @@ -387,53 +377,70 @@ proc test_NodeRangeProof*( noisy = not dbg.isNil maxLen = high(int) - # RLP does not allow static check - verifyAccountListSizes() - # Assuming the `inLst` entries have been stored in the DB already for n,w in inLst: let - accounts = w.data.accounts[0 .. min(w.data.accounts.len,maxLen)-1] + accounts = w.data.accounts[0 ..< min(w.data.accounts.len,maxLen)] iv = NodeTagRange.new(w.base, accounts[^1].accKey.to(NodeTag)) - rc = iv.hexaryRangeLeafsProof(rootKey, db, accounts.len) + rc = db.hexaryRangeLeafsProof(rootKey, iv, accounts.len) check rc.isOk if rc.isErr: return - let leafs = rc.value.leafs - if leafs.len != accounts.len or accounts[^1].accKey != leafs[^1].key: - noisy.say "***", "n=", n, " something went wrong .." - check (n,leafs.len) == (n,accounts.len) - rootKey.printCompareRightLeafs(w.base, accounts, leafs, db, dbg) - return + # Run over sub-samples of the given account range + var subCount = 0 + for cutOff in {0, 2, 5, 10, 16, 23, 77}: - # Import proof nodes and build trie - var rx = rootKey.verifyRangeProof(leafs, rc.value.proof) - if rx.isErr: - rx = rootKey.verifyRangeProof(leafs, rc.value.proof, dbg) - let - baseNbls = iv.minPt.to(NodeKey).to(NibblesSeq) - lastNbls = iv.maxPt.to(NodeKey).to(NibblesSeq) - nPfxNblsLen = baseNbls.sharedPrefixLen lastNbls - pfxNbls = baseNbls.slice(0, nPfxNblsLen) - noisy.say "***", "n=", n, - " leafs=", leafs.len, - " proof=", rc.value.proof.ppNodeKeys(dbg), - "\n\n ", - " base=", iv.minPt, - "\n ", iv.minPt.hexaryPath(rootKey,db).pp(dbg), - "\n\n ", - " pfx=", pfxNbls, - " nPfx=", nPfxNblsLen, - "\n ", pfxNbls.hexaryPath(rootKey,db).pp(dbg), - "\n" + # Take sub-samples but not too small + if 0 < cutOff and rc.value.leafs.len < cutOff + 5: + break # rest cases ignored + subCount.inc - check rx == typeof(rx).ok() - return + let leafs = rc.value.leafs[0 ..< rc.value.leafs.len - cutOff] + var proof: seq[Blob] + + # Calculate proof + if cutOff == 0: + if leafs.len != accounts.len or accounts[^1].accKey != leafs[^1].key: + noisy.say "***", "n=", n, " something went wrong .." + check (n,leafs.len) == (n,accounts.len) + rootKey.printCompareRightLeafs(w.base, accounts, leafs, db, dbg) + return + proof = rc.value.proof + else: + # Re-adjust proof + proof = db.hexaryRangeLeafsProof(rootKey, iv.minPt, leafs).proof + + # Import proof nodes and build trie + block: + var rx = rootKey.verifyRangeProof(leafs, proof) + if rx.isErr: + rx = rootKey.verifyRangeProof(leafs, proof, dbg) + let + baseNbls = iv.minPt.to(NodeKey).to(NibblesSeq) + lastNbls = iv.maxPt.to(NodeKey).to(NibblesSeq) + nPfxNblsLen = baseNbls.sharedPrefixLen lastNbls + pfxNbls = baseNbls.slice(0, nPfxNblsLen) + noisy.say "***", "n=", n, + " cutOff=", cutOff, + " leafs=", leafs.len, + " proof=", proof.ppNodeKeys(dbg), + "\n\n ", + " base=", iv.minPt, + "\n ", iv.minPt.hexaryPath(rootKey,db).pp(dbg), + "\n\n ", + " pfx=", pfxNbls, + " nPfx=", nPfxNblsLen, + "\n ", pfxNbls.hexaryPath(rootKey,db).pp(dbg), + "\n" + + check rx == typeof(rx).ok() + return noisy.say "***", "n=", n, - " leafs=", leafs.len, - " proof=", rc.value.proof.len, "/", w.data.proof.len + " leafs=", rc.value.leafs.len, + " proof=", rc.value.proof.len, "/", w.data.proof.len, + " sub-samples=", subCount proc test_NodeRangeLeftBoundary*( @@ -468,3 +475,6 @@ proc test_NodeRangeLeftBoundary*( # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ + +proc xxx(inLst: seq[UndumpAccounts]; db: HexaryGetFn; dbg: HexaryTreeDbRef) = + inLst.test_NodeRangeProof(db, dbg)