Snap sync extractor test sub range proofs (#1460)

* Unit tests to verify calculations based on hard coded constants why: Sizes of RLP encoded objects are available at run time only. * Changed argument order for `hexaryRangeLeafsProof()` prototype why: Better to read as a stand-alone function (arguments were optimised for functional pipelines) * Run sub-range proof tests for extracted ranges
2023-02-02 13:27:09 +00:00 · 2023-02-02 13:27:09 +00:00 · c2fc46a99a
parent ba92c53624
commit c2fc46a99a
7 changed files with 269 additions and 153 deletions
--- a/nimbus/sync/handlers/snap.nim
+++ b/nimbus/sync/handlers/snap.nim
@ -26,6 +26,14 @@ type
    chain: ChainRef
    peerPool: PeerPool
 const
  transportAccountSizeMax = 110
    ## Account record with `high(UInt256)` hashes and balance, and maximal
    ## nonce within RLP list
  transportProofNodeSizeMax = 536
    ## Branch node with all branches `high(UInt256)` within RLP list
 # ------------------------------------------------------------------------------
 # Private functions: helper functions
 # ------------------------------------------------------------------------------
@ -73,6 +81,45 @@ proc init*(
  #ctx.setupPeerObserver()
  ctx
 # ------------------------------------------------------------------------------
 # Public functions: helpers
 # ------------------------------------------------------------------------------
 proc accountRangeSize*(n: int): int =
  ## Max number of bytes needed to store `n` RLP encoded `Account()` type
  ## entries. Note that this is an *approximate* upper bound.
  ##
  ## The maximum size of a single RLP encoded account item can be determined
  ## by setting every field of `Account()` to `high()` or `0xff`.
  ##
  ## Note: Public function subject to unit tests
  # Experimentally derived, see `test_calc` unit test module
  if 595 < n:
    4 + n * transportAccountSizeMax
  elif 2 < n:
    3 + n * transportAccountSizeMax
  elif 0 < n:
    2 + n * transportAccountSizeMax
  else:
    1
 proc proofNodesSize*(n: int): int =
  ## Ditto for proof nodes
  ##
  ## Note: Public function subject to unit tests
  # Experimentally derived, see `test_calc` unit test module
  if 125 < n:
    4 + n * transportProofNodeSizeMax
  elif 0 < n:
    3 + n * transportProofNodeSizeMax
  else:
    1
 proc accountRangeNumEntries*(size: int): int =
  ## Number of entries with size guaranteed to not exceed the argument `size`.
  if transportAccountSizeMax + 3 <= size:
    result = (size - 3) div transportAccountSizeMax
 # ------------------------------------------------------------------------------
 # Public functions: snap wire protocol handlers
 # ------------------------------------------------------------------------------
--- a/nimbus/sync/snap/worker/db/hexary_range.nim
+++ b/nimbus/sync/snap/worker/db/hexary_range.nim
@ -40,9 +40,9 @@ proc convertTo(key: RepairKey; T: type NodeKey): T =
 # ------------------------------------------------------------------------------
 template collectLeafs(
    iv: NodeTagRange;                # Proofed range of leaf paths
    rootKey: NodeKey|RepairKey;      # State root
    db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
    rootKey: NodeKey|RepairKey;      # State root
    iv: NodeTagRange;                # Proofed range of leaf paths
    nLeafs: int;                     # Implies maximal data size
      ): auto =
  ## Collect trie database leafs prototype. This directive is provided as
@ -90,12 +90,12 @@ template collectLeafs(
 template updateProof(
    db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
    rootKey: NodeKey|RepairKey;      # State root
    baseTag: NodeTag;                # Left boundary
    leafList: seq[RangeLeaf];        # Set of collected leafs
    rootKey: NodeKey|RepairKey;      # State root
    db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
      ): auto =
-  ## Update leafs list by adding proof nodes. This directive is provided as
+  ## Complement leafs list by adding proof nodes. This directive is provided as
  ## `template` for avoiding varying exceprion annotations.
  var proof = baseTag.hexaryPath(rootKey, db)
        .path
@ -120,53 +120,53 @@ template updateProof(
 # ------------------------------------------------------------------------------
 proc hexaryRangeLeafsProof*(
    iv: NodeTagRange;                # Proofed range of leaf paths
    rootKey: NodeKey;                # State root
    db: HexaryGetFn;                 # Database abstraction
    nLeafs = high(int);              # Implies maximal data size
      ): Result[RangeProof,HexaryError]
      {.gcsafe, raises: [Defect,RlpError]} =
  ## ...
  let rc = iv.collectLeafs(rootKey, db, nLeafs)
  if rc.isErr:
    err(rc.error)
  else:
    ok(iv.minPt.updateProof(rc.value, rootKey, db))
 proc hexaryRangeLeafsProof*(
    baseTag: NodeTag;                # Left boundary
    leafList: seq[RangeLeaf];        # Set of already collected leafs
    rootKey: NodeKey;                # State root
    db: HexaryGetFn;                 # Database abstraction
      ): RangeProof
      {.gcsafe, raises: [Defect,RlpError]} =
  ## ...
  baseTag.updateProof(leafList, rootKey, db)
 proc hexaryRangeLeafsProof*(
    iv: NodeTagRange;                # Proofed range of leaf paths
    rootKey: NodeKey;                # State root
    db: HexaryTreeDbRef;             # Database abstraction
    rootKey: NodeKey;                # State root
    iv: NodeTagRange;                # Proofed range of leaf paths
    nLeafs = high(int);              # Implies maximal data size
      ): Result[RangeProof,HexaryError]
      {.gcsafe, raises: [Defect,KeyError]} =
-  ## ...
+  ## Collect trie database leafs prototype and add proof.
-  let rc = iv.collectLeafs(rootKey, db, nLeafs)
+  let rc = db.collectLeafs(rootKey, iv, nLeafs)
  if rc.isErr:
    err(rc.error)
  else:
-    ok(iv.minPt.updateProof(rc.value, rootKey, db))
+    ok(db.updateProof(rootKey, iv.minPt, rc.value))
 proc hexaryRangeLeafsProof*(
    db: HexaryTreeDbRef;             # Database abstraction
    rootKey: NodeKey;                # State root
    baseTag: NodeTag;                # Left boundary
    leafList: seq[RangeLeaf];        # Set of already collected leafs
    rootKey: NodeKey;                # State root
    db: HexaryTreeDbRef;             # Database abstraction
      ): RangeProof
      {.gcsafe, raises: [Defect,KeyError]} =
-  ## ...
+  ## Complement leafs list by adding proof nodes to the argument list
-  baseTag.updateProof(leafList, rootKey, db)
+  ## `leafList`.
  db.updateProof(rootKey, baseTag, leafList)
 proc hexaryRangeLeafsProof*(
    db: HexaryGetFn;                 # Database abstraction
    rootKey: NodeKey;                # State root
    iv: NodeTagRange;                # Proofed range of leaf paths
    nLeafs = high(int);              # Implies maximal data size
      ): Result[RangeProof,HexaryError]
      {.gcsafe, raises: [Defect,RlpError]} =
  ## Variant of `hexaryRangeLeafsProof()` for persistent database.
  let rc = db.collectLeafs(rootKey, iv, nLeafs)
  if rc.isErr:
    err(rc.error)
  else:
    ok(db.updateProof(rootKey, iv.minPt, rc.value))
 proc hexaryRangeLeafsProof*(
    db: HexaryGetFn;                 # Database abstraction
    rootKey: NodeKey;                # State root
    baseTag: NodeTag;                # Left boundary
    leafList: seq[RangeLeaf];        # Set of already collected leafs
      ): RangeProof
      {.gcsafe, raises: [Defect,RlpError]} =
  ## Variant of `hexaryRangeLeafsProof()` for persistent database.
  db.updateProof(rootKey, baseTag, leafList)
 # ------------------------------------------------------------------------------
 # End
--- a/tests/test_sync_snap.nim
+++ b/tests/test_sync_snap.nim
@ -27,8 +27,8 @@ import
  ./replay/[pp, undump_accounts, undump_storages],
  ./test_sync_snap/[
    bulk_test_xx, snap_test_xx,
-    test_accounts, test_helpers, test_node_range, test_inspect, test_pivot,
+    test_accounts, test_calc, test_helpers, test_node_range, test_inspect,
-    test_storage, test_db_timing, test_types]
+    test_pivot, test_storage, test_db_timing, test_types]
 const
  baseDir = [".", "..", ".."/"..", $DirSep]
@ -174,9 +174,19 @@ proc snapDbAccountsRef(cdb:ChainDb; root:Hash256; pers:bool):SnapDbAccountsRef =
 # Test Runners: accounts and accounts storages
 # ------------------------------------------------------------------------------
 proc miscRunner(noisy = true) =
  suite "SyncSnap: Verify setup, constants, limits":
    test "RLP accounts list sizes":
      test_calcAccountsListSizes()
    test "RLP proofs list sizes":
      test_calcProofsListSizes()
 proc accountsRunner(noisy = true;  persistent = true; sample = accSample) =
  let
    peer = Peer.new
    accLst = sample.to(seq[UndumpAccounts])
    root = accLst[0].root
    tmpDir = getTmpDir()
@ -490,6 +500,7 @@ proc dbTimingRunner(noisy = true;  persistent = true; cleanUp = true) =
 # ------------------------------------------------------------------------------
 proc syncSnapMain*(noisy = defined(debug)) =
  noisy.miscRunner()
  noisy.accountsRunner(persistent=true)
  noisy.accountsRunner(persistent=false)
  noisy.importRunner() # small sample, just verify functionality
@ -503,50 +514,8 @@ when isMainModule:
  #setTraceLevel()
  setErrorLevel()
-  # The `accountsRunner()` tests a snap sync functionality for storing chain
+  # Test constant, calculations etc.
-  # chain data directly rather than derive them by executing the EVM. Here,
+  noisy.miscRunner()
  # only accounts are considered.
  #
  # The `snap/1` protocol allows to fetch data for a certain account range. The
  # following boundary conditions apply to the received data:
  #
  # * `State root`: All data are relaive to the same state root.
  #
  # * `Accounts`: There is an accounts interval sorted in strictly increasing
  #   order. The accounts are required consecutive, i.e. without holes in
  #   between although this cannot be verified immediately.
  #
  # * `Lower bound`: There is a start value which might be lower than the first
  #   account hash. There must be no other account between this start value and
  #   the first account (not verifyable yet.) For all practicat purposes, this
  #   value is mostly ignored but carried through.
  #
  # * `Proof`: There is a list of hexary nodes which allow to build a partial
  #   Patricia-Merkle trie starting at the state root with all the account
  #   leaves. There are enough nodes that show that there is no account before
  #   the least account (which is currently ignored.)
  #
  # There are test data samples on the sub-directory `test_sync_snap`. These
  # are complete replies for some (admittedly smapp) test requests from a `kiln`
  # session.
  #
  # The `accountsRunner()` does three tests:
  #
  # 1. Run the `importAccounts()` function which is the all-in-one production
  #    function processoing the data described above. The test applies it
  #    sequentially to about 20 data sets.
  #
  # 2. Test individual functional items which are hidden in test 1. while
  #    merging the sample data.
  #    * Load/accumulate `proofs` data from several samples
  #    * Load/accumulate accounts (needs some unique sorting)
  #    * Build/complete hexary trie for accounts
  #    * Save/bulk-store hexary trie on disk. If rocksdb is available, data
  #      are bulk stored via sst.
  #
  # 3. Traverse trie nodes stored earlier. The accounts from test 2 are
  #    re-visted using the account hash as access path.
  #
  # This one uses dumps from the external `nimbus-eth1-blob` repo
  when true and false:
--- a/tests/test_sync_snap/test_accounts.nim
+++ b/tests/test_sync_snap/test_accounts.nim
@ -10,6 +10,52 @@
 # distributed except according to those terms.
 ## Snap sync components tester and TDD environment
 ##
 ## This module provides test bodies for storing chain chain data directly
 ## rather than derive them by executing the EVM. Here, only accounts are
 ## considered.
 ##
 ## The `snap/1` protocol allows to fetch data for a certain account range. The
 ## following boundary conditions apply to the received data:
 ##
 ## * `State root`: All data are relaive to the same state root.
 ##
 ## * `Accounts`: There is an accounts interval sorted in strictly increasing
 ##   order. The accounts are required consecutive, i.e. without holes in
 ##   between although this cannot be verified immediately.
 ##
 ## * `Lower bound`: There is a start value which might be lower than the first
 ##   account hash. There must be no other account between this start value and
 ##   the first account (not verifyable yet.) For all practicat purposes, this
 ##   value is mostly ignored but carried through.
 ##
 ## * `Proof`: There is a list of hexary nodes which allow to build a partial
 ##   Patricia-Merkle trie starting at the state root with all the account
 ##   leaves. There are enough nodes that show that there is no account before
 ##   the least account (which is currently ignored.)
 ##
 ## There are test data samples on the sub-directory `test_sync_snap`. These
 ## are complete replies for some (admittedly snap) test requests from a `kiln#`
 ## session.
 ##
 ## There are three tests:
 ##
 ## 1. Run the `test_accountsImport()` function which is the all-in-one
 ##    production function processoing the data described above. The test
 ##    applies it sequentially to all argument data sets.
 ##
 ## 2. With `test_accountsMergeProofs()` individual items are tested which are
 ##    hidden in test 1. while merging the sample data.
 ##    * Load/accumulate `proofs` data from several samples
 ##    * Load/accumulate accounts (needs some unique sorting)
 ##    * Build/complete hexary trie for accounts
 ##    * Save/bulk-store hexary trie on disk. If rocksdb is available, data
 ##      are bulk stored via sst.
 ##
 ## 3. The function `test_accountsRevisitStoredItems()` traverses trie nodes
 ##    stored earlier. The accounts from test 2 are re-visted using the account
 ##    hash as access path.
 ##
 import
  std/algorithm,
--- a/tests/test_sync_snap/test_calc.nim
+++ b/tests/test_sync_snap/test_calc.nim
@ -0,0 +1,65 @@
 # Nimbus - Types, data structures and shared utilities used in network sync
 #
 # Copyright (c) 2018-2021 Status Research & Development GmbH
 # Licensed under either of
 #  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
 #    http://www.apache.org/licenses/LICENSE-2.0)
 #  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
 #    http://opensource.org/licenses/MIT)
 # at your option. This file may not be copied, modified, or
 # distributed except according to those terms.
 ## Snap sync components tester and TDD environment
 import
  std/sequtils,
  eth/common,
  unittest2,
  ../../nimbus/sync/handlers/snap,
  ../../nimbus/sync/snap/[range_desc, worker/db/hexary_desc],
  ./test_helpers
 # ------------------------------------------------------------------------------
 # Public test function
 # ------------------------------------------------------------------------------
 proc test_calcAccountsListSizes*() =
  ## RLP does not allow static check ..
  let sample = Account(
    storageRoot: Hash256(data: high(UInt256).toBytesBE),
    codeHash:    Hash256(data: high(UInt256).toBytesBE),
    nonce:       high(uint64),
    balance:     high(UInt256))
  let tryLst = [0, 1, 2, 3, 594, 595, 596]
  for n in tryLst:
    #echo ">>> ", n, " ", sample.repeat(n).encode.len
    check n.accountRangeSize == sample.repeat(n).encode.len
  block:
    let n = tryLst[^1]
    check 4 + n * sample.encode.len == sample.repeat(n).encode.len
 proc  test_calcProofsListSizes*() =
  ## RLP does not allow static check ..
  let sample = block:
    var xNode = XNodeObj(kind: Branch)
    for n in 0 .. 15:
      xNode.bLink[n] = high(NodeTag).to(Blob)
    xNode
  let tryLst = [0, 1, 2, 126, 127]
  for n in tryLst:
    #echo ">>> ", n, " ", sample.repeat(n).encode.len
    check n.proofNodesSize == sample.repeat(n).encode.len
  block:
    let n = tryLst[^1]
    check 4 + n * sample.encode.len == sample.repeat(n).encode.len
 # ------------------------------------------------------------------------------
 # End
 # ------------------------------------------------------------------------------
--- a/tests/test_sync_snap/test_helpers.nim
+++ b/tests/test_sync_snap/test_helpers.nim
@ -110,27 +110,6 @@ proc say*(noisy = false; pfx = "***"; args: varargs[string, `$`]) =
    else:
      echo pfx, args.toSeq.join
 # ------------------------------------------------------------------------------
 # Public free parking
 # ------------------------------------------------------------------------------
 proc rangeAccountSizeMax*(n: int): int =
  ## Max number of bytes needed to store `n` RLP encoded `Account()` type
  ## entries. Note that this is an upper bound.
  ##
  ## The maximum size of a single RLP encoded account item can be determined
  ## by setting every field of `Account()` to `high()` or `0xff`.
  if 127 < n:
    3 + n * 110
  elif 0 < n:
    2 + n * 110
  else:
    1
 proc rangeNumAccounts*(size: int): int =
  ## ..
  (size - 3) div 110
 # ------------------------------------------------------------------------------
 # End
 # ------------------------------------------------------------------------------
--- a/tests/test_sync_snap/test_node_range.nim
+++ b/tests/test_sync_snap/test_node_range.nim
@ -13,7 +13,7 @@
 import
  std/[sequtils, sets, strformat, strutils],
-  eth/[common, p2p, rlp, trie/nibbles],
+  eth/[common, p2p, trie/nibbles],
  stew/[byteutils, interval_set, results],
  unittest2,
  ../../nimbus/sync/types,
@ -189,16 +189,6 @@ proc printCompareLeftNearby(
    "\n"
 proc verifyAccountListSizes() =
  ## RLP does not allow static check ..
  for n in [0, 1, 128, 129, 200]:
    check n.rangeAccountSizeMax == Account(
      storageRoot: Hash256(data: high(UInt256).toBytesBE),
      codeHash:    Hash256(data: high(UInt256).toBytesBE),
      nonce:       high(uint64),
      balance:     high(UInt256)).repeat(n).encode.len
 proc verifyRangeProof(
    rootKey: NodeKey;
    leafs: seq[RangeLeaf];
@ -387,38 +377,54 @@ proc test_NodeRangeProof*(
    noisy = not dbg.isNil
    maxLen = high(int)
  # RLP does not allow static check
  verifyAccountListSizes()
  # Assuming the `inLst` entries have been stored in the DB already
  for n,w in inLst:
    let
-      accounts = w.data.accounts[0 .. min(w.data.accounts.len,maxLen)-1]
+      accounts = w.data.accounts[0 ..< min(w.data.accounts.len,maxLen)]
      iv = NodeTagRange.new(w.base, accounts[^1].accKey.to(NodeTag))
-      rc = iv.hexaryRangeLeafsProof(rootKey, db, accounts.len)
+      rc = db.hexaryRangeLeafsProof(rootKey, iv, accounts.len)
    check rc.isOk
    if rc.isErr:
      return
-    let leafs = rc.value.leafs
+    # Run over sub-samples of the given account range
    var subCount = 0
    for cutOff in {0, 2, 5, 10, 16, 23, 77}:
      # Take sub-samples but not too small
      if 0 < cutOff and rc.value.leafs.len < cutOff + 5:
        break # rest cases ignored
      subCount.inc
      let leafs = rc.value.leafs[0 ..< rc.value.leafs.len - cutOff]
      var proof: seq[Blob]
      # Calculate proof
      if cutOff == 0:
        if leafs.len != accounts.len or accounts[^1].accKey != leafs[^1].key:
          noisy.say "***", "n=", n, " something went wrong .."
          check (n,leafs.len) == (n,accounts.len)
          rootKey.printCompareRightLeafs(w.base, accounts, leafs, db, dbg)
          return
        proof = rc.value.proof
      else:
        # Re-adjust proof
        proof = db.hexaryRangeLeafsProof(rootKey, iv.minPt, leafs).proof
      # Import proof nodes and build trie
-    var rx = rootKey.verifyRangeProof(leafs, rc.value.proof)
+      block:
        var rx = rootKey.verifyRangeProof(leafs, proof)
        if rx.isErr:
-      rx = rootKey.verifyRangeProof(leafs, rc.value.proof, dbg)
+          rx = rootKey.verifyRangeProof(leafs, proof, dbg)
          let
            baseNbls =  iv.minPt.to(NodeKey).to(NibblesSeq)
            lastNbls =  iv.maxPt.to(NodeKey).to(NibblesSeq)
            nPfxNblsLen = baseNbls.sharedPrefixLen lastNbls
            pfxNbls = baseNbls.slice(0, nPfxNblsLen)
          noisy.say "***", "n=", n,
            " cutOff=", cutOff,
            " leafs=", leafs.len,
-        " proof=", rc.value.proof.ppNodeKeys(dbg),
+            " proof=", proof.ppNodeKeys(dbg),
            "\n\n   ",
            " base=", iv.minPt,
            "\n    ", iv.minPt.hexaryPath(rootKey,db).pp(dbg),
@ -432,8 +438,9 @@ proc test_NodeRangeProof*(
          return
    noisy.say "***", "n=", n,
-      " leafs=", leafs.len,
+      " leafs=", rc.value.leafs.len,
-      " proof=", rc.value.proof.len, "/", w.data.proof.len
+      " proof=", rc.value.proof.len, "/", w.data.proof.len,
      " sub-samples=", subCount
 proc test_NodeRangeLeftBoundary*(
@ -468,3 +475,6 @@ proc test_NodeRangeLeftBoundary*(
 # ------------------------------------------------------------------------------
 # End
 # ------------------------------------------------------------------------------
 proc xxx(inLst: seq[UndumpAccounts]; db: HexaryGetFn; dbg: HexaryTreeDbRef) =
  inLst.test_NodeRangeProof(db, dbg)