Snap sync extractor test sub range proofs (#1460)

* Unit tests to verify calculations based on hard coded constants why: Sizes of RLP encoded objects are available at run time only. * Changed argument order for `hexaryRangeLeafsProof()` prototype why: Better to read as a stand-alone function (arguments were optimised for functional pipelines) * Run sub-range proof tests for extracted ranges
2025-02-23 09:18:29 +00:00 · 2023-02-02 13:27:09 +00:00 · 2023-02-02 13:27:09 +00:00 · c2fc46a99a
commit c2fc46a99a
parent ba92c53624
7 changed files with 269 additions and 153 deletions
--- a/nimbus/sync/handlers/snap.nim
+++ b/nimbus/sync/handlers/snap.nim
@ -26,6 +26,14 @@ type
    chain: ChainRef
    peerPool: PeerPool

+const
+  transportAccountSizeMax = 110
+    ## Account record with `high(UInt256)` hashes and balance, and maximal
+    ## nonce within RLP list
+
+  transportProofNodeSizeMax = 536
+    ## Branch node with all branches `high(UInt256)` within RLP list
+
 # ------------------------------------------------------------------------------
 # Private functions: helper functions
 # ------------------------------------------------------------------------------
@ -73,6 +81,45 @@ proc init*(
  #ctx.setupPeerObserver()
  ctx

+# ------------------------------------------------------------------------------
+# Public functions: helpers
+# ------------------------------------------------------------------------------
+
+proc accountRangeSize*(n: int): int =
+  ## Max number of bytes needed to store `n` RLP encoded `Account()` type
+  ## entries. Note that this is an *approximate* upper bound.
+  ##
+  ## The maximum size of a single RLP encoded account item can be determined
+  ## by setting every field of `Account()` to `high()` or `0xff`.
+  ##
+  ## Note: Public function subject to unit tests
+  # Experimentally derived, see `test_calc` unit test module
+  if 595 < n:
+    4 + n * transportAccountSizeMax
+  elif 2 < n:
+    3 + n * transportAccountSizeMax
+  elif 0 < n:
+    2 + n * transportAccountSizeMax
+  else:
+    1
+
+proc proofNodesSize*(n: int): int =
+  ## Ditto for proof nodes
+  ##
+  ## Note: Public function subject to unit tests
+  # Experimentally derived, see `test_calc` unit test module
+  if 125 < n:
+    4 + n * transportProofNodeSizeMax
+  elif 0 < n:
+    3 + n * transportProofNodeSizeMax
+  else:
+    1
+
+proc accountRangeNumEntries*(size: int): int =
+  ## Number of entries with size guaranteed to not exceed the argument `size`.
+  if transportAccountSizeMax + 3 <= size:
+    result = (size - 3) div transportAccountSizeMax
+
 # ------------------------------------------------------------------------------
 # Public functions: snap wire protocol handlers
 # ------------------------------------------------------------------------------
--- a/nimbus/sync/snap/worker/db/hexary_range.nim
+++ b/nimbus/sync/snap/worker/db/hexary_range.nim
@ -40,9 +40,9 @@ proc convertTo(key: RepairKey; T: type NodeKey): T =
 # ------------------------------------------------------------------------------

 template collectLeafs(
-    iv: NodeTagRange;                # Proofed range of leaf paths
-    rootKey: NodeKey|RepairKey;      # State root
    db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
+    rootKey: NodeKey|RepairKey;      # State root
+    iv: NodeTagRange;                # Proofed range of leaf paths
    nLeafs: int;                     # Implies maximal data size
      ): auto =
  ## Collect trie database leafs prototype. This directive is provided as
@ -90,12 +90,12 @@ template collectLeafs(


 template updateProof(
+    db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
+    rootKey: NodeKey|RepairKey;      # State root
    baseTag: NodeTag;                # Left boundary
    leafList: seq[RangeLeaf];        # Set of collected leafs
-    rootKey: NodeKey|RepairKey;      # State root
-    db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
      ): auto =
-  ## Update leafs list by adding proof nodes. This directive is provided as
+  ## Complement leafs list by adding proof nodes. This directive is provided as
  ## `template` for avoiding varying exceprion annotations.
  var proof = baseTag.hexaryPath(rootKey, db)
        .path
@ -120,53 +120,53 @@ template updateProof(
 # ------------------------------------------------------------------------------

 proc hexaryRangeLeafsProof*(
-    iv: NodeTagRange;                # Proofed range of leaf paths
-    rootKey: NodeKey;                # State root
-    db: HexaryGetFn;                 # Database abstraction
-    nLeafs = high(int);              # Implies maximal data size
-      ): Result[RangeProof,HexaryError]
-      {.gcsafe, raises: [Defect,RlpError]} =
-  ## ...
-  let rc = iv.collectLeafs(rootKey, db, nLeafs)
-  if rc.isErr:
-    err(rc.error)
-  else:
-    ok(iv.minPt.updateProof(rc.value, rootKey, db))
-
-proc hexaryRangeLeafsProof*(
-    baseTag: NodeTag;                # Left boundary
-    leafList: seq[RangeLeaf];        # Set of already collected leafs
-    rootKey: NodeKey;                # State root
-    db: HexaryGetFn;                 # Database abstraction
-      ): RangeProof
-      {.gcsafe, raises: [Defect,RlpError]} =
-  ## ...
-  baseTag.updateProof(leafList, rootKey, db)
-
-
-proc hexaryRangeLeafsProof*(
-    iv: NodeTagRange;                # Proofed range of leaf paths
-    rootKey: NodeKey;                # State root
    db: HexaryTreeDbRef;             # Database abstraction
+    rootKey: NodeKey;                # State root
+    iv: NodeTagRange;                # Proofed range of leaf paths
    nLeafs = high(int);              # Implies maximal data size
      ): Result[RangeProof,HexaryError]
      {.gcsafe, raises: [Defect,KeyError]} =
-  ## ...
-  let rc = iv.collectLeafs(rootKey, db, nLeafs)
+  ## Collect trie database leafs prototype and add proof.
+  let rc = db.collectLeafs(rootKey, iv, nLeafs)
  if rc.isErr:
    err(rc.error)
  else:
-    ok(iv.minPt.updateProof(rc.value, rootKey, db))
+    ok(db.updateProof(rootKey, iv.minPt, rc.value))

 proc hexaryRangeLeafsProof*(
+    db: HexaryTreeDbRef;             # Database abstraction
+    rootKey: NodeKey;                # State root
    baseTag: NodeTag;                # Left boundary
    leafList: seq[RangeLeaf];        # Set of already collected leafs
-    rootKey: NodeKey;                # State root
-    db: HexaryTreeDbRef;             # Database abstraction
      ): RangeProof
      {.gcsafe, raises: [Defect,KeyError]} =
-  ## ...
-  baseTag.updateProof(leafList, rootKey, db)
+  ## Complement leafs list by adding proof nodes to the argument list
+  ## `leafList`.
+  db.updateProof(rootKey, baseTag, leafList)
+
+proc hexaryRangeLeafsProof*(
+    db: HexaryGetFn;                 # Database abstraction
+    rootKey: NodeKey;                # State root
+    iv: NodeTagRange;                # Proofed range of leaf paths
+    nLeafs = high(int);              # Implies maximal data size
+      ): Result[RangeProof,HexaryError]
+      {.gcsafe, raises: [Defect,RlpError]} =
+  ## Variant of `hexaryRangeLeafsProof()` for persistent database.
+  let rc = db.collectLeafs(rootKey, iv, nLeafs)
+  if rc.isErr:
+    err(rc.error)
+  else:
+    ok(db.updateProof(rootKey, iv.minPt, rc.value))
+
+proc hexaryRangeLeafsProof*(
+    db: HexaryGetFn;                 # Database abstraction
+    rootKey: NodeKey;                # State root
+    baseTag: NodeTag;                # Left boundary
+    leafList: seq[RangeLeaf];        # Set of already collected leafs
+      ): RangeProof
+      {.gcsafe, raises: [Defect,RlpError]} =
+  ## Variant of `hexaryRangeLeafsProof()` for persistent database.
+  db.updateProof(rootKey, baseTag, leafList)

 # ------------------------------------------------------------------------------
 # End
--- a/tests/test_sync_snap.nim
+++ b/tests/test_sync_snap.nim
@ -27,8 +27,8 @@ import
  ./replay/[pp, undump_accounts, undump_storages],
  ./test_sync_snap/[
    bulk_test_xx, snap_test_xx,
-    test_accounts, test_helpers, test_node_range, test_inspect, test_pivot,
-    test_storage, test_db_timing, test_types]
+    test_accounts, test_calc, test_helpers, test_node_range, test_inspect,
+    test_pivot, test_storage, test_db_timing, test_types]

 const
  baseDir = [".", "..", ".."/"..", $DirSep]
@ -174,9 +174,19 @@ proc snapDbAccountsRef(cdb:ChainDb; root:Hash256; pers:bool):SnapDbAccountsRef =
 # Test Runners: accounts and accounts storages
 # ------------------------------------------------------------------------------

+proc miscRunner(noisy = true) =
+
+  suite "SyncSnap: Verify setup, constants, limits":
+
+    test "RLP accounts list sizes":
+      test_calcAccountsListSizes()
+
+    test "RLP proofs list sizes":
+      test_calcProofsListSizes()
+
+
 proc accountsRunner(noisy = true;  persistent = true; sample = accSample) =
  let
-    peer = Peer.new
    accLst = sample.to(seq[UndumpAccounts])
    root = accLst[0].root
    tmpDir = getTmpDir()
@ -490,6 +500,7 @@ proc dbTimingRunner(noisy = true;  persistent = true; cleanUp = true) =
 # ------------------------------------------------------------------------------

 proc syncSnapMain*(noisy = defined(debug)) =
+  noisy.miscRunner()
  noisy.accountsRunner(persistent=true)
  noisy.accountsRunner(persistent=false)
  noisy.importRunner() # small sample, just verify functionality
@ -503,50 +514,8 @@ when isMainModule:
  #setTraceLevel()
  setErrorLevel()

-  # The `accountsRunner()` tests a snap sync functionality for storing chain
-  # chain data directly rather than derive them by executing the EVM. Here,
-  # only accounts are considered.
-  #
-  # The `snap/1` protocol allows to fetch data for a certain account range. The
-  # following boundary conditions apply to the received data:
-  #
-  # * `State root`: All data are relaive to the same state root.
-  #
-  # * `Accounts`: There is an accounts interval sorted in strictly increasing
-  #   order. The accounts are required consecutive, i.e. without holes in
-  #   between although this cannot be verified immediately.
-  #
-  # * `Lower bound`: There is a start value which might be lower than the first
-  #   account hash. There must be no other account between this start value and
-  #   the first account (not verifyable yet.) For all practicat purposes, this
-  #   value is mostly ignored but carried through.
-  #
-  # * `Proof`: There is a list of hexary nodes which allow to build a partial
-  #   Patricia-Merkle trie starting at the state root with all the account
-  #   leaves. There are enough nodes that show that there is no account before
-  #   the least account (which is currently ignored.)
-  #
-  # There are test data samples on the sub-directory `test_sync_snap`. These
-  # are complete replies for some (admittedly smapp) test requests from a `kiln`
-  # session.
-  #
-  # The `accountsRunner()` does three tests:
-  #
-  # 1. Run the `importAccounts()` function which is the all-in-one production
-  #    function processoing the data described above. The test applies it
-  #    sequentially to about 20 data sets.
-  #
-  # 2. Test individual functional items which are hidden in test 1. while
-  #    merging the sample data.
-  #    * Load/accumulate `proofs` data from several samples
-  #    * Load/accumulate accounts (needs some unique sorting)
-  #    * Build/complete hexary trie for accounts
-  #    * Save/bulk-store hexary trie on disk. If rocksdb is available, data
-  #      are bulk stored via sst.
-  #
-  # 3. Traverse trie nodes stored earlier. The accounts from test 2 are
-  #    re-visted using the account hash as access path.
-  #
+  # Test constant, calculations etc.
+  noisy.miscRunner()

  # This one uses dumps from the external `nimbus-eth1-blob` repo
  when true and false:
--- a/tests/test_sync_snap/test_accounts.nim
+++ b/tests/test_sync_snap/test_accounts.nim
@ -10,6 +10,52 @@
 # distributed except according to those terms.

 ## Snap sync components tester and TDD environment
+##
+## This module provides test bodies for storing chain chain data directly
+## rather than derive them by executing the EVM. Here, only accounts are
+## considered.
+##
+## The `snap/1` protocol allows to fetch data for a certain account range. The
+## following boundary conditions apply to the received data:
+##
+## * `State root`: All data are relaive to the same state root.
+##
+## * `Accounts`: There is an accounts interval sorted in strictly increasing
+##   order. The accounts are required consecutive, i.e. without holes in
+##   between although this cannot be verified immediately.
+##
+## * `Lower bound`: There is a start value which might be lower than the first
+##   account hash. There must be no other account between this start value and
+##   the first account (not verifyable yet.) For all practicat purposes, this
+##   value is mostly ignored but carried through.
+##
+## * `Proof`: There is a list of hexary nodes which allow to build a partial
+##   Patricia-Merkle trie starting at the state root with all the account
+##   leaves. There are enough nodes that show that there is no account before
+##   the least account (which is currently ignored.)
+##
+## There are test data samples on the sub-directory `test_sync_snap`. These
+## are complete replies for some (admittedly snap) test requests from a `kiln#`
+## session.
+##
+## There are three tests:
+##
+## 1. Run the `test_accountsImport()` function which is the all-in-one
+##    production function processoing the data described above. The test
+##    applies it sequentially to all argument data sets.
+##
+## 2. With `test_accountsMergeProofs()` individual items are tested which are
+##    hidden in test 1. while merging the sample data.
+##    * Load/accumulate `proofs` data from several samples
+##    * Load/accumulate accounts (needs some unique sorting)
+##    * Build/complete hexary trie for accounts
+##    * Save/bulk-store hexary trie on disk. If rocksdb is available, data
+##      are bulk stored via sst.
+##
+## 3. The function `test_accountsRevisitStoredItems()` traverses trie nodes
+##    stored earlier. The accounts from test 2 are re-visted using the account
+##    hash as access path.
+##

 import
  std/algorithm,
--- a/tests/test_sync_snap/test_calc.nim
+++ b/tests/test_sync_snap/test_calc.nim
@ -0,0 +1,65 @@
+# Nimbus - Types, data structures and shared utilities used in network sync
+#
+# Copyright (c) 2018-2021 Status Research & Development GmbH
+# Licensed under either of
+#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+#    http://www.apache.org/licenses/LICENSE-2.0)
+#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+#    http://opensource.org/licenses/MIT)
+# at your option. This file may not be copied, modified, or
+# distributed except according to those terms.
+
+## Snap sync components tester and TDD environment
+
+import
+  std/sequtils,
+  eth/common,
+  unittest2,
+  ../../nimbus/sync/handlers/snap,
+  ../../nimbus/sync/snap/[range_desc, worker/db/hexary_desc],
+  ./test_helpers
+
+# ------------------------------------------------------------------------------
+# Public test function
+# ------------------------------------------------------------------------------
+
+proc test_calcAccountsListSizes*() =
+  ## RLP does not allow static check ..
+
+  let sample = Account(
+    storageRoot: Hash256(data: high(UInt256).toBytesBE),
+    codeHash:    Hash256(data: high(UInt256).toBytesBE),
+    nonce:       high(uint64),
+    balance:     high(UInt256))
+
+  let tryLst = [0, 1, 2, 3, 594, 595, 596]
+
+  for n in tryLst:
+    #echo ">>> ", n, " ", sample.repeat(n).encode.len
+    check n.accountRangeSize == sample.repeat(n).encode.len
+  block:
+    let n = tryLst[^1]
+    check 4 + n * sample.encode.len == sample.repeat(n).encode.len
+
+
+proc  test_calcProofsListSizes*() =
+  ## RLP does not allow static check ..
+
+  let sample = block:
+    var xNode = XNodeObj(kind: Branch)
+    for n in 0 .. 15:
+      xNode.bLink[n] = high(NodeTag).to(Blob)
+    xNode
+
+  let tryLst = [0, 1, 2, 126, 127]
+
+  for n in tryLst:
+    #echo ">>> ", n, " ", sample.repeat(n).encode.len
+    check n.proofNodesSize == sample.repeat(n).encode.len
+  block:
+    let n = tryLst[^1]
+    check 4 + n * sample.encode.len == sample.repeat(n).encode.len
+
+# ------------------------------------------------------------------------------
+# End
+# ------------------------------------------------------------------------------
--- a/tests/test_sync_snap/test_helpers.nim
+++ b/tests/test_sync_snap/test_helpers.nim
@ -110,27 +110,6 @@ proc say*(noisy = false; pfx = "***"; args: varargs[string, `$`]) =
    else:
      echo pfx, args.toSeq.join

-# ------------------------------------------------------------------------------
-# Public free parking
-# ------------------------------------------------------------------------------
-
-proc rangeAccountSizeMax*(n: int): int =
-  ## Max number of bytes needed to store `n` RLP encoded `Account()` type
-  ## entries. Note that this is an upper bound.
-  ##
-  ## The maximum size of a single RLP encoded account item can be determined
-  ## by setting every field of `Account()` to `high()` or `0xff`.
-  if 127 < n:
-    3 + n * 110
-  elif 0 < n:
-    2 + n * 110
-  else:
-    1
-
-proc rangeNumAccounts*(size: int): int =
-  ## ..
-  (size - 3) div 110
-
 # ------------------------------------------------------------------------------
 # End
 # ------------------------------------------------------------------------------
--- a/tests/test_sync_snap/test_node_range.nim
+++ b/tests/test_sync_snap/test_node_range.nim
@ -13,7 +13,7 @@

 import
  std/[sequtils, sets, strformat, strutils],
-  eth/[common, p2p, rlp, trie/nibbles],
+  eth/[common, p2p, trie/nibbles],
  stew/[byteutils, interval_set, results],
  unittest2,
  ../../nimbus/sync/types,
@ -189,16 +189,6 @@ proc printCompareLeftNearby(
    "\n"


-proc verifyAccountListSizes() =
-  ## RLP does not allow static check ..
-  for n in [0, 1, 128, 129, 200]:
-    check n.rangeAccountSizeMax == Account(
-      storageRoot: Hash256(data: high(UInt256).toBytesBE),
-      codeHash:    Hash256(data: high(UInt256).toBytesBE),
-      nonce:       high(uint64),
-      balance:     high(UInt256)).repeat(n).encode.len
-
-
 proc verifyRangeProof(
    rootKey: NodeKey;
    leafs: seq[RangeLeaf];
@ -387,53 +377,70 @@ proc test_NodeRangeProof*(
    noisy = not dbg.isNil
    maxLen = high(int)

-  # RLP does not allow static check
-  verifyAccountListSizes()
-
  # Assuming the `inLst` entries have been stored in the DB already
  for n,w in inLst:
    let
-      accounts = w.data.accounts[0 .. min(w.data.accounts.len,maxLen)-1]
+      accounts = w.data.accounts[0 ..< min(w.data.accounts.len,maxLen)]
      iv = NodeTagRange.new(w.base, accounts[^1].accKey.to(NodeTag))
-      rc = iv.hexaryRangeLeafsProof(rootKey, db, accounts.len)
+      rc = db.hexaryRangeLeafsProof(rootKey, iv, accounts.len)
    check rc.isOk
    if rc.isErr:
      return

-    let leafs = rc.value.leafs
-    if leafs.len != accounts.len or accounts[^1].accKey != leafs[^1].key:
-      noisy.say "***", "n=", n, " something went wrong .."
-      check (n,leafs.len) == (n,accounts.len)
-      rootKey.printCompareRightLeafs(w.base, accounts, leafs, db, dbg)
-      return
+    # Run over sub-samples of the given account range
+    var subCount = 0
+    for cutOff in {0, 2, 5, 10, 16, 23, 77}:

-    # Import proof nodes and build trie
-    var rx = rootKey.verifyRangeProof(leafs, rc.value.proof)
-    if rx.isErr:
-      rx = rootKey.verifyRangeProof(leafs, rc.value.proof, dbg)
-      let
-        baseNbls =  iv.minPt.to(NodeKey).to(NibblesSeq)
-        lastNbls =  iv.maxPt.to(NodeKey).to(NibblesSeq)
-        nPfxNblsLen = baseNbls.sharedPrefixLen lastNbls
-        pfxNbls = baseNbls.slice(0, nPfxNblsLen)
-      noisy.say "***", "n=", n,
-        " leafs=", leafs.len,
-        " proof=", rc.value.proof.ppNodeKeys(dbg),
-        "\n\n   ",
-        " base=", iv.minPt,
-        "\n    ", iv.minPt.hexaryPath(rootKey,db).pp(dbg),
-        "\n\n   ",
-        " pfx=", pfxNbls,
-        " nPfx=", nPfxNblsLen,
-        "\n    ", pfxNbls.hexaryPath(rootKey,db).pp(dbg),
-        "\n"
+      # Take sub-samples but not too small
+      if 0 < cutOff and rc.value.leafs.len < cutOff + 5:
+        break # rest cases ignored
+      subCount.inc

-      check rx == typeof(rx).ok()
-      return
+      let leafs = rc.value.leafs[0 ..< rc.value.leafs.len - cutOff]
+      var proof: seq[Blob]
+
+      # Calculate proof
+      if cutOff == 0:
+        if leafs.len != accounts.len or accounts[^1].accKey != leafs[^1].key:
+          noisy.say "***", "n=", n, " something went wrong .."
+          check (n,leafs.len) == (n,accounts.len)
+          rootKey.printCompareRightLeafs(w.base, accounts, leafs, db, dbg)
+          return
+        proof = rc.value.proof
+      else:
+        # Re-adjust proof
+        proof = db.hexaryRangeLeafsProof(rootKey, iv.minPt, leafs).proof
+
+      # Import proof nodes and build trie
+      block:
+        var rx = rootKey.verifyRangeProof(leafs, proof)
+        if rx.isErr:
+          rx = rootKey.verifyRangeProof(leafs, proof, dbg)
+          let
+            baseNbls =  iv.minPt.to(NodeKey).to(NibblesSeq)
+            lastNbls =  iv.maxPt.to(NodeKey).to(NibblesSeq)
+            nPfxNblsLen = baseNbls.sharedPrefixLen lastNbls
+            pfxNbls = baseNbls.slice(0, nPfxNblsLen)
+          noisy.say "***", "n=", n,
+            " cutOff=", cutOff,
+            " leafs=", leafs.len,
+            " proof=", proof.ppNodeKeys(dbg),
+            "\n\n   ",
+            " base=", iv.minPt,
+            "\n    ", iv.minPt.hexaryPath(rootKey,db).pp(dbg),
+            "\n\n   ",
+            " pfx=", pfxNbls,
+            " nPfx=", nPfxNblsLen,
+            "\n    ", pfxNbls.hexaryPath(rootKey,db).pp(dbg),
+            "\n"
+
+          check rx == typeof(rx).ok()
+          return

    noisy.say "***", "n=", n,
-      " leafs=", leafs.len,
-      " proof=", rc.value.proof.len, "/", w.data.proof.len
+      " leafs=", rc.value.leafs.len,
+      " proof=", rc.value.proof.len, "/", w.data.proof.len,
+      " sub-samples=", subCount


 proc test_NodeRangeLeftBoundary*(
@ -468,3 +475,6 @@ proc test_NodeRangeLeftBoundary*(
 # ------------------------------------------------------------------------------
 # End
 # ------------------------------------------------------------------------------
+
+proc xxx(inLst: seq[UndumpAccounts]; db: HexaryGetFn; dbg: HexaryTreeDbRef) =
+  inLst.test_NodeRangeProof(db, dbg)