Snap sync extractor test sub range proofs (#1460)

* Unit tests to verify calculations based on hard coded constants

why:
  Sizes of RLP encoded objects are available at run time only.

* Changed argument order for `hexaryRangeLeafsProof()` prototype

why:
  Better to read as a stand-alone function (arguments were optimised
  for functional pipelines)

* Run sub-range proof tests for extracted ranges
This commit is contained in:
Jordan Hrycaj 2023-02-02 13:27:09 +00:00 committed by GitHub
parent ba92c53624
commit c2fc46a99a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 269 additions and 153 deletions

View File

@ -26,6 +26,14 @@ type
chain: ChainRef
peerPool: PeerPool
const
transportAccountSizeMax = 110
## Account record with `high(UInt256)` hashes and balance, and maximal
## nonce within RLP list
transportProofNodeSizeMax = 536
## Branch node with all branches `high(UInt256)` within RLP list
# ------------------------------------------------------------------------------
# Private functions: helper functions
# ------------------------------------------------------------------------------
@ -73,6 +81,45 @@ proc init*(
#ctx.setupPeerObserver()
ctx
# ------------------------------------------------------------------------------
# Public functions: helpers
# ------------------------------------------------------------------------------
proc accountRangeSize*(n: int): int =
## Max number of bytes needed to store `n` RLP encoded `Account()` type
## entries. Note that this is an *approximate* upper bound.
##
## The maximum size of a single RLP encoded account item can be determined
## by setting every field of `Account()` to `high()` or `0xff`.
##
## Note: Public function subject to unit tests
# Experimentally derived, see `test_calc` unit test module
if 595 < n:
4 + n * transportAccountSizeMax
elif 2 < n:
3 + n * transportAccountSizeMax
elif 0 < n:
2 + n * transportAccountSizeMax
else:
1
proc proofNodesSize*(n: int): int =
## Ditto for proof nodes
##
## Note: Public function subject to unit tests
# Experimentally derived, see `test_calc` unit test module
if 125 < n:
4 + n * transportProofNodeSizeMax
elif 0 < n:
3 + n * transportProofNodeSizeMax
else:
1
proc accountRangeNumEntries*(size: int): int =
## Number of entries with size guaranteed to not exceed the argument `size`.
if transportAccountSizeMax + 3 <= size:
result = (size - 3) div transportAccountSizeMax
# ------------------------------------------------------------------------------
# Public functions: snap wire protocol handlers
# ------------------------------------------------------------------------------

View File

@ -40,9 +40,9 @@ proc convertTo(key: RepairKey; T: type NodeKey): T =
# ------------------------------------------------------------------------------
template collectLeafs(
iv: NodeTagRange; # Proofed range of leaf paths
rootKey: NodeKey|RepairKey; # State root
db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
rootKey: NodeKey|RepairKey; # State root
iv: NodeTagRange; # Proofed range of leaf paths
nLeafs: int; # Implies maximal data size
): auto =
## Collect trie database leafs prototype. This directive is provided as
@ -90,12 +90,12 @@ template collectLeafs(
template updateProof(
db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
rootKey: NodeKey|RepairKey; # State root
baseTag: NodeTag; # Left boundary
leafList: seq[RangeLeaf]; # Set of collected leafs
rootKey: NodeKey|RepairKey; # State root
db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
): auto =
## Update leafs list by adding proof nodes. This directive is provided as
## Complement leafs list by adding proof nodes. This directive is provided as
## `template` for avoiding varying exceprion annotations.
var proof = baseTag.hexaryPath(rootKey, db)
.path
@ -120,53 +120,53 @@ template updateProof(
# ------------------------------------------------------------------------------
proc hexaryRangeLeafsProof*(
iv: NodeTagRange; # Proofed range of leaf paths
rootKey: NodeKey; # State root
db: HexaryGetFn; # Database abstraction
nLeafs = high(int); # Implies maximal data size
): Result[RangeProof,HexaryError]
{.gcsafe, raises: [Defect,RlpError]} =
## ...
let rc = iv.collectLeafs(rootKey, db, nLeafs)
if rc.isErr:
err(rc.error)
else:
ok(iv.minPt.updateProof(rc.value, rootKey, db))
proc hexaryRangeLeafsProof*(
baseTag: NodeTag; # Left boundary
leafList: seq[RangeLeaf]; # Set of already collected leafs
rootKey: NodeKey; # State root
db: HexaryGetFn; # Database abstraction
): RangeProof
{.gcsafe, raises: [Defect,RlpError]} =
## ...
baseTag.updateProof(leafList, rootKey, db)
proc hexaryRangeLeafsProof*(
iv: NodeTagRange; # Proofed range of leaf paths
rootKey: NodeKey; # State root
db: HexaryTreeDbRef; # Database abstraction
rootKey: NodeKey; # State root
iv: NodeTagRange; # Proofed range of leaf paths
nLeafs = high(int); # Implies maximal data size
): Result[RangeProof,HexaryError]
{.gcsafe, raises: [Defect,KeyError]} =
## ...
let rc = iv.collectLeafs(rootKey, db, nLeafs)
## Collect trie database leafs prototype and add proof.
let rc = db.collectLeafs(rootKey, iv, nLeafs)
if rc.isErr:
err(rc.error)
else:
ok(iv.minPt.updateProof(rc.value, rootKey, db))
ok(db.updateProof(rootKey, iv.minPt, rc.value))
proc hexaryRangeLeafsProof*(
db: HexaryTreeDbRef; # Database abstraction
rootKey: NodeKey; # State root
baseTag: NodeTag; # Left boundary
leafList: seq[RangeLeaf]; # Set of already collected leafs
rootKey: NodeKey; # State root
db: HexaryTreeDbRef; # Database abstraction
): RangeProof
{.gcsafe, raises: [Defect,KeyError]} =
## ...
baseTag.updateProof(leafList, rootKey, db)
## Complement leafs list by adding proof nodes to the argument list
## `leafList`.
db.updateProof(rootKey, baseTag, leafList)
proc hexaryRangeLeafsProof*(
db: HexaryGetFn; # Database abstraction
rootKey: NodeKey; # State root
iv: NodeTagRange; # Proofed range of leaf paths
nLeafs = high(int); # Implies maximal data size
): Result[RangeProof,HexaryError]
{.gcsafe, raises: [Defect,RlpError]} =
## Variant of `hexaryRangeLeafsProof()` for persistent database.
let rc = db.collectLeafs(rootKey, iv, nLeafs)
if rc.isErr:
err(rc.error)
else:
ok(db.updateProof(rootKey, iv.minPt, rc.value))
proc hexaryRangeLeafsProof*(
db: HexaryGetFn; # Database abstraction
rootKey: NodeKey; # State root
baseTag: NodeTag; # Left boundary
leafList: seq[RangeLeaf]; # Set of already collected leafs
): RangeProof
{.gcsafe, raises: [Defect,RlpError]} =
## Variant of `hexaryRangeLeafsProof()` for persistent database.
db.updateProof(rootKey, baseTag, leafList)
# ------------------------------------------------------------------------------
# End

View File

@ -27,8 +27,8 @@ import
./replay/[pp, undump_accounts, undump_storages],
./test_sync_snap/[
bulk_test_xx, snap_test_xx,
test_accounts, test_helpers, test_node_range, test_inspect, test_pivot,
test_storage, test_db_timing, test_types]
test_accounts, test_calc, test_helpers, test_node_range, test_inspect,
test_pivot, test_storage, test_db_timing, test_types]
const
baseDir = [".", "..", ".."/"..", $DirSep]
@ -174,9 +174,19 @@ proc snapDbAccountsRef(cdb:ChainDb; root:Hash256; pers:bool):SnapDbAccountsRef =
# Test Runners: accounts and accounts storages
# ------------------------------------------------------------------------------
proc miscRunner(noisy = true) =
suite "SyncSnap: Verify setup, constants, limits":
test "RLP accounts list sizes":
test_calcAccountsListSizes()
test "RLP proofs list sizes":
test_calcProofsListSizes()
proc accountsRunner(noisy = true; persistent = true; sample = accSample) =
let
peer = Peer.new
accLst = sample.to(seq[UndumpAccounts])
root = accLst[0].root
tmpDir = getTmpDir()
@ -490,6 +500,7 @@ proc dbTimingRunner(noisy = true; persistent = true; cleanUp = true) =
# ------------------------------------------------------------------------------
proc syncSnapMain*(noisy = defined(debug)) =
noisy.miscRunner()
noisy.accountsRunner(persistent=true)
noisy.accountsRunner(persistent=false)
noisy.importRunner() # small sample, just verify functionality
@ -503,50 +514,8 @@ when isMainModule:
#setTraceLevel()
setErrorLevel()
# The `accountsRunner()` tests a snap sync functionality for storing chain
# chain data directly rather than derive them by executing the EVM. Here,
# only accounts are considered.
#
# The `snap/1` protocol allows to fetch data for a certain account range. The
# following boundary conditions apply to the received data:
#
# * `State root`: All data are relaive to the same state root.
#
# * `Accounts`: There is an accounts interval sorted in strictly increasing
# order. The accounts are required consecutive, i.e. without holes in
# between although this cannot be verified immediately.
#
# * `Lower bound`: There is a start value which might be lower than the first
# account hash. There must be no other account between this start value and
# the first account (not verifyable yet.) For all practicat purposes, this
# value is mostly ignored but carried through.
#
# * `Proof`: There is a list of hexary nodes which allow to build a partial
# Patricia-Merkle trie starting at the state root with all the account
# leaves. There are enough nodes that show that there is no account before
# the least account (which is currently ignored.)
#
# There are test data samples on the sub-directory `test_sync_snap`. These
# are complete replies for some (admittedly smapp) test requests from a `kiln`
# session.
#
# The `accountsRunner()` does three tests:
#
# 1. Run the `importAccounts()` function which is the all-in-one production
# function processoing the data described above. The test applies it
# sequentially to about 20 data sets.
#
# 2. Test individual functional items which are hidden in test 1. while
# merging the sample data.
# * Load/accumulate `proofs` data from several samples
# * Load/accumulate accounts (needs some unique sorting)
# * Build/complete hexary trie for accounts
# * Save/bulk-store hexary trie on disk. If rocksdb is available, data
# are bulk stored via sst.
#
# 3. Traverse trie nodes stored earlier. The accounts from test 2 are
# re-visted using the account hash as access path.
#
# Test constant, calculations etc.
noisy.miscRunner()
# This one uses dumps from the external `nimbus-eth1-blob` repo
when true and false:

View File

@ -10,6 +10,52 @@
# distributed except according to those terms.
## Snap sync components tester and TDD environment
##
## This module provides test bodies for storing chain chain data directly
## rather than derive them by executing the EVM. Here, only accounts are
## considered.
##
## The `snap/1` protocol allows to fetch data for a certain account range. The
## following boundary conditions apply to the received data:
##
## * `State root`: All data are relaive to the same state root.
##
## * `Accounts`: There is an accounts interval sorted in strictly increasing
## order. The accounts are required consecutive, i.e. without holes in
## between although this cannot be verified immediately.
##
## * `Lower bound`: There is a start value which might be lower than the first
## account hash. There must be no other account between this start value and
## the first account (not verifyable yet.) For all practicat purposes, this
## value is mostly ignored but carried through.
##
## * `Proof`: There is a list of hexary nodes which allow to build a partial
## Patricia-Merkle trie starting at the state root with all the account
## leaves. There are enough nodes that show that there is no account before
## the least account (which is currently ignored.)
##
## There are test data samples on the sub-directory `test_sync_snap`. These
## are complete replies for some (admittedly snap) test requests from a `kiln#`
## session.
##
## There are three tests:
##
## 1. Run the `test_accountsImport()` function which is the all-in-one
## production function processoing the data described above. The test
## applies it sequentially to all argument data sets.
##
## 2. With `test_accountsMergeProofs()` individual items are tested which are
## hidden in test 1. while merging the sample data.
## * Load/accumulate `proofs` data from several samples
## * Load/accumulate accounts (needs some unique sorting)
## * Build/complete hexary trie for accounts
## * Save/bulk-store hexary trie on disk. If rocksdb is available, data
## are bulk stored via sst.
##
## 3. The function `test_accountsRevisitStoredItems()` traverses trie nodes
## stored earlier. The accounts from test 2 are re-visted using the account
## hash as access path.
##
import
std/algorithm,

View File

@ -0,0 +1,65 @@
# Nimbus - Types, data structures and shared utilities used in network sync
#
# Copyright (c) 2018-2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or
# distributed except according to those terms.
## Snap sync components tester and TDD environment
import
std/sequtils,
eth/common,
unittest2,
../../nimbus/sync/handlers/snap,
../../nimbus/sync/snap/[range_desc, worker/db/hexary_desc],
./test_helpers
# ------------------------------------------------------------------------------
# Public test function
# ------------------------------------------------------------------------------
proc test_calcAccountsListSizes*() =
## RLP does not allow static check ..
let sample = Account(
storageRoot: Hash256(data: high(UInt256).toBytesBE),
codeHash: Hash256(data: high(UInt256).toBytesBE),
nonce: high(uint64),
balance: high(UInt256))
let tryLst = [0, 1, 2, 3, 594, 595, 596]
for n in tryLst:
#echo ">>> ", n, " ", sample.repeat(n).encode.len
check n.accountRangeSize == sample.repeat(n).encode.len
block:
let n = tryLst[^1]
check 4 + n * sample.encode.len == sample.repeat(n).encode.len
proc test_calcProofsListSizes*() =
## RLP does not allow static check ..
let sample = block:
var xNode = XNodeObj(kind: Branch)
for n in 0 .. 15:
xNode.bLink[n] = high(NodeTag).to(Blob)
xNode
let tryLst = [0, 1, 2, 126, 127]
for n in tryLst:
#echo ">>> ", n, " ", sample.repeat(n).encode.len
check n.proofNodesSize == sample.repeat(n).encode.len
block:
let n = tryLst[^1]
check 4 + n * sample.encode.len == sample.repeat(n).encode.len
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

View File

@ -110,27 +110,6 @@ proc say*(noisy = false; pfx = "***"; args: varargs[string, `$`]) =
else:
echo pfx, args.toSeq.join
# ------------------------------------------------------------------------------
# Public free parking
# ------------------------------------------------------------------------------
proc rangeAccountSizeMax*(n: int): int =
## Max number of bytes needed to store `n` RLP encoded `Account()` type
## entries. Note that this is an upper bound.
##
## The maximum size of a single RLP encoded account item can be determined
## by setting every field of `Account()` to `high()` or `0xff`.
if 127 < n:
3 + n * 110
elif 0 < n:
2 + n * 110
else:
1
proc rangeNumAccounts*(size: int): int =
## ..
(size - 3) div 110
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

View File

@ -13,7 +13,7 @@
import
std/[sequtils, sets, strformat, strutils],
eth/[common, p2p, rlp, trie/nibbles],
eth/[common, p2p, trie/nibbles],
stew/[byteutils, interval_set, results],
unittest2,
../../nimbus/sync/types,
@ -189,16 +189,6 @@ proc printCompareLeftNearby(
"\n"
proc verifyAccountListSizes() =
## RLP does not allow static check ..
for n in [0, 1, 128, 129, 200]:
check n.rangeAccountSizeMax == Account(
storageRoot: Hash256(data: high(UInt256).toBytesBE),
codeHash: Hash256(data: high(UInt256).toBytesBE),
nonce: high(uint64),
balance: high(UInt256)).repeat(n).encode.len
proc verifyRangeProof(
rootKey: NodeKey;
leafs: seq[RangeLeaf];
@ -387,53 +377,70 @@ proc test_NodeRangeProof*(
noisy = not dbg.isNil
maxLen = high(int)
# RLP does not allow static check
verifyAccountListSizes()
# Assuming the `inLst` entries have been stored in the DB already
for n,w in inLst:
let
accounts = w.data.accounts[0 .. min(w.data.accounts.len,maxLen)-1]
accounts = w.data.accounts[0 ..< min(w.data.accounts.len,maxLen)]
iv = NodeTagRange.new(w.base, accounts[^1].accKey.to(NodeTag))
rc = iv.hexaryRangeLeafsProof(rootKey, db, accounts.len)
rc = db.hexaryRangeLeafsProof(rootKey, iv, accounts.len)
check rc.isOk
if rc.isErr:
return
let leafs = rc.value.leafs
if leafs.len != accounts.len or accounts[^1].accKey != leafs[^1].key:
noisy.say "***", "n=", n, " something went wrong .."
check (n,leafs.len) == (n,accounts.len)
rootKey.printCompareRightLeafs(w.base, accounts, leafs, db, dbg)
return
# Run over sub-samples of the given account range
var subCount = 0
for cutOff in {0, 2, 5, 10, 16, 23, 77}:
# Import proof nodes and build trie
var rx = rootKey.verifyRangeProof(leafs, rc.value.proof)
if rx.isErr:
rx = rootKey.verifyRangeProof(leafs, rc.value.proof, dbg)
let
baseNbls = iv.minPt.to(NodeKey).to(NibblesSeq)
lastNbls = iv.maxPt.to(NodeKey).to(NibblesSeq)
nPfxNblsLen = baseNbls.sharedPrefixLen lastNbls
pfxNbls = baseNbls.slice(0, nPfxNblsLen)
noisy.say "***", "n=", n,
" leafs=", leafs.len,
" proof=", rc.value.proof.ppNodeKeys(dbg),
"\n\n ",
" base=", iv.minPt,
"\n ", iv.minPt.hexaryPath(rootKey,db).pp(dbg),
"\n\n ",
" pfx=", pfxNbls,
" nPfx=", nPfxNblsLen,
"\n ", pfxNbls.hexaryPath(rootKey,db).pp(dbg),
"\n"
# Take sub-samples but not too small
if 0 < cutOff and rc.value.leafs.len < cutOff + 5:
break # rest cases ignored
subCount.inc
check rx == typeof(rx).ok()
return
let leafs = rc.value.leafs[0 ..< rc.value.leafs.len - cutOff]
var proof: seq[Blob]
# Calculate proof
if cutOff == 0:
if leafs.len != accounts.len or accounts[^1].accKey != leafs[^1].key:
noisy.say "***", "n=", n, " something went wrong .."
check (n,leafs.len) == (n,accounts.len)
rootKey.printCompareRightLeafs(w.base, accounts, leafs, db, dbg)
return
proof = rc.value.proof
else:
# Re-adjust proof
proof = db.hexaryRangeLeafsProof(rootKey, iv.minPt, leafs).proof
# Import proof nodes and build trie
block:
var rx = rootKey.verifyRangeProof(leafs, proof)
if rx.isErr:
rx = rootKey.verifyRangeProof(leafs, proof, dbg)
let
baseNbls = iv.minPt.to(NodeKey).to(NibblesSeq)
lastNbls = iv.maxPt.to(NodeKey).to(NibblesSeq)
nPfxNblsLen = baseNbls.sharedPrefixLen lastNbls
pfxNbls = baseNbls.slice(0, nPfxNblsLen)
noisy.say "***", "n=", n,
" cutOff=", cutOff,
" leafs=", leafs.len,
" proof=", proof.ppNodeKeys(dbg),
"\n\n ",
" base=", iv.minPt,
"\n ", iv.minPt.hexaryPath(rootKey,db).pp(dbg),
"\n\n ",
" pfx=", pfxNbls,
" nPfx=", nPfxNblsLen,
"\n ", pfxNbls.hexaryPath(rootKey,db).pp(dbg),
"\n"
check rx == typeof(rx).ok()
return
noisy.say "***", "n=", n,
" leafs=", leafs.len,
" proof=", rc.value.proof.len, "/", w.data.proof.len
" leafs=", rc.value.leafs.len,
" proof=", rc.value.proof.len, "/", w.data.proof.len,
" sub-samples=", subCount
proc test_NodeRangeLeftBoundary*(
@ -468,3 +475,6 @@ proc test_NodeRangeLeftBoundary*(
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------
proc xxx(inLst: seq[UndumpAccounts]; db: HexaryGetFn; dbg: HexaryTreeDbRef) =
inLst.test_NodeRangeProof(db, dbg)