Snap sync extractor and sub range proofs cont1 (#1468)

* Redefine `seq[Blob]` => `seq[SnapProof]` for `snap/1` protocol why: Proof nodes are traded as `Blob` type items rather than Nim objects. So the RLP transcoder must not extra wrap proofs which are of type seq[Blob]. Without custom encoding one would produce a `list(blob(item1), blob(item2) ..)` instead of `list(item1, item2 ..)`. * Limit leaf extractor by RLP size rather than number of items why: To be used serving `snap/1` requests, the result of function `hexaryRangeLeafsProof()` is limited by the maximal space needed to serialise the result which will be part of the `snap/1` repsonse. * Let the range extractor `hexaryRangeLeafsProof()` return RLP list sizes why: When collecting accounts, the size oft the accounts list when encoded as RLP is continually updated. So the summed up value is available anyway. For the proof nodes list, there are not many (~ 10) so summing up is not expensive here.
2025-01-12 05:14:14 +00:00 · 2023-02-15 11:14:40 +01:00 · 2023-02-15 11:14:40 +01:00 · b793f0de8d
commit b793f0de8d
parent 880313d7a4
17 changed files with 446 additions and 255 deletions
--- a/nimbus/sync/handlers.nim
+++ b/nimbus/sync/handlers.nim
@ -10,10 +10,11 @@

 import
  ./handlers/eth as handlers_eth,
-  ./handlers/setup as handlers_setup
+  ./handlers/setup as handlers_setup,
+  ./handlers/snap as handlers_snap

 export
-  handlers_eth, handlers_setup
+  handlers_eth, handlers_setup, handlers_snap

 static:
  type
--- a/nimbus/sync/handlers/snap.nim
+++ b/nimbus/sync/handlers/snap.nim
@ -12,8 +12,8 @@

 import
  chronicles,
-  chronos,
  eth/p2p,
+  ../snap/worker/db/hexary_range,
  ../protocol,
  ../protocol/snap/snap_types,
  ../../core/chain
@ -27,11 +27,7 @@ type
    peerPool: PeerPool

 const
-  transportAccountSizeMax = 110
-    ## Account record with `high(UInt256)` hashes and balance, and maximal
-    ## nonce within RLP list
-
-  transportProofNodeSizeMax = 536
+  proofNodeSizeMax = 532
    ## Branch node with all branches `high(UInt256)` within RLP list

 # ------------------------------------------------------------------------------
@ -41,6 +37,10 @@ const
 proc notImplemented(name: string) =
  debug "snapWire: hHandler method not implemented", meth=name

+proc append(writer: var RlpWriter; t: SnapProof; node: Blob) =
+  ## RLP mixin, encoding
+  writer.snapAppend node
+
 # ------------------------------------------------------------------------------
 # Private functions: peer observer
 # ------------------------------------------------------------------------------
@ -85,40 +85,18 @@ proc init*(
 # Public functions: helpers
 # ------------------------------------------------------------------------------

-proc accountRangeSize*(n: int): int =
-  ## Max number of bytes needed to store `n` RLP encoded `Account()` type
-  ## entries. Note that this is an *approximate* upper bound.
-  ##
-  ## The maximum size of a single RLP encoded account item can be determined
-  ## by setting every field of `Account()` to `high()` or `0xff`.
-  ##
-  ## Note: Public function subject to unit tests
-  # Experimentally derived, see `test_calc` unit test module
-  if 595 < n:
-    4 + n * transportAccountSizeMax
-  elif 2 < n:
-    3 + n * transportAccountSizeMax
-  elif 0 < n:
-    2 + n * transportAccountSizeMax
+proc proofNodesSizeMax*(n: int): int =
+  ## Max number of bytes needed to store a list of `n` RLP encoded hexary
+  ## nodes which is a `Branch` node where every link reference is set to
+  ## `high(UInt256)`.
+  const nMax = high(int) div proofNodeSizeMax
+  if n <= nMax:
+    hexaryRangeRlpSize(n * proofNodeSizeMax)
  else:
-    1
+    high(int)

-proc proofNodesSize*(n: int): int =
-  ## Ditto for proof nodes
-  ##
-  ## Note: Public function subject to unit tests
-  # Experimentally derived, see `test_calc` unit test module
-  if 125 < n:
-    4 + n * transportProofNodeSizeMax
-  elif 0 < n:
-    3 + n * transportProofNodeSizeMax
-  else:
-    1
-
-proc accountRangeNumEntries*(size: int): int =
-  ## Number of entries with size guaranteed to not exceed the argument `size`.
-  if transportAccountSizeMax + 3 <= size:
-    result = (size - 3) div transportAccountSizeMax
+proc proofEncode*(proof: seq[SnapProof]): Blob =
+  rlp.encode proof

 # ------------------------------------------------------------------------------
 # Public functions: snap wire protocol handlers
@ -130,7 +108,7 @@ method getAccountRange*(
    origin: Hash256;
    limit: Hash256;
    replySizeMax: uint64;
-      ): (seq[SnapAccount], SnapAccountProof)
+      ): (seq[SnapAccount], seq[SnapProof])
      {.gcsafe.} =
  notImplemented("getAccountRange")

@ -141,7 +119,7 @@ method getStorageRanges*(
    origin: openArray[byte];
    limit: openArray[byte];
    replySizeMax: uint64;
-      ): (seq[seq[SnapStorage]], SnapStorageProof)
+      ): (seq[seq[SnapStorage]], seq[SnapProof])
      {.gcsafe.} =
  notImplemented("getStorageRanges")

--- a/nimbus/sync/protocol/snap/snap_types.nim
+++ b/nimbus/sync/protocol/snap/snap_types.nim
@ -10,7 +10,9 @@

 import
  chronicles,
-  eth/common
+  eth/common,
+  stew/endians2,
+  ../../../constants

 {.push raises: [].}

@ -19,18 +21,124 @@ type
    accHash*: Hash256
    accBody* {.rlpCustomSerialization.}: Account

-  SnapAccountProof* = seq[Blob]
+  SnapProof* = object
+    data* {.rlpCustomSerialization.}: Blob

  SnapStorage* = object
    slotHash*: Hash256
    slotData*: Blob

-  SnapStorageProof* = seq[Blob]
-
  SnapWireBase* = ref object of RootRef

  SnapPeerState* = ref object of RootRef

+# ------------------------------------------------------------------------------
+# Public serialisation helpers
+# ------------------------------------------------------------------------------
+
+# The `snap` protocol represents `Account` differently from the regular RLP
+# serialisation used in `eth` protocol as well as the canonical Merkle hash
+# over all accounts.  In `snap`, empty storage hash and empty code hash are
+# each represented by an RLP zero-length string instead of the full hash.  This
+# avoids transmitting these hashes in about 90% of accounts.  We need to
+# recognise or set these hashes in `Account` when serialising RLP for `snap`.
+
+proc snapRead*(
+    rlp: var Rlp;
+    T: type Account;
+    strict: static[bool] = false;
+      ): T
+      {.gcsafe, raises: [RlpError]} =
+  ## RLP decoding for `Account`. The `snap` RLP representation of the account
+  ## differs from standard `Account` RLP. Empty storage hash and empty code
+  ## hash are each represented by an RLP zero-length string instead of the
+  ## full hash.
+  ##
+  ## Normally, this read function will silently handle standard encodinig and
+  ## `snap` enciding. Setting the argument strict as `false` the function will
+  ## throw an exception if `snap` encoding is violated.
+  rlp.tryEnterList()
+  result.nonce = rlp.read(typeof(result.nonce))
+  result.balance = rlp.read(typeof(result.balance))
+  if rlp.blobLen != 0 or not rlp.isBlob:
+    result.storageRoot = rlp.read(typeof(result.storageRoot))
+    when strict:
+      if result.storageRoot == EMPTY_ROOT_HASH:
+        raise newException(RlpTypeMismatch,
+          "EMPTY_ROOT_HASH not encoded as empty string in Snap protocol")
+  else:
+    rlp.skipElem()
+    result.storageRoot = EMPTY_ROOT_HASH
+  if rlp.blobLen != 0 or not rlp.isBlob:
+    result.codeHash = rlp.read(typeof(result.codeHash))
+    when strict:
+      if result.codeHash == EMPTY_SHA3:
+        raise newException(RlpTypeMismatch,
+          "EMPTY_SHA3 not encoded as empty string in Snap protocol")
+  else:
+    rlp.skipElem()
+    result.codeHash = EMPTY_SHA3
+
+proc snapAppend*(
+    writer: var RlpWriter;
+    account: Account;
+      ) =
+  ## RLP encoding for `Account`. The snap RLP representation of the account
+  ## differs from standard `Account` RLP. Empty storage hash and empty code
+  ## hash are each represented by an RLP zero-length string instead of the
+  ## full hash.
+  writer.startList(4)
+  writer.append(account.nonce)
+  writer.append(account.balance)
+  if account.storageRoot == EMPTY_ROOT_HASH:
+    writer.append("")
+  else:
+    writer.append(account.storageRoot)
+  if account.codeHash == EMPTY_SHA3:
+    writer.append("")
+  else:
+    writer.append(account.codeHash)
+
+# ---------------------
+
+proc snapRead*(
+    rlp: var Rlp;
+    T: type Blob;
+      ): T
+      {.gcsafe, raises: [RlpError]} =
+  ## Rlp decoding for a proof node.
+  rlp.read Blob
+
+proc snapAppend*(
+    writer: var RlpWriter;
+    proofNode: Blob;
+      ) =
+  ## Rlp encoding for proof node.
+  var start = 0u8
+
+  # Need some magic to strip an extra layer that will be re-introduced by
+  # the RLP encoder as object wrapper. The problem is that the `proofNode`
+  # argument blob is encoded already and a second encoding must be avoided.
+  #
+  # This extra work is not an issue as the number of proof nodes in a list
+  # is typically small.
+
+  if proofNode.len < 57:
+    # <c0> + data(max 55)
+    start = 1u8
+  elif 0xf7 < proofNode[0]:
+    # <f7+sizeLen> + size + data ..
+    start = proofNode[0] - 0xf7 + 1
+  else:
+    # Oops, unexpected data -- encode as is
+    discard
+
+  writer.appendRawBytes proofNode[start ..< proofNode.len]
+
+# ------------------------------------------------------------------------------
+# Public service stubs
+# ------------------------------------------------------------------------------
+
 proc notImplemented(name: string) =
  debug "Method not implemented", meth = name

@ -40,7 +148,7 @@ method getAccountRange*(
    origin: Hash256;
    limit: Hash256;
    replySizeMax: uint64;
-      ): (seq[SnapAccount], SnapAccountProof)
+      ): (seq[SnapAccount], seq[SnapProof])
      {.base.} =
  notImplemented("getAccountRange")

@ -51,7 +159,7 @@ method getStorageRanges*(
    origin: openArray[byte];
    limit: openArray[byte];
    replySizeMax: uint64;
-      ): (seq[seq[SnapStorage]], SnapStorageProof)
+      ): (seq[seq[SnapStorage]], seq[SnapProof])
      {.base.} =
  notImplemented("getStorageRanges")

@ -72,4 +180,6 @@ method getTrieNodes*(
      {.base.} =
  notImplemented("getTrieNodes")

+# ------------------------------------------------------------------------------
 # End
+# ------------------------------------------------------------------------------
--- a/nimbus/sync/protocol/snap1.nim
+++ b/nimbus/sync/protocol/snap1.nim
@ -138,7 +138,6 @@ import
  chronicles,
  chronos,
  eth/[common, p2p, p2p/private/p2p_types],
-  nimcrypto/hash,
  ./snap/snap_types,
  ../../constants

@ -167,69 +166,22 @@ const
  trSnapSendReplying* =
    ">> " & prettySnapProtoName & " Replying "

-# The `snap` protocol represents `Account` differently from the regular RLP
-# serialisation used in `eth` protocol as well as the canonical Merkle hash
-# over all accounts.  In `snap`, empty storage hash and empty code hash are
-# each represented by an RLP zero-length string instead of the full hash.  This
-# avoids transmitting these hashes in about 90% of accounts.  We need to
-# recognise or set these hashes in `Account` when serialising RLP for `snap`.
-
-proc snapRead*(rlp: var Rlp; T: type Account; strict: static[bool] = false): T
-    {.gcsafe, raises: [RlpError]} =
-  ## RLP decoding for `Account`. The `snap` RLP representation of the account
-  ## differs from standard `Account` RLP. Empty storage hash and empty code
-  ## hash are each represented by an RLP zero-length string instead of the
-  ## full hash.
-  ##
-  ## Normally, this read function will silently handle standard encodinig and
-  ## `snap` enciding. Setting the argument strict as `false` the function will
-  ## throw an exception if `snap` encoding is violated.
-  rlp.tryEnterList()
-  result.nonce = rlp.read(typeof(result.nonce))
-  result.balance = rlp.read(typeof(result.balance))
-  if rlp.blobLen != 0 or not rlp.isBlob:
-    result.storageRoot = rlp.read(typeof(result.storageRoot))
-    when strict:
-      if result.storageRoot == EMPTY_ROOT_HASH:
-        raise newException(RlpTypeMismatch,
-          "EMPTY_ROOT_HASH not encoded as empty string in Snap protocol")
-  else:
-    rlp.skipElem()
-    result.storageRoot = EMPTY_ROOT_HASH
-  if rlp.blobLen != 0 or not rlp.isBlob:
-    result.codeHash = rlp.read(typeof(result.codeHash))
-    when strict:
-      if result.codeHash == EMPTY_SHA3:
-        raise newException(RlpTypeMismatch,
-          "EMPTY_SHA3 not encoded as empty string in Snap protocol")
-  else:
-    rlp.skipElem()
-    result.codeHash = EMPTY_SHA3
-
-proc snapAppend*(writer: var RlpWriter; account: Account) =
-  ## RLP encoding for `Account`. The snap RLP representation of the account
-  ## differs from standard `Account` RLP. Empty storage hash and empty code
-  ## hash are each represented by an RLP zero-length string instead of the
-  ## full hash.
-  writer.startList(4)
-  writer.append(account.nonce)
-  writer.append(account.balance)
-  if account.storageRoot == EMPTY_ROOT_HASH:
-    writer.append("")
-  else:
-    writer.append(account.storageRoot)
-  if account.codeHash == EMPTY_SHA3:
-    writer.append("")
-  else:
-    writer.append(account.codeHash)

 proc read(rlp: var Rlp, t: var SnapAccount, T: type Account): T =
-  ## RLP Mixin: decoding for `SnapAccount`.
-  result = rlp.snapRead(T)
+  ## RLP mixin, decoding
+  rlp.snapRead T

-proc append(rlpWriter: var RlpWriter, t: SnapAccount, account: Account) =
-  ##  RLP Mixin: encoding for `SnapAccount`.
-  rlpWriter.snapAppend(account)
+proc read(rlp: var Rlp; t: var SnapProof; T: type Blob): T =
+  ## RLP mixin, decoding
+  rlp.snapRead T
+
+proc append(writer: var RlpWriter, t: SnapAccount, account: Account) =
+  ## RLP mixin, encoding
+  writer.snapAppend account
+
+proc append(writer: var RlpWriter; t: SnapProof; node: Blob) =
+  ## RLP mixin, encoding
+  writer.snapAppend node


 p2pProtocol snap1(version = snapVersion,
@ -271,8 +223,8 @@ p2pProtocol snap1(version = snapVersion,
    # User message 0x01: AccountRange.
    proc accountRange(
        peer: Peer;
-        accounts: seq[SnapAccount];
-        proof: SnapAccountProof)
+        accounts: openArray[SnapAccount];
+        proof: openArray[SnapProof])


  requestResponse:
@ -312,7 +264,7 @@ p2pProtocol snap1(version = snapVersion,
    proc storageRanges(
        peer: Peer;
        slotLists: openArray[seq[SnapStorage]];
-        proof: SnapStorageProof)
+        proof: openArray[SnapProof])


  requestResponse:
--- a/nimbus/sync/snap/range_desc.nim
+++ b/nimbus/sync/snap/range_desc.nim
@ -57,7 +57,7 @@ type
    ## used for storing in the database. So the `PackedAccount` is `BaseDB`
    ## trie compatible.
    accounts*: seq[PackedAccount]  ## List of re-packed accounts data
-    proof*: SnapAccountProof       ## Boundary proofs
+    proof*: seq[SnapProof]         ## Boundary proofs

  PackedAccount* = object
    ## In fact, the `snap/1` driver returns the `Account` structure which is
@ -75,7 +75,7 @@ type
    ## List of storage descriptors, the last `AccountSlots` storage data might
    ## be incomplete and the `proof` is needed for proving validity.
    storages*: seq[AccountSlots]    ## List of accounts and storage data
-    proof*: SnapStorageProof        ## Boundary proofs for last entry
+    proof*: seq[SnapProof]          ## Boundary proofs for last entry
    base*: NodeTag                  ## Lower limit for last entry w/proof

  AccountSlots* = object
--- a/nimbus/sync/snap/worker/com/get_storage_ranges.nim
+++ b/nimbus/sync/snap/worker/com/get_storage_ranges.nim
@ -30,7 +30,7 @@ type
  #
  # SnapStorageRanges* = object
  #  slotLists*: seq[seq[SnapStorage]]
-  #  proof*: SnapStorageProof
+  #  proof*: seq[SnapProof]

  GetStorageRanges* = object
    leftOver*: seq[AccountSlotsHeader]
--- a/nimbus/sync/snap/worker/db/hexary_desc.nim
+++ b/nimbus/sync/snap/worker/db/hexary_desc.nim
@ -179,6 +179,52 @@ proc initImpl(key: var RepairKey; data: openArray[byte]): bool =
    trg.copyMem(unsafeAddr data[0], data.len)
    return true

+
+proc append(writer: var RlpWriter, node: RNodeRef) =
+  ## Mixin for RLP writer
+  proc appendOk(writer: var RlpWriter; key: RepairKey): bool =
+    if key.isZero:
+      writer.append(EmptyNodeBlob)
+    elif key.isNodeKey:
+      var hash: Hash256
+      (addr hash.data[0]).copyMem(unsafeAddr key.ByteArray33[1], 32)
+      writer.append(hash)
+    else:
+      return false
+    true
+
+  case node.kind:
+  of Branch:
+    writer.startList(17)
+    for n in 0 ..< 16:
+      if not writer.appendOk(node.bLink[n]):
+        return # empty `Blob`
+    writer.append(node.bData)
+  of Extension:
+    writer.startList(2)
+    writer.append(node.ePfx.hexPrefixEncode(isleaf = false))
+    if not writer.appendOk(node.eLink):
+      return # empty `Blob`
+  of Leaf:
+    writer.startList(2)
+    writer.append(node.lPfx.hexPrefixEncode(isleaf = true))
+    writer.append(node.lData)
+
+
+proc append(writer: var RlpWriter, node: XNodeObj) =
+  ## Mixin for RLP writer
+  case node.kind:
+  of Branch:
+    writer.append(node.bLink)
+  of Extension:
+    writer.startList(2)
+    writer.append(node.ePfx.hexPrefixEncode(isleaf = false))
+    writer.append(node.eLink)
+  of Leaf:
+    writer.startList(2)
+    writer.append(node.lPfx.hexPrefixEncode(isleaf = true))
+    writer.append(node.lData)
+
 # ------------------------------------------------------------------------------
 # Private debugging helpers
 # ------------------------------------------------------------------------------
@ -393,55 +439,22 @@ proc convertTo*(data: Blob; T: type RepairKey): T =
 proc convertTo*(node: RNodeRef; T: type Blob): T =
  ## Write the node as an RLP-encoded blob
  var writer = initRlpWriter()
-
-  proc appendOk(writer: var RlpWriter; key: RepairKey): bool =
-    if key.isZero:
-      writer.append(EmptyNodeBlob)
-    elif key.isNodeKey:
-      var hash: Hash256
-      (addr hash.data[0]).copyMem(unsafeAddr key.ByteArray33[1], 32)
-      writer.append(hash)
-    else:
-      return false
-    true
-
-  case node.kind:
-  of Branch:
-    writer.startList(17)
-    for n in 0 ..< 16:
-      if not writer.appendOk(node.bLink[n]):
-        return # empty `Blob`
-    writer.append(node.bData)
-  of Extension:
-    writer.startList(2)
-    writer.append(node.ePfx.hexPrefixEncode(isleaf = false))
-    if not writer.appendOk(node.eLink):
-      return # empty `Blob`
-  of Leaf:
-    writer.startList(2)
-    writer.append(node.lPfx.hexPrefixEncode(isleaf = true))
-    writer.append(node.lData)
-
+  writer.append node
  writer.finish()

 proc convertTo*(node: XNodeObj; T: type Blob): T =
-  ## Variant of above `convertTo()` for `XNodeObj` nodes.
+  ## Variant of `convertTo()` for `XNodeObj` nodes.
  var writer = initRlpWriter()
-
-  case node.kind:
-  of Branch:
-    writer.append(node.bLink)
-  of Extension:
-    writer.startList(2)
-    writer.append(node.ePfx.hexPrefixEncode(isleaf = false))
-    writer.append(node.eLink)
-  of Leaf:
-    writer.startList(2)
-    writer.append(node.lPfx.hexPrefixEncode(isleaf = true))
-    writer.append(node.lData)
-
+  writer.append node
  writer.finish()

+proc convertTo*(nodeList: openArray[XNodeObj]; T: type Blob): T =
+  ## Variant of `convertTo()` for a list of `XNodeObj` nodes.
+  var writer = initRlpList(nodeList.len)
+  for w in nodeList:
+    writer.append w
+  writer.finish
+
 # ------------------------------------------------------------------------------
 # End
 # ------------------------------------------------------------------------------
--- a/nimbus/sync/snap/worker/db/hexary_range.nim
+++ b/nimbus/sync/snap/worker/db/hexary_range.nim
@ -13,6 +13,7 @@ import
  chronicles,
  eth/[common, p2p, trie/nibbles],
  stew/[byteutils, interval_set],
+  ../../../protocol,
  ../../range_desc,
  "."/[hexary_desc, hexary_error, hexary_nearby, hexary_paths]

@ -25,7 +26,12 @@ type

  RangeProof* = object
    leafs*: seq[RangeLeaf]
-    proof*: seq[Blob]
+    leafsSize*: int
+    proof*: seq[SnapProof]
+    proofSize*: int
+
+proc hexaryRangeRlpLeafListSize*(blobLen: int; lstLen = 0): (int,int) {.gcsafe.}
+proc hexaryRangeRlpSize*(blobLen: int): int {.gcsafe.}

 # ------------------------------------------------------------------------------
 # Private helpers
@ -35,6 +41,15 @@ proc convertTo(key: RepairKey; T: type NodeKey): T =
  ## Might be lossy, check before use (if at all, unless debugging)
  (addr result.ByteArray32[0]).copyMem(unsafeAddr key.ByteArray33[1], 32)

+proc rlpPairSize(aLen: int; bRlpLen: int): int =
+  ## Size caclualation for an RLP encoded pair `[<a>,<rb>]` for blobs `a` and
+  ## rlp encoded `rb` argument length `aLen` and `bRlpLen`.
+  let aRlpLen = hexaryRangeRlpSize(aLen)
+  if bRlpLen < high(int) - aRlpLen:
+    hexaryRangeRlpSize(aRlpLen + bRlpLen)
+  else:
+    high(int)
+
 # ------------------------------------------------------------------------------
 # Private functions
 # ------------------------------------------------------------------------------
@ -43,7 +58,8 @@ template collectLeafs(
    db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
    rootKey: NodeKey|RepairKey;      # State root
    iv: NodeTagRange;                # Proofed range of leaf paths
-    nLeafs: int;                     # Implies maximal data size
+    nSizeLimit: int;                 # List of RLP encoded data must be smaller
+    nSizeUsed: var int;              # Updated size counter for the raw list
      ): auto =
  ## Collect trie database leafs prototype. This directive is provided as
  ## `template` for avoiding varying exceprion annotations.
@ -55,8 +71,8 @@ template collectLeafs(
      prevTag: NodeTag
      rls: seq[RangeLeaf]

-    # Fill at most `nLeafs` leaf nodes from interval range
-    while rls.len < nLeafs and nodeTag <= maxPt(iv):
+    # Fill leaf nodes from interval range unless size reached
+    while nodeTag <= maxPt(iv):
      # The following logic might be sub-optimal. A strict version of the
      # `next()` function that stops with an error at dangling links could
      # be faster if the leaf nodes are not too far apart on the hexary trie.
@ -76,6 +92,13 @@ template collectLeafs(
        rc = typeof(rc).err(FailedNextNode)
        break body # stop here

+      let (pairLen,listLen) =
+        hexaryRangeRlpLeafListSize(xPath.leafData.len, nSizeUsed)
+      if listLen < nSizeLimit:
+        nSizeUsed += pairLen
+      else:
+        break
+
      rls.add RangeLeaf(
        key:  rightKey,
        data: xPath.leafData)
@ -94,6 +117,7 @@ template updateProof(
    rootKey: NodeKey|RepairKey;      # State root
    baseTag: NodeTag;                # Left boundary
    leafList: seq[RangeLeaf];        # Set of collected leafs
+    nSizeUsed: int;                  # To be stored into the result
      ): auto =
  ## Complement leafs list by adding proof nodes. This directive is provided as
  ## `template` for avoiding varying exceprion annotations.
@ -106,57 +130,40 @@ template updateProof(
  if 0 < leafList.len:
    proof.incl leafList[^1].key.to(NodeTag).hexaryPath(rootKey, db)
        .path
-         .mapIt(it.node)
+        .mapIt(it.node)
        .filterIt(it.kind != Leaf)
        .mapIt(it.convertTo(Blob))
        .toHashSet

-  RangeProof(
+  var rp = RangeProof(
    leafs: leafList,
-    proof: proof.toSeq)
+    proof: proof.toSeq.mapIt(SnapProof(data: it)))
+
+  if 0 < nSizeUsed:
+    rp.leafsSize = hexaryRangeRlpSize nSizeUsed
+  if 0 < rp.proof.len:
+    rp.proofSize = hexaryRangeRlpSize rp.proof.foldl(a + b.data.len, 0)
+
+  rp

 # ------------------------------------------------------------------------------
 # Public functions
 # ------------------------------------------------------------------------------

-#proc hexaryRangeLeafsProof*(
-#    db: HexaryTreeDbRef;             # Database abstraction
-#    rootKey: NodeKey;                # State root
-#    iv: NodeTagRange;                # Proofed range of leaf paths
-#    nLeafs = high(int);              # Implies maximal data size
-#      ): Result[RangeProof,HexaryError]
-#      {.gcsafe, raises: [KeyError]} =
-#  ## Collect trie database leafs prototype and add proof.
-#  let rc = db.collectLeafs(rootKey, iv, nLeafs)
-#  if rc.isErr:
-#    err(rc.error)
-#  else:
-#    ok(db.updateProof(rootKey, iv.minPt, rc.value))
-#
-#proc hexaryRangeLeafsProof*(
-#    db: HexaryTreeDbRef;             # Database abstraction
-#    rootKey: NodeKey;                # State root
-#    baseTag: NodeTag;                # Left boundary
-#    leafList: seq[RangeLeaf];        # Set of already collected leafs
-#      ): RangeProof
-#      {.gcsafe, raises: [KeyError]} =
-#  ## Complement leafs list by adding proof nodes to the argument list
-#  ## `leafList`.
-#  db.updateProof(rootKey, baseTag, leafList)
-
 proc hexaryRangeLeafsProof*(
    db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
    rootKey: NodeKey;                # State root
    iv: NodeTagRange;                # Proofed range of leaf paths
-    nLeafs = high(int);              # Implies maximal data size
+    nSizeLimit = high(int);          # List of RLP encoded data must be smaller
      ): Result[RangeProof,HexaryError]
      {.gcsafe, raises: [CatchableError]} =
  ## Collect trie database leafs prototype and add proof.
-  let rc = db.collectLeafs(rootKey, iv, nLeafs)
+  var accSize = 0
+  let rc = db.collectLeafs(rootKey, iv, nSizeLimit, accSize)
  if rc.isErr:
    err(rc.error)
  else:
-    ok(db.updateProof(rootKey, iv.minPt, rc.value))
+    ok(db.updateProof(rootKey, iv.minPt, rc.value, accSize))

 proc hexaryRangeLeafsProof*(
    db: HexaryGetFn|HexaryTreeDbRef; # Database abstraction
@ -167,7 +174,54 @@ proc hexaryRangeLeafsProof*(
      {.gcsafe, raises: [CatchableError]} =
  ## Complement leafs list by adding proof nodes to the argument list
  ## `leafList`.
-  db.updateProof(rootKey, baseTag, leafList)
+  db.updateProof(rootKey, baseTag, leafList, 0)
+
+# ------------------------------------------------------------------------------
+# Public helpers
+# ------------------------------------------------------------------------------
+
+proc hexaryRangeRlpSize*(blobLen: int): int =
+  ## Returns the size of RLP encoded <blob> of argument length `blobLen`.
+  if blobLen < 56:
+      return blobLen + 1
+  if blobLen < (1 shl (8 * 1)):
+    return blobLen + 2
+  if blobLen < (1 shl (8 * 2)):
+    return blobLen + 3
+  if blobLen < (1 shl (8 * 3)):
+    return blobLen + 4
+
+  when sizeof(int) < 8:
+    if blobLen < (1 shl (8 * 4)):
+      return blobLen + 5
+    if blobLen < (1 shl (8 * 5)):
+      return blobLen + 6
+    if blobLen < (1 shl (8 * 6)):
+      return blobLen + 7
+    if blobLen < (1 shl (8 * 7)):
+      return blobLen + 8
+
+  if blobLen < high(int) - (1 + sizeof(int)):
+    blobLen + 1 + sizeof(int)
+  else:
+    high(int)
+
+proc hexaryRangeRlpLeafListSize*(blobLen: int; lstLen = 0): (int,int) =
+  ## Size caclualation for an RLP encoded list `[[<key>,<blob>],a,b,..]`
+  ## where a,b,.. are from a sequence of the same format `[<keyA>,<blobA>]`,
+  ## `[<keyB>,<blobB>]`,... The size of blob is the argument size `blobLen`,
+  ## and the toral size of the sequence is `listLen`.
+  ##
+  ## The fuction returns `(x,y)`, the size `x` of the RLP encoded pair
+  ## `[<key>,<blob>]` and the total size `y` of the complete RLP encoded list
+  ## `[[<key>,<blob>],a,b,..]`.
+  let pairLen = blobLen.rlpPairSize(33)
+  if lstLen == 0:
+    (pairLen, hexaryRangeRlpSize(pairLen))
+  elif lstLen < high(int) - lstLen:
+    (pairLen, hexaryRangeRlpSize(pairLen + lstLen))
+  else:
+    (pairLen, high(int))

 # ------------------------------------------------------------------------------
 # End
--- a/nimbus/sync/snap/worker/db/snapdb_desc.nim
+++ b/nimbus/sync/snap/worker/db/snapdb_desc.nim
@ -13,6 +13,7 @@ import
  chronicles,
  eth/[common, p2p, trie/db, trie/nibbles],
  ../../../../db/[select_backend, storage_types],
+  ../../../protocol,
  ../../range_desc,
  "."/[hexary_desc, hexary_error, hexary_import, hexary_nearby,
       hexary_paths, rocky_bulk_load]
@ -211,7 +212,7 @@ proc dbBackendRocksDb*(ps: SnapDbBaseRef): bool =
 proc mergeProofs*(
    ps: SnapDbBaseRef;        ## Session database
    peer: Peer;               ## For log messages
-    proof: seq[Blob];         ## Node records
+    proof: seq[SnapProof];    ## Node records
    freeStandingOk = false;   ## Remove freestanding nodes
      ): Result[void,HexaryError]
      {.gcsafe, raises: [RlpError,KeyError].} =
@ -225,7 +226,7 @@ proc mergeProofs*(
    refs = @[ps.root.to(RepairKey)].toHashSet

  for n,rlpRec in proof:
-    let report = db.hexaryImport(rlpRec, nodes, refs)
+    let report = db.hexaryImport(rlpRec.data, nodes, refs)
    if report.error != NothingSerious:
      let error = report.error
      trace "mergeProofs()", peer, item=n, proofs=proof.len, error
--- a/nimbus/sync/snap/worker/db/snapdb_storage_slots.nim
+++ b/nimbus/sync/snap/worker/db/snapdb_storage_slots.nim
@ -113,7 +113,7 @@ proc importStorageSlots(
    ps: SnapDbStorageSlotsRef; ## Re-usable session descriptor
    base: NodeTag;             ## before or at first account entry in `data`
    data: AccountSlots;        ## Account storage descriptor
-    proof: SnapStorageProof;   ## Storage slots proof data
+    proof: seq[SnapProof];    ## Storage slots proof data
    noBaseBoundCheck = false;  ## Ignore left boundary proof check if `true`
      ): Result[seq[NodeSpecs],HexaryError]
      {.gcsafe, raises: [RlpError,KeyError].} =
--- a/tests/replay/undump_accounts.nim
+++ b/tests/replay/undump_accounts.nim
@ -13,7 +13,7 @@ import
  eth/common,
  nimcrypto/utils,
  stew/byteutils,
-  ../../nimbus/sync/snap/range_desc,
+  ../../nimbus/sync/[protocol, snap/range_desc],
  ./gunzip

 type
@ -68,6 +68,9 @@ proc dumpAccounts*(
  proc ppStr(blob: Blob): string =
    blob.mapIt(it.toHex(2)).join.toLowerAscii

+  proc ppStr(proof: SnapProof): string =
+    proof.data.ppStr
+
  proc ppStr(hash: Hash256): string =
    hash.data.mapIt(it.toHex(2)).join.toLowerAscii

@ -180,7 +183,7 @@ iterator undumpNextAccount*(gzFile: string): UndumpAccounts =

    of UndumpProofs:
      if flds.len == 1:
-        data.data.proof.add flds[0].toByteSeq
+        data.data.proof.add SnapProof(data: flds[0].toByteSeq)
        nProofs.dec
        if nProofs <= 0:
          state = UndumpCommit
--- a/tests/replay/undump_storages.nim
+++ b/tests/replay/undump_storages.nim
@ -13,8 +13,7 @@ import
  eth/common,
  nimcrypto/utils,
  stew/byteutils,
-  ../../nimbus/sync/snap/range_desc,
-  ../../nimbus/sync/protocol,
+  ../../nimbus/sync/[protocol, snap/range_desc],
  ./gunzip

 type
@ -69,6 +68,9 @@ proc dumpStorages*(
  proc ppStr(blob: Blob): string =
    blob.mapIt(it.toHex(2)).join.toLowerAscii

+  proc ppStr(proof: SnapProof): string =
+    proof.data.ppStr
+
  proc ppStr(hash: Hash256): string =
    hash.data.mapIt(it.toHex(2)).join.toLowerAscii

@ -207,7 +209,7 @@ iterator undumpNextStorages*(gzFile: string): UndumpStorages =

    of UndumpProofs:
      if flds.len == 1:
-        data.data.proof.add flds[0].toByteSeq
+        data.data.proof.add SnapProof(data: flds[0].toByteSeq)
        nProofs.dec
        if nProofs <= 0:
          state = UndumpCommit
--- a/tests/test_sync_snap.nim
+++ b/tests/test_sync_snap.nim
@ -511,7 +511,8 @@ when isMainModule:
  setErrorLevel()

  # Test constant, calculations etc.
-  noisy.miscRunner()
+  when true: # and false:
+    noisy.miscRunner()

  # This one uses dumps from the external `nimbus-eth1-blob` repo
  when true and false:
@ -544,16 +545,18 @@ when isMainModule:
      false.accountsRunner(persistent=true, sam)
      false.storagesRunner(persistent=true, sam)

-  # This one uses readily available dumps
+  # This one uses the readily available dump: `bulkTest0` and some huge replay
+  # dumps `bulkTest1`, `bulkTest2`, .. from the `nimbus-eth1-blobs` package
  when true and false:
    # ---- database storage timings -------

-    noisy.showElapsed("importRunner()"):
-      noisy.importRunner(capture = bulkTest0)
+    for test in @[bulkTest0] & @[bulkTest1, bulkTest2, bulkTest3]:
+      noisy.showElapsed("importRunner()"):
+        noisy.importRunner(capture = test)

-    noisy.showElapsed("dbTimingRunner()"):
-      true.dbTimingRunner(cleanUp = false)
-      true.dbTimingRunner()
+      noisy.showElapsed("dbTimingRunner()"):
+        true.dbTimingRunner(cleanUp = false)
+        true.dbTimingRunner()

 # ------------------------------------------------------------------------------
 # End
--- a/tests/test_sync_snap/bulk_test_xx.nim
+++ b/tests/test_sync_snap/bulk_test_xx.nim
@ -1,4 +1,4 @@
- #
+#
 # Copyright (c) 2018-2021 Status Research & Development GmbH
 # Licensed under either of
 #  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
--- a/tests/test_sync_snap/test_accounts.nim
+++ b/tests/test_sync_snap/test_accounts.nim
@ -62,6 +62,7 @@ import
  eth/[common, p2p],
  unittest2,
  ../../nimbus/db/select_backend,
+  ../../nimbus/sync/protocol,
  ../../nimbus/sync/snap/range_desc,
  ../../nimbus/sync/snap/worker/db/[snapdb_accounts, snapdb_desc],
  ../replay/[pp, undump_accounts],
@ -71,7 +72,7 @@ import
 # Private helpers
 # ------------------------------------------------------------------------------

-proc flatten(list: openArray[seq[Blob]]): seq[Blob] =
+proc flatten(list: openArray[seq[SnapProof]]): seq[SnapProof] =
  for w in list:
    result.add w

--- a/tests/test_sync_snap/test_calc.nim
+++ b/tests/test_sync_snap/test_calc.nim
@ -12,53 +12,111 @@
 ## Snap sync components tester and TDD environment

 import
-  std/sequtils,
+  std/[random, sequtils],
  eth/common,
+  stew/byteutils,
  unittest2,
-  ../../nimbus/sync/handlers/snap,
-  ../../nimbus/sync/snap/[range_desc, worker/db/hexary_desc],
+  ../../nimbus/sync/[handlers, protocol],
+  ../../nimbus/sync/snap/range_desc,
+  ../../nimbus/sync/snap/worker/db/[hexary_desc, hexary_range],
  ./test_helpers

+const
+  accObjRlpMin = 70                   # min size of an encoded `Account()` obj
+  accObjRlpMax = 110                  # max size of an encoded `Account()` obj
+var
+  accBlobs: array[accObjRlpMax - accObjRlpMin + 1, Blob]
+  brNode = XNodeObj(kind: Branch)
+  nodeBlob: Blob
+
+# ------------------------------------------------------------------------------
+# Private helpers for `test_calcAccountsListSizes()`
+# ------------------------------------------------------------------------------
+
+proc randAccSize(r: var Rand): int =
+  ## Print random account size
+  accObjRlpMin + r.rand(accBlobs.len - 1)
+
+proc accBlob(n: int): Blob =
+  let inx = n - accObjRlpMin
+  if 0 <= inx and inx < accBlobs.len:
+    accBlobs[inx]
+  else:
+    @[]
+
+proc initAccBlobs() =
+  if accBlobs[0].len == 0:
+    let ffAccLen = Account(
+      storageRoot: Hash256(data: high(UInt256).toBytesBE),
+      codeHash:    Hash256(data: high(UInt256).toBytesBE),
+      nonce:       high(uint64),
+      balance:     high(UInt256)).encode.len
+
+    check accObjRlpMin == Account().encode.len
+    check accObjRlpMax == ffAccLen
+
+    # Initialise
+    for n in 0 ..< accBlobs.len:
+      accBlobs[n] = 5.byte.repeat(accObjRlpMin + n)
+
+    # Verify
+    for n in 0 .. (accObjRlpMax + 2):
+      if accObjRlpMin <= n and n <= accObjRlpMax:
+        check n == accBlob(n).len
+      else:
+        check 0 == accBlob(n).len
+
+proc accRndChain(r: var Rand; nItems: int): seq[RangeLeaf] =
+  for n in 0 ..< nItems:
+    result.add RangeLeaf(data: accBlob(r.randAccSize()))
+    discard result[^1].key.init (n mod 256).byte.repeat(32)
+
+proc accRndChain(seed: int; nItems: int): seq[RangeLeaf] =
+  var prng = initRand(seed)
+  prng.accRndChain(nItems)
+
+# ------------------------------------------------------------------------------
+# Private helpers for `test_calcProofsListSizes()`
+# ------------------------------------------------------------------------------
+
+proc initBranchNodeSample() =
+  if nodeBlob.len == 0:
+    for n in 0 .. 15:
+      brNode.bLink[n] = high(NodeTag).to(Blob)
+    nodeBlob = brNode.convertTo(Blob)
+
 # ------------------------------------------------------------------------------
 # Public test function
 # ------------------------------------------------------------------------------

 proc test_calcAccountsListSizes*() =
-  ## RLP does not allow static check ..
+  ## Verify accounts size calculation for `hexaryRangeLeafsProof()`.
+  initAccBlobs()

-  let sample = Account(
-    storageRoot: Hash256(data: high(UInt256).toBytesBE),
-    codeHash:    Hash256(data: high(UInt256).toBytesBE),
-    nonce:       high(uint64),
-    balance:     high(UInt256))
+  let chain = 42.accRndChain(123)

-  let tryLst = [0, 1, 2, 3, 594, 595, 596]
-
-  for n in tryLst:
-    #echo ">>> ", n, " ", sample.repeat(n).encode.len
-    check n.accountRangeSize == sample.repeat(n).encode.len
-  block:
-    let n = tryLst[^1]
-    check 4 + n * sample.encode.len == sample.repeat(n).encode.len
+  # Emulate `hexaryRangeLeafsProof()` size calculations
+  var sizeAccu = 0
+  for n in 0 ..< chain.len:
+    let (pairLen,listLen) =
+      chain[n].data.len.hexaryRangeRlpLeafListSize(sizeAccu)
+    check listLen == chain[0 .. n].encode.len
+    sizeAccu += pairLen


 proc  test_calcProofsListSizes*() =
  ## RLP does not allow static check ..
+  initBranchNodeSample()

-  let sample = block:
-    var xNode = XNodeObj(kind: Branch)
-    for n in 0 .. 15:
-      xNode.bLink[n] = high(NodeTag).to(Blob)
-    xNode
-
-  let tryLst = [0, 1, 2, 126, 127]
-
-  for n in tryLst:
-    #echo ">>> ", n, " ", sample.repeat(n).encode.len
-    check n.proofNodesSize == sample.repeat(n).encode.len
-  block:
-    let n = tryLst[^1]
-    check 4 + n * sample.encode.len == sample.repeat(n).encode.len
+  for n in [0, 1, 2, 126, 127]:
+    let
+      nodeBlobsEncoded = SnapProof(data: nodeBlob).repeat(n).proofEncode
+      nodeBlobsHex = nodeBlobsEncoded.toHex
+      brNodesHex = brNode.repeat(n).convertTo(Blob).toHex
+    #echo ">>> ", n, " ", nodeBlobsHex
+    #echo "<<< ", n, " ", brNodesHex
+    check nodeBlobsEncoded.len == n.proofNodesSizeMax
+    check nodeBlobsHex == brNodesHex

 # ------------------------------------------------------------------------------
 # End
--- a/tests/test_sync_snap/test_node_range.nim
+++ b/tests/test_sync_snap/test_node_range.nim
@ -16,7 +16,7 @@ import
  eth/[common, p2p, trie/nibbles],
  stew/[byteutils, interval_set, results],
  unittest2,
-  ../../nimbus/sync/types,
+  ../../nimbus/sync/[protocol, types],
  ../../nimbus/sync/snap/range_desc,
  ../../nimbus/sync/snap/worker/db/[
    hexary_desc, hexary_envelope,  hexary_error, hexary_interpolate,
@ -192,7 +192,7 @@ proc printCompareLeftNearby(
 proc verifyRangeProof(
    rootKey: NodeKey;
    leafs: seq[RangeLeaf];
-    proof: seq[Blob];
+    proof: seq[SnapProof];
    dbg = HexaryTreeDbRef(nil);
     ): Result[void,HexaryError] =
  ## Re-build temporary database and prove or disprove
@ -206,7 +206,7 @@ proc verifyRangeProof(
  # Import proof nodes
  var unrefs, refs: HashSet[RepairKey] # values ignored
  for rlpRec in proof:
-    let importError = xDb.hexaryImport(rlpRec, unrefs, refs).error
+    let importError = xDb.hexaryImport(rlpRec.data, unrefs, refs).error
    if importError != HexaryError(0):
      check importError == HexaryError(0)
      return err(importError)
@ -224,7 +224,7 @@ proc verifyRangeProof(
      #"\n",
      #"\n    unrefs=[", unrefs.toSeq.mapIt(it.pp(dbg)).join(","), "]",
      #"\n    refs=[", refs.toSeq.mapIt(it.pp(dbg)).join(","), "]",
-      "\n\n    proof=", proof.ppNodeKeys(dbg),
+      "\n\n    proof=", proof.mapIt(it.data).ppNodeKeys(dbg),
      "\n\n    first=", leafs[0].key,
      "\n    ", leafs[0].key.hexaryPath(rootKey,xDb).pp(dbg),
      "\n\n    last=", leafs[^1].key,
@ -375,14 +375,14 @@ proc test_NodeRangeProof*(
  let
    rootKey = inLst[0].root.to(NodeKey)
    noisy = not dbg.isNil
-    maxLen = high(int)
+    maxLen = high(int) # set it lower for debugging (eg. 5 for a small smaple)

  # Assuming the `inLst` entries have been stored in the DB already
  for n,w in inLst:
    let
      accounts = w.data.accounts[0 ..< min(w.data.accounts.len,maxLen)]
      iv = NodeTagRange.new(w.base, accounts[^1].accKey.to(NodeTag))
-      rc = db.hexaryRangeLeafsProof(rootKey, iv, accounts.len)
+      rc = db.hexaryRangeLeafsProof(rootKey, iv)
    check rc.isOk
    if rc.isErr:
      return
@ -393,11 +393,14 @@ proc test_NodeRangeProof*(

      # Take sub-samples but not too small
      if 0 < cutOff and rc.value.leafs.len < cutOff + 5:
-        break # rest cases ignored
+        break # remaining cases ignored
      subCount.inc

-      let leafs = rc.value.leafs[0 ..< rc.value.leafs.len - cutOff]
-      var proof: seq[Blob]
+      let
+        leafs = rc.value.leafs[0 ..< rc.value.leafs.len - cutOff]
+        leafsRlpLen = leafs.encode.len
+      var
+        proof: seq[SnapProof]

      # Calculate proof
      if cutOff == 0:
@ -407,7 +410,22 @@ proc test_NodeRangeProof*(
          rootKey.printCompareRightLeafs(w.base, accounts, leafs, db, dbg)
          return
        proof = rc.value.proof
+
+        # Some sizes to verify (full data list)
+        check rc.value.proofSize == proof.encode.len
+        check rc.value.leafsSize == leafsRlpLen
      else:
+        # Make sure that the size calculation deliver the expected number
+        # of entries.
+        let rx = db.hexaryRangeLeafsProof(rootKey, iv, leafsRlpLen + 1)
+        check rx.isOk
+        if rx.isErr:
+          return
+        check rx.value.leafs.len == leafs.len
+
+        # Some size to verify (truncated data list)
+        check rx.value.proofSize == rx.value.proof.encode.len
+
        # Re-adjust proof
        proof = db.hexaryRangeLeafsProof(rootKey, iv.minPt, leafs).proof

@ -424,7 +442,7 @@ proc test_NodeRangeProof*(
          noisy.say "***", "n=", n,
            " cutOff=", cutOff,
            " leafs=", leafs.len,
-            " proof=", proof.ppNodeKeys(dbg),
+            " proof=", proof.mapIt(it.data).ppNodeKeys(dbg),
            "\n\n   ",
            " base=", iv.minPt,
            "\n    ", iv.minPt.hexaryPath(rootKey,db).pp(dbg),
@ -475,6 +493,3 @@ proc test_NodeRangeLeftBoundary*(
 # ------------------------------------------------------------------------------
 # End
 # ------------------------------------------------------------------------------
-
-proc xxx(inLst: seq[UndumpAccounts]; db: HexaryGetFn; dbg: HexaryTreeDbRef) =
-  inLst.test_NodeRangeProof(db, dbg)