nimbus-eth1/nimbus/sync/snap/worker/com/get_trie_nodes.nim

# Nimbus
# Copyright (c) 2018-2021 Status Research & Development GmbH
# Licensed and distributed under either of
#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
#    http://www.apache.org/licenses/LICENSE-2.0)
#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
#    http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

import
  std/[options, sequtils],
  chronos,
  eth/[common, p2p],
  "../../.."/[protocol, protocol/trace_config],
  ../../worker_desc,
  ./com_error

{.push raises: [Defect].}

logScope:
  topics = "snap-fetch"

type
  # SnapTrieNodes = object
  #   nodes*: seq[Blob]

  GetTrieNodes* = object
    leftOver*: seq[seq[Blob]]
    nodes*: seq[Blob]

# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------

proc getTrieNodesReq(
    buddy: SnapBuddyRef;
    stateRoot: Hash256;
    paths: seq[seq[Blob]];
      ): Future[Result[Option[SnapTrieNodes],void]]
      {.async.} =
  let
    peer = buddy.peer
  try:
    let reply = await peer.getTrieNodes(stateRoot, paths, snapRequestBytesLimit)
    return ok(reply)

  except CatchableError as e:
    trace trSnapRecvError & "waiting for GetByteCodes reply", peer,
      error=e.msg
    return err()

# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------

proc getTrieNodes*(
    buddy: SnapBuddyRef;
    stateRoot: Hash256;
    paths: seq[seq[Blob]],
      ): Future[Result[GetTrieNodes,ComError]]
      {.async.} =
  ## Fetch data using the `snap#` protocol, returns the trie nodes requested
  ## (if any.)
  let
    peer = buddy.peer
    nPaths = paths.len

  if nPaths == 0:
    return err(ComEmptyRequestArguments)

  let nTotal = paths.mapIt(it.len).foldl(a+b, 0)

  if trSnapTracePacketsOk:
    trace trSnapSendSending & "GetTrieNodes", peer,
      nPaths, nTotal, bytesLimit=snapRequestBytesLimit

  let trieNodes = block:
    let rc = await buddy.getTrieNodesReq(stateRoot, paths)
    if rc.isErr:
      return err(ComNetworkProblem)
    if rc.value.isNone:
      trace trSnapRecvTimeoutWaiting & "for reply to GetTrieNodes", peer, nPaths
      return err(ComResponseTimeout)
    let blobs = rc.value.get.nodes
    if nTotal < blobs.len:
      # Ooops, makes no sense
      return err(ComTooManyTrieNodes)
    blobs

  let
    nNodes = trieNodes.len

  if nNodes == 0:
    # github.com/ethereum/devp2p/blob/master/caps/snap.md#gettrienodes-0x06
    #
    # Notes:
    # * Nodes must always respond to the query.
    # * The returned nodes must be in the request order.
    # * If the node does not have the state for the requested state root or for
    #   any requested account paths, it must return an empty reply. It is the
    #   responsibility of the caller to query an state not older than 128
    #   blocks; and the caller is expected to only ever query existing trie
    #   nodes.
    # * The responding node is allowed to return less data than requested
    #   (serving QoS limits), but the node must return at least one trie node.
    trace trSnapRecvReceived & "empty TrieNodes", peer, nPaths, nNodes
    return err(ComNoByteCodesAvailable)

  # Assemble return value
  var dd = GetTrieNodes(nodes: trieNodes)

  # For each request group/sub-sequence, analyse the results
  var nInx = 0
  block loop:
    for n in 0 ..< nPaths:
      let pathLen = paths[n].len

      # Account node request
      if pathLen < 2:
        if trieNodes[nInx].len == 0:
          dd.leftOver.add paths[n]
        nInx.inc
        if nInx < nNodes:
          continue
        # all the rest needs to be re-processed
        dd.leftOver = dd.leftOver & paths[n+1 ..< nPaths]
        break loop

      # Storage request for account
      if 1 < pathLen:
        var pushBack: seq[Blob]
        for i in 1 ..< pathLen:
          if trieNodes[nInx].len == 0:
            pushBack.add paths[n][i]
            nInx.inc
            if nInx < nNodes:
              continue
            # all the rest needs to be re-processed
            #
            # add:              account & pushBack & rest  ...
            dd.leftOver.add paths[n][0] & pushBack & paths[n][i+1 ..< pathLen]
            dd.leftOver = dd.leftOver & paths[n+1 ..< nPaths]
            break loop
        if 0 < pushBack.len:
          dd.leftOver.add paths[n][0] & pushBack

  trace trSnapRecvReceived & "TrieNodes", peer,
    nPaths, nNodes, nLeftOver=dd.leftOver.len

  return ok(dd)

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim` 2022-10-08 17:20:50 +00:00			`# Nimbus`
			`# Copyright (c) 2018-2021 Status Research & Development GmbH`
			`# Licensed and distributed under either of`
Snap sync accounts healing (#1225) * Added inspect module why: Find dangling references for trie healing support. details: + This patch set provides only the inspect module and some unit tests. + There are also extensive unit tests which need bulk data from the `nimbus-eth1-blob` module. * Alternative pivot finder why: Attempt to be faster on start up. Also tying to decouple pivot finder somehow by providing different mechanisms (this one runs in `single` mode.) * Use inspect module for healing details: + After some progress with account and storage data, the inspect facility is used to find dangling links in the database to be filled nose-wise. + This is a crude attempt to cobble together functional elements. The set up needs to be honed. * fix scheduler to avoid starting dead peers why: Some peers drop out while in `sleepAsync()`. So extra `if` clauses make sure that this event is detected early. * Bug fixes causing crashes details: + prettify.toPC(): int/intToStr() numeric range over/underflow + hexary_inspect.hexaryInspectPath(): take care of half initialised step with branch but missing index into branch array * improve handling of dropped peers in alternaive pivot finder why: Strange things may happen while querying data from the network. Additional checks make sure that the state of other peers is updated immediately. * Update trace messages * reorganise snap fetch & store schedule 2022-09-16 07:24:12 +00:00			`# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or`
			`# http://www.apache.org/licenses/LICENSE-2.0)`
			`# * MIT license ([LICENSE-MIT](LICENSE-MIT) or`
			`# http://opensource.org/licenses/MIT)`
			`# at your option. This file may not be copied, modified, or distributed`
			`# except according to those terms.`

			`import`
			`std/[options, sequtils],`
			`chronos,`
Remodel persistent snapdb access (#1274) * Re-model persistent database access why: Storage slots healing just run on the wrong sub-trie (i.e. the wrong key mapping). So get/put and bulk functions now use the definitions in `snapdb_desc` (earlier there were some shortcuts for `get()`.) * Fixes: missing return code, typo, redundant imports etc. * Remove obsolete debugging directives from `worker_desc` module * Correct failing unit tests for storage slots trie inspection why: Some pathological cases for the extended tests do not produce any hexary trie data. This is rightly detected by the trie inspection and the result checks needed to adjusted. 2022-10-20 16:59:54 +00:00			`eth/[common, p2p],`
Snap sync accounts healing (#1225) * Added inspect module why: Find dangling references for trie healing support. details: + This patch set provides only the inspect module and some unit tests. + There are also extensive unit tests which need bulk data from the `nimbus-eth1-blob` module. * Alternative pivot finder why: Attempt to be faster on start up. Also tying to decouple pivot finder somehow by providing different mechanisms (this one runs in `single` mode.) * Use inspect module for healing details: + After some progress with account and storage data, the inspect facility is used to find dangling links in the database to be filled nose-wise. + This is a crude attempt to cobble together functional elements. The set up needs to be honed. * fix scheduler to avoid starting dead peers why: Some peers drop out while in `sleepAsync()`. So extra `if` clauses make sure that this event is detected early. * Bug fixes causing crashes details: + prettify.toPC(): int/intToStr() numeric range over/underflow + hexary_inspect.hexaryInspectPath(): take care of half initialised step with branch but missing index into branch array * improve handling of dropped peers in alternaive pivot finder why: Strange things may happen while querying data from the network. Additional checks make sure that the state of other peers is updated immediately. * Update trace messages * reorganise snap fetch & store schedule 2022-09-16 07:24:12 +00:00			`"../../.."/[protocol, protocol/trace_config],`
			`../../worker_desc,`
Prep for full sync after snap (#1253) * Split fetch accounts into sub-modules details: There will be separated modules for accounts snapshot, storage snapshot, and healing for either. * Allow to rebase pivot before negotiated header why: Peers seem to have not too many snapshots available. By setting back the pivot block header slightly, the chances might be higher to find more peers to serve this pivot. Experiment on mainnet showed that setting back too much (tested with 1024), the chances to find matching snapshot peers seem to decrease. * Add accounts healing * Update variable/field naming in `worker_desc` for readability * Handle leaf nodes in accounts healing why: There is no need to fetch accounts when they had been added by the healing process. On the flip side, these accounts must be checked for storage data and the batch queue updated, accordingly. * Reorganising accounts hash ranges batch queue why: The aim is to formally cover as many accounts as possible for different pivot state root environments. Formerly, this was tried by starting the accounts batch queue at a random value for each pivot (and wrapping around.) Now, each pivot environment starts with an interval set mutually disjunct from any interval set retrieved with other pivot state roots. also: Stop fishing for more pivots in `worker` if 100% download is reached * Reorganise/update accounts healing why: Error handling was wrong and the (math. complexity of) whole process could be better managed. details: Much of the algorithm is now documented at the top of the file `heal_accounts.nim` 2022-10-08 17:20:50 +00:00			`./com_error`
Snap sync accounts healing (#1225) * Added inspect module why: Find dangling references for trie healing support. details: + This patch set provides only the inspect module and some unit tests. + There are also extensive unit tests which need bulk data from the `nimbus-eth1-blob` module. * Alternative pivot finder why: Attempt to be faster on start up. Also tying to decouple pivot finder somehow by providing different mechanisms (this one runs in `single` mode.) * Use inspect module for healing details: + After some progress with account and storage data, the inspect facility is used to find dangling links in the database to be filled nose-wise. + This is a crude attempt to cobble together functional elements. The set up needs to be honed. * fix scheduler to avoid starting dead peers why: Some peers drop out while in `sleepAsync()`. So extra `if` clauses make sure that this event is detected early. * Bug fixes causing crashes details: + prettify.toPC(): int/intToStr() numeric range over/underflow + hexary_inspect.hexaryInspectPath(): take care of half initialised step with branch but missing index into branch array * improve handling of dropped peers in alternaive pivot finder why: Strange things may happen while querying data from the network. Additional checks make sure that the state of other peers is updated immediately. * Update trace messages * reorganise snap fetch & store schedule 2022-09-16 07:24:12 +00:00
			`{.push raises: [Defect].}`

			`logScope:`
			`topics = "snap-fetch"`

			`type`
			`# SnapTrieNodes = object`
			`# nodes*: seq[Blob]`

			`GetTrieNodes* = object`
			`leftOver*: seq[seq[Blob]]`
			`nodes*: seq[Blob]`

			`# ------------------------------------------------------------------------------`
			`# Private functions`
			`# ------------------------------------------------------------------------------`

			`proc getTrieNodesReq(`
			`buddy: SnapBuddyRef;`
			`stateRoot: Hash256;`
			`paths: seq[seq[Blob]];`
			`): Future[Result[Option[SnapTrieNodes],void]]`
			`{.async.} =`
			`let`
			`peer = buddy.peer`
			`try:`
			`let reply = await peer.getTrieNodes(stateRoot, paths, snapRequestBytesLimit)`
			`return ok(reply)`

			`except CatchableError as e:`
			`trace trSnapRecvError & "waiting for GetByteCodes reply", peer,`
			`error=e.msg`
			`return err()`

			`# ------------------------------------------------------------------------------`
			`# Public functions`
			`# ------------------------------------------------------------------------------`

			`proc getTrieNodes*(`
			`buddy: SnapBuddyRef;`
			`stateRoot: Hash256;`
			`paths: seq[seq[Blob]],`
			`): Future[Result[GetTrieNodes,ComError]]`
			`{.async.} =`
			## Fetch data using the `snap#` protocol, returns the trie nodes requested
			`## (if any.)`
			`let`
			`peer = buddy.peer`
			`nPaths = paths.len`

			`if nPaths == 0:`
			`return err(ComEmptyRequestArguments)`

			`let nTotal = paths.mapIt(it.len).foldl(a+b, 0)`

			`if trSnapTracePacketsOk:`
			`trace trSnapSendSending & "GetTrieNodes", peer,`
			`nPaths, nTotal, bytesLimit=snapRequestBytesLimit`

			`let trieNodes = block:`
			`let rc = await buddy.getTrieNodesReq(stateRoot, paths)`
			`if rc.isErr:`
			`return err(ComNetworkProblem)`
			`if rc.value.isNone:`
			`trace trSnapRecvTimeoutWaiting & "for reply to GetTrieNodes", peer, nPaths`
			`return err(ComResponseTimeout)`
			`let blobs = rc.value.get.nodes`
			`if nTotal < blobs.len:`
			`# Ooops, makes no sense`
			`return err(ComTooManyTrieNodes)`
			`blobs`

			`let`
			`nNodes = trieNodes.len`

			`if nNodes == 0:`
			`# github.com/ethereum/devp2p/blob/master/caps/snap.md#gettrienodes-0x06`
			`#`
			`# Notes:`
			`# * Nodes must always respond to the query.`
			`# * The returned nodes must be in the request order.`
			`# * If the node does not have the state for the requested state root or for`
			`# any requested account paths, it must return an empty reply. It is the`
			`# responsibility of the caller to query an state not older than 128`
			`# blocks; and the caller is expected to only ever query existing trie`
			`# nodes.`
			`# * The responding node is allowed to return less data than requested`
			`# (serving QoS limits), but the node must return at least one trie node.`
			`trace trSnapRecvReceived & "empty TrieNodes", peer, nPaths, nNodes`
			`return err(ComNoByteCodesAvailable)`

			`# Assemble return value`
			`var dd = GetTrieNodes(nodes: trieNodes)`

			`# For each request group/sub-sequence, analyse the results`
			`var nInx = 0`
			`block loop:`
			`for n in 0 ..< nPaths:`
			`let pathLen = paths[n].len`

			`# Account node request`
			`if pathLen < 2:`
			`if trieNodes[nInx].len == 0:`
			`dd.leftOver.add paths[n]`
			`nInx.inc`
			`if nInx < nNodes:`
			`continue`
			`# all the rest needs to be re-processed`
			`dd.leftOver = dd.leftOver & paths[n+1 ..< nPaths]`
			`break loop`

			`# Storage request for account`
			`if 1 < pathLen:`
			`var pushBack: seq[Blob]`
			`for i in 1 ..< pathLen:`
			`if trieNodes[nInx].len == 0:`
			`pushBack.add paths[n][i]`
			`nInx.inc`
			`if nInx < nNodes:`
			`continue`
			`# all the rest needs to be re-processed`
			`#`
			`# add: account & pushBack & rest ...`
			`dd.leftOver.add paths[n][0] & pushBack & paths[n][i+1 ..< pathLen]`
			`dd.leftOver = dd.leftOver & paths[n+1 ..< nPaths]`
			`break loop`
			`if 0 < pushBack.len:`
			`dd.leftOver.add paths[n][0] & pushBack`

			`trace trSnapRecvReceived & "TrieNodes", peer,`
			`nPaths, nNodes, nLeftOver=dd.leftOver.len`

			`return ok(dd)`

			`# ------------------------------------------------------------------------------`
			`# End`
			`# ------------------------------------------------------------------------------`