nimbus-eth1/nimbus/sync/snap/worker/heal_storages.nim

# Nimbus
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
#    http://www.apache.org/licenses/LICENSE-2.0)
#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
#    http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

## Heal storage DB:
## ================
##
## This module works similar to `heal_accounts` applied to each
## per-account storage slots hexary trie.

import
  std/sequtils,
  chronicles,
  chronos,
  eth/[common, p2p, trie/nibbles, trie/trie_defs, rlp],
  stew/[interval_set, keyed_queue],
  ../../../utils/prettify,
  ../../sync_desc,
  ".."/[range_desc, worker_desc],
  ./com/[com_error, get_trie_nodes],
  ./db/[hexary_desc, hexary_error, snapdb_storage_slots]

{.push raises: [Defect].}

logScope:
  topics = "snap-heal"

const
  extraTraceMessages = false or true
    ## Enabled additional logging noise

# ------------------------------------------------------------------------------
# Private logging helpers
# ------------------------------------------------------------------------------

proc healingCtx(
    buddy: SnapBuddyRef;
    kvp: SnapSlotsQueuePair;
      ): string =
  let
    slots = kvp.data.slots
  "{" &
    "covered=" & slots.unprocessed.emptyFactor.toPC(0) &
    "nCheckNodes=" & $slots.checkNodes.len & "," &
    "nMissingNodes=" & $slots.missingNodes.len & "}"

# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------

proc acceptWorkItemAsIs(
    buddy: SnapBuddyRef;
    kvp: SnapSlotsQueuePair;
      ): Result[bool, HexaryDbError] =
  ## Check whether this work item is done and the corresponding storage trie
  ## can be completely inherited.
  if kvp.data.inherit:
    let
      ctx = buddy.ctx
      peer = buddy.peer
      db = ctx.data.snapDb
      accHash = kvp.data.accHash
      storageRoot = kvp.key.to(Hash256)

      rc = db.inspectStorageSlotsTrie(peer, accHash, storageRoot)

    # Check whether the hexary trie is complete
    if rc.isOk:
      return ok(rc.value.dangling.len == 0)

    return err(rc.error)

  ok(false)


proc updateMissingNodesList(
    buddy: SnapBuddyRef;
    kvp: SnapSlotsQueuePair) =
  ## Check whether previously missing nodes from the `missingNodes` list
  ## have been magically added to the database since it was checked last
  ## time. These nodes will me moved to `checkNodes` for further processing.
  let
    ctx = buddy.ctx
    db = ctx.data.snapDb
    peer = buddy.peer
    env = buddy.data.pivotEnv
    accHash = kvp.data.accHash
    storageRoot = kvp.key.to(Hash256)
    slots = kvp.data.slots
  var
    nodes: seq[Blob]

  when extraTraceMessages:
    trace "Start storage slots healing", peer, ctx=buddy.healingCtx(kvp),
      nSlotLists=env.nSlotLists, nStorageQueue=env.fetchStorage.len

  for slotKey in slots.missingNodes:
    let rc = db.getStorageSlotsNodeKey(peer, accHash, storageRoot, slotKey)
    if rc.isOk:
      # Check nodes for dangling links
      slots.checkNodes.add slotKey
    else:
      # Node is still missing
      nodes.add slotKey

  slots.missingNodes = nodes


proc appendMoreDanglingNodesToMissingNodesList(
    buddy: SnapBuddyRef;
    kvp: SnapSlotsQueuePair;
      ): bool =
  ## Starting with a given set of potentially dangling intermediate trie nodes
  ## `checkNodes`, this set is filtered and processed. The outcome is fed back
  ## to the vey same list `checkNodes`
  let
    ctx = buddy.ctx
    db = ctx.data.snapDb
    peer = buddy.peer
    env = buddy.data.pivotEnv
    accHash = kvp.data.accHash
    storageRoot = kvp.key.to(Hash256)
    slots = kvp.data.slots

    rc = db.inspectStorageSlotsTrie(
      peer, accHash, storageRoot, slots.checkNodes)

  if rc.isErr:
    when extraTraceMessages:
      error "Storage slots healing failed => stop", peer,
        ctx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
        nStorageQueue=env.fetchStorage.len, error=rc.error
    # Attempt to switch peers, there is not much else we can do here
    buddy.ctrl.zombie = true
    return false

  # Update batch lists
  slots.checkNodes.setLen(0)
  slots.missingNodes = slots.missingNodes & rc.value.dangling

  true


proc getMissingNodesFromNetwork(
    buddy: SnapBuddyRef;
    kvp: SnapSlotsQueuePair;
      ): Future[seq[Blob]]
      {.async.} =
  ##  Extract from `missingNodes` the next batch of nodes that need
  ## to be merged it into the database
  let
    ctx = buddy.ctx
    peer = buddy.peer
    env = buddy.data.pivotEnv
    accHash = kvp.data.accHash
    storageRoot = kvp.key.to(Hash256)
    slots = kvp.data.slots

    nMissingNodes = slots.missingNodes.len
    inxLeft = max(0, nMissingNodes - maxTrieNodeFetch)

  # There is no point in processing too many nodes at the same time. So leave
  # the rest on the `missingNodes` queue to be handled later.
  let fetchNodes = slots.missingNodes[inxLeft ..< nMissingNodes]
  slots.missingNodes.setLen(inxLeft)

  # Fetch nodes from the network. Note that the remainder of the `missingNodes`
  # list might be used by another process that runs semi-parallel.
  let
    req = @[accHash.data.toSeq] & fetchNodes.mapIt(@[it])
    rc = await buddy.getTrieNodes(storageRoot, req)
  if rc.isOk:
    # Register unfetched missing nodes for the next pass
    slots.missingNodes = slots.missingNodes & rc.value.leftOver.mapIt(it[0])
    return rc.value.nodes

  # Restore missing nodes list now so that a task switch in the error checker
  # allows other processes to access the full `missingNodes` list.
  slots.missingNodes = slots.missingNodes & fetchNodes

  let error = rc.error
  if await buddy.ctrl.stopAfterSeriousComError(error, buddy.data.errors):
    discard
    when extraTraceMessages:
      trace "Error fetching storage slots nodes for healing => stop", peer,
        ctx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
        nStorageQueue=env.fetchStorage.len, error
  else:
    discard
    when extraTraceMessages:
      trace "Error fetching storage slots nodes for healing", peer,
        ctx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
        nStorageQueue=env.fetchStorage.len, error

  return @[]


proc kvStorageSlotsLeaf(
    buddy: SnapBuddyRef;
    kvp: SnapSlotsQueuePair;
    partialPath: Blob;
    node: Blob;
      ): (bool,NodeKey)
      {.gcsafe, raises: [Defect,RlpError]} =
  ## Read leaf node from persistent database (if any)
  let
    peer = buddy.peer
    env = buddy.data.pivotEnv

    nodeRlp = rlpFromBytes node
    (_,prefix) = hexPrefixDecode partialPath
    (_,segment) = hexPrefixDecode nodeRlp.listElem(0).toBytes
    nibbles = prefix & segment
  if nibbles.len == 64:
    return (true, nibbles.getBytes.convertTo(NodeKey))

  when extraTraceMessages:
    trace "Isolated node path for healing => ignored", peer,
      ctx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
      nStorageQueue=env.fetchStorage.len


proc registerStorageSlotsLeaf(
    buddy: SnapBuddyRef;
    kvp: SnapSlotsQueuePair;
    slotKey: NodeKey) =
  ## Process single trie node as would be done with an interval by
  ## the `storeStorageSlots()` function
  let
    peer = buddy.peer
    env = buddy.data.pivotEnv
    slots = kvp.data.slots
    pt = slotKey.to(NodeTag)

  # Find range set (from list) containing `pt`
  var ivSet: NodeTagRangeSet
  block foundCoveringRange:
    for w in slots.unprocessed:
      if 0 < w.covered(pt,pt):
        ivSet = w
        break foundCoveringRange
    return # already processed, forget this account leaf

  # Register this isolated leaf node that was added
  discard ivSet.reduce(pt,pt)

  when extraTraceMessages:
    trace "Isolated storage slot for healing", peer,
      ctx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
      nStorageQueue=env.fetchStorage.len, slotKey=pt

# ------------------------------------------------------------------------------
# Private functions: do the healing for one work item (sub-trie)
# ------------------------------------------------------------------------------

proc storageSlotsHealing(
    buddy: SnapBuddyRef;
    kvp: SnapSlotsQueuePair;
      ): Future[bool]
      {.async.} =
  ## Returns `true` is the sub-trie is complete (probably inherited), and
  ## `false` if there are nodes left to be completed.
  let
    ctx = buddy.ctx
    db = ctx.data.snapDb
    peer = buddy.peer
    env = buddy.data.pivotEnv
    accHash = kvp.data.accHash
    slots = kvp.data.slots

  # Update for changes since last visit
  buddy.updateMissingNodesList(kvp)

  # ???
  if slots.checkNodes.len != 0:
    if not buddy.appendMoreDanglingNodesToMissingNodesList(kvp):
      return false

  # Check whether the trie is complete.
  if slots.missingNodes.len == 0:
    trace "Storage slots healing complete", peer, ctx=buddy.healingCtx(kvp)
    return true

  # Get next batch of nodes that need to be merged it into the database
  let nodesData = await buddy.getMissingNodesFromNetwork(kvp)
  if nodesData.len == 0:
    return

  # Store nodes to disk
  let report = db.importRawStorageSlotsNodes(peer, accHash, nodesData)
  if 0 < report.len and report[^1].slot.isNone:
    # Storage error, just run the next lap (not much else that can be done)
    error "Storage slots healing, error updating persistent database", peer,
      ctx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
      nStorageQueue=env.fetchStorage.len, nNodes=nodesData.len,
      error=report[^1].error
    slots.missingNodes = slots.missingNodes & nodesData
    return false

  when extraTraceMessages:
    trace "Storage slots healing, nodes merged into database", peer,
      ctx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
      nStorageQueue=env.fetchStorage.len, nNodes=nodesData.len

  # Filter out error and leaf nodes
  for w in report:
    if w.slot.isSome: # non-indexed entries appear typically at the end, though
      let
        inx = w.slot.unsafeGet
        nodePath = nodesData[inx]

      if w.error != NothingSerious or w.kind.isNone:
        # error, try downloading again
        slots.missingNodes.add nodePath

      elif w.kind.unsafeGet != Leaf:
        # re-check this node
        slots.checkNodes.add nodePath

      else:
        # Node has been stored, double check
        let (isLeaf, slotKey) =
          buddy.kvStorageSlotsLeaf(kvp, nodePath, nodesData[inx])
        if isLeaf:
          # Update `uprocessed` registry, collect storage roots (if any)
          buddy.registerStorageSlotsLeaf(kvp, slotKey)
        else:
          slots.checkNodes.add nodePath

  when extraTraceMessages:
    trace "Storage slots healing job done", peer,
      ctx=buddy.healingCtx(kvp), nSlotLists=env.nSlotLists,
      nStorageQueue=env.fetchStorage.len


proc healingIsComplete(
    buddy: SnapBuddyRef;
    kvp: SnapSlotsQueuePair;
      ): Future[bool]
      {.async.} =
  ## Check whether the storage trie can be completely inherited and prepare for
  ## healing if not.
  ##
  ## Returns `true` is the sub-trie is complete (probably inherited), and
  ## `false` if there are nodes left to be completed.
  let
    ctx = buddy.ctx
    db = ctx.data.snapDb
    peer = buddy.peer
    env = buddy.data.pivotEnv
    accHash = kvp.data.accHash
    storageRoot = kvp.key.to(Hash256)

  # Check whether this work item can be completely inherited
  if kvp.data.inherit:
    let rc = db.inspectStorageSlotsTrie(peer, accHash, storageRoot)

    if rc.isErr:
      # Oops, not much we can do here (looping trie?)
      error "Problem inspecting storage trie", peer,
        nSlotLists=env.nSlotLists, nStorageQueue=env.fetchStorage.len,
        storageRoot, error=rc.error
      return false

    # Check whether the hexary trie can be inherited as-is.
    if rc.value.dangling.len == 0:
      return true # done

    # Set up healing structure for this work item
    let slots = SnapTrieRangeBatchRef(
      missingNodes: rc.value.dangling)
    kvp.data.slots = slots

    # Full range covered vy unprocessed items
    for n in 0 ..< kvp.data.slots.unprocessed.len:
      slots.unprocessed[n] = NodeTagRangeSet.init()
    discard slots.unprocessed[0].merge(
      NodeTagRange.new(low(NodeTag),high(NodeTag)))

  # Proceed with healing
  return await buddy.storageSlotsHealing(kvp)

# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------

proc healStoragesDb*(buddy: SnapBuddyRef) {.async.} =
  ## Fetching and merging missing slorage slots trie database nodes.
  let
    ctx = buddy.ctx
    db = ctx.data.snapDb
    peer = buddy.peer
    env = buddy.data.pivotEnv
  var
    toBeHealed: seq[SnapSlotsQueuePair]
    nAcceptedAsIs = 0

  # Search the current slot item batch list for items to complete via healing
  for kvp in env.fetchStorage.nextPairs:

    # Marked items indicate that a partial sub-trie existsts which might have
    # been inherited from an earlier state root.
    if not kvp.data.inherit:
      let slots = kvp.data.slots

      # Otherwise check partally fetched sub-tries only if they have a certain
      # degree of completeness.
      if slots.isNil or slots.unprocessed.emptyFactor < healSlorageSlotsTrigger:
        continue

    # Remove `kvp` work item from the queue object (which is allowed within a
    # `for` loop over a `KeyedQueue` object type.)
    env.fetchStorage.del(kvp.key)

    # With some luck, the `kvp` work item refers to a complete storage trie
    # that can be be accepted as-is in wich case `kvp` can be just dropped.
    block:
      let rc = buddy.acceptWorkItemAsIs(kvp)
      if rc.isOk and rc.value:
        env.nSlotLists.inc
        nAcceptedAsIs.inc # for logging
        continue # dropping `kvp`

    # Add to local batch to be processed, below
    toBeHealed.add kvp
    if maxStoragesHeal <= toBeHealed.len:
      break

  # Run against local batch
  let nHealerQueue = toBeHealed.len
  if 0 < nHealerQueue:
    when extraTraceMessages:
      trace "Processing storage healing items", peer,
        nSlotLists=env.nSlotLists, nStorageQueue=env.fetchStorage.len,
        nHealerQueue, nAcceptedAsIs

    for n in 0 ..< toBeHealed.len:
      let
        kvp = toBeHealed[n]
        isComplete = await buddy.healingIsComplete(kvp)
      if isComplete:
        env.nSlotLists.inc
        nAcceptedAsIs.inc
      else:
        env.fetchStorage.merge kvp

      if buddy.ctrl.stopped:
        # Oops, peer has gone
        env.fetchStorage.merge toBeHealed[n+1 ..< toBeHealed.len]
        break

  when extraTraceMessages:
    if 0 < nHealerQueue or 0 < nAcceptedAsIs:
      trace "Done storage healing items", peer,
        nSlotLists=env.nSlotLists, nStorageQueue=env.fetchStorage.len,
        nHealerQueue, nAcceptedAsIs

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------