nimbus-eth1/nimbus/sync/snap/worker/pivot.nim

# Nimbus
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
#    http://www.apache.org/licenses/LICENSE-2.0)
#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
#    http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

{.push raises: [].}

import
  std/[math, sets, sequtils],
  chronicles,
  chronos,
  eth/p2p, # trie/trie_defs],
  stew/[interval_set, keyed_queue, sorted_set],
  "../.."/[sync_desc, types],
  ".."/[constants, range_desc, worker_desc],
  ./db/[hexary_error, snapdb_accounts, snapdb_contracts, snapdb_pivot],
  ./pivot/[heal_accounts, heal_storage_slots, range_fetch_accounts,
           range_fetch_contracts, range_fetch_storage_slots,
           storage_queue_helper],
  ./ticker

logScope:
  topics = "snap-pivot"

const
  extraTraceMessages = false or true
    ## Enabled additional logging noise

proc pivotMothball*(env: SnapPivotRef) {.gcsafe.}


# ------------------------------------------------------------------------------
# Private helpers, logging
# ------------------------------------------------------------------------------

template logTxt(info: static[string]): static[string] =
  "Pivot " & info

template ignExceptionOops(info: static[string]; code: untyped) =
  try:
    code
  except CatchableError as e:
    trace logTxt "Ooops", `info`=info, name=($e.name), msg=(e.msg)

# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------

proc accountsHealingOk(
    env: SnapPivotRef;              # Current pivot environment
    ctx: SnapCtxRef;                # Some global context
      ): bool =
  ## Returns `true` if accounts healing is enabled for this pivot.
  not env.fetchAccounts.processed.isEmpty and
    healAccountsCoverageTrigger <= ctx.pivotAccountsCoverage()


proc init(
    T: type SnapRangeBatchRef;      # Collection of sets of account ranges
    ctx: SnapCtxRef;                # Some global context
      ): T =
  ## Account ranges constructor
  new result
  result.unprocessed.init() # full range on the first set of the pair
  result.processed = NodeTagRangeSet.init()

  # Update coverage level roll over
  ctx.pivotAccountsCoverage100PcRollOver()

  # Initialise accounts range fetch batch, the pair of `fetchAccounts[]` range
  # sets. Deprioritise already processed ranges by moving it to the second set.
  for iv in ctx.pool.coveredAccounts.increasing:
    discard result.unprocessed[0].reduce iv
    discard result.unprocessed[1].merge iv

proc init(
    T: type SnapPivotRef;           # Privot descriptor type
    ctx: SnapCtxRef;                # Some global context
    header: BlockHeader;            # Header to generate new pivot from
      ): T =
  ## Pivot constructor.
  result = T(
    stateHeader:   header,
    fetchAccounts: SnapRangeBatchRef.init(ctx))
  result.storageAccounts.init()

# ------------------------------------------------------------------------------
# Public functions: pivot table related
# ------------------------------------------------------------------------------

proc beforeTopMostlyClean*(pivotTable: var SnapPivotTable) =
  ## Clean up pivot queues of the entry before the top one. The queues are
  ## the pivot data that need most of the memory. This cleaned pivot is not
  ## usable any more after cleaning but might be useful as historic record.
  let rc = pivotTable.beforeLastValue
  if rc.isOk:
    rc.value.pivotMothball


proc topNumber*(pivotTable: var SnapPivotTable): BlockNumber =
  ## Return the block number of the top pivot entry, or zero if there is none.
  let rc = pivotTable.lastValue
  if rc.isOk:
    return rc.value.stateHeader.blockNumber


proc reverseUpdate*(
    pivotTable: var SnapPivotTable; # Pivot table
    header: BlockHeader;            # Header to generate new pivot from
    ctx: SnapCtxRef;                # Some global context
      ) =
  ## Activate environment for earlier state root implied by `header` argument.
  ##
  ## Note that the pivot table is assumed to be sorted by the block numbers of
  ## the pivot header.
  ##
  # Append per-state root environment to LRU queue
  discard pivotTable.prepend(
    header.stateRoot, SnapPivotRef.init(ctx, header))

  # Make sure that the LRU table does not grow too big.
  if max(3, ctx.buddiesMax) < pivotTable.len:
    # Delete second entry rather than the first which might currently
    # be needed.
    let rc = pivotTable.secondKey
    if rc.isOk:
      pivotTable.del rc.value


proc tickerStats*(
    pivotTable: var SnapPivotTable; # Pivot table
    ctx: SnapCtxRef;                # Some global context
      ): TickerSnapStatsUpdater =
  ## This function returns a function of type `TickerStatsUpdater` that prints
  ## out pivot table statitics. The returned fuction is supposed to drive
  ## ticker` module.
  proc meanStdDev(sum, sqSum: float; length: int): (float,float) =
    if 0 < length:
      result[0] = sum / length.float
      let
        sqSumAv = sqSum / length.float
        rSq = result[0] * result[0]
      if rSq < sqSumAv:
        result[1] = sqrt(sqSum / length.float - result[0] * result[0])

  result = proc: TickerSnapStats =
    var
      aSum, aSqSum, uSum, uSqSum, sSum, sSqSum, cSum, cSqSum: float
      count = 0
    for kvp in ctx.pool.pivotTable.nextPairs:

      # Accounts mean & variance
      let aLen = kvp.data.nAccounts.float
      if 0 < aLen:
        count.inc
        aSum += aLen
        aSqSum += aLen * aLen

        # Fill utilisation mean & variance
        let fill = kvp.data.fetchAccounts.processed.fullFactor
        uSum += fill
        uSqSum += fill * fill

        let sLen = kvp.data.nSlotLists.float
        sSum += sLen
        sSqSum += sLen * sLen

        # Lists of missing contracts
        let cLen = kvp.data.nContracts.float
        cSum += cLen
        cSqSum += cLen * cLen
    let
      env = ctx.pool.pivotTable.lastValue.get(otherwise = nil)
      accCoverage = (ctx.pool.coveredAccounts.fullFactor +
                     ctx.pool.covAccTimesFull.float)
      accFill = meanStdDev(uSum, uSqSum, count)
    var
      beaconBlock = none(BlockNumber)
      pivotBlock = none(BlockNumber)
      stoQuLen = none(int)
      ctraQuLen = none(int)
      procChunks = 0
    if not env.isNil:
      pivotBlock = some(env.stateHeader.blockNumber)
      procChunks = env.fetchAccounts.processed.chunks
      stoQuLen = some(env.storageQueueTotal())
      ctraQuLen = some(env.fetchContracts.len)
    if 0 < ctx.pool.beaconHeader.blockNumber:
      beaconBlock = some(ctx.pool.beaconHeader.blockNumber)

    TickerSnapStats(
      beaconBlock:    beaconBlock,
      pivotBlock:     pivotBlock,
      nQueues:        ctx.pool.pivotTable.len,
      nAccounts:      meanStdDev(aSum, aSqSum, count),
      nSlotLists:     meanStdDev(sSum, sSqSum, count),
      nContracts:     meanStdDev(cSum, cSqSum, count),
      accountsFill:   (accFill[0], accFill[1], accCoverage),
      nAccountStats:  procChunks,
      nStorageQueue:  stoQuLen,
      nContractQueue: ctraQuLen)

# ------------------------------------------------------------------------------
# Public functions: particular pivot
# ------------------------------------------------------------------------------

proc pivotCompleteOk*(env: SnapPivotRef): bool =
  ## Returns `true` iff the pivot covers a complete set of accounts ans
  ## storage slots.
  env.fetchAccounts.processed.isFull and
    env.storageQueueTotal() == 0 and
    env.fetchContracts.len == 0


proc pivotMothball*(env: SnapPivotRef) =
  ## Clean up most of this argument `env` pivot record and mark it `archived`.
  ## Note that archived pivots will be checked for swapping in already known
  ## accounts and storage slots.
  env.fetchAccounts.unprocessed.init()

  # Simplify storage slots queues by resolving partial slots into full list
  for kvp in env.fetchStoragePart.nextPairs:
    discard env.fetchStorageFull.append(
      kvp.key, SnapSlotsQueueItemRef(acckey: kvp.data.accKey))
  env.fetchStoragePart.clear()

  # Provide index into `fetchStorageFull`
  env.storageAccounts.clear()
  for kvp in env.fetchStorageFull.nextPairs:
    let rc = env.storageAccounts.insert(kvp.data.accKey.to(NodeTag))
    # Note that `rc.isErr` should not exist as accKey => storageRoot
    if rc.isOk:
      rc.value.data = kvp.key

  # Finally, mark that node `archived`
  env.archived = true


proc execSnapSyncAction*(
    env: SnapPivotRef;              # Current pivot environment
    buddy: SnapBuddyRef;            # Worker peer
      ) {.async.} =
  ## Execute a synchronisation run.
  let
    ctx = buddy.ctx

  if env.savedFullPivotOk:
    return # no need to do anything

  block:
    # Clean up storage slots queue and contracts first it becomes too large
    if storageSlotsQuPrioThresh < env.storageQueueAvail():
      await buddy.rangeFetchStorageSlots(env)
      if buddy.ctrl.stopped or env.archived:
        return
    if contractsQuPrioThresh < env.fetchContracts.len:
      await buddy.rangeFetchContracts(env)
      if buddy.ctrl.stopped or env.archived:
        return

  var rangeFetchOk = true
  if not env.fetchAccounts.processed.isFull:
    await buddy.rangeFetchAccounts(env)

    # Update 100% accounting
    ctx.pivotAccountsCoverage100PcRollOver()

    # Run at least one round fetching storage slosts and contracts even if
    # the `archived` flag is set in order to keep the batch queue small.
    if buddy.ctrl.running:
      await buddy.rangeFetchStorageSlots(env)
      await buddy.rangeFetchContracts(env)
    else:
      rangeFetchOk = false
    if env.archived or (buddy.ctrl.zombie and buddy.only.errors.peerDegraded):
      return

    # Uncconditonally try healing if enabled.
    if env.accountsHealingOk(ctx):
      # Let this procedure decide whether to ditch this peer (if any.) The idea
      # is that the healing process might address different peer ressources
      # than the fetch procedure. So that peer might still be useful unless
      # physically disconnected.
      buddy.ctrl.forceRun = true
      await buddy.healAccounts(env)
      if env.archived or (buddy.ctrl.zombie and buddy.only.errors.peerDegraded):
        return

  # Some additional storage slots and contracts might have been popped up
  if rangeFetchOk:
    await buddy.rangeFetchStorageSlots(env)
    await buddy.rangeFetchContracts(env)
    if env.archived:
      return

  # Don't bother with storage slots healing before accounts healing takes
  # place. This saves communication bandwidth. The pivot might change soon,
  # anyway.
  if env.accountsHealingOk(ctx):
    buddy.ctrl.forceRun = true
    await buddy.healStorageSlots(env)


proc saveCheckpoint*(
    env: SnapPivotRef;              # Current pivot environment
    ctx: SnapCtxRef;                # Some global context
      ): Result[int,HexaryError] =
  ## Save current sync admin data. On success, the size of the data record
  ## saved is returned (e.g. for logging.)
  ##
  if env.savedFullPivotOk:
    return ok(0) # no need to do anything

  let fa = env.fetchAccounts
  if fa.processed.isEmpty:
    return err(NoAccountsYet)

  if saveAccountsProcessedChunksMax < fa.processed.chunks:
    return err(TooManyChunksInAccountsQueue)

  if saveStorageSlotsMax < env.storageQueueTotal():
    return err(TooManyQueuedStorageSlots)

  if saveContactsMax < env.fetchContracts.len:
    return err(TooManyQueuedContracts)

  result = ctx.pool.snapDb.pivotSaveDB SnapDbPivotRegistry(
    header:       env.stateHeader,
    nAccounts:    env.nAccounts,
    nSlotLists:   env.nSlotLists,
    processed:    toSeq(env.fetchAccounts.processed.increasing)
                    .mapIt((it.minPt,it.maxPt)),
    slotAccounts: (toSeq(env.fetchStorageFull.nextKeys) &
                   toSeq(env.fetchStoragePart.nextKeys)).mapIt(it.to(NodeKey)) &
                   toSeq(env.parkedStorage.items),
    ctraAccounts: (toSeq(env.fetchContracts.nextValues)))

  if result.isOk and env.pivotCompleteOk():
    env.savedFullPivotOk = true


proc pivotRecoverFromCheckpoint*(
    env: SnapPivotRef;              # Current pivot environment
    ctx: SnapCtxRef;                # Global context (containing save state)
    topLevel: bool;                 # Full data set on top level only
      ) =
  ## Recover some pivot variables and global list `coveredAccounts` from
  ## checkpoint data. If the argument `toplevel` is set `true`, also the
  ## `processed`, `unprocessed`, and the `fetchStorageFull` lists are
  ## initialised.
  ##
  let recov = ctx.pool.recovery
  if recov.isNil:
    return

  env.nAccounts = recov.state.nAccounts
  env.nSlotLists = recov.state.nSlotLists

  # Import processed interval
  for (minPt,maxPt) in recov.state.processed:
    if topLevel:
      env.fetchAccounts.unprocessed.reduce NodeTagRange.new(minPt, maxPt)
    discard env.fetchAccounts.processed.merge(minPt, maxPt)
    discard ctx.pool.coveredAccounts.merge(minPt, maxPt)
    ctx.pivotAccountsCoverage100PcRollOver() # update coverage level roll over

  # Handle storage slots
  let stateRoot = recov.state.header.stateRoot
  for w in recov.state.slotAccounts:
    let pt = NodeTagRange.new(w.to(NodeTag),w.to(NodeTag)) # => `pt.len == 1`

    if 0 < env.fetchAccounts.processed.covered(pt):
      # Ignoring slots that have accounts to be downloaded, anyway
      let rc = ctx.pool.snapDb.getAccountsData(stateRoot, w)
      if rc.isErr:
        # Oops, how did that account get lost?
        discard env.fetchAccounts.processed.reduce pt
        env.fetchAccounts.unprocessed.merge pt
      elif rc.value.storageRoot != EMPTY_ROOT_HASH:
        env.storageQueueAppendFull(rc.value.storageRoot, w)

  # Handle contracts
  for w in recov.state.ctraAccounts:
    let pt = NodeTagRange.new(w.to(NodeTag),w.to(NodeTag)) # => `pt.len == 1`

    if 0 < env.fetchAccounts.processed.covered(pt):
      # Ignoring contracts that have accounts to be downloaded, anyway
      let rc = ctx.pool.snapDb.getAccountsData(stateRoot, w)
      if rc.isErr:
        # Oops, how did that account get lost?
        discard env.fetchAccounts.processed.reduce pt
        env.fetchAccounts.unprocessed.merge pt
      elif rc.value.codeHash != EMPTY_CODE_HASH:
        env.fetchContracts[rc.value.codeHash] = w

  # Handle mothballed pivots for swapping in (see `pivotMothball()`)
  if topLevel:
    env.savedFullPivotOk = env.pivotCompleteOk()
    when extraTraceMessages:
      trace logTxt "recovered top level record",
        pivot=env.stateHeader.blockNumber.toStr,
        savedFullPivotOk=env.savedFullPivotOk,
        processed=env.fetchAccounts.processed.fullPC3,
        nStoQ=env.storageQueueTotal()
  else:
    for kvp in env.fetchStorageFull.nextPairs:
      let rc = env.storageAccounts.insert(kvp.data.accKey.to(NodeTag))
      if rc.isOk:
        rc.value.data = kvp.key
    env.archived = true

# ------------------------------------------------------------------------------
# Public function, manage new peer and pivot update
# ------------------------------------------------------------------------------

proc pivotApprovePeer*(buddy: SnapBuddyRef) {.async.} =
  ## Approve peer and update pivot. On failure, the `buddy` will be stopped so
  ## it will not proceed to the next scheduler task.
  let
    ctx = buddy.ctx
    beaconHeader = ctx.pool.beaconHeader
  var
    pivotHeader: BlockHeader

  block:
    let rc = ctx.pool.pivotTable.lastValue
    if rc.isOk:
      pivotHeader = rc.value.stateHeader

  # Check whether the pivot needs to be updated
  if pivotHeader.blockNumber+pivotBlockDistanceMin <= beaconHeader.blockNumber:
    # If the entry before the previous entry is unused, then run a pool mode
    # based session (which should enable a pivot table purge).
    block:
      let rc = ctx.pool.pivotTable.beforeLast
      if rc.isOk and rc.value.data.fetchAccounts.processed.isEmpty:
        ctx.poolMode = true

    when extraTraceMessages:
      trace logTxt "new pivot from beacon chain", peer=buddy.peer,
        pivot=pivotHeader.blockNumber.toStr,
        beacon=beaconHeader.blockNumber.toStr, poolMode=ctx.poolMode

    discard ctx.pool.pivotTable.lruAppend(
      beaconHeader.stateRoot, SnapPivotRef.init(ctx, beaconHeader),
      pivotTableLruEntriesMax)

    pivotHeader = beaconHeader

  # Not ready yet?
  if pivotHeader.blockNumber == 0:
    buddy.ctrl.stopped = true


proc pivotUpdateBeaconHeaderCB*(ctx: SnapCtxRef): SyncReqNewHeadCB =
  ## Update beacon header. This function is intended as a call back function
  ## for the RPC module.
  result = proc(h: BlockHeader) {.gcsafe.} =
    if ctx.pool.beaconHeader.blockNumber < h.blockNumber:
      # when extraTraceMessages:
      #   trace logTxt "external beacon info update", header=h.blockNumber.toStr
      ctx.pool.beaconHeader = h

# ------------------------------------------------------------------------------
# Public function, debugging
# ------------------------------------------------------------------------------

import
  db/[hexary_desc, hexary_inspect, hexary_nearby, hexary_paths,
      snapdb_storage_slots]

const
  pivotVerifyExtraBlurb = false # or true
  inspectSuspendAfter = 10_000
  inspectExtraNap = 100.milliseconds

proc pivotVerifyComplete*(
    env: SnapPivotRef;              # Current pivot environment
    ctx: SnapCtxRef;                # Some global context
    inspectAccountsTrie = false;    # Check for dangling links
    walkAccountsDB = true;          # Walk accounts db
    inspectSlotsTries = true;       # Check dangling links (if `walkAccountsDB`)
    verifyContracts = true;         # Verify that code hashes are in database
      ): Future[bool]
      {.async,discardable.} =
  ## Check the database whether the pivot is complete -- not advidsed on a
  ## production system as the process takes a lot of ressources.
  let
    rootKey = env.stateHeader.stateRoot.to(NodeKey)
    accFn = ctx.pool.snapDb.getAccountFn
    ctraFn = ctx.pool.snapDb.getContractsFn

  # Verify consistency of accounts trie database. This should not be needed
  # if `walkAccountsDB` is set. In case that there is a dangling link that would
  # have been detected by `hexaryInspectTrie()`, the `hexaryNearbyRight()`
  # function should fail at that point as well.
  if inspectAccountsTrie:
    var
      stats = accFn.hexaryInspectTrie(rootKey,
        suspendAfter=inspectSuspendAfter,
        maxDangling=1)
      nVisited = stats.count
      nRetryCount = 0
    while stats.dangling.len == 0 and not stats.resumeCtx.isNil:
      when pivotVerifyExtraBlurb:
        trace logTxt "accounts db inspect ..", nVisited, nRetryCount
      await sleepAsync inspectExtraNap
      nRetryCount.inc
      stats = accFn.hexaryInspectTrie(rootKey,
        resumeCtx=stats.resumeCtx,
        suspendAfter=inspectSuspendAfter,
        maxDangling=1)
      nVisited += stats.count
      # End while

    if stats.dangling.len != 0:
      error logTxt "accounts trie has danglig links", nVisited, nRetryCount
      return false
    trace logTxt "accounts trie ok", nVisited, nRetryCount
    # End `if inspectAccountsTrie`

  # Visit accounts and make sense of storage slots
  if walkAccountsDB:
    var
      nAccounts = 0
      nStorages = 0
      nContracts = 0
      nRetryTotal = 0
      nodeTag = low(NodeTag)
    while true:
      if (nAccounts mod inspectSuspendAfter) == 0 and 0 < nAccounts:
        when pivotVerifyExtraBlurb:
          trace logTxt "accounts db walk ..",
            nAccounts, nStorages, nContracts, nRetryTotal,
            inspectSlotsTries, verifyContracts
        await sleepAsync inspectExtraNap

      # Find next account key => `nodeTag`
      let rc = nodeTag.hexaryPath(rootKey,accFn).hexaryNearbyRight(accFn)
      if rc.isErr:
        if rc.error == NearbyBeyondRange:
          break # No more accounts
        error logTxt "accounts db problem", nodeTag,
          nAccounts, nStorages, nContracts, nRetryTotal,
          inspectSlotsTries, verifyContracts, error=rc.error
        return false
      nodeTag = rc.value.getPartialPath.convertTo(NodeKey).to(NodeTag)
      nAccounts.inc

      # Decode accounts data
      var accData: Account
      try:
        accData = rc.value.leafData.decode(Account)
      except RlpError as e:
        error logTxt "account data problem", nodeTag,
          nAccounts, nStorages, nContracts, nRetryTotal,
          inspectSlotsTries, verifyContracts, name=($e.name), msg=(e.msg)
        return false

      # Check for storage slots for this account
      if accData.storageRoot != EMPTY_ROOT_HASH:
        nStorages.inc
        if inspectSlotsTries:
          let
            slotFn = ctx.pool.snapDb.getStorageSlotsFn(nodeTag.to(NodeKey))
            stoKey = accData.storageRoot.to(NodeKey)
          var
            stats = slotFn.hexaryInspectTrie(stoKey,
              suspendAfter=inspectSuspendAfter,
              maxDangling=1)
            nVisited = stats.count
            nRetryCount = 0
          while stats.dangling.len == 0 and not stats.resumeCtx.isNil:
            when pivotVerifyExtraBlurb:
              trace logTxt "storage slots inspect ..", nodeTag,
                nAccounts, nStorages, nContracts, nRetryTotal,
                inspectSlotsTries, verifyContracts, nVisited, nRetryCount
            await sleepAsync inspectExtraNap
            nRetryCount.inc
            nRetryTotal.inc
            stats = accFn.hexaryInspectTrie(stoKey,
              resumeCtx=stats.resumeCtx,
              suspendAfter=inspectSuspendAfter,
              maxDangling=1)
            nVisited += stats.count

          if stats.dangling.len != 0:
            error logTxt "storage slots trie has dangling link", nodeTag,
              nAccounts, nStorages, nContracts, nRetryTotal,
              inspectSlotsTries, nVisited, nRetryCount
            return false
          if nVisited == 0:
            error logTxt "storage slots trie is empty", nodeTag,
              nAccounts, nStorages, nContracts, nRetryTotal,
              inspectSlotsTries, verifyContracts, nVisited, nRetryCount
            return false

      # Check for contract codes for this account
      if accData.codeHash != EMPTY_CODE_HASH:
        nContracts.inc
        if verifyContracts:
          let codeKey = accData.codeHash.to(NodeKey)
          if codeKey.to(Blob).ctraFn.len == 0:
            error logTxt "Contract code missing", nodeTag,
              codeKey=codeKey.to(NodeTag),
              nAccounts, nStorages, nContracts, nRetryTotal,
              inspectSlotsTries, verifyContracts
            return false

      # Set up next node key for looping
      if nodeTag == high(NodeTag):
        break
      nodeTag = nodeTag + 1.u256
      # End while

    trace logTxt "accounts db walk ok",
      nAccounts, nStorages, nContracts, nRetryTotal, inspectSlotsTries
    # End `if walkAccountsDB`

  return true

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------