nimbus-eth1/nimbus/sync/flare/worker.nim

# Nimbus
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed and distributed under either of
#   * MIT license (license terms in the root directory or at
#     https://opensource.org/licenses/MIT).
#   * Apache v2 license (license terms in the root directory or at
#     https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.


{.push raises:[].}

import
  pkg/[chronicles, chronos],
  pkg/eth/[common, p2p],
  pkg/stew/[interval_set, sorted_set],
  ../../common,
  ./worker/[blocks_staged, db, headers_staged, headers_unproc, helpers,
            start_stop, update],
  ./worker_desc

logScope:
  topics = "flare"

# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------

proc headersToFetchOk(buddy: FlareBuddyRef): bool =
  0 < buddy.ctx.headersUnprocTotal() and
    buddy.ctrl.running and
    not buddy.ctx.poolMode

proc bodiesToFetchOk(buddy: FlareBuddyRef): bool =
  buddy.ctx.blocksStagedFetchOk() and
    buddy.ctrl.running and
    not buddy.ctx.poolMode

proc napUnlessSomethingToFetch(
    buddy: FlareBuddyRef;
    info: static[string];
      ): Future[bool] {.async.} =
  ## When idle, save cpu cycles waiting for something to do.
  if buddy.ctx.pool.importRunningOk or
     not (buddy.headersToFetchOk() or
          buddy.bodiesToFetchOk()):
    debug info & ": idly wasting time", peer=buddy.peer
    await sleepAsync workerIdleWaitInterval
    return true
  return false

# ------------------------------------------------------------------------------
# Public start/stop and admin functions
# ------------------------------------------------------------------------------

proc setup*(ctx: FlareCtxRef): bool =
  ## Global set up
  debug "RUNSETUP"
  ctx.setupRpcMagic()

  # Load initial state from database if there is any
  ctx.setupDatabase()

  # Debugging stuff, might be an empty template
  ctx.setupTicker()

  # Enable background daemon
  ctx.daemon = true
  true

proc release*(ctx: FlareCtxRef) =
  ## Global clean up
  debug "RUNRELEASE"
  ctx.destroyRpcMagic()
  ctx.destroyTicker()


proc start*(buddy: FlareBuddyRef): bool =
  ## Initialise worker peer
  const info = "RUNSTART"

  if runsThisManyPeersOnly <= buddy.ctx.pool.nBuddies:
    debug info & " peer limit reached", peer=buddy.peer
    return false

  if not buddy.startBuddy():
    debug info & " failed", peer=buddy.peer
    return false

  debug info, peer=buddy.peer
  true

proc stop*(buddy: FlareBuddyRef) =
  ## Clean up this peer
  debug "RUNSTOP", peer=buddy.peer, nInvocations=buddy.only.nMultiLoop,
    lastIdleGap=buddy.only.multiRunIdle.toStr
  buddy.stopBuddy()

# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------

proc runDaemon*(ctx: FlareCtxRef) {.async.} =
  ## Global background job that will be re-started as long as the variable
  ## `ctx.daemon` is set `true`. If that job was stopped due to re-setting
  ## `ctx.daemon` to `false`, it will be restarted next after it was reset
  ## as `true` not before there is some activity on the `runPool()`,
  ## `runSingle()`, or `runMulti()` functions.
  ##
  const info = "RUNDAEMON"
  debug info

  # Check for a possible header layout and body request changes
  discard ctx.updateLinkedHChainsLayout()
  discard ctx.updateBlockRequests()

  # Execute staged block records.
  if ctx.blocksStagedCanImportOk():

    block:
      # Set advisory flag telling that a slow/long running process will take
      # place. This works a bit like `runSingle()` only that in the case here
      # we might have no peer.
      ctx.pool.importRunningOk = true
      defer: ctx.pool.importRunningOk = false

      # Import from staged queue.
      while ctx.blocksStagedImport info:
        ctx.updateMetrics()

        # Allow pseudo/async thread switch
        await sleepAsync asyncThreadSwitchTimeSlot

  # At the end of the cycle, leave time to refill
  await sleepAsync daemonWaitInterval

  ctx.updateMetrics()


proc runPool*(buddy: FlareBuddyRef; last: bool; laps: int): bool =
  ## Once started, the function `runPool()` is called for all worker peers in
  ## sequence as long as this function returns `false`. There will be no other
  ## `runPeer()` functions activated while `runPool()` is active.
  ##
  ## This procedure is started if the global flag `buddy.ctx.poolMode` is set
  ## `true` (default is `false`.) The flag will be automatically reset before
  ## the loop starts. Re-setting it again results in repeating the loop. The
  ## argument `laps` (starting with `0`) indicated the currend lap of the
  ## repeated loops.
  ##
  ## The argument `last` is set `true` if the last entry is reached.
  ##
  ## Note that this function does not run in `async` mode.
  ##
  const info = "RUNPOOL"
  #debug info, peer=buddy.peer, laps
  buddy.ctx.headersStagedReorg info # reorg
  true # stop


proc runPeer*(buddy: FlareBuddyRef) {.async.} =
  ## This peer worker method is repeatedly invoked (exactly one per peer) while
  ## the `buddy.ctrl.poolMode` flag is set `false`.
  ##
  const info = "RUNPEER"
  let peer = buddy.peer

  if 0 < buddy.only.nMultiLoop:                 # statistics/debugging
    buddy.only.multiRunIdle = Moment.now() - buddy.only.stoppedMultiRun
  buddy.only.nMultiLoop.inc                     # statistics/debugging

  trace info, peer, nInvocations=buddy.only.nMultiLoop,
    lastIdleGap=buddy.only.multiRunIdle.toStr

  # Update beacon header when needed. For the beacon header, a hash will be
  # auto-magically made available via RPC. The corresponding header is then
  # fetched from the current peer.
  await buddy.headerStagedUpdateBeacon info

  if not await buddy.napUnlessSomethingToFetch info:
    #
    # Layout of a triple of linked header chains (see `README.md`)
    # ::
    #   G                B                     L                F
    #   | <--- [G,B] --> | <----- (B,L) -----> | <-- [L,F] ---> |
    #   o----------------o---------------------o----------------o--->
    #   | <-- linked --> | <-- unprocessed --> | <-- linked --> |
    #
    # This function is run concurrently for fetching the next batch of
    # headers and stashing them on the database. Each concurrently running
    # actor works as follows:
    #
    # * Get a range of block numbers from the `unprocessed` range `(B,L)`.
    # * Fetch headers for this range (as much as one can get).
    # * Stash then on the database.
    # * Rinse and repeat.
    #
    # The block numbers range concurrently taken from `(B,L)` are chosen
    # from the upper range. So exactly one of the actors has a range
    # `[whatever,L-1]` adjacent to `[L,F]`. Call this actor the lead actor.
    #
    # For the lead actor, headers can be downloaded all by the hashes as
    # the parent hash for the header with block number `L` is known. All
    # other non-lead actors will download headers by the block number only
    # and stage it to be re-ordered and stashed on the database when ready.
    #
    # Once the lead actor stashes the dowloaded headers, the other staged
    # headers will also be stashed on the database until there is a gap or
    # the stashed haeders are exhausted.
    #
    # Due to the nature of the `async` logic, the current lead actor will
    # stay lead when fetching the next range of block numbers.
    #
    while buddy.headersToFetchOk():

      # * Get unprocessed range from pool
      # * Fetch headers for this range (as much as one can get)
      # * Verify that a block is contiguous, chained by parent hash, etc.
      # * Stash this range on the staged queue on the pool
      if await buddy.headersStagedCollect info:

        # * Save updated state and headers
        # * Decrease the left boundary `L` of the trusted range `[L,F]`
        discard buddy.ctx.headersStagedProcess info

    # Fetch bodies and combine them with headers to blocks to be staged. These
    # staged blocks are then excuted by the daemon process (no `peer` needed.)
    while buddy.bodiesToFetchOk():
      discard await buddy.blocksStagedCollect info

    # Note that it is important **not** to leave this function to be
    # re-invoked by the scheduler unless necessary. While the time gap
    # until restarting is typically a few millisecs, there are always
    # outliers which well exceed several seconds. This seems to let
    # remote peers run into timeouts.

  buddy.only.stoppedMultiRun = Moment.now()     # statistics/debugging

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------