nimbus-eth2/beacon_chain/gossip_processing/block_processor.nim

# beacon_chain
# Copyright (c) 2018-2022 Status Research & Development GmbH
# Licensed and distributed under either of
#   * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
#   * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

{.push raises: [Defect].}

import
  std/math,
  stew/results,
  chronicles, chronos, metrics,
  eth/async_utils,
  web3/engine_api_types,
  ../spec/datatypes/[phase0, altair, bellatrix],
  ../spec/[forks, signatures_batch],
  ../consensus_object_pools/[
    attestation_pool, block_clearance, blockchain_dag, block_quarantine,
    spec_cache],
  ../eth1/eth1_monitor,
  ./consensus_manager,
  ../beacon_clock,
  ../sszdump

export sszdump, signatures_batch

# Block Processor
# ------------------------------------------------------------------------------
# The block processor moves blocks from "Incoming" to "Consensus verified"

declareHistogram beacon_store_block_duration_seconds,
  "storeBlock() duration", buckets = [0.25, 0.5, 1, 2, 4, 8, Inf]

type
  BlockEntry* = object
    blck*: ForkedSignedBeaconBlock
    resfut*: Future[Result[void, BlockError]]
    queueTick*: Moment # Moment when block was enqueued
    validationDur*: Duration # Time it took to perform gossip validation
    src*: MsgSource

  BlockProcessor* = object
    ## This manages the processing of blocks from different sources
    ## Blocks and attestations are enqueued in a gossip-validated state
    ##
    ## from:
    ## - Gossip (when synced)
    ## - SyncManager (during sync)
    ## - RequestManager (missing ancestor blocks)
    ##
    ## are then consensus-verified and added to:
    ## - the blockchain DAG
    ## - database
    ## - attestation pool
    ## - fork choice
    ##
    ## The processor will also reinsert blocks from the quarantine, should a
    ## parent be found.

    # Config
    # ----------------------------------------------------------------
    dumpEnabled: bool
    dumpDirInvalid: string
    dumpDirIncoming: string

    # Producers
    # ----------------------------------------------------------------
    blockQueue: AsyncQueue[BlockEntry]

    # Consumer
    # ----------------------------------------------------------------
    consensusManager: ref ConsensusManager
      ## Blockchain DAG, AttestationPool and Quarantine
      ## Blockchain DAG, AttestationPool, Quarantine, and Eth1Manager
    validatorMonitor: ref ValidatorMonitor
    getBeaconTime: GetBeaconTimeFn

    verifier: BatchVerifier

proc addBlock*(
    self: var BlockProcessor, src: MsgSource, blck: ForkedSignedBeaconBlock,
    resfut: Future[Result[void, BlockError]] = nil,
    validationDur = Duration())

# Initialization
# ------------------------------------------------------------------------------

proc new*(T: type BlockProcessor,
          dumpEnabled: bool,
          dumpDirInvalid, dumpDirIncoming: string,
          rng: ref BrHmacDrbgContext, taskpool: TaskPoolPtr,
          consensusManager: ref ConsensusManager,
          validatorMonitor: ref ValidatorMonitor,
          getBeaconTime: GetBeaconTimeFn): ref BlockProcessor =
  (ref BlockProcessor)(
    dumpEnabled: dumpEnabled,
    dumpDirInvalid: dumpDirInvalid,
    dumpDirIncoming: dumpDirIncoming,
    blockQueue: newAsyncQueue[BlockEntry](),
    consensusManager: consensusManager,
    validatorMonitor: validatorMonitor,
    getBeaconTime: getBeaconTime,
    verifier: BatchVerifier(rng: rng, taskpool: taskpool)
  )

# Sync callbacks
# ------------------------------------------------------------------------------

func hasBlocks*(self: BlockProcessor): bool =
  self.blockQueue.len() > 0

# Storage
# ------------------------------------------------------------------------------

proc dumpInvalidBlock*(
    self: BlockProcessor, signedBlock: ForkySignedBeaconBlock) =
  if self.dumpEnabled:
    dump(self.dumpDirInvalid, signedBlock)

proc dumpBlock[T](
    self: BlockProcessor,
    signedBlock: ForkySignedBeaconBlock,
    res: Result[T, BlockError]) =
  if self.dumpEnabled and res.isErr:
    case res.error
    of BlockError.Invalid:
      self.dumpInvalidBlock(signedBlock)
    of BlockError.MissingParent:
      dump(self.dumpDirIncoming, signedBlock)
    else:
      discard

proc storeBackfillBlock(
    self: var BlockProcessor,
    signedBlock: ForkySignedBeaconBlock): Result[void, BlockError] =

  # The block is certainly not missing any more
  self.consensusManager.quarantine[].missing.del(signedBlock.root)

  let res = self.consensusManager.dag.addBackfillBlock(signedBlock)

  if res.isErr():
    case res.error
    of BlockError.MissingParent:
      if signedBlock.message.parent_root in
          self.consensusManager.quarantine[].unviable:
        # DAG doesn't know about unviable ancestor blocks - we do! Translate
        # this to the appropriate error so that sync etc doesn't retry the block
        self.consensusManager.quarantine[].addUnviable(signedBlock.root)

        return err(BlockError.UnviableFork)
    of BlockError.UnviableFork:
      # Track unviables so that descendants can be discarded properly
      self.consensusManager.quarantine[].addUnviable(signedBlock.root)
    else: discard

  res

proc storeBlock*(
    self: var BlockProcessor,
    src: MsgSource, wallTime: BeaconTime,
    signedBlock: ForkySignedBeaconBlock, queueTick: Moment = Moment.now(),
    validationDur = Duration()): Result[BlockRef, BlockError] =
  ## storeBlock is the main entry point for unvalidated blocks - all untrusted
  ## blocks, regardless of origin, pass through here. When storing a block,
  ## we will add it to the dag and pass it to all block consumers that need
  ## to know about it, such as the fork choice and the monitoring
  let
    attestationPool = self.consensusManager.attestationPool
    startTick = Moment.now()
    wallSlot = wallTime.slotOrZero()
    vm = self.validatorMonitor
    dag = self.consensusManager.dag

  # The block is certainly not missing any more
  self.consensusManager.quarantine[].missing.del(signedBlock.root)

  # We'll also remove the block as an orphan: it's unlikely the parent is
  # missing if we get this far - should that be the case, the block will
  # be re-added later
  self.consensusManager.quarantine[].removeOrphan(signedBlock)

  type Trusted = typeof signedBlock.asTrusted()
  let blck = dag.addHeadBlock(self.verifier, signedBlock) do (
      blckRef: BlockRef, trustedBlock: Trusted, epochRef: EpochRef):
    # Callback add to fork choice if valid
    attestationPool[].addForkChoice(
      epochRef, blckRef, trustedBlock.message, wallTime)

    vm[].registerBeaconBlock(
      src, wallTime, trustedBlock.message)

    for attestation in trustedBlock.message.body.attestations:
      for validator_index in get_attesting_indices(
          epochRef, attestation.data.slot,
          CommitteeIndex.init(attestation.data.index).expect(
            "index has been checked"),
          attestation.aggregation_bits):
        vm[].registerAttestationInBlock(attestation.data, validator_index,
          trustedBlock.message.slot)

    withState(dag[].clearanceState):
      when stateFork >= BeaconStateFork.Altair and
          Trusted isnot phase0.TrustedSignedBeaconBlock: # altair+
        for i in trustedBlock.message.body.sync_aggregate.sync_committee_bits.oneIndices():
          vm[].registerSyncAggregateInBlock(
            trustedBlock.message.slot, trustedBlock.root,
            state.data.current_sync_committee.pubkeys.data[i])

  self.dumpBlock(signedBlock, blck)

  # There can be a scenario where we receive a block we already received.
  # However this block was before the last finalized epoch and so its parent
  # was pruned from the ForkChoice.
  if blck.isErr():
    case blck.error()
    of BlockError.MissingParent:
      if signedBlock.message.parent_root in
          self.consensusManager.quarantine[].unviable:
        # DAG doesn't know about unviable ancestor blocks - we do! Translate
        # this to the appropriate error so that sync etc doesn't retry the block
        self.consensusManager.quarantine[].addUnviable(signedBlock.root)

        return err(BlockError.UnviableFork)

      if not self.consensusManager.quarantine[].addOrphan(
          dag.finalizedHead.slot, ForkedSignedBeaconBlock.init(signedBlock)):
        debug "Block quarantine full",
          blockRoot = shortLog(signedBlock.root),
          blck = shortLog(signedBlock.message),
          signature = shortLog(signedBlock.signature)
    of BlockError.UnviableFork:
      # Track unviables so that descendants can be discarded properly
      self.consensusManager.quarantine[].addUnviable(signedBlock.root)
    else: discard

    return blck

  let storeBlockTick = Moment.now()

  # Eagerly update head: the incoming block "should" get selected
  self.consensusManager[].updateHead(wallTime.slotOrZero)

  let
    updateHeadTick = Moment.now()
    queueDur = startTick - queueTick
    storeBlockDur = storeBlockTick - startTick
    updateHeadDur = updateHeadTick - storeBlockTick

  beacon_store_block_duration_seconds.observe(storeBlockDur.toFloatSeconds())

  debug "Block processed",
    localHeadSlot = self.consensusManager.dag.head.slot,
    blockSlot = blck.get().slot,
    validationDur, queueDur, storeBlockDur, updateHeadDur

  for quarantined in self.consensusManager.quarantine[].pop(blck.get().root):
    # Process the blocks that had the newly accepted block as parent
    self.addBlock(MsgSource.gossip, quarantined)

  blck

# Enqueue
# ------------------------------------------------------------------------------

proc addBlock*(
    self: var BlockProcessor, src: MsgSource, blck: ForkedSignedBeaconBlock,
    resfut: Future[Result[void, BlockError]] = nil,
    validationDur = Duration()) =
  ## Enqueue a Gossip-validated block for consensus verification
  # Backpressure:
  #   There is no backpressure here - producers must wait for `resfut` to
  #   constrain their own processing
  # Producers:
  # - Gossip (when synced)
  # - SyncManager (during sync)
  # - RequestManager (missing ancestor blocks)

  withBlck(blck):
    if blck.message.slot <= self.consensusManager.dag.finalizedHead.slot:
      # let backfill blocks skip the queue - these are always "fast" to process
      # because there are no state rewinds to deal with
      let res = self.storeBackfillBlock(blck)

      if resfut != nil:
        resfut.complete(res)
      return

  try:
    self.blockQueue.addLastNoWait(BlockEntry(
      blck: blck,
      resfut: resfut, queueTick: Moment.now(),
      validationDur: validationDur,
      src: src))
  except AsyncQueueFullError:
    raiseAssert "unbounded queue"

# Event Loop
# ------------------------------------------------------------------------------

proc processBlock(self: var BlockProcessor, entry: BlockEntry) =
  logScope:
    blockRoot = shortLog(entry.blck.root)

  let
    wallTime = self.getBeaconTime()
    (afterGenesis, wallSlot) = wallTime.toSlot()

  if not afterGenesis:
    error "Processing block before genesis, clock turned back?"
    quit 1

  let
     res = withBlck(entry.blck):
       self.storeBlock(entry.src, wallTime, blck, entry.queueTick, entry.validationDur)

  if entry.resfut != nil:
    entry.resfut.complete(
      if res.isOk(): Result[void, BlockError].ok()
      else: Result[void, BlockError].err(res.error()))

proc runForkchoiceUpdated(
    self: ref BlockProcessor, headBlockRoot, finalizedBlockRoot: Eth2Digest)
    {.async.} =
  # Allow finalizedBlockRoot to be 0 to avoid sync deadlocks.
  #
  # https://github.com/ethereum/EIPs/blob/master/EIPS/eip-3675.md#pos-events
  # has "Before the first finalized block occurs in the system the finalized
  # block hash provided by this event is stubbed with
  # `0x0000000000000000000000000000000000000000000000000000000000000000`."
  # and
  # https://github.com/ethereum/consensus-specs/blob/v1.1.10/specs/bellatrix/validator.md#executionpayload
  # notes "`finalized_block_hash` is the hash of the latest finalized execution
  # payload (`Hash32()` if none yet finalized)"
  if headBlockRoot.isZero:
    return

  try:
    # Minimize window for Eth1 monitor to shut down connection
    await self.consensusManager.eth1Monitor.ensureDataProvider()

    debug "runForkChoiceUpdated: running forkchoiceUpdated",
      headBlockRoot,
      finalizedBlockRoot

    discard awaitWithTimeout(
      forkchoiceUpdated(
        self.consensusManager.eth1Monitor, headBlockRoot, finalizedBlockRoot),
      FORKCHOICEUPDATED_TIMEOUT):
        debug "runForkChoiceUpdated: forkchoiceUpdated timed out"
        default(ForkchoiceUpdatedResponse)
  except CatchableError as err:
    debug "runForkChoiceUpdated: forkchoiceUpdated failed",
      err = err.msg
    discard

proc newExecutionPayload*(
    eth1Monitor: Eth1Monitor, executionPayload: bellatrix.ExecutionPayload):
    Future[PayloadExecutionStatus] {.async.} =
  debug "newPayload: inserting block into execution engine",
    parentHash = executionPayload.parent_hash,
    blockHash = executionPayload.block_hash,
    stateRoot = shortLog(executionPayload.state_root),
    receiptsRoot = shortLog(executionPayload.receipts_root),
    prevRandao = shortLog(executionPayload.prev_randao),
    blockNumber = executionPayload.block_number,
    gasLimit = executionPayload.gas_limit,
    gasUsed = executionPayload.gas_used,
    timestamp = executionPayload.timestamp,
    extraDataLen = executionPayload.extra_data.len,
    blockHash = executionPayload.block_hash,
    baseFeePerGas =
      UInt256.fromBytesLE(executionPayload.base_fee_per_gas.data),
    numTransactions = executionPayload.transactions.len

  if eth1Monitor.isNil:
    info "newPayload: attempting to process execution payload without an Eth1Monitor. Ensure --web3-url setting is correct."
    return PayloadExecutionStatus.syncing

  try:
    let
      payloadResponse =
        awaitWithTimeout(
            eth1Monitor.newPayload(
              executionPayload.asEngineExecutionPayload),
            NEWPAYLOAD_TIMEOUT):
          info "newPayload: newPayload timed out"
          PayloadStatusV1(status: PayloadExecutionStatus.syncing)
      payloadStatus = payloadResponse.status

    return payloadStatus
  except CatchableError as err:
    debug "newPayload failed", msg = err.msg
    return PayloadExecutionStatus.syncing

proc runQueueProcessingLoop*(self: ref BlockProcessor) {.async.} =
  # Don't want to vacillate between "optimistic" sync and non-optimistic
  # sync heads. Relies on runQueueProcessingLoop() being the only place,
  # in Nimbus, which does this.
  var
    optForkchoiceHeadSlot = GENESIS_SLOT # safe default
    optForkchoiceHeadRoot: Eth2Digest

  while true:
    # Cooperative concurrency: one block per loop iteration - because
    # we run both networking and CPU-heavy things like block processing
    # on the same thread, we need to make sure that there is steady progress
    # on the networking side or we get long lockups that lead to timeouts.
    const
      # We cap waiting for an idle slot in case there's a lot of network traffic
      # taking up all CPU - we don't want to _completely_ stop processing blocks
      # in this case - doing so also allows us to benefit from more batching /
      # larger network reads when under load.
      idleTimeout = 10.milliseconds

      defaultBellatrixPayload = default(bellatrix.ExecutionPayload)

    discard await idleAsync().withTimeout(idleTimeout)

    let
      blck = await self[].blockQueue.popFirst()
      hasExecutionPayload = blck.blck.kind >= BeaconBlockFork.Bellatrix
      executionPayloadStatus =
        if  hasExecutionPayload and
            # Allow local testnets to run without requiring an execution layer
            blck.blck.bellatrixData.message.body.execution_payload !=
              defaultBellatrixPayload:
          try:
            # Minimize window for Eth1 monitor to shut down connection
            await self.consensusManager.eth1Monitor.ensureDataProvider()

            await newExecutionPayload(
              self.consensusManager.eth1Monitor,
              blck.blck.bellatrixData.message.body.execution_payload)
          except CatchableError as err:
            debug "runQueueProcessingLoop: newPayload failed",
              err = err.msg
            PayloadExecutionStatus.syncing
        else:
          # Vacuously
          PayloadExecutionStatus.valid

    if executionPayloadStatus in [
        PayloadExecutionStatus.invalid,
        PayloadExecutionStatus.invalid_block_hash]:
      debug "runQueueProcessingLoop: execution payload invalid",
        executionPayloadStatus
      if not blck.resfut.isNil:
        blck.resfut.complete(Result[void, BlockError].err(BlockError.Invalid))
      continue

    if  executionPayloadStatus in
          [PayloadExecutionStatus.accepted, PayloadExecutionStatus.syncing] and
        hasExecutionPayload:
      # The EL client doesn't know here whether the payload is valid, because,
      # for example, in Geth's case, its parent isn't known. When Geth logs an
      # "Ignoring payload with missing parent" message, this is the result. It
      # is distinct from the invalid cases above, and shouldn't cause the same
      # BlockError.Invalid error, because it doesn't badly on the peer sending
      # it, it's just not fully verifiable yet for this node. Furthermore, the
      # EL client can, e.g. via Geth, "rely on the beacon client to forcefully
      # update the head with a forkchoice update request". This can occur when
      # an EL client is substantially more synced than a CL client, and when a
      # CL client in that position attempts to serially sync it will encounter
      # potential for this message until it nearly catches up, unless using an
      # approach such as forkchoiceUpdated to trigger sync.
      #
      # Since the code doesn't reach here unless it's not optimistic sync, and
      # it doesn't set the finalized block, it's safe enough to try this chain
      # until either the EL client resyncs or returns invalid on the chain.
      #
      # Returning the MissingParent error causes the sync manager to loop in
      # place until the EL does resync/catch up, then the normal process can
      # resume where there's a hybrid serial and optimistic sync model.
      #
      # When this occurs within a couple of epochs of the Merge, before there
      # has been a chance to justify and finalize a post-merge block this can
      # cause a sync deadlock unless the EL can be convinced to sync back, or
      # the CL is rather more open-endedly optimistic (potentially for entire
      # weak subjectivity periods) than seems optimal.
      debug "runQueueProcessingLoop: execution payload accepted or syncing",
        executionPayloadStatus

      await self.runForkchoiceUpdated(
        blck.blck.bellatrixData.message.body.execution_payload.block_hash,
        self.consensusManager.dag.finalizedHead.blck.executionBlockRoot)

      if not blck.resfut.isNil:
        blck.resfut.complete(Result[void, BlockError].err(
          BlockError.MissingParent))
      continue

    if executionPayloadStatus == PayloadExecutionStatus.valid:
      self[].processBlock(blck)
    else:
      # Every non-nil future must be completed here, but don't want to process
      # the block any further in CL terms. Also don't want to specify Invalid,
      # as if it gets here, it's something more like MissingParent (except, on
      # the EL side).
      if not blck.resfut.isNil:
        blck.resfut.complete(
          Result[void, BlockError].err(BlockError.MissingParent))

    if  executionPayloadStatus == PayloadExecutionStatus.valid and
        hasExecutionPayload:
      await self.runForkchoiceUpdated(
        self.consensusManager.dag.head.executionBlockRoot,
        self.consensusManager.dag.finalizedHead.blck.executionBlockRoot)