nimbus-eth2/beacon_chain/gossip_processing/eth2_processor.nim

566 lines
21 KiB
Nim
Raw Normal View History

# beacon_chain
# Copyright (c) 2018-2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.push raises: [Defect].}
# References to `vFuture` refer to the pre-release proposal of the libp2p based
# light client sync protocol. Conflicting release versions are not in use.
# https://github.com/ethereum/consensus-specs/pull/2802
import
std/tables,
stew/results, bearssl,
chronicles, chronos, metrics, taskpools,
../spec/[helpers, forks],
../spec/datatypes/[altair, phase0],
../consensus_object_pools/[
block_clearance, block_quarantine, blockchain_dag, exit_pool, attestation_pool,
light_client_pool, sync_committee_msg_pool],
../validators/validator_pool,
../beacon_clock,
"."/[gossip_validation, block_processor, batch_validation]
export
results, bearssl, taskpools, block_clearance, blockchain_dag, exit_pool, attestation_pool,
light_client_pool, sync_committee_msg_pool, validator_pool, beacon_clock,
gossip_validation, block_processor, batch_validation, block_quarantine
logScope: topics = "gossip_eth2"
# Metrics for tracking attestation and beacon block loss
declareCounter beacon_attestations_received,
"Number of valid unaggregated attestations processed by this node"
declareCounter beacon_attestations_dropped,
"Number of invalid unaggregated attestations dropped by this node", labels = ["reason"]
declareCounter beacon_aggregates_received,
"Number of valid aggregated attestations processed by this node"
declareCounter beacon_aggregates_dropped,
"Number of invalid aggregated attestations dropped by this node", labels = ["reason"]
declareCounter beacon_blocks_received,
"Number of valid blocks processed by this node"
declareCounter beacon_blocks_dropped,
"Number of invalid blocks dropped by this node", labels = ["reason"]
declareCounter beacon_attester_slashings_received,
"Number of valid attester slashings processed by this node"
declareCounter beacon_attester_slashings_dropped,
"Number of invalid attester slashings dropped by this node", labels = ["reason"]
declareCounter beacon_proposer_slashings_received,
"Number of valid proposer slashings processed by this node"
declareCounter beacon_proposer_slashings_dropped,
"Number of invalid proposer slashings dropped by this node", labels = ["reason"]
declareCounter beacon_voluntary_exits_received,
"Number of valid voluntary exits processed by this node"
declareCounter beacon_voluntary_exits_dropped,
"Number of invalid voluntary exits dropped by this node", labels = ["reason"]
declareCounter beacon_sync_committee_messages_received,
"Number of valid sync committee messages processed by this node"
declareCounter beacon_sync_committee_messages_dropped,
"Number of invalid sync committee messages dropped by this node", labels = ["reason"]
declareCounter beacon_sync_committee_contributions_received,
"Number of valid sync committee contributions processed by this node"
declareCounter beacon_sync_committee_contributions_dropped,
"Number of invalid sync committee contributions dropped by this node", labels = ["reason"]
const delayBuckets = [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, Inf]
declareHistogram beacon_attestation_delay,
"Time(s) between slot start and attestation reception", buckets = delayBuckets
declareHistogram beacon_aggregate_delay,
"Time(s) between slot start and aggregate reception", buckets = delayBuckets
declareHistogram beacon_block_delay,
"Time(s) between slot start and beacon block reception", buckets = delayBuckets
type
DoppelgangerProtection = object
broadcastStartEpoch*: Epoch ##\
## Set anew, each time gossip is re-enabled after syncing completes, so
## might reset multiple times per instance. This allows some safe level
## of gossip interleaving between nodes so long as they don't gossip at
## the same time.
nodeLaunchSlot*: Slot ##\
## Set once, at node launch. This functions as a basic protection against
## false positives from attestations persisting within the gossip network
## across quick restarts.
Eth2Processor* = object
## The Eth2Processor is the entry point for untrusted message processing -
## when we receive messages from various sources, we pass them to the
## processor for validation and routing - the messages are generally
## validated, and if valid, passed on to the various pools, monitors and
## managers to update the state of the application.
##
## Block processing is special in that part of it is done in the
## `BlockProcessor` instead, via a special block processing queue.
##
## Each validating function generally will do a sanity check on the message
## whose purpose is to quickly filter out spam, then will (usually) delegate
## full validation to the proper manager - finally, metrics and monitoring
## are updated.
doppelgangerDetectionEnabled*: bool
# Local sources of truth for validation
# ----------------------------------------------------------------
dag*: ChainDAGRef
attestationPool*: ref AttestationPool
validatorPool: ref ValidatorPool
syncCommitteeMsgPool: ref SyncCommitteeMsgPool
lightClientPool: ref LightClientPool
doppelgangerDetection*: DoppelgangerProtection
# Gossip validated -> enqueue for further verification
# ----------------------------------------------------------------
blockProcessor: ref BlockProcessor
# Validator monitoring
validatorMonitor: ref ValidatorMonitor
# Validated with no further verification required
# ----------------------------------------------------------------
exitPool: ref ExitPool
# Almost validated, pending cryptographic signature check
# ----------------------------------------------------------------
2021-04-03 01:50:47 +00:00
batchCrypto*: ref BatchCrypto
# Missing information
# ----------------------------------------------------------------
quarantine*: ref Quarantine
# Application-provided current time provider (to facilitate testing)
getCurrentBeaconTime*: GetBeaconTimeFn
ValidationRes* = Result[void, ValidationError]
func toValidationResult*(res: ValidationRes): ValidationResult =
if res.isOk(): ValidationResult.Accept else: res.error()[0]
# Initialization
# ------------------------------------------------------------------------------
proc new*(T: type Eth2Processor,
doppelgangerDetectionEnabled: bool,
blockProcessor: ref BlockProcessor,
validatorMonitor: ref ValidatorMonitor,
dag: ChainDAGRef,
attestationPool: ref AttestationPool,
exitPool: ref ExitPool,
validatorPool: ref ValidatorPool,
syncCommitteeMsgPool: ref SyncCommitteeMsgPool,
lightClientPool: ref LightClientPool,
quarantine: ref Quarantine,
rng: ref BrHmacDrbgContext,
getBeaconTime: GetBeaconTimeFn,
taskpool: TaskPoolPtr
): ref Eth2Processor =
(ref Eth2Processor)(
doppelgangerDetectionEnabled: doppelgangerDetectionEnabled,
doppelgangerDetection: DoppelgangerProtection(
nodeLaunchSlot: getBeaconTime().slotOrZero,
broadcastStartEpoch: FAR_FUTURE_EPOCH),
blockProcessor: blockProcessor,
validatorMonitor: validatorMonitor,
dag: dag,
attestationPool: attestationPool,
exitPool: exitPool,
validatorPool: validatorPool,
syncCommitteeMsgPool: syncCommitteeMsgPool,
lightClientPool: lightClientPool,
quarantine: quarantine,
getCurrentBeaconTime: getBeaconTime,
batchCrypto: BatchCrypto.new(
rng = rng,
# Only run eager attestation signature verification if we're not
# processing blocks in order to give priority to block processing
eager = proc(): bool = not blockProcessor[].hasBlocks(),
taskpool)
)
# Each validator logs, validates then passes valid data to its destination
# further down the line - in particular, validation should generally not have
# any side effects until the message is fully validated, or invalid messages
# could be used to push out valid messages.
proc blockValidator*(
self: var Eth2Processor, src: MsgSource,
signedBlock: ForkySignedBeaconBlock): ValidationRes =
let
wallTime = self.getCurrentBeaconTime()
(afterGenesis, wallSlot) = wallTime.toSlot()
logScope:
blockRoot = shortLog(signedBlock.root)
blck = shortLog(signedBlock.message)
signature = shortLog(signedBlock.signature)
wallSlot
if not afterGenesis:
notice "Block before genesis"
return errIgnore("Block before genesis")
# Potential under/overflows are fine; would just create odd metrics and logs
let delay = wallTime - signedBlock.message.slot.start_beacon_time
# Start of block processing - in reality, we have already gone through SSZ
# decoding at this stage, which may be significant
debug "Block received", delay
let v =
self.dag.validateBeaconBlock(self.quarantine, signedBlock, wallTime, {})
if v.isOk():
# Block passed validation - enqueue it for processing. The block processing
# queue is effectively unbounded as we use a freestanding task to enqueue
# the block - this is done so that when blocks arrive concurrently with
# sync, we don't lose the gossip blocks, but also don't block the gossip
# propagation of seemingly good blocks
trace "Block validated"
self.blockProcessor[].addBlock(
src, ForkedSignedBeaconBlock.init(signedBlock),
validationDur = nanoseconds(
(self.getCurrentBeaconTime() - wallTime).nanoseconds))
# Validator monitor registration for blocks is done by the processor
beacon_blocks_received.inc()
beacon_block_delay.observe(delay.toFloatSeconds())
else:
debug "Dropping block", error = v.error()
self.blockProcessor[].dumpInvalidBlock(signedBlock)
beacon_blocks_dropped.inc(1, [$v.error[0]])
v
proc setupDoppelgangerDetection*(self: var Eth2Processor, slot: Slot) =
# When another client's already running, this is very likely to detect
# potential duplicate validators, which can trigger slashing.
#
# Every missed attestation costs approximately 3*get_base_reward(), which
# can be up to around 10,000 Wei. Thus, skipping attestations isn't cheap
# and one should gauge the likelihood of this simultaneous launch to tune
# the epoch delay to one's perceived risk.
if self.validatorPool[].count() > 0:
const duplicateValidatorEpochs = 2
self.doppelgangerDetection.broadcastStartEpoch =
slot.epoch + duplicateValidatorEpochs
if self.doppelgangerDetectionEnabled:
notice "Setting up doppelganger detection",
epoch = slot.epoch,
broadcastStartEpoch = self.doppelgangerDetection.broadcastStartEpoch
proc checkForPotentialDoppelganger(
self: var Eth2Processor, attestation: Attestation,
attesterIndices: openArray[ValidatorIndex]) =
# Only check for attestations after node launch. There might be one slot of
# overlap in quick intra-slot restarts so trade off a few true negatives in
# the service of avoiding more likely false positives.
limit by-root requests to non-finalized blocks (#3293) * limit by-root requests to non-finalized blocks Presently, we keep a mapping from block root to `BlockRef` in memory - this has simplified reasoning about the dag, but is not sustainable with the chain growing. We can distinguish between two cases where by-root access is useful: * unfinalized blocks - this is where the beacon chain is operating generally, by validating incoming data as interesting for future fork choice decisions - bounded by the length of the unfinalized period * finalized blocks - historical access in the REST API etc - no bounds, really In this PR, we limit the by-root block index to the first use case: finalized chain data can more efficiently be addressed by slot number. Future work includes: * limiting the `BlockRef` horizon in general - each instance is 40 bytes+overhead which adds up - this needs further refactoring to deal with the tail vs state problem * persisting the finalized slot-to-hash index - this one also keeps growing unbounded (albeit slowly) Anyway, this PR easily shaves ~128mb of memory usage at the time of writing. * No longer honor `BeaconBlocksByRoot` requests outside of the non-finalized period - previously, Nimbus would generously return any block through this libp2p request - per the spec, finalized blocks should be fetched via `BeaconBlocksByRange` instead. * return `Opt[BlockRef]` instead of `nil` when blocks can't be found - this becomes a lot more common now and thus deserves more attention * `dag.blocks` -> `dag.forkBlocks` - this index only carries unfinalized blocks from now - `finalizedBlocks` covers the other `BlockRef` instances * in backfill, verify that the last backfilled block leads back to genesis, or panic * add backfill timings to log * fix missing check that `BlockRef` block can be fetched with `getForkedBlock` reliably * shortcut doppelganger check when feature is not enabled * in REST/JSON-RPC, fetch blocks without involving `BlockRef` * fix dag.blocks ref
2022-01-21 11:33:16 +00:00
if not self.doppelgangerDetectionEnabled:
return
if attestation.data.slot <= self.doppelgangerDetection.nodeLaunchSlot + 1:
return
if attestation.data.slot.epoch <
self.doppelgangerDetection.broadcastStartEpoch and
self.doppelgangerDetection.nodeLaunchSlot > GENESIS_SLOT:
for validatorIndex in attesterIndices:
limit by-root requests to non-finalized blocks (#3293) * limit by-root requests to non-finalized blocks Presently, we keep a mapping from block root to `BlockRef` in memory - this has simplified reasoning about the dag, but is not sustainable with the chain growing. We can distinguish between two cases where by-root access is useful: * unfinalized blocks - this is where the beacon chain is operating generally, by validating incoming data as interesting for future fork choice decisions - bounded by the length of the unfinalized period * finalized blocks - historical access in the REST API etc - no bounds, really In this PR, we limit the by-root block index to the first use case: finalized chain data can more efficiently be addressed by slot number. Future work includes: * limiting the `BlockRef` horizon in general - each instance is 40 bytes+overhead which adds up - this needs further refactoring to deal with the tail vs state problem * persisting the finalized slot-to-hash index - this one also keeps growing unbounded (albeit slowly) Anyway, this PR easily shaves ~128mb of memory usage at the time of writing. * No longer honor `BeaconBlocksByRoot` requests outside of the non-finalized period - previously, Nimbus would generously return any block through this libp2p request - per the spec, finalized blocks should be fetched via `BeaconBlocksByRange` instead. * return `Opt[BlockRef]` instead of `nil` when blocks can't be found - this becomes a lot more common now and thus deserves more attention * `dag.blocks` -> `dag.forkBlocks` - this index only carries unfinalized blocks from now - `finalizedBlocks` covers the other `BlockRef` instances * in backfill, verify that the last backfilled block leads back to genesis, or panic * add backfill timings to log * fix missing check that `BlockRef` block can be fetched with `getForkedBlock` reliably * shortcut doppelganger check when feature is not enabled * in REST/JSON-RPC, fetch blocks without involving `BlockRef` * fix dag.blocks ref
2022-01-21 11:33:16 +00:00
let validatorPubkey = self.dag.validatorKey(validatorIndex).get().toPubKey()
if not isNil(self.validatorPool[].getValidator(validatorPubkey)):
warn "We believe you are currently running another instance of the same validator. We've disconnected you from the network as this presents a significant slashing risk. Possible next steps are (a) making sure you've disconnected your validator from your old machine before restarting the client; and (b) running the client again with the gossip-slashing-protection option disabled, only if you are absolutely sure this is the only instance of your validator running, and reporting the issue at https://github.com/status-im/nimbus-eth2/issues.",
validatorIndex,
validatorPubkey,
attestation = shortLog(attestation)
quit QuitFailure
proc attestationValidator*(
self: ref Eth2Processor, src: MsgSource,
attestation: Attestation, subnet_id: SubnetId,
checkSignature: bool = true): Future[ValidationRes] {.async.} =
var wallTime = self.getCurrentBeaconTime()
let (afterGenesis, wallSlot) = wallTime.toSlot()
logScope:
attestation = shortLog(attestation)
subnet_id
wallSlot
if not afterGenesis:
notice "Attestation before genesis"
return errIgnore("Attestation before genesis")
# Potential under/overflows are fine; would just create odd metrics and logs
let delay = wallTime - attestation.data.slot.start_beacon_time
debug "Attestation received", delay
# Now proceed to validation
let v =
await self.attestationPool.validateAttestation(
self.batchCrypto, attestation, wallTime, subnet_id, checkSignature)
return if v.isOk():
# Due to async validation the wallTime here might have changed
wallTime = self.getCurrentBeaconTime()
let (attester_index, sig) = v.get()
self[].checkForPotentialDoppelganger(attestation, [attester_index])
trace "Attestation validated"
self.attestationPool[].addAttestation(
attestation, [attester_index], sig, wallTime)
self.validatorMonitor[].registerAttestation(
src, wallTime, attestation, attester_index)
beacon_attestations_received.inc()
beacon_attestation_delay.observe(delay.toFloatSeconds())
ok()
else:
debug "Dropping attestation", validationError = v.error
beacon_attestations_dropped.inc(1, [$v.error[0]])
err(v.error())
proc aggregateValidator*(
self: ref Eth2Processor, src: MsgSource,
signedAggregateAndProof: SignedAggregateAndProof): Future[ValidationRes] {.async.} =
var wallTime = self.getCurrentBeaconTime()
let (afterGenesis, wallSlot) = wallTime.toSlot()
logScope:
aggregate = shortLog(signedAggregateAndProof.message.aggregate)
aggregator_index = signedAggregateAndProof.message.aggregator_index
selection_proof = shortLog(signedAggregateAndProof.message.selection_proof)
signature = shortLog(signedAggregateAndProof.signature)
wallSlot
if not afterGenesis:
notice "Aggregate before genesis"
return errIgnore("Aggreagte before genesis")
# Potential under/overflows are fine; would just create odd logs
let delay =
wallTime - signedAggregateAndProof.message.aggregate.data.slot.start_beacon_time
debug "Aggregate received", delay
let v =
await self.attestationPool.validateAggregate(
self.batchCrypto, signedAggregateAndProof, wallTime)
return if v.isOk():
# Due to async validation the wallTime here might have changed
wallTime = self.getCurrentBeaconTime()
let (attesting_indices, sig) = v.get()
self[].checkForPotentialDoppelganger(
signedAggregateAndProof.message.aggregate, attesting_indices)
trace "Aggregate validated"
self.attestationPool[].addAttestation(
signedAggregateAndProof.message.aggregate, attesting_indices, sig,
wallTime)
self.validatorMonitor[].registerAggregate(
src, wallTime, signedAggregateAndProof.message, attesting_indices)
beacon_aggregates_received.inc()
beacon_aggregate_delay.observe(delay.toFloatSeconds())
ok()
else:
debug "Dropping aggregate", error = v.error
beacon_aggregates_dropped.inc(1, [$v.error[0]])
err(v.error())
proc attesterSlashingValidator*(
self: var Eth2Processor, src: MsgSource,
attesterSlashing: AttesterSlashing): ValidationRes =
logScope:
attesterSlashing = shortLog(attesterSlashing)
debug "Attester slashing received"
let v = self.exitPool[].validateAttesterSlashing(attesterSlashing)
if v.isOk():
trace "Attester slashing validated"
self.exitPool[].addMessage(attesterSlashing)
self.validatorMonitor[].registerAttesterSlashing(src, attesterSlashing)
beacon_attester_slashings_received.inc()
else:
2021-06-10 08:39:53 +00:00
debug "Dropping attester slashing", validationError = v.error
beacon_attester_slashings_dropped.inc(1, [$v.error[0]])
v
proc proposerSlashingValidator*(
self: var Eth2Processor, src: MsgSource,
proposerSlashing: ProposerSlashing): Result[void, ValidationError] =
logScope:
proposerSlashing = shortLog(proposerSlashing)
debug "Proposer slashing received"
let v = self.exitPool[].validateProposerSlashing(proposerSlashing)
if v.isOk():
trace "Proposer slashing validated"
self.exitPool[].addMessage(proposerSlashing)
self.validatorMonitor[].registerProposerSlashing(src, proposerSlashing)
beacon_proposer_slashings_received.inc()
else:
debug "Dropping proposer slashing", validationError = v.error
beacon_proposer_slashings_dropped.inc(1, [$v.error[0]])
v
proc voluntaryExitValidator*(
self: var Eth2Processor, src: MsgSource,
signedVoluntaryExit: SignedVoluntaryExit): Result[void, ValidationError] =
logScope:
signedVoluntaryExit = shortLog(signedVoluntaryExit)
debug "Voluntary exit received"
let v = self.exitPool[].validateVoluntaryExit(signedVoluntaryExit)
if v.isOk():
trace "Voluntary exit validated"
self.exitPool[].addMessage(signedVoluntaryExit)
self.validatorMonitor[].registerVoluntaryExit(
src, signedVoluntaryExit.message)
beacon_voluntary_exits_received.inc()
else:
debug "Dropping voluntary exit", error = v.error
beacon_voluntary_exits_dropped.inc(1, [$v.error[0]])
v
proc syncCommitteeMessageValidator*(
self: ref Eth2Processor, src: MsgSource,
syncCommitteeMsg: SyncCommitteeMessage,
subcommitteeIdx: SyncSubcommitteeIndex,
checkSignature: bool = true): Future[Result[void, ValidationError]] {.async.} =
let
wallTime = self.getCurrentBeaconTime()
wallSlot = wallTime.slotOrZero()
logScope:
syncCommitteeMsg = shortLog(syncCommitteeMsg)
subcommitteeIdx
wallSlot
# Potential under/overflows are fine; would just create odd metrics and logs
let delay = wallTime - syncCommitteeMsg.slot.start_beacon_time
debug "Sync committee message received", delay
# Now proceed to validation
let v = await validateSyncCommitteeMessage(
self.dag, self.batchCrypto, self.syncCommitteeMsgPool,
syncCommitteeMsg, subcommitteeIdx, wallTime, checkSignature)
return if v.isOk():
trace "Sync committee message validated"
let (positions, cookedSig) = v.get()
self.syncCommitteeMsgPool[].addSyncCommitteeMessage(
syncCommitteeMsg.slot,
syncCommitteeMsg.beacon_block_root,
syncCommitteeMsg.validator_index,
cookedSig,
subcommitteeIdx,
positions)
self.validatorMonitor[].registerSyncCommitteeMessage(
src, wallTime, syncCommitteeMsg)
beacon_sync_committee_messages_received.inc()
ok()
else:
debug "Dropping sync committee message", error = v.error
beacon_sync_committee_messages_dropped.inc(1, [$v.error[0]])
err(v.error())
proc contributionValidator*(
self: ref Eth2Processor, src: MsgSource,
contributionAndProof: SignedContributionAndProof,
checkSignature: bool = true): Future[Result[void, ValidationError]] {.async.} =
let
wallTime = self.getCurrentBeaconTime()
wallSlot = wallTime.slotOrZero()
logScope:
contribution = shortLog(contributionAndProof.message.contribution)
signature = shortLog(contributionAndProof.signature)
aggregator_index = contributionAndProof.message.aggregator_index
selection_proof = contributionAndProof.message.selection_proof
wallSlot
# Potential under/overflows are fine; would just create odd metrics and logs
let delay = wallTime - contributionAndProof.message.contribution.slot.start_beacon_time
debug "Contribution received", delay
# Now proceed to validation
let v = await validateContribution(
self.dag, self.batchCrypto, self.syncCommitteeMsgPool,
contributionAndProof, wallTime, checkSignature)
return if v.isOk():
trace "Contribution validated"
self.syncCommitteeMsgPool[].addContribution(
contributionAndProof, v.get()[0])
self.validatorMonitor[].registerSyncContribution(
src, wallTime, contributionAndProof.message, v.get()[1])
beacon_sync_committee_contributions_received.inc()
ok()
else:
debug "Dropping contribution", error = v.error
beacon_sync_committee_contributions_dropped.inc(1, [$v.error[0]])
err(v.error())
# https://github.com/ethereum/consensus-specs/blob/vFuture/specs/altair/sync-protocol.md#light_client_finality_update
proc lightClientFinalityUpdateValidator*(
self: var Eth2Processor, src: MsgSource,
finality_update: altair.LightClientFinalityUpdate
): Result[void, ValidationError] =
let
wallTime = self.getCurrentBeaconTime()
v = validateLightClientFinalityUpdate(
self.lightClientPool[], self.dag, finality_update, wallTime)
v
# https://github.com/ethereum/consensus-specs/blob/vFuture/specs/altair/sync-protocol.md#light_client_optimistic_update
proc lightClientOptimisticUpdateValidator*(
self: var Eth2Processor, src: MsgSource,
optimistic_update: altair.LightClientOptimisticUpdate
): Result[void, ValidationError] =
let
wallTime = self.getCurrentBeaconTime()
v = validateLightClientOptimisticUpdate(
self.lightClientPool[], self.dag, optimistic_update, wallTime)
v