Always-on optimistic mode (#4458)

With https://github.com/status-im/nimbus-eth2/pull/4420 implemented, the
checks that we perform are equivalent to those of a `SYNCING` EL - as
such, we can treat missing EL the same as SYNCING and proceed with an
optimistic sync.

This mode of operation significantly speeds up recovery after an offline
EL event because the CL is already synced and can immediately inform the
EL of the latest head.

It also allows using a beacon node for consensus archival queries
without an execution client.

* deprecate `--optimistic` flag
* log block details on EL error, soften log level because we can now
continue to operate
* `UnviableFork` -> `Invalid` when block hash verification fails -
failed hash verification is not a fork-related block issue
This commit is contained in:
Jacek Sieka 2023-01-04 16:51:14 +01:00 committed by GitHub
parent 8251cc223d
commit 7c2ed5c609
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 38 additions and 52 deletions

View File

@ -180,9 +180,9 @@ type
name: "web3-force-polling" .}: bool name: "web3-force-polling" .}: bool
optimistic* {. optimistic* {.
defaultValue: false hidden # deprecated > 22.12
desc: "Run the node in optimistic mode, allowing it to optimistically sync without an execution client" desc: "Run the node in optimistic mode, allowing it to optimistically sync without an execution client (flag deprecated, always on)"
name: "optimistic".}: bool name: "optimistic".}: Option[bool]
requireEngineAPI* {. requireEngineAPI* {.
hidden # Deprecated > 22.9 hidden # Deprecated > 22.9

View File

@ -188,7 +188,10 @@ proc runForkchoiceUpdated*(
forkchoiceUpdated( forkchoiceUpdated(
eth1Monitor, headBlockRoot, safeBlockRoot, finalizedBlockRoot), eth1Monitor, headBlockRoot, safeBlockRoot, finalizedBlockRoot),
FORKCHOICEUPDATED_TIMEOUT): FORKCHOICEUPDATED_TIMEOUT):
debug "runForkchoiceUpdated: forkchoiceUpdated timed out" debug "runForkchoiceUpdated: forkchoiceUpdated timed out",
headBlockRoot = shortLog(headBlockRoot),
safeBlockRoot = shortLog(safeBlockRoot),
finalizedBlockRoot = shortLog(finalizedBlockRoot)
ForkchoiceUpdatedResponse( ForkchoiceUpdatedResponse(
payloadStatus: PayloadStatusV1( payloadStatus: PayloadStatusV1(
status: PayloadExecutionStatus.syncing)) status: PayloadExecutionStatus.syncing))
@ -201,8 +204,11 @@ proc runForkchoiceUpdated*(
return (fcuR.payloadStatus.status, fcuR.payloadStatus.latestValidHash) return (fcuR.payloadStatus.status, fcuR.payloadStatus.latestValidHash)
except CatchableError as err: except CatchableError as err:
error "runForkchoiceUpdated: forkchoiceUpdated failed", warn "forkchoiceUpdated failed - check execution client",
err = err.msg err = err.msg,
headBlockRoot = shortLog(headBlockRoot),
safeBlockRoot = shortLog(safeBlockRoot),
finalizedBlockRoot = shortLog(finalizedBlockRoot)
return (PayloadExecutionStatus.syncing, none BlockHash) return (PayloadExecutionStatus.syncing, none BlockHash)
proc runForkchoiceUpdatedDiscardResult*( proc runForkchoiceUpdatedDiscardResult*(

View File

@ -86,10 +86,6 @@ type
verifier: BatchVerifier verifier: BatchVerifier
optimistic: bool
## Run block processor in optimistic mode allowing it to progress even
## though execution client is offline
NewPayloadStatus {.pure.} = enum NewPayloadStatus {.pure.} = enum
valid valid
notValid notValid
@ -114,8 +110,7 @@ proc new*(T: type BlockProcessor,
rng: ref HmacDrbgContext, taskpool: TaskPoolPtr, rng: ref HmacDrbgContext, taskpool: TaskPoolPtr,
consensusManager: ref ConsensusManager, consensusManager: ref ConsensusManager,
validatorMonitor: ref ValidatorMonitor, validatorMonitor: ref ValidatorMonitor,
getBeaconTime: GetBeaconTimeFn, getBeaconTime: GetBeaconTimeFn): ref BlockProcessor =
optimistic: bool = false): ref BlockProcessor =
(ref BlockProcessor)( (ref BlockProcessor)(
dumpEnabled: dumpEnabled, dumpEnabled: dumpEnabled,
dumpDirInvalid: dumpDirInvalid, dumpDirInvalid: dumpDirInvalid,
@ -124,8 +119,7 @@ proc new*(T: type BlockProcessor,
consensusManager: consensusManager, consensusManager: consensusManager,
validatorMonitor: validatorMonitor, validatorMonitor: validatorMonitor,
getBeaconTime: getBeaconTime, getBeaconTime: getBeaconTime,
verifier: BatchVerifier(rng: rng, taskpool: taskpool), verifier: BatchVerifier(rng: rng, taskpool: taskpool)
optimistic: optimistic
) )
# Sync callbacks # Sync callbacks
@ -286,7 +280,11 @@ proc newExecutionPayload*(
return Opt.some payloadStatus return Opt.some payloadStatus
except CatchableError as err: except CatchableError as err:
error "newPayload failed", msg = err.msg warn "newPayload failed - check execution client",
msg = err.msg,
parentHash = shortLog(executionPayload.parent_hash),
blockHash = shortLog(executionPayload.block_hash),
blockNumber = executionPayload.block_number
return Opt.none PayloadExecutionStatus return Opt.none PayloadExecutionStatus
# TODO investigate why this seems to allow compilation even though it doesn't # TODO investigate why this seems to allow compilation even though it doesn't
@ -300,12 +298,6 @@ proc newExecutionPayload*(
Future[Opt[PayloadExecutionStatus]] {.async.} = Future[Opt[PayloadExecutionStatus]] {.async.} =
debugRaiseAssert $eip4844ImplementationMissing & ": block_processor.nim:newExecutionPayload" debugRaiseAssert $eip4844ImplementationMissing & ": block_processor.nim:newExecutionPayload"
proc getExecutionValidity(
eth1Monitor: Eth1Monitor,
blck: phase0.SignedBeaconBlock | altair.SignedBeaconBlock):
Future[NewPayloadStatus] {.async.} =
return NewPayloadStatus.valid # vacuously
proc getExecutionValidity( proc getExecutionValidity(
eth1Monitor: Eth1Monitor, eth1Monitor: Eth1Monitor,
blck: bellatrix.SignedBeaconBlock | capella.SignedBeaconBlock): blck: bellatrix.SignedBeaconBlock | capella.SignedBeaconBlock):
@ -366,7 +358,10 @@ proc storeBlock*(
vm = self.validatorMonitor vm = self.validatorMonitor
dag = self.consensusManager.dag dag = self.consensusManager.dag
payloadStatus = payloadStatus =
when typeof(signedBlock).toFork() >= BeaconBlockFork.Bellatrix:
await self.consensusManager.eth1Monitor.getExecutionValidity(signedBlock) await self.consensusManager.eth1Monitor.getExecutionValidity(signedBlock)
else:
NewPayloadStatus.valid # vacuously
payloadValid = payloadStatus == NewPayloadStatus.valid payloadValid = payloadStatus == NewPayloadStatus.valid
# The block is certainly not missing any more # The block is certainly not missing any more
@ -377,16 +372,8 @@ proc storeBlock*(
return err((VerifierError.UnviableFork, ProcessingStatus.completed)) return err((VerifierError.UnviableFork, ProcessingStatus.completed))
if NewPayloadStatus.noResponse == payloadStatus: if NewPayloadStatus.noResponse == payloadStatus:
if not self[].optimistic: # When the execution layer is not available to verify the payload, we do the
# Disallow the `MissingParent` from leaking to the sync/request managers # required check on the CL side instead and proceed as if the EL was syncing
# as it will be descored. However sync and request managers interact via
# `processBlock` (indirectly). `validator_duties` does call `storeBlock`
# directly, so is exposed to this, but only cares about whether there is
# an error or not.
if self[].consensusManager.eth1Monitor.isNil:
warn "Attempting to process execution payload without execution client. Ensure --web3-url setting is correct and JWT is configured."
return err((VerifierError.MissingParent, ProcessingStatus.notCompleted))
# Client software MUST validate blockHash value as being equivalent to # Client software MUST validate blockHash value as being equivalent to
# Keccak256(RLP(ExecutionBlockHeader)) # Keccak256(RLP(ExecutionBlockHeader))
@ -394,10 +381,10 @@ proc storeBlock*(
when typeof(signedBlock).toFork() >= BeaconBlockFork.Bellatrix: when typeof(signedBlock).toFork() >= BeaconBlockFork.Bellatrix:
template payload(): auto = signedBlock.message.body.execution_payload template payload(): auto = signedBlock.message.body.execution_payload
if payload.block_hash != payload.compute_execution_block_hash(): if payload.block_hash != payload.compute_execution_block_hash():
debug "EL block hash validation failed", execution_payload = payload debug "Execution block hash validation failed", execution_payload = payload
doAssert strictVerification notin dag.updateFlags doAssert strictVerification notin dag.updateFlags
self.consensusManager.quarantine[].addUnviable(signedBlock.root) self.consensusManager.quarantine[].addUnviable(signedBlock.root)
return err((VerifierError.UnviableFork, ProcessingStatus.completed)) return err((VerifierError.Invalid, ProcessingStatus.completed))
else: else:
discard discard

View File

@ -299,8 +299,7 @@ proc initFullNode(
config.defaultFeeRecipient) config.defaultFeeRecipient)
blockProcessor = BlockProcessor.new( blockProcessor = BlockProcessor.new(
config.dumpEnabled, config.dumpDirInvalid, config.dumpDirIncoming, config.dumpEnabled, config.dumpDirInvalid, config.dumpDirIncoming,
rng, taskpool, consensusManager, node.validatorMonitor, getBeaconTime, rng, taskpool, consensusManager, node.validatorMonitor, getBeaconTime)
optimistic = config.optimistic)
blockVerifier = proc(signedBlock: ForkedSignedBeaconBlock): blockVerifier = proc(signedBlock: ForkedSignedBeaconBlock):
Future[Result[void, VerifierError]] = Future[Result[void, VerifierError]] =
# The design with a callback for block verification is unusual compared # The design with a callback for block verification is unusual compared
@ -1784,6 +1783,7 @@ proc doRunBeaconNode(config: var BeaconNodeConf, rng: ref HmacDrbgContext) {.rai
ignoreDeprecatedOption requireEngineAPI ignoreDeprecatedOption requireEngineAPI
ignoreDeprecatedOption safeSlotsToImportOptimistically ignoreDeprecatedOption safeSlotsToImportOptimistically
ignoreDeprecatedOption terminalTotalDifficultyOverride ignoreDeprecatedOption terminalTotalDifficultyOverride
ignoreDeprecatedOption optimistic
createPidFile(config.dataDir.string / "beacon_node.pid") createPidFile(config.dataDir.string / "beacon_node.pid")

View File

@ -1,11 +1,11 @@
# Run an execution client # Run an execution client
In order to run a beacon node, you need to also be running an execution client - at least one for each beacon node. In order to perform validation duties, you need to also be running an execution client - at least one for each beacon node.
Nimbus has been tested all major execution clients - see the [execution client comparison](https://ethereum.org/en/developers/docs/nodes-and-clients/#execution-clients) for more information. Nimbus has been tested all major execution clients - see the [execution client comparison](https://ethereum.org/en/developers/docs/nodes-and-clients/#execution-clients) for more information.
!!! warning !!! warning
You need to run your own execution client - relying on third-party services such as Infura, Alchemy and Pocket is no longer possible. You need to run your own execution client - relying on third-party services such as Infura, Alchemy and Pocket is no longer possible. Sharing the same execution client between multiple beacon nodes is not supported.
!!! info !!! info
Syncing an execution client may take hours or even days, depending on your hardware! The backup providers will be synced only when the primary becomes unavailable, which may lead to a small gap in validation duties - this limitation may be lifted in future versions. Syncing an execution client may take hours or even days, depending on your hardware! The backup providers will be synced only when the primary becomes unavailable, which may lead to a small gap in validation duties - this limitation may be lifted in future versions.
@ -14,7 +14,7 @@ Nimbus has been tested all major execution clients - see the [execution client c
### 1. Install execution client ### 1. Install execution client
Select an execution client and install it, configuring it such that that WebSockets are enabled and a JWT secret file is created. Select an execution client and install it, configuring it such that that the authenticated JSON-RPC interface is enabled and a JWT secret file is created.
=== "Nimbus" === "Nimbus"
@ -75,7 +75,7 @@ Select an execution client and install it, configuring it such that that WebSock
### 2. Leave the execution client running ### 2. Leave the execution client running
The execution client will be syncing the chain through the merge transition block. Once it reaches this point, it will wait for the beacon node to provide further sync instructions. The execution client needs to be running at all times in order for the beacon node to be able to support validators. It will start its syncing process as soon as the beacon node connects to it - once both are synced, they will continue to work in tandem to validate the latest Ethereum state.
It is safe to start the beacon node even if the execution client is not yet fully synced and vice versa. It is safe to start the beacon node even if the execution client is not yet fully synced and vice versa.

View File

@ -6,6 +6,11 @@ Once the execution client has caught up, the consensus and execution clients wor
Both execution and consensus clients must be fully synced to perform validation duties - while optimistically synced, validator duties (attestation, sync committee and block production work) are skipped. Both execution and consensus clients must be fully synced to perform validation duties - while optimistically synced, validator duties (attestation, sync committee and block production work) are skipped.
!!! info "Running without execution client"
Nimbus continues to sync optimistically when the exection client is not available thanks to its built-in execution payload verifier.
This feature is available from `v23.1.0` onwards. A preview of the feature could be enabled with `--optimstic` in earlier versions - this flag is no longer needed.
## Identifying optimistic sync ## Identifying optimistic sync
An optimistically synced node can be identified by examining the "Slot start" log message - when optimistically synced, the `sync` key will have a `/opt` suffix, indicating that it's waiting for the execution client to catch up: An optimistically synced node can be identified by examining the "Slot start" log message - when optimistically synced, the `sync` key will have a `/opt` suffix, indicating that it's waiting for the execution client to catch up:
@ -13,15 +18,3 @@ An optimistically synced node can be identified by examining the "Slot start" lo
``` ```
INF 2022-10-26 18:57:35.000+02:00 Slot start topics="beacnde" slot=4998286 epoch=156196 sync=synced/opt peers=29 head=f21d399e:4998285 finalized=156194:91e2ebaf delay=467us953ns INF 2022-10-26 18:57:35.000+02:00 Slot start topics="beacnde" slot=4998286 epoch=156196 sync=synced/opt peers=29 head=f21d399e:4998285 finalized=156194:91e2ebaf delay=467us953ns
``` ```
## Optimistic mode
In "optimistic" mode, Nimbus will start syncing optimistically without an execution client present, as normally required:
```sh
# Start in optimistic mode which allows syncing the beacon chain without an execution client, albeit with reduced security and functionality
./run-mainnet-beacon-node.sh --optimistic
```
!!! warning
An optimistically synced node is less secure than a fully synced node: it has not validated that the transactions in blocks received from the network are valid - as such, it is not suitable for validation duties (where block contents have not yet been validated by a supermajority of validators) and may be unsuitable for other uses.