Always-on optimistic mode (#4458)
With https://github.com/status-im/nimbus-eth2/pull/4420 implemented, the checks that we perform are equivalent to those of a `SYNCING` EL - as such, we can treat missing EL the same as SYNCING and proceed with an optimistic sync. This mode of operation significantly speeds up recovery after an offline EL event because the CL is already synced and can immediately inform the EL of the latest head. It also allows using a beacon node for consensus archival queries without an execution client. * deprecate `--optimistic` flag * log block details on EL error, soften log level because we can now continue to operate * `UnviableFork` -> `Invalid` when block hash verification fails - failed hash verification is not a fork-related block issue
This commit is contained in:
parent
8251cc223d
commit
7c2ed5c609
|
@ -180,9 +180,9 @@ type
|
|||
name: "web3-force-polling" .}: bool
|
||||
|
||||
optimistic* {.
|
||||
defaultValue: false
|
||||
desc: "Run the node in optimistic mode, allowing it to optimistically sync without an execution client"
|
||||
name: "optimistic".}: bool
|
||||
hidden # deprecated > 22.12
|
||||
desc: "Run the node in optimistic mode, allowing it to optimistically sync without an execution client (flag deprecated, always on)"
|
||||
name: "optimistic".}: Option[bool]
|
||||
|
||||
requireEngineAPI* {.
|
||||
hidden # Deprecated > 22.9
|
||||
|
|
|
@ -188,7 +188,10 @@ proc runForkchoiceUpdated*(
|
|||
forkchoiceUpdated(
|
||||
eth1Monitor, headBlockRoot, safeBlockRoot, finalizedBlockRoot),
|
||||
FORKCHOICEUPDATED_TIMEOUT):
|
||||
debug "runForkchoiceUpdated: forkchoiceUpdated timed out"
|
||||
debug "runForkchoiceUpdated: forkchoiceUpdated timed out",
|
||||
headBlockRoot = shortLog(headBlockRoot),
|
||||
safeBlockRoot = shortLog(safeBlockRoot),
|
||||
finalizedBlockRoot = shortLog(finalizedBlockRoot)
|
||||
ForkchoiceUpdatedResponse(
|
||||
payloadStatus: PayloadStatusV1(
|
||||
status: PayloadExecutionStatus.syncing))
|
||||
|
@ -201,8 +204,11 @@ proc runForkchoiceUpdated*(
|
|||
|
||||
return (fcuR.payloadStatus.status, fcuR.payloadStatus.latestValidHash)
|
||||
except CatchableError as err:
|
||||
error "runForkchoiceUpdated: forkchoiceUpdated failed",
|
||||
err = err.msg
|
||||
warn "forkchoiceUpdated failed - check execution client",
|
||||
err = err.msg,
|
||||
headBlockRoot = shortLog(headBlockRoot),
|
||||
safeBlockRoot = shortLog(safeBlockRoot),
|
||||
finalizedBlockRoot = shortLog(finalizedBlockRoot)
|
||||
return (PayloadExecutionStatus.syncing, none BlockHash)
|
||||
|
||||
proc runForkchoiceUpdatedDiscardResult*(
|
||||
|
|
|
@ -86,10 +86,6 @@ type
|
|||
|
||||
verifier: BatchVerifier
|
||||
|
||||
optimistic: bool
|
||||
## Run block processor in optimistic mode allowing it to progress even
|
||||
## though execution client is offline
|
||||
|
||||
NewPayloadStatus {.pure.} = enum
|
||||
valid
|
||||
notValid
|
||||
|
@ -114,8 +110,7 @@ proc new*(T: type BlockProcessor,
|
|||
rng: ref HmacDrbgContext, taskpool: TaskPoolPtr,
|
||||
consensusManager: ref ConsensusManager,
|
||||
validatorMonitor: ref ValidatorMonitor,
|
||||
getBeaconTime: GetBeaconTimeFn,
|
||||
optimistic: bool = false): ref BlockProcessor =
|
||||
getBeaconTime: GetBeaconTimeFn): ref BlockProcessor =
|
||||
(ref BlockProcessor)(
|
||||
dumpEnabled: dumpEnabled,
|
||||
dumpDirInvalid: dumpDirInvalid,
|
||||
|
@ -124,8 +119,7 @@ proc new*(T: type BlockProcessor,
|
|||
consensusManager: consensusManager,
|
||||
validatorMonitor: validatorMonitor,
|
||||
getBeaconTime: getBeaconTime,
|
||||
verifier: BatchVerifier(rng: rng, taskpool: taskpool),
|
||||
optimistic: optimistic
|
||||
verifier: BatchVerifier(rng: rng, taskpool: taskpool)
|
||||
)
|
||||
|
||||
# Sync callbacks
|
||||
|
@ -286,7 +280,11 @@ proc newExecutionPayload*(
|
|||
|
||||
return Opt.some payloadStatus
|
||||
except CatchableError as err:
|
||||
error "newPayload failed", msg = err.msg
|
||||
warn "newPayload failed - check execution client",
|
||||
msg = err.msg,
|
||||
parentHash = shortLog(executionPayload.parent_hash),
|
||||
blockHash = shortLog(executionPayload.block_hash),
|
||||
blockNumber = executionPayload.block_number
|
||||
return Opt.none PayloadExecutionStatus
|
||||
|
||||
# TODO investigate why this seems to allow compilation even though it doesn't
|
||||
|
@ -300,12 +298,6 @@ proc newExecutionPayload*(
|
|||
Future[Opt[PayloadExecutionStatus]] {.async.} =
|
||||
debugRaiseAssert $eip4844ImplementationMissing & ": block_processor.nim:newExecutionPayload"
|
||||
|
||||
proc getExecutionValidity(
|
||||
eth1Monitor: Eth1Monitor,
|
||||
blck: phase0.SignedBeaconBlock | altair.SignedBeaconBlock):
|
||||
Future[NewPayloadStatus] {.async.} =
|
||||
return NewPayloadStatus.valid # vacuously
|
||||
|
||||
proc getExecutionValidity(
|
||||
eth1Monitor: Eth1Monitor,
|
||||
blck: bellatrix.SignedBeaconBlock | capella.SignedBeaconBlock):
|
||||
|
@ -366,7 +358,10 @@ proc storeBlock*(
|
|||
vm = self.validatorMonitor
|
||||
dag = self.consensusManager.dag
|
||||
payloadStatus =
|
||||
when typeof(signedBlock).toFork() >= BeaconBlockFork.Bellatrix:
|
||||
await self.consensusManager.eth1Monitor.getExecutionValidity(signedBlock)
|
||||
else:
|
||||
NewPayloadStatus.valid # vacuously
|
||||
payloadValid = payloadStatus == NewPayloadStatus.valid
|
||||
|
||||
# The block is certainly not missing any more
|
||||
|
@ -377,16 +372,8 @@ proc storeBlock*(
|
|||
return err((VerifierError.UnviableFork, ProcessingStatus.completed))
|
||||
|
||||
if NewPayloadStatus.noResponse == payloadStatus:
|
||||
if not self[].optimistic:
|
||||
# Disallow the `MissingParent` from leaking to the sync/request managers
|
||||
# as it will be descored. However sync and request managers interact via
|
||||
# `processBlock` (indirectly). `validator_duties` does call `storeBlock`
|
||||
# directly, so is exposed to this, but only cares about whether there is
|
||||
# an error or not.
|
||||
if self[].consensusManager.eth1Monitor.isNil:
|
||||
warn "Attempting to process execution payload without execution client. Ensure --web3-url setting is correct and JWT is configured."
|
||||
|
||||
return err((VerifierError.MissingParent, ProcessingStatus.notCompleted))
|
||||
# When the execution layer is not available to verify the payload, we do the
|
||||
# required check on the CL side instead and proceed as if the EL was syncing
|
||||
|
||||
# Client software MUST validate blockHash value as being equivalent to
|
||||
# Keccak256(RLP(ExecutionBlockHeader))
|
||||
|
@ -394,10 +381,10 @@ proc storeBlock*(
|
|||
when typeof(signedBlock).toFork() >= BeaconBlockFork.Bellatrix:
|
||||
template payload(): auto = signedBlock.message.body.execution_payload
|
||||
if payload.block_hash != payload.compute_execution_block_hash():
|
||||
debug "EL block hash validation failed", execution_payload = payload
|
||||
debug "Execution block hash validation failed", execution_payload = payload
|
||||
doAssert strictVerification notin dag.updateFlags
|
||||
self.consensusManager.quarantine[].addUnviable(signedBlock.root)
|
||||
return err((VerifierError.UnviableFork, ProcessingStatus.completed))
|
||||
return err((VerifierError.Invalid, ProcessingStatus.completed))
|
||||
else:
|
||||
discard
|
||||
|
||||
|
|
|
@ -299,8 +299,7 @@ proc initFullNode(
|
|||
config.defaultFeeRecipient)
|
||||
blockProcessor = BlockProcessor.new(
|
||||
config.dumpEnabled, config.dumpDirInvalid, config.dumpDirIncoming,
|
||||
rng, taskpool, consensusManager, node.validatorMonitor, getBeaconTime,
|
||||
optimistic = config.optimistic)
|
||||
rng, taskpool, consensusManager, node.validatorMonitor, getBeaconTime)
|
||||
blockVerifier = proc(signedBlock: ForkedSignedBeaconBlock):
|
||||
Future[Result[void, VerifierError]] =
|
||||
# The design with a callback for block verification is unusual compared
|
||||
|
@ -1784,6 +1783,7 @@ proc doRunBeaconNode(config: var BeaconNodeConf, rng: ref HmacDrbgContext) {.rai
|
|||
ignoreDeprecatedOption requireEngineAPI
|
||||
ignoreDeprecatedOption safeSlotsToImportOptimistically
|
||||
ignoreDeprecatedOption terminalTotalDifficultyOverride
|
||||
ignoreDeprecatedOption optimistic
|
||||
|
||||
createPidFile(config.dataDir.string / "beacon_node.pid")
|
||||
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
# Run an execution client
|
||||
|
||||
In order to run a beacon node, you need to also be running an execution client - at least one for each beacon node.
|
||||
In order to perform validation duties, you need to also be running an execution client - at least one for each beacon node.
|
||||
|
||||
Nimbus has been tested all major execution clients - see the [execution client comparison](https://ethereum.org/en/developers/docs/nodes-and-clients/#execution-clients) for more information.
|
||||
|
||||
!!! warning
|
||||
You need to run your own execution client - relying on third-party services such as Infura, Alchemy and Pocket is no longer possible.
|
||||
You need to run your own execution client - relying on third-party services such as Infura, Alchemy and Pocket is no longer possible. Sharing the same execution client between multiple beacon nodes is not supported.
|
||||
|
||||
!!! info
|
||||
Syncing an execution client may take hours or even days, depending on your hardware! The backup providers will be synced only when the primary becomes unavailable, which may lead to a small gap in validation duties - this limitation may be lifted in future versions.
|
||||
|
@ -14,7 +14,7 @@ Nimbus has been tested all major execution clients - see the [execution client c
|
|||
|
||||
### 1. Install execution client
|
||||
|
||||
Select an execution client and install it, configuring it such that that WebSockets are enabled and a JWT secret file is created.
|
||||
Select an execution client and install it, configuring it such that that the authenticated JSON-RPC interface is enabled and a JWT secret file is created.
|
||||
|
||||
=== "Nimbus"
|
||||
|
||||
|
@ -75,7 +75,7 @@ Select an execution client and install it, configuring it such that that WebSock
|
|||
|
||||
### 2. Leave the execution client running
|
||||
|
||||
The execution client will be syncing the chain through the merge transition block. Once it reaches this point, it will wait for the beacon node to provide further sync instructions.
|
||||
The execution client needs to be running at all times in order for the beacon node to be able to support validators. It will start its syncing process as soon as the beacon node connects to it - once both are synced, they will continue to work in tandem to validate the latest Ethereum state.
|
||||
|
||||
It is safe to start the beacon node even if the execution client is not yet fully synced and vice versa.
|
||||
|
||||
|
|
|
@ -6,6 +6,11 @@ Once the execution client has caught up, the consensus and execution clients wor
|
|||
|
||||
Both execution and consensus clients must be fully synced to perform validation duties - while optimistically synced, validator duties (attestation, sync committee and block production work) are skipped.
|
||||
|
||||
!!! info "Running without execution client"
|
||||
Nimbus continues to sync optimistically when the exection client is not available thanks to its built-in execution payload verifier.
|
||||
|
||||
This feature is available from `v23.1.0` onwards. A preview of the feature could be enabled with `--optimstic` in earlier versions - this flag is no longer needed.
|
||||
|
||||
## Identifying optimistic sync
|
||||
|
||||
An optimistically synced node can be identified by examining the "Slot start" log message - when optimistically synced, the `sync` key will have a `/opt` suffix, indicating that it's waiting for the execution client to catch up:
|
||||
|
@ -13,15 +18,3 @@ An optimistically synced node can be identified by examining the "Slot start" lo
|
|||
```
|
||||
INF 2022-10-26 18:57:35.000+02:00 Slot start topics="beacnde" slot=4998286 epoch=156196 sync=synced/opt peers=29 head=f21d399e:4998285 finalized=156194:91e2ebaf delay=467us953ns
|
||||
```
|
||||
|
||||
## Optimistic mode
|
||||
|
||||
In "optimistic" mode, Nimbus will start syncing optimistically without an execution client present, as normally required:
|
||||
|
||||
```sh
|
||||
# Start in optimistic mode which allows syncing the beacon chain without an execution client, albeit with reduced security and functionality
|
||||
./run-mainnet-beacon-node.sh --optimistic
|
||||
```
|
||||
|
||||
!!! warning
|
||||
An optimistically synced node is less secure than a fully synced node: it has not validated that the transactions in blocks received from the network are valid - as such, it is not suitable for validation duties (where block contents have not yet been validated by a supermajority of validators) and may be unsuitable for other uses.
|
||||
|
|
Loading…
Reference in New Issue