BN: Fix el_manager timeouts issue in block processing. (#6665)
* Fix el_manager + block_processor NEWPAYLOAD_TIMEOUT timeouts issue. Use predefined array of exponential timeouts when all the requests to EL has been failed. * Increase timeout value to (next_slot.start_time - 1.second) * Address review comments. * Do not repeat requests when node is optimistically synced.
This commit is contained in:
parent
ead72deaa2
commit
a3d4a3ee5e
|
@ -34,6 +34,10 @@ export
|
||||||
logScope:
|
logScope:
|
||||||
topics = "elman"
|
topics = "elman"
|
||||||
|
|
||||||
|
const
|
||||||
|
SleepDurations =
|
||||||
|
[100.milliseconds, 200.milliseconds, 500.milliseconds, 1.seconds]
|
||||||
|
|
||||||
type
|
type
|
||||||
FixedBytes[N: static int] = web3.FixedBytes[N]
|
FixedBytes[N: static int] = web3.FixedBytes[N]
|
||||||
PubKeyBytes = DynamicBytes[48, 48]
|
PubKeyBytes = DynamicBytes[48, 48]
|
||||||
|
@ -43,6 +47,11 @@ type
|
||||||
WithoutTimeout* = distinct int
|
WithoutTimeout* = distinct int
|
||||||
Address = web3.Address
|
Address = web3.Address
|
||||||
|
|
||||||
|
DeadlineObject* = object
|
||||||
|
# TODO (cheatfate): This object declaration could be removed when
|
||||||
|
# `Raising()` macro starts to support procedure arguments.
|
||||||
|
future*: Future[void].Raising([CancelledError])
|
||||||
|
|
||||||
SomeEnginePayloadWithValue =
|
SomeEnginePayloadWithValue =
|
||||||
BellatrixExecutionPayloadWithValue |
|
BellatrixExecutionPayloadWithValue |
|
||||||
GetPayloadV2Response |
|
GetPayloadV2Response |
|
||||||
|
@ -233,6 +242,22 @@ declareCounter engine_api_last_minute_forkchoice_updates_sent,
|
||||||
"Number of last minute requests to the forkchoiceUpdated Engine API end-point just before block proposals",
|
"Number of last minute requests to the forkchoiceUpdated Engine API end-point just before block proposals",
|
||||||
labels = ["url"]
|
labels = ["url"]
|
||||||
|
|
||||||
|
proc init*(t: typedesc[DeadlineObject], d: Duration): DeadlineObject =
|
||||||
|
DeadlineObject(future: sleepAsync(d))
|
||||||
|
|
||||||
|
proc variedSleep*(
|
||||||
|
counter: var int,
|
||||||
|
durations: openArray[Duration]
|
||||||
|
): Future[void] {.async: (raises: [CancelledError], raw: true).} =
|
||||||
|
doAssert(len(durations) > 0, "Empty durations array!")
|
||||||
|
let index =
|
||||||
|
if (counter < 0) or (counter > high(durations)):
|
||||||
|
high(durations)
|
||||||
|
else:
|
||||||
|
counter
|
||||||
|
inc(counter)
|
||||||
|
sleepAsync(durations[index])
|
||||||
|
|
||||||
proc close(connection: ELConnection): Future[void] {.async: (raises: []).} =
|
proc close(connection: ELConnection): Future[void] {.async: (raises: []).} =
|
||||||
if connection.web3.isSome:
|
if connection.web3.isSome:
|
||||||
try:
|
try:
|
||||||
|
@ -942,14 +967,20 @@ proc lazyWait(futures: seq[FutureBase]) {.async: (raises: []).} =
|
||||||
|
|
||||||
proc sendNewPayload*(
|
proc sendNewPayload*(
|
||||||
m: ELManager,
|
m: ELManager,
|
||||||
blck: SomeForkyBeaconBlock
|
blck: SomeForkyBeaconBlock,
|
||||||
|
deadlineObj: DeadlineObject,
|
||||||
|
maxRetriesCount: int
|
||||||
): Future[PayloadExecutionStatus] {.async: (raises: [CancelledError]).} =
|
): Future[PayloadExecutionStatus] {.async: (raises: [CancelledError]).} =
|
||||||
|
doAssert maxRetriesCount > 0
|
||||||
|
|
||||||
let
|
let
|
||||||
startTime = Moment.now()
|
startTime = Moment.now()
|
||||||
deadline = sleepAsync(NEWPAYLOAD_TIMEOUT)
|
deadline = deadlineObj.future
|
||||||
payload = blck.body.asEngineExecutionPayload
|
payload = blck.body.asEngineExecutionPayload
|
||||||
var
|
var
|
||||||
responseProcessor = ELConsensusViolationDetector.init()
|
responseProcessor = ELConsensusViolationDetector.init()
|
||||||
|
sleepCounter = 0
|
||||||
|
retriesCount = 0
|
||||||
|
|
||||||
while true:
|
while true:
|
||||||
block mainLoop:
|
block mainLoop:
|
||||||
|
@ -1033,18 +1064,23 @@ proc sendNewPayload*(
|
||||||
return PayloadExecutionStatus.syncing
|
return PayloadExecutionStatus.syncing
|
||||||
|
|
||||||
if len(pendingRequests) == 0:
|
if len(pendingRequests) == 0:
|
||||||
# All requests failed, we will continue our attempts until deadline
|
# All requests failed.
|
||||||
# is not finished.
|
inc(retriesCount)
|
||||||
|
if retriesCount == maxRetriesCount:
|
||||||
|
return PayloadExecutionStatus.syncing
|
||||||
|
|
||||||
# To avoid continous spam of requests when EL node is offline we
|
# To avoid continous spam of requests when EL node is offline we
|
||||||
# going to sleep until next attempt for
|
# going to sleep until next attempt.
|
||||||
# (NEWPAYLOAD_TIMEOUT / 4) time (2.seconds).
|
await variedSleep(sleepCounter, SleepDurations)
|
||||||
let timeout =
|
|
||||||
chronos.nanoseconds(NEWPAYLOAD_TIMEOUT.nanoseconds div 4)
|
|
||||||
await sleepAsync(timeout)
|
|
||||||
|
|
||||||
break mainLoop
|
break mainLoop
|
||||||
|
|
||||||
|
proc sendNewPayload*(
|
||||||
|
m: ELManager,
|
||||||
|
blck: SomeForkyBeaconBlock
|
||||||
|
): Future[PayloadExecutionStatus] {.
|
||||||
|
async: (raises: [CancelledError], raw: true).} =
|
||||||
|
sendNewPayload(m, blck, DeadlineObject.init(NEWPAYLOAD_TIMEOUT), high(int))
|
||||||
|
|
||||||
proc forkchoiceUpdatedForSingleEL(
|
proc forkchoiceUpdatedForSingleEL(
|
||||||
connection: ELConnection,
|
connection: ELConnection,
|
||||||
state: ref ForkchoiceStateV1,
|
state: ref ForkchoiceStateV1,
|
||||||
|
@ -1072,11 +1108,14 @@ proc forkchoiceUpdated*(
|
||||||
headBlockHash, safeBlockHash, finalizedBlockHash: Eth2Digest,
|
headBlockHash, safeBlockHash, finalizedBlockHash: Eth2Digest,
|
||||||
payloadAttributes: Opt[PayloadAttributesV1] |
|
payloadAttributes: Opt[PayloadAttributesV1] |
|
||||||
Opt[PayloadAttributesV2] |
|
Opt[PayloadAttributesV2] |
|
||||||
Opt[PayloadAttributesV3]
|
Opt[PayloadAttributesV3],
|
||||||
|
deadlineObj: DeadlineObject,
|
||||||
|
maxRetriesCount: int
|
||||||
): Future[(PayloadExecutionStatus, Opt[BlockHash])] {.
|
): Future[(PayloadExecutionStatus, Opt[BlockHash])] {.
|
||||||
async: (raises: [CancelledError]).} =
|
async: (raises: [CancelledError]).} =
|
||||||
|
|
||||||
doAssert not headBlockHash.isZero
|
doAssert not headBlockHash.isZero
|
||||||
|
doAssert maxRetriesCount > 0
|
||||||
|
|
||||||
# Allow finalizedBlockHash to be 0 to avoid sync deadlocks.
|
# Allow finalizedBlockHash to be 0 to avoid sync deadlocks.
|
||||||
#
|
#
|
||||||
|
@ -1132,9 +1171,12 @@ proc forkchoiceUpdated*(
|
||||||
safeBlockHash: safeBlockHash.asBlockHash,
|
safeBlockHash: safeBlockHash.asBlockHash,
|
||||||
finalizedBlockHash: finalizedBlockHash.asBlockHash)
|
finalizedBlockHash: finalizedBlockHash.asBlockHash)
|
||||||
startTime = Moment.now
|
startTime = Moment.now
|
||||||
deadline = sleepAsync(FORKCHOICEUPDATED_TIMEOUT)
|
deadline = deadlineObj.future
|
||||||
|
|
||||||
var
|
var
|
||||||
responseProcessor = ELConsensusViolationDetector.init()
|
responseProcessor = ELConsensusViolationDetector.init()
|
||||||
|
sleepCounter = 0
|
||||||
|
retriesCount = 0
|
||||||
|
|
||||||
while true:
|
while true:
|
||||||
block mainLoop:
|
block mainLoop:
|
||||||
|
@ -1216,16 +1258,28 @@ proc forkchoiceUpdated*(
|
||||||
if len(pendingRequests) == 0:
|
if len(pendingRequests) == 0:
|
||||||
# All requests failed, we will continue our attempts until deadline
|
# All requests failed, we will continue our attempts until deadline
|
||||||
# is not finished.
|
# is not finished.
|
||||||
|
inc(retriesCount)
|
||||||
|
if retriesCount == maxRetriesCount:
|
||||||
|
return (PayloadExecutionStatus.syncing, Opt.none BlockHash)
|
||||||
|
|
||||||
# To avoid continous spam of requests when EL node is offline we
|
# To avoid continous spam of requests when EL node is offline we
|
||||||
# going to sleep until next attempt for
|
# going to sleep until next attempt.
|
||||||
# (FORKCHOICEUPDATED_TIMEOUT / 4) time (2.seconds).
|
await variedSleep(sleepCounter, SleepDurations)
|
||||||
let timeout =
|
|
||||||
chronos.nanoseconds(FORKCHOICEUPDATED_TIMEOUT.nanoseconds div 4)
|
|
||||||
await sleepAsync(timeout)
|
|
||||||
|
|
||||||
break mainLoop
|
break mainLoop
|
||||||
|
|
||||||
|
proc forkchoiceUpdated*(
|
||||||
|
m: ELManager,
|
||||||
|
headBlockHash, safeBlockHash, finalizedBlockHash: Eth2Digest,
|
||||||
|
payloadAttributes: Opt[PayloadAttributesV1] |
|
||||||
|
Opt[PayloadAttributesV2] |
|
||||||
|
Opt[PayloadAttributesV3]
|
||||||
|
): Future[(PayloadExecutionStatus, Opt[BlockHash])] {.
|
||||||
|
async: (raises: [CancelledError], raw: true).} =
|
||||||
|
forkchoiceUpdated(
|
||||||
|
m, headBlockHash, safeBlockHash, finalizedBlockHash,
|
||||||
|
payloadAttributes, DeadlineObject.init(FORKCHOICEUPDATED_TIMEOUT),
|
||||||
|
high(int))
|
||||||
|
|
||||||
# TODO can't be defined within exchangeConfigWithSingleEL
|
# TODO can't be defined within exchangeConfigWithSingleEL
|
||||||
func `==`(x, y: Quantity): bool {.borrow.}
|
func `==`(x, y: Quantity): bool {.borrow.}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ from ../consensus_object_pools/consensus_manager import
|
||||||
updateHeadWithExecution
|
updateHeadWithExecution
|
||||||
from ../consensus_object_pools/blockchain_dag import
|
from ../consensus_object_pools/blockchain_dag import
|
||||||
getBlockRef, getForkedBlock, getProposer, forkAtEpoch, loadExecutionBlockHash,
|
getBlockRef, getForkedBlock, getProposer, forkAtEpoch, loadExecutionBlockHash,
|
||||||
markBlockVerified, validatorKey
|
markBlockVerified, validatorKey, is_optimistic
|
||||||
from ../beacon_clock import GetBeaconTimeFn, toFloatSeconds
|
from ../beacon_clock import GetBeaconTimeFn, toFloatSeconds
|
||||||
from ../consensus_object_pools/block_dag import BlockRef, root, shortLog, slot
|
from ../consensus_object_pools/block_dag import BlockRef, root, shortLog, slot
|
||||||
from ../consensus_object_pools/block_pools_types import
|
from ../consensus_object_pools/block_pools_types import
|
||||||
|
@ -230,19 +230,24 @@ from web3/engine_api_types import
|
||||||
PayloadAttributesV1, PayloadAttributesV2, PayloadAttributesV3,
|
PayloadAttributesV1, PayloadAttributesV2, PayloadAttributesV3,
|
||||||
PayloadExecutionStatus, PayloadStatusV1
|
PayloadExecutionStatus, PayloadStatusV1
|
||||||
from ../el/el_manager import
|
from ../el/el_manager import
|
||||||
ELManager, forkchoiceUpdated, hasConnection, hasProperlyConfiguredConnection,
|
ELManager, DeadlineObject, forkchoiceUpdated, hasConnection,
|
||||||
sendNewPayload
|
hasProperlyConfiguredConnection, sendNewPayload, init
|
||||||
|
|
||||||
proc expectValidForkchoiceUpdated(
|
proc expectValidForkchoiceUpdated(
|
||||||
elManager: ELManager, headBlockPayloadAttributesType: typedesc,
|
elManager: ELManager, headBlockPayloadAttributesType: typedesc,
|
||||||
headBlockHash, safeBlockHash, finalizedBlockHash: Eth2Digest,
|
headBlockHash, safeBlockHash, finalizedBlockHash: Eth2Digest,
|
||||||
receivedBlock: ForkySignedBeaconBlock): Future[void] {.async: (raises: [CancelledError]).} =
|
receivedBlock: ForkySignedBeaconBlock,
|
||||||
|
deadlineObj: DeadlineObject,
|
||||||
|
maxRetriesCount: int
|
||||||
|
): Future[void] {.async: (raises: [CancelledError]).} =
|
||||||
let
|
let
|
||||||
(payloadExecutionStatus, _) = await elManager.forkchoiceUpdated(
|
(payloadExecutionStatus, _) = await elManager.forkchoiceUpdated(
|
||||||
headBlockHash = headBlockHash,
|
headBlockHash = headBlockHash,
|
||||||
safeBlockHash = safeBlockHash,
|
safeBlockHash = safeBlockHash,
|
||||||
finalizedBlockHash = finalizedBlockHash,
|
finalizedBlockHash = finalizedBlockHash,
|
||||||
payloadAttributes = Opt.none headBlockPayloadAttributesType)
|
payloadAttributes = Opt.none headBlockPayloadAttributesType,
|
||||||
|
deadlineObj = deadlineObj,
|
||||||
|
maxRetriesCount = maxRetriesCount)
|
||||||
receivedExecutionBlockHash =
|
receivedExecutionBlockHash =
|
||||||
when typeof(receivedBlock).kind >= ConsensusFork.Bellatrix:
|
when typeof(receivedBlock).kind >= ConsensusFork.Bellatrix:
|
||||||
receivedBlock.message.body.execution_payload.block_hash
|
receivedBlock.message.body.execution_payload.block_hash
|
||||||
|
@ -277,8 +282,11 @@ from ../consensus_object_pools/attestation_pool import
|
||||||
from ../consensus_object_pools/spec_cache import get_attesting_indices
|
from ../consensus_object_pools/spec_cache import get_attesting_indices
|
||||||
|
|
||||||
proc newExecutionPayload*(
|
proc newExecutionPayload*(
|
||||||
elManager: ELManager, blck: SomeForkyBeaconBlock):
|
elManager: ELManager,
|
||||||
Future[Opt[PayloadExecutionStatus]] {.async: (raises: [CancelledError]).} =
|
blck: SomeForkyBeaconBlock,
|
||||||
|
deadlineObj: DeadlineObject,
|
||||||
|
maxRetriesCount: int
|
||||||
|
): Future[Opt[PayloadExecutionStatus]] {.async: (raises: [CancelledError]).} =
|
||||||
|
|
||||||
template executionPayload: untyped = blck.body.execution_payload
|
template executionPayload: untyped = blck.body.execution_payload
|
||||||
|
|
||||||
|
@ -295,7 +303,8 @@ proc newExecutionPayload*(
|
||||||
executionPayload = shortLog(executionPayload)
|
executionPayload = shortLog(executionPayload)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
let payloadStatus = await elManager.sendNewPayload(blck)
|
let payloadStatus =
|
||||||
|
await elManager.sendNewPayload(blck, deadlineObj, maxRetriesCount)
|
||||||
|
|
||||||
debug "newPayload: succeeded",
|
debug "newPayload: succeeded",
|
||||||
parentHash = executionPayload.parent_hash,
|
parentHash = executionPayload.parent_hash,
|
||||||
|
@ -312,22 +321,34 @@ proc newExecutionPayload*(
|
||||||
blockNumber = executionPayload.block_number
|
blockNumber = executionPayload.block_number
|
||||||
return Opt.none PayloadExecutionStatus
|
return Opt.none PayloadExecutionStatus
|
||||||
|
|
||||||
|
proc newExecutionPayload*(
|
||||||
|
elManager: ELManager,
|
||||||
|
blck: SomeForkyBeaconBlock
|
||||||
|
): Future[Opt[PayloadExecutionStatus]] {.
|
||||||
|
async: (raises: [CancelledError], raw: true).} =
|
||||||
|
newExecutionPayload(
|
||||||
|
elManager, blck, DeadlineObject.init(FORKCHOICEUPDATED_TIMEOUT),
|
||||||
|
high(int))
|
||||||
|
|
||||||
proc getExecutionValidity(
|
proc getExecutionValidity(
|
||||||
elManager: ELManager,
|
elManager: ELManager,
|
||||||
blck: bellatrix.SignedBeaconBlock | capella.SignedBeaconBlock |
|
blck: bellatrix.SignedBeaconBlock | capella.SignedBeaconBlock |
|
||||||
deneb.SignedBeaconBlock | electra.SignedBeaconBlock):
|
deneb.SignedBeaconBlock | electra.SignedBeaconBlock,
|
||||||
Future[NewPayloadStatus] {.async: (raises: [CancelledError]).} =
|
deadlineObj: DeadlineObject,
|
||||||
|
maxRetriesCount: int
|
||||||
|
): Future[NewPayloadStatus] {.async: (raises: [CancelledError]).} =
|
||||||
if not blck.message.is_execution_block:
|
if not blck.message.is_execution_block:
|
||||||
return NewPayloadStatus.valid # vacuously
|
return NewPayloadStatus.valid # vacuously
|
||||||
|
|
||||||
try:
|
try:
|
||||||
let executionPayloadStatus = await elManager.newExecutionPayload(
|
let executionPayloadStatus = await elManager.newExecutionPayload(
|
||||||
blck.message)
|
blck.message, deadlineObj, maxRetriesCount)
|
||||||
if executionPayloadStatus.isNone:
|
if executionPayloadStatus.isNone:
|
||||||
return NewPayloadStatus.noResponse
|
return NewPayloadStatus.noResponse
|
||||||
|
|
||||||
case executionPayloadStatus.get
|
case executionPayloadStatus.get
|
||||||
of PayloadExecutionStatus.invalid, PayloadExecutionStatus.invalid_block_hash:
|
of PayloadExecutionStatus.invalid,
|
||||||
|
PayloadExecutionStatus.invalid_block_hash:
|
||||||
# Blocks come either from gossip or request manager requests. In the
|
# Blocks come either from gossip or request manager requests. In the
|
||||||
# former case, they've passed libp2p gosisp validation which implies
|
# former case, they've passed libp2p gosisp validation which implies
|
||||||
# correct signature for correct proposer,which makes spam expensive,
|
# correct signature for correct proposer,which makes spam expensive,
|
||||||
|
@ -413,6 +434,20 @@ proc storeBlock(
|
||||||
vm = self.validatorMonitor
|
vm = self.validatorMonitor
|
||||||
dag = self.consensusManager.dag
|
dag = self.consensusManager.dag
|
||||||
wallSlot = wallTime.slotOrZero
|
wallSlot = wallTime.slotOrZero
|
||||||
|
deadlineTime =
|
||||||
|
block:
|
||||||
|
let slotTime = (wallSlot + 1).start_beacon_time() - 1.seconds
|
||||||
|
if slotTime <= wallTime:
|
||||||
|
0.seconds
|
||||||
|
else:
|
||||||
|
chronos.nanoseconds((slotTime - wallTime).nanoseconds)
|
||||||
|
deadlineObj = DeadlineObject.init(deadlineTime)
|
||||||
|
|
||||||
|
func getRetriesCount(): int =
|
||||||
|
if dag.is_optimistic(dag.head.bid):
|
||||||
|
1
|
||||||
|
else:
|
||||||
|
high(int)
|
||||||
|
|
||||||
# If the block is missing its parent, it will be re-orphaned below
|
# If the block is missing its parent, it will be re-orphaned below
|
||||||
self.consensusManager.quarantine[].removeOrphan(signedBlock)
|
self.consensusManager.quarantine[].removeOrphan(signedBlock)
|
||||||
|
@ -518,7 +553,8 @@ proc storeBlock(
|
||||||
NewPayloadStatus.noResponse
|
NewPayloadStatus.noResponse
|
||||||
else:
|
else:
|
||||||
when typeof(signedBlock).kind >= ConsensusFork.Bellatrix:
|
when typeof(signedBlock).kind >= ConsensusFork.Bellatrix:
|
||||||
await self.consensusManager.elManager.getExecutionValidity(signedBlock)
|
await self.consensusManager.elManager.getExecutionValidity(
|
||||||
|
signedBlock, deadlineObj, getRetriesCount())
|
||||||
else:
|
else:
|
||||||
NewPayloadStatus.valid # vacuously
|
NewPayloadStatus.valid # vacuously
|
||||||
payloadValid = payloadStatus == NewPayloadStatus.valid
|
payloadValid = payloadStatus == NewPayloadStatus.valid
|
||||||
|
@ -685,7 +721,9 @@ proc storeBlock(
|
||||||
self.consensusManager[].optimisticExecutionBlockHash,
|
self.consensusManager[].optimisticExecutionBlockHash,
|
||||||
safeBlockHash = newHead.get.safeExecutionBlockHash,
|
safeBlockHash = newHead.get.safeExecutionBlockHash,
|
||||||
finalizedBlockHash = newHead.get.finalizedExecutionBlockHash,
|
finalizedBlockHash = newHead.get.finalizedExecutionBlockHash,
|
||||||
payloadAttributes = Opt.none attributes)
|
payloadAttributes = Opt.none attributes,
|
||||||
|
deadlineObj = deadlineObj,
|
||||||
|
maxRetriesCount = getRetriesCount())
|
||||||
|
|
||||||
let consensusFork = self.consensusManager.dag.cfg.consensusForkAtEpoch(
|
let consensusFork = self.consensusManager.dag.cfg.consensusForkAtEpoch(
|
||||||
newHead.get.blck.bid.slot.epoch)
|
newHead.get.blck.bid.slot.epoch)
|
||||||
|
@ -712,7 +750,9 @@ proc storeBlock(
|
||||||
headBlockHash = headExecutionBlockHash,
|
headBlockHash = headExecutionBlockHash,
|
||||||
safeBlockHash = newHead.get.safeExecutionBlockHash,
|
safeBlockHash = newHead.get.safeExecutionBlockHash,
|
||||||
finalizedBlockHash = newHead.get.finalizedExecutionBlockHash,
|
finalizedBlockHash = newHead.get.finalizedExecutionBlockHash,
|
||||||
receivedBlock = signedBlock)
|
receivedBlock = signedBlock,
|
||||||
|
deadlineObj = deadlineObj,
|
||||||
|
maxRetriesCount = getRetriesCount())
|
||||||
|
|
||||||
template callForkChoiceUpdated: auto =
|
template callForkChoiceUpdated: auto =
|
||||||
case self.consensusManager.dag.cfg.consensusForkAtEpoch(
|
case self.consensusManager.dag.cfg.consensusForkAtEpoch(
|
||||||
|
|
Loading…
Reference in New Issue