log elmanager timeouts (#5895)

Also:

* remove some unused metrics
* simplify execution payload fetching flow
This commit is contained in:
Jacek Sieka 2024-02-17 10:15:02 +01:00 committed by GitHub
parent e410fe0052
commit b5089ebf70
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 30 additions and 28 deletions

View File

@ -158,7 +158,7 @@ type
## reconnecting after a lost connetion. You can wait on ## reconnecting after a lost connetion. You can wait on
## the future below for the moment the connection is active. ## the future below for the moment the connection is active.
connectingFut: Future[Result[Web3, string]] connectingFut: Future[Result[Web3, string]].Raising([CancelledError])
## This future will be replaced when the connection is lost. ## This future will be replaced when the connection is lost.
etcStatus: EtcStatus etcStatus: EtcStatus
@ -580,8 +580,9 @@ proc newWeb3*(engineUrl: EngineApiUrl): Future[Web3] =
getJsonRpcRequestHeaders(engineUrl.jwtSecret), getJsonRpcRequestHeaders(engineUrl.jwtSecret),
httpFlags = {HttpClientFlag.NewConnectionAlways}) httpFlags = {HttpClientFlag.NewConnectionAlways})
proc establishEngineApiConnection*(url: EngineApiUrl): proc establishEngineApiConnection(url: EngineApiUrl):
Future[Result[Web3, string]] {.async.} = Future[Result[Web3, string]] {.
async: (raises: [CancelledError]).} =
try: try:
ok(await newWeb3(url).wait(engineApiConnectionTimeout)) ok(await newWeb3(url).wait(engineApiConnectionTimeout))
except AsyncTimeoutError: except AsyncTimeoutError:
@ -589,9 +590,10 @@ proc establishEngineApiConnection*(url: EngineApiUrl):
except CancelledError as exc: except CancelledError as exc:
raise exc raise exc
except CatchableError as exc: except CatchableError as exc:
err "Engine API connection failed: " & exc.msg err exc.msg
proc tryConnecting(connection: ELConnection): Future[bool] {.async.} = proc tryConnecting(connection: ELConnection): Future[bool] {.
async: (raises: [CancelledError]).} =
if connection.isConnected: if connection.isConnected:
return true return true
@ -601,12 +603,14 @@ proc tryConnecting(connection: ELConnection): Future[bool] {.async.} =
let web3Res = await connection.connectingFut let web3Res = await connection.connectingFut
if web3Res.isErr: if web3Res.isErr:
warn "Engine API connection failed", err = web3Res.error
return false return false
else: else:
connection.web3 = some web3Res.get connection.web3 = some web3Res.get
return true return true
proc connectedRpcClient(connection: ELConnection): Future[RpcClient] {.async.} = proc connectedRpcClient(connection: ELConnection): Future[RpcClient] {.
async: (raises: [CancelledError]).} =
while not connection.isConnected: while not connection.isConnected:
if not await connection.tryConnecting(): if not await connection.tryConnecting():
await sleepAsync(chronos.seconds(10)) await sleepAsync(chronos.seconds(10))
@ -796,9 +800,11 @@ proc getPayload*(m: ELManager,
var bestPayloadIdx = none int var bestPayloadIdx = none int
for idx, req in requests: for idx, req in requests:
if not req.finished: if not req.finished:
warn "Timeout while getting execution payload",
url = m.elConnections[idx].engineUrl.url
req.cancelSoon() req.cancelSoon()
elif req.failed: elif req.failed:
error "Failed to get execution payload from EL", warn "Failed to get execution payload from EL",
url = m.elConnections[idx].engineUrl.url, url = m.elConnections[idx].engineUrl.url,
err = req.error.msg err = req.error.msg
else: else:
@ -826,10 +832,12 @@ proc getPayload*(m: ELManager,
withdrawals_from_el = withdrawals_from_el =
mapIt( mapIt(
req.value().executionPayload.withdrawals.maybeDeref, req.value().executionPayload.withdrawals.maybeDeref,
it.asConsensusWithdrawal) it.asConsensusWithdrawal),
url = m.elConnections[idx].engineUrl.url
if req.value().executionPayload.extraData.len > MAX_EXTRA_DATA_BYTES: if req.value().executionPayload.extraData.len > MAX_EXTRA_DATA_BYTES:
warn "Execution client provided a block with invalid extraData (size exceeds limit)", warn "Execution client provided a block with invalid extraData (size exceeds limit)",
url = m.elConnections[idx].engineUrl.url,
size = req.value().executionPayload.extraData.len, size = req.value().executionPayload.extraData.len,
limit = MAX_EXTRA_DATA_BYTES limit = MAX_EXTRA_DATA_BYTES
continue continue
@ -840,6 +848,8 @@ proc getPayload*(m: ELManager,
if cmpGetPayloadResponses(req.value(), requests[bestPayloadIdx.get].value()) > 0: if cmpGetPayloadResponses(req.value(), requests[bestPayloadIdx.get].value()) > 0:
bestPayloadIdx = some idx bestPayloadIdx = some idx
deadline.cancelSoon()
if bestPayloadIdx.isSome: if bestPayloadIdx.isSome:
return ok requests[bestPayloadIdx.get].value().asConsensusType return ok requests[bestPayloadIdx.get].value().asConsensusType
else: else:

View File

@ -59,12 +59,6 @@ declareCounter beacon_blocks_proposed,
declareCounter beacon_block_production_errors, declareCounter beacon_block_production_errors,
"Number of times we failed to produce a block" "Number of times we failed to produce a block"
declareCounter beacon_block_payload_errors,
"Number of times execution client failed to produce block payload"
declareCounter beacon_blobs_sidecar_payload_errors,
"Number of times execution client failed to produce blobs sidecar"
# Metrics for tracking external block builder usage # Metrics for tracking external block builder usage
declareCounter beacon_block_builder_missed_with_fallback, declareCounter beacon_block_builder_missed_with_fallback,
"Number of beacon chain blocks where an attempt to use an external block builder failed with fallback" "Number of beacon chain blocks where an attempt to use an external block builder failed with fallback"
@ -369,12 +363,14 @@ from ../spec/beaconstate import get_expected_withdrawals
proc getExecutionPayload( proc getExecutionPayload(
PayloadType: type ForkyExecutionPayloadForSigning, PayloadType: type ForkyExecutionPayloadForSigning,
node: BeaconNode, proposalState: ref ForkedHashedBeaconState, node: BeaconNode, head: BlockRef, proposalState: ref ForkedHashedBeaconState,
epoch: Epoch, validator_index: ValidatorIndex): Future[Opt[PayloadType]] validator_index: ValidatorIndex): Future[Opt[PayloadType]]
{.async: (raises: [CancelledError]).} = {.async: (raises: [CancelledError], raw: true).} =
# https://github.com/ethereum/consensus-specs/blob/v1.3.0/specs/bellatrix/validator.md#executionpayload # https://github.com/ethereum/consensus-specs/blob/v1.3.0/specs/bellatrix/validator.md#executionpayload
let let
epoch = withState(proposalState[]):
forkyState.data.slot.epoch
feeRecipient = block: feeRecipient = block:
let pubkey = node.dag.validatorKey(validator_index) let pubkey = node.dag.validatorKey(validator_index)
if pubkey.isNone(): if pubkey.isNone():
@ -383,7 +379,7 @@ proc getExecutionPayload(
else: else:
node.getFeeRecipient(pubkey.get().toPubKey(), validator_index, epoch) node.getFeeRecipient(pubkey.get().toPubKey(), validator_index, epoch)
beaconHead = node.attestationPool[].getBeaconHead(node.dag.head) beaconHead = node.attestationPool[].getBeaconHead(head)
executionHead = withState(proposalState[]): executionHead = withState(proposalState[]):
when consensusFork >= ConsensusFork.Bellatrix: when consensusFork >= ConsensusFork.Bellatrix:
forkyState.data.latest_execution_payload_header.block_hash forkyState.data.latest_execution_payload_header.block_hash
@ -407,13 +403,9 @@ proc getExecutionPayload(
validatorIndex = validator_index, validatorIndex = validator_index,
feeRecipient = $feeRecipient feeRecipient = $feeRecipient
let payload = await node.elManager.getPayload( node.elManager.getPayload(
PayloadType, beaconHead.blck.bid.root, executionHead, latestSafe, PayloadType, beaconHead.blck.bid.root, executionHead, latestSafe,
latestFinalized, timestamp, random, feeRecipient, withdrawals) latestFinalized, timestamp, random, feeRecipient, withdrawals)
if payload.isNone():
warn "Failed to obtain execution payload from EL",
executionHeadBlock = executionHead
payload
proc makeBeaconBlockForHeadAndSlot*( proc makeBeaconBlockForHeadAndSlot*(
PayloadType: type ForkyExecutionPayloadForSigning, PayloadType: type ForkyExecutionPayloadForSigning,
@ -479,7 +471,7 @@ proc makeBeaconBlockForHeadAndSlot*(
fut fut
else: else:
# Create execution payload while packing attestations # Create execution payload while packing attestations
getExecutionPayload(PayloadType, node, state, slot.epoch, validator_index) getExecutionPayload(PayloadType, node, head, state, validator_index)
eth1Proposal = node.getBlockProposalEth1Data(state[]) eth1Proposal = node.getBlockProposalEth1Data(state[])
@ -1029,7 +1021,7 @@ proc collectBids(
if payloadBuilderBidFut.value().isOk: if payloadBuilderBidFut.value().isOk:
Opt.some(payloadBuilderBidFut.value().value()) Opt.some(payloadBuilderBidFut.value().value())
elif usePayloadBuilder: elif usePayloadBuilder:
info "Payload builder error", notice "Payload builder error",
slot, head = shortLog(head), validator = shortLog(validator_pubkey), slot, head = shortLog(head), validator = shortLog(validator_pubkey),
err = payloadBuilderBidFut.value().error() err = payloadBuilderBidFut.value().error()
Opt.none(BuilderBid[SBBB]) Opt.none(BuilderBid[SBBB])
@ -1037,7 +1029,7 @@ proc collectBids(
# Effectively the same case, but without the log message # Effectively the same case, but without the log message
Opt.none(BuilderBid[SBBB]) Opt.none(BuilderBid[SBBB])
else: else:
info "Payload builder bid future failed", notice "Payload builder bid request failed",
slot, head = shortLog(head), validator = shortLog(validator_pubkey), slot, head = shortLog(head), validator = shortLog(validator_pubkey),
err = payloadBuilderBidFut.error.msg err = payloadBuilderBidFut.error.msg
Opt.none(BuilderBid[SBBB]) Opt.none(BuilderBid[SBBB])
@ -1047,12 +1039,12 @@ proc collectBids(
if engineBlockFut.value.isOk: if engineBlockFut.value.isOk:
Opt.some(engineBlockFut.value().value()) Opt.some(engineBlockFut.value().value())
else: else:
info "Engine block building error", notice "Engine block building error",
slot, head = shortLog(head), validator = shortLog(validator_pubkey), slot, head = shortLog(head), validator = shortLog(validator_pubkey),
err = engineBlockFut.value.error() err = engineBlockFut.value.error()
Opt.none(EngineBid) Opt.none(EngineBid)
else: else:
info "Engine block building failed", notice "Engine block building failed",
slot, head = shortLog(head), validator = shortLog(validator_pubkey), slot, head = shortLog(head), validator = shortLog(validator_pubkey),
err = engineBlockFut.error.msg err = engineBlockFut.error.msg
Opt.none(EngineBid) Opt.none(EngineBid)