log elmanager timeouts (#5895)

Also:

* remove some unused metrics
* simplify execution payload fetching flow
This commit is contained in:
Jacek Sieka 2024-02-17 10:15:02 +01:00 committed by GitHub
parent e410fe0052
commit b5089ebf70
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 30 additions and 28 deletions

View File

@ -158,7 +158,7 @@ type
## reconnecting after a lost connetion. You can wait on
## the future below for the moment the connection is active.
connectingFut: Future[Result[Web3, string]]
connectingFut: Future[Result[Web3, string]].Raising([CancelledError])
## This future will be replaced when the connection is lost.
etcStatus: EtcStatus
@ -580,8 +580,9 @@ proc newWeb3*(engineUrl: EngineApiUrl): Future[Web3] =
getJsonRpcRequestHeaders(engineUrl.jwtSecret),
httpFlags = {HttpClientFlag.NewConnectionAlways})
proc establishEngineApiConnection*(url: EngineApiUrl):
Future[Result[Web3, string]] {.async.} =
proc establishEngineApiConnection(url: EngineApiUrl):
Future[Result[Web3, string]] {.
async: (raises: [CancelledError]).} =
try:
ok(await newWeb3(url).wait(engineApiConnectionTimeout))
except AsyncTimeoutError:
@ -589,9 +590,10 @@ proc establishEngineApiConnection*(url: EngineApiUrl):
except CancelledError as exc:
raise exc
except CatchableError as exc:
err "Engine API connection failed: " & exc.msg
err exc.msg
proc tryConnecting(connection: ELConnection): Future[bool] {.async.} =
proc tryConnecting(connection: ELConnection): Future[bool] {.
async: (raises: [CancelledError]).} =
if connection.isConnected:
return true
@ -601,12 +603,14 @@ proc tryConnecting(connection: ELConnection): Future[bool] {.async.} =
let web3Res = await connection.connectingFut
if web3Res.isErr:
warn "Engine API connection failed", err = web3Res.error
return false
else:
connection.web3 = some web3Res.get
return true
proc connectedRpcClient(connection: ELConnection): Future[RpcClient] {.async.} =
proc connectedRpcClient(connection: ELConnection): Future[RpcClient] {.
async: (raises: [CancelledError]).} =
while not connection.isConnected:
if not await connection.tryConnecting():
await sleepAsync(chronos.seconds(10))
@ -796,9 +800,11 @@ proc getPayload*(m: ELManager,
var bestPayloadIdx = none int
for idx, req in requests:
if not req.finished:
warn "Timeout while getting execution payload",
url = m.elConnections[idx].engineUrl.url
req.cancelSoon()
elif req.failed:
error "Failed to get execution payload from EL",
warn "Failed to get execution payload from EL",
url = m.elConnections[idx].engineUrl.url,
err = req.error.msg
else:
@ -826,10 +832,12 @@ proc getPayload*(m: ELManager,
withdrawals_from_el =
mapIt(
req.value().executionPayload.withdrawals.maybeDeref,
it.asConsensusWithdrawal)
it.asConsensusWithdrawal),
url = m.elConnections[idx].engineUrl.url
if req.value().executionPayload.extraData.len > MAX_EXTRA_DATA_BYTES:
warn "Execution client provided a block with invalid extraData (size exceeds limit)",
url = m.elConnections[idx].engineUrl.url,
size = req.value().executionPayload.extraData.len,
limit = MAX_EXTRA_DATA_BYTES
continue
@ -840,6 +848,8 @@ proc getPayload*(m: ELManager,
if cmpGetPayloadResponses(req.value(), requests[bestPayloadIdx.get].value()) > 0:
bestPayloadIdx = some idx
deadline.cancelSoon()
if bestPayloadIdx.isSome:
return ok requests[bestPayloadIdx.get].value().asConsensusType
else:

View File

@ -59,12 +59,6 @@ declareCounter beacon_blocks_proposed,
declareCounter beacon_block_production_errors,
"Number of times we failed to produce a block"
declareCounter beacon_block_payload_errors,
"Number of times execution client failed to produce block payload"
declareCounter beacon_blobs_sidecar_payload_errors,
"Number of times execution client failed to produce blobs sidecar"
# Metrics for tracking external block builder usage
declareCounter beacon_block_builder_missed_with_fallback,
"Number of beacon chain blocks where an attempt to use an external block builder failed with fallback"
@ -369,12 +363,14 @@ from ../spec/beaconstate import get_expected_withdrawals
proc getExecutionPayload(
PayloadType: type ForkyExecutionPayloadForSigning,
node: BeaconNode, proposalState: ref ForkedHashedBeaconState,
epoch: Epoch, validator_index: ValidatorIndex): Future[Opt[PayloadType]]
{.async: (raises: [CancelledError]).} =
node: BeaconNode, head: BlockRef, proposalState: ref ForkedHashedBeaconState,
validator_index: ValidatorIndex): Future[Opt[PayloadType]]
{.async: (raises: [CancelledError], raw: true).} =
# https://github.com/ethereum/consensus-specs/blob/v1.3.0/specs/bellatrix/validator.md#executionpayload
let
epoch = withState(proposalState[]):
forkyState.data.slot.epoch
feeRecipient = block:
let pubkey = node.dag.validatorKey(validator_index)
if pubkey.isNone():
@ -383,7 +379,7 @@ proc getExecutionPayload(
else:
node.getFeeRecipient(pubkey.get().toPubKey(), validator_index, epoch)
beaconHead = node.attestationPool[].getBeaconHead(node.dag.head)
beaconHead = node.attestationPool[].getBeaconHead(head)
executionHead = withState(proposalState[]):
when consensusFork >= ConsensusFork.Bellatrix:
forkyState.data.latest_execution_payload_header.block_hash
@ -407,13 +403,9 @@ proc getExecutionPayload(
validatorIndex = validator_index,
feeRecipient = $feeRecipient
let payload = await node.elManager.getPayload(
node.elManager.getPayload(
PayloadType, beaconHead.blck.bid.root, executionHead, latestSafe,
latestFinalized, timestamp, random, feeRecipient, withdrawals)
if payload.isNone():
warn "Failed to obtain execution payload from EL",
executionHeadBlock = executionHead
payload
proc makeBeaconBlockForHeadAndSlot*(
PayloadType: type ForkyExecutionPayloadForSigning,
@ -479,7 +471,7 @@ proc makeBeaconBlockForHeadAndSlot*(
fut
else:
# Create execution payload while packing attestations
getExecutionPayload(PayloadType, node, state, slot.epoch, validator_index)
getExecutionPayload(PayloadType, node, head, state, validator_index)
eth1Proposal = node.getBlockProposalEth1Data(state[])
@ -1029,7 +1021,7 @@ proc collectBids(
if payloadBuilderBidFut.value().isOk:
Opt.some(payloadBuilderBidFut.value().value())
elif usePayloadBuilder:
info "Payload builder error",
notice "Payload builder error",
slot, head = shortLog(head), validator = shortLog(validator_pubkey),
err = payloadBuilderBidFut.value().error()
Opt.none(BuilderBid[SBBB])
@ -1037,7 +1029,7 @@ proc collectBids(
# Effectively the same case, but without the log message
Opt.none(BuilderBid[SBBB])
else:
info "Payload builder bid future failed",
notice "Payload builder bid request failed",
slot, head = shortLog(head), validator = shortLog(validator_pubkey),
err = payloadBuilderBidFut.error.msg
Opt.none(BuilderBid[SBBB])
@ -1047,12 +1039,12 @@ proc collectBids(
if engineBlockFut.value.isOk:
Opt.some(engineBlockFut.value().value())
else:
info "Engine block building error",
notice "Engine block building error",
slot, head = shortLog(head), validator = shortLog(validator_pubkey),
err = engineBlockFut.value.error()
Opt.none(EngineBid)
else:
info "Engine block building failed",
notice "Engine block building failed",
slot, head = shortLog(head), validator = shortLog(validator_pubkey),
err = engineBlockFut.error.msg
Opt.none(EngineBid)