From b5089ebf704b9545f91500ab4e4a2bb5ac2e7a0e Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Sat, 17 Feb 2024 10:15:02 +0100 Subject: [PATCH] log elmanager timeouts (#5895) Also: * remove some unused metrics * simplify execution payload fetching flow --- beacon_chain/el/el_manager.nim | 26 ++++++++++----- beacon_chain/validators/beacon_validators.nim | 32 +++++++------------ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/beacon_chain/el/el_manager.nim b/beacon_chain/el/el_manager.nim index e9e4aedfc..694b9fe91 100644 --- a/beacon_chain/el/el_manager.nim +++ b/beacon_chain/el/el_manager.nim @@ -158,7 +158,7 @@ type ## reconnecting after a lost connetion. You can wait on ## the future below for the moment the connection is active. - connectingFut: Future[Result[Web3, string]] + connectingFut: Future[Result[Web3, string]].Raising([CancelledError]) ## This future will be replaced when the connection is lost. etcStatus: EtcStatus @@ -580,8 +580,9 @@ proc newWeb3*(engineUrl: EngineApiUrl): Future[Web3] = getJsonRpcRequestHeaders(engineUrl.jwtSecret), httpFlags = {HttpClientFlag.NewConnectionAlways}) -proc establishEngineApiConnection*(url: EngineApiUrl): - Future[Result[Web3, string]] {.async.} = +proc establishEngineApiConnection(url: EngineApiUrl): + Future[Result[Web3, string]] {. + async: (raises: [CancelledError]).} = try: ok(await newWeb3(url).wait(engineApiConnectionTimeout)) except AsyncTimeoutError: @@ -589,9 +590,10 @@ proc establishEngineApiConnection*(url: EngineApiUrl): except CancelledError as exc: raise exc except CatchableError as exc: - err "Engine API connection failed: " & exc.msg + err exc.msg -proc tryConnecting(connection: ELConnection): Future[bool] {.async.} = +proc tryConnecting(connection: ELConnection): Future[bool] {. + async: (raises: [CancelledError]).} = if connection.isConnected: return true @@ -601,12 +603,14 @@ proc tryConnecting(connection: ELConnection): Future[bool] {.async.} = let web3Res = await connection.connectingFut if web3Res.isErr: + warn "Engine API connection failed", err = web3Res.error return false else: connection.web3 = some web3Res.get return true -proc connectedRpcClient(connection: ELConnection): Future[RpcClient] {.async.} = +proc connectedRpcClient(connection: ELConnection): Future[RpcClient] {. + async: (raises: [CancelledError]).} = while not connection.isConnected: if not await connection.tryConnecting(): await sleepAsync(chronos.seconds(10)) @@ -796,9 +800,11 @@ proc getPayload*(m: ELManager, var bestPayloadIdx = none int for idx, req in requests: if not req.finished: + warn "Timeout while getting execution payload", + url = m.elConnections[idx].engineUrl.url req.cancelSoon() elif req.failed: - error "Failed to get execution payload from EL", + warn "Failed to get execution payload from EL", url = m.elConnections[idx].engineUrl.url, err = req.error.msg else: @@ -826,10 +832,12 @@ proc getPayload*(m: ELManager, withdrawals_from_el = mapIt( req.value().executionPayload.withdrawals.maybeDeref, - it.asConsensusWithdrawal) + it.asConsensusWithdrawal), + url = m.elConnections[idx].engineUrl.url if req.value().executionPayload.extraData.len > MAX_EXTRA_DATA_BYTES: warn "Execution client provided a block with invalid extraData (size exceeds limit)", + url = m.elConnections[idx].engineUrl.url, size = req.value().executionPayload.extraData.len, limit = MAX_EXTRA_DATA_BYTES continue @@ -840,6 +848,8 @@ proc getPayload*(m: ELManager, if cmpGetPayloadResponses(req.value(), requests[bestPayloadIdx.get].value()) > 0: bestPayloadIdx = some idx + deadline.cancelSoon() + if bestPayloadIdx.isSome: return ok requests[bestPayloadIdx.get].value().asConsensusType else: diff --git a/beacon_chain/validators/beacon_validators.nim b/beacon_chain/validators/beacon_validators.nim index 05fa49210..0a1d18436 100644 --- a/beacon_chain/validators/beacon_validators.nim +++ b/beacon_chain/validators/beacon_validators.nim @@ -59,12 +59,6 @@ declareCounter beacon_blocks_proposed, declareCounter beacon_block_production_errors, "Number of times we failed to produce a block" -declareCounter beacon_block_payload_errors, - "Number of times execution client failed to produce block payload" - -declareCounter beacon_blobs_sidecar_payload_errors, - "Number of times execution client failed to produce blobs sidecar" - # Metrics for tracking external block builder usage declareCounter beacon_block_builder_missed_with_fallback, "Number of beacon chain blocks where an attempt to use an external block builder failed with fallback" @@ -369,12 +363,14 @@ from ../spec/beaconstate import get_expected_withdrawals proc getExecutionPayload( PayloadType: type ForkyExecutionPayloadForSigning, - node: BeaconNode, proposalState: ref ForkedHashedBeaconState, - epoch: Epoch, validator_index: ValidatorIndex): Future[Opt[PayloadType]] - {.async: (raises: [CancelledError]).} = + node: BeaconNode, head: BlockRef, proposalState: ref ForkedHashedBeaconState, + validator_index: ValidatorIndex): Future[Opt[PayloadType]] + {.async: (raises: [CancelledError], raw: true).} = # https://github.com/ethereum/consensus-specs/blob/v1.3.0/specs/bellatrix/validator.md#executionpayload let + epoch = withState(proposalState[]): + forkyState.data.slot.epoch feeRecipient = block: let pubkey = node.dag.validatorKey(validator_index) if pubkey.isNone(): @@ -383,7 +379,7 @@ proc getExecutionPayload( else: node.getFeeRecipient(pubkey.get().toPubKey(), validator_index, epoch) - beaconHead = node.attestationPool[].getBeaconHead(node.dag.head) + beaconHead = node.attestationPool[].getBeaconHead(head) executionHead = withState(proposalState[]): when consensusFork >= ConsensusFork.Bellatrix: forkyState.data.latest_execution_payload_header.block_hash @@ -407,13 +403,9 @@ proc getExecutionPayload( validatorIndex = validator_index, feeRecipient = $feeRecipient - let payload = await node.elManager.getPayload( + node.elManager.getPayload( PayloadType, beaconHead.blck.bid.root, executionHead, latestSafe, latestFinalized, timestamp, random, feeRecipient, withdrawals) - if payload.isNone(): - warn "Failed to obtain execution payload from EL", - executionHeadBlock = executionHead - payload proc makeBeaconBlockForHeadAndSlot*( PayloadType: type ForkyExecutionPayloadForSigning, @@ -479,7 +471,7 @@ proc makeBeaconBlockForHeadAndSlot*( fut else: # Create execution payload while packing attestations - getExecutionPayload(PayloadType, node, state, slot.epoch, validator_index) + getExecutionPayload(PayloadType, node, head, state, validator_index) eth1Proposal = node.getBlockProposalEth1Data(state[]) @@ -1029,7 +1021,7 @@ proc collectBids( if payloadBuilderBidFut.value().isOk: Opt.some(payloadBuilderBidFut.value().value()) elif usePayloadBuilder: - info "Payload builder error", + notice "Payload builder error", slot, head = shortLog(head), validator = shortLog(validator_pubkey), err = payloadBuilderBidFut.value().error() Opt.none(BuilderBid[SBBB]) @@ -1037,7 +1029,7 @@ proc collectBids( # Effectively the same case, but without the log message Opt.none(BuilderBid[SBBB]) else: - info "Payload builder bid future failed", + notice "Payload builder bid request failed", slot, head = shortLog(head), validator = shortLog(validator_pubkey), err = payloadBuilderBidFut.error.msg Opt.none(BuilderBid[SBBB]) @@ -1047,12 +1039,12 @@ proc collectBids( if engineBlockFut.value.isOk: Opt.some(engineBlockFut.value().value()) else: - info "Engine block building error", + notice "Engine block building error", slot, head = shortLog(head), validator = shortLog(validator_pubkey), err = engineBlockFut.value.error() Opt.none(EngineBid) else: - info "Engine block building failed", + notice "Engine block building failed", slot, head = shortLog(head), validator = shortLog(validator_pubkey), err = engineBlockFut.error.msg Opt.none(EngineBid)