From 61cb7fafdfa2c357394512134c331725df50ce5a Mon Sep 17 00:00:00 2001 From: Etan Kissling Date: Thu, 25 Jan 2024 11:52:25 +0100 Subject: [PATCH] clear `BrokenClock` status if Nimbus extensions no longer supported (#5827) When BN clock is out of sync, VC sets BN status to `BrokenClock`. It is only reset to `Offline` after restoring time sync. However, if VC fails encounters an error while checking time, Nimbus extensions are assumed to be unavailable and the BN is no longer checked for having a synced clock. This means it is never reset back to `Offline` if errors start occurring _after_ BN is already set to `BrokenClock`. This could be because BN is changed from Nimbus to an alternative implementation, or due to intermittent connection issues. Ensure that BN status is reset back to `Offline` when Nimbus extensions are disabled to ensure eventual connection recovery. --- .../validator_client/fallback_service.nim | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/beacon_chain/validator_client/fallback_service.nim b/beacon_chain/validator_client/fallback_service.nim index 5c6bce4e7..5d39fe9b6 100644 --- a/beacon_chain/validator_client/fallback_service.nim +++ b/beacon_chain/validator_client/fallback_service.nim @@ -374,6 +374,14 @@ proc checkOffsetStatus(node: BeaconNodeServerRef, offset: TimeOffset) = "Beacon node has acceptable time offset") node.updateStatus(RestBeaconNodeStatus.Offline, failure) +proc disableNimbusExtensions(node: BeaconNodeServerRef) = + node.features.incl(RestBeaconNodeFeature.NoNimbusExtensions) + if node.status == RestBeaconNodeStatus.BrokenClock: + let failure = ApiNodeFailure.init(ApiFailure.NoError, + "disableNimbusExtensions()", node, 200, + "Nimbus extensions no longer available") + node.updateStatus(RestBeaconNodeStatus.Offline, failure) + proc runTimeMonitor(service: FallbackServiceRef, node: BeaconNodeServerRef) {.async.} = const NimbusExtensionsLog = "Beacon node does not support Nimbus extensions" @@ -398,10 +406,8 @@ proc runTimeMonitor(service: FallbackServiceRef, let tres = try: - let - delay = vc.processingDelay.valueOr: ZeroDuration - res = await node.client.getTimeOffset(delay) - Opt.some(res) + let delay = vc.processingDelay.valueOr: ZeroDuration + await node.client.getTimeOffset(delay) except RestResponseError as exc: case exc.status of 400: @@ -412,12 +418,12 @@ proc runTimeMonitor(service: FallbackServiceRef, notice NimbusExtensionsLog, status = $exc.status, reason = $exc.msg, error_message = $exc.message # Exiting loop - node.features.incl(RestBeaconNodeFeature.NoNimbusExtensions) + node.disableNimbusExtensions() return except RestError as exc: debug "Unable to obtain beacon node's time offset", reason = $exc.msg notice NimbusExtensionsLog - node.features.incl(RestBeaconNodeFeature.NoNimbusExtensions) + node.disableNimbusExtensions() return except CancelledError as exc: raise exc @@ -425,13 +431,10 @@ proc runTimeMonitor(service: FallbackServiceRef, warn "An unexpected error occurred while asking for time offset", reason = $exc.msg, error = $exc.name notice NimbusExtensionsLog - node.features.incl(RestBeaconNodeFeature.NoNimbusExtensions) + node.disableNimbusExtensions() return - if tres.isSome(): - checkOffsetStatus(node, TimeOffset.init(tres.get())) - else: - debug "Beacon node's time offset was not updated" + checkOffsetStatus(node, TimeOffset.init(tres)) await service.waitForNextSlot()