clear BrokenClock status if Nimbus extensions no longer supported (#5827)

When BN clock is out of sync, VC sets BN status to `BrokenClock`. It is
only reset to `Offline` after restoring time sync. However, if VC fails
encounters an error while checking time, Nimbus extensions are assumed
to be unavailable and the BN is no longer checked for having a synced
clock. This means it is never reset back to `Offline` if errors start
occurring _after_ BN is already set to `BrokenClock`. This could be
because BN is changed from Nimbus to an alternative implementation,
or due to intermittent connection issues.

Ensure that BN status is reset back to `Offline` when Nimbus extensions
are disabled to ensure eventual connection recovery.
This commit is contained in:
Etan Kissling 2024-01-25 11:52:25 +01:00 committed by GitHub
parent 128834a8eb
commit 61cb7fafdf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -374,6 +374,14 @@ proc checkOffsetStatus(node: BeaconNodeServerRef, offset: TimeOffset) =
"Beacon node has acceptable time offset") "Beacon node has acceptable time offset")
node.updateStatus(RestBeaconNodeStatus.Offline, failure) node.updateStatus(RestBeaconNodeStatus.Offline, failure)
proc disableNimbusExtensions(node: BeaconNodeServerRef) =
node.features.incl(RestBeaconNodeFeature.NoNimbusExtensions)
if node.status == RestBeaconNodeStatus.BrokenClock:
let failure = ApiNodeFailure.init(ApiFailure.NoError,
"disableNimbusExtensions()", node, 200,
"Nimbus extensions no longer available")
node.updateStatus(RestBeaconNodeStatus.Offline, failure)
proc runTimeMonitor(service: FallbackServiceRef, proc runTimeMonitor(service: FallbackServiceRef,
node: BeaconNodeServerRef) {.async.} = node: BeaconNodeServerRef) {.async.} =
const NimbusExtensionsLog = "Beacon node does not support Nimbus extensions" const NimbusExtensionsLog = "Beacon node does not support Nimbus extensions"
@ -398,10 +406,8 @@ proc runTimeMonitor(service: FallbackServiceRef,
let tres = let tres =
try: try:
let let delay = vc.processingDelay.valueOr: ZeroDuration
delay = vc.processingDelay.valueOr: ZeroDuration await node.client.getTimeOffset(delay)
res = await node.client.getTimeOffset(delay)
Opt.some(res)
except RestResponseError as exc: except RestResponseError as exc:
case exc.status case exc.status
of 400: of 400:
@ -412,12 +418,12 @@ proc runTimeMonitor(service: FallbackServiceRef,
notice NimbusExtensionsLog, status = $exc.status, notice NimbusExtensionsLog, status = $exc.status,
reason = $exc.msg, error_message = $exc.message reason = $exc.msg, error_message = $exc.message
# Exiting loop # Exiting loop
node.features.incl(RestBeaconNodeFeature.NoNimbusExtensions) node.disableNimbusExtensions()
return return
except RestError as exc: except RestError as exc:
debug "Unable to obtain beacon node's time offset", reason = $exc.msg debug "Unable to obtain beacon node's time offset", reason = $exc.msg
notice NimbusExtensionsLog notice NimbusExtensionsLog
node.features.incl(RestBeaconNodeFeature.NoNimbusExtensions) node.disableNimbusExtensions()
return return
except CancelledError as exc: except CancelledError as exc:
raise exc raise exc
@ -425,13 +431,10 @@ proc runTimeMonitor(service: FallbackServiceRef,
warn "An unexpected error occurred while asking for time offset", warn "An unexpected error occurred while asking for time offset",
reason = $exc.msg, error = $exc.name reason = $exc.msg, error = $exc.name
notice NimbusExtensionsLog notice NimbusExtensionsLog
node.features.incl(RestBeaconNodeFeature.NoNimbusExtensions) node.disableNimbusExtensions()
return return
if tres.isSome(): checkOffsetStatus(node, TimeOffset.init(tres))
checkOffsetStatus(node, TimeOffset.init(tres.get()))
else:
debug "Beacon node's time offset was not updated"
await service.waitForNextSlot() await service.waitForNextSlot()