VC: Use not-synced/opt-synced BNs. (#4635)
* Initial commit. * Address review comments and recommendations. * Fix too often `Execution client not in sync` messages in logs. * Add failure reason for duties requests. * Add more reasons to every place of ValidatorApiError. * Address race condition issue. * Remove `vc` argument for getFailureReason().
This commit is contained in:
parent
1dd07d5def
commit
08b6bb7a6b
|
@ -161,11 +161,33 @@ proc onSlotStart(vc: ValidatorClientRef, wallTime: BeaconTime,
|
|||
if checkIfShouldStopAtEpoch(wallSlot.slot, vc.config.stopAtEpoch):
|
||||
return true
|
||||
|
||||
if len(vc.beaconNodes) > 1:
|
||||
let
|
||||
counts = vc.getNodeCounts()
|
||||
# Good nodes are nodes which can be used for ALL the requests.
|
||||
goodNodes = counts.data[int(RestBeaconNodeStatus.Synced)]
|
||||
# Viable nodes are nodes which can be used only SOME of the requests.
|
||||
viableNodes = counts.data[int(RestBeaconNodeStatus.OptSynced)] +
|
||||
counts.data[int(RestBeaconNodeStatus.NotSynced)] +
|
||||
counts.data[int(RestBeaconNodeStatus.Compatible)]
|
||||
# Bad nodes are nodes which can't be used at all.
|
||||
badNodes = counts.data[int(RestBeaconNodeStatus.Offline)] +
|
||||
counts.data[int(RestBeaconNodeStatus.Online)] +
|
||||
counts.data[int(RestBeaconNodeStatus.Incompatible)]
|
||||
info "Slot start",
|
||||
slot = shortLog(wallSlot.slot),
|
||||
attestationIn = vc.getDurationToNextAttestation(wallSlot.slot),
|
||||
blockIn = vc.getDurationToNextBlock(wallSlot.slot),
|
||||
validators = vc.attachedValidators[].count(),
|
||||
good_nodes = goodNodes, viable_nodes = viableNodes, bad_nodes = badNodes,
|
||||
delay = shortLog(delay)
|
||||
else:
|
||||
info "Slot start",
|
||||
slot = shortLog(wallSlot.slot),
|
||||
attestationIn = vc.getDurationToNextAttestation(wallSlot.slot),
|
||||
blockIn = vc.getDurationToNextBlock(wallSlot.slot),
|
||||
validators = vc.attachedValidators[].count(),
|
||||
node_status = $vc.beaconNodes[0].status,
|
||||
delay = shortLog(delay)
|
||||
|
||||
return false
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -81,11 +81,12 @@ proc serveAttestation(service: AttestationServiceRef, adata: AttestationData,
|
|||
let res =
|
||||
try:
|
||||
await vc.submitPoolAttestations(@[attestation], ApiStrategyKind.First)
|
||||
except ValidatorApiError:
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to publish attestation",
|
||||
attestation = shortLog(attestation),
|
||||
validator = shortLog(validator),
|
||||
validator_index = vindex
|
||||
validator_index = vindex,
|
||||
reason = exc.getFailureReason()
|
||||
return false
|
||||
except CancelledError as exc:
|
||||
debug "Attestation publishing process was interrupted"
|
||||
|
@ -160,11 +161,12 @@ proc serveAggregateAndProof*(service: AttestationServiceRef,
|
|||
let res =
|
||||
try:
|
||||
await vc.publishAggregateAndProofs(@[signedProof], ApiStrategyKind.First)
|
||||
except ValidatorApiError:
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to publish aggregated attestation",
|
||||
attestation = shortLog(signedProof.message.aggregate),
|
||||
validator = shortLog(validator),
|
||||
validator_index = vindex
|
||||
validator_index = vindex,
|
||||
reason = exc.getFailureReason()
|
||||
return false
|
||||
except CancelledError as exc:
|
||||
debug "Publish aggregate and proofs request was interrupted"
|
||||
|
@ -287,9 +289,10 @@ proc produceAndPublishAggregates(service: AttestationServiceRef,
|
|||
try:
|
||||
await vc.getAggregatedAttestation(slot, attestationRoot,
|
||||
ApiStrategyKind.Best)
|
||||
except ValidatorApiError:
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to get aggregated attestation data", slot = slot,
|
||||
attestation_root = shortLog(attestationRoot)
|
||||
attestation_root = shortLog(attestationRoot),
|
||||
reason = exc.getFailureReason()
|
||||
return
|
||||
except CancelledError as exc:
|
||||
debug "Aggregated attestation request was interrupted"
|
||||
|
@ -360,9 +363,10 @@ proc publishAttestationsAndAggregates(service: AttestationServiceRef,
|
|||
let ad =
|
||||
try:
|
||||
await service.produceAndPublishAttestations(slot, committee_index, duties)
|
||||
except ValidatorApiError:
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to proceed attestations", slot = slot,
|
||||
committee_index = committee_index, duties_count = len(duties)
|
||||
committee_index = committee_index, duties_count = len(duties),
|
||||
reason = exc.getFailureReason()
|
||||
return
|
||||
except CancelledError as exc:
|
||||
debug "Publish attestation request was interrupted"
|
||||
|
|
|
@ -38,8 +38,8 @@ proc produceBlock(
|
|||
try:
|
||||
await vc.produceBlockV2(slot, randao_reveal, graffiti,
|
||||
ApiStrategyKind.Best)
|
||||
except ValidatorApiError:
|
||||
error "Unable to retrieve block data"
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to retrieve block data", reason = exc.getFailureReason()
|
||||
return Opt.none(PreparedBeaconBlock)
|
||||
except CancelledError as exc:
|
||||
error "Block data production has been interrupted"
|
||||
|
@ -69,7 +69,8 @@ proc produceBlindedBlock(
|
|||
await vc.produceBlindedBlock(slot, randao_reveal, graffiti,
|
||||
ApiStrategyKind.Best)
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to retrieve blinded block data", error_msg = exc.msg
|
||||
error "Unable to retrieve blinded block data", error_msg = exc.msg,
|
||||
reason = exc.getFailureReason()
|
||||
return Opt.none(PreparedBlindedBeaconBlock)
|
||||
except CancelledError as exc:
|
||||
error "Blinded block data production has been interrupted"
|
||||
|
@ -214,8 +215,9 @@ proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot,
|
|||
try:
|
||||
debug "Sending blinded block"
|
||||
await vc.publishBlindedBlock(signedBlock, ApiStrategyKind.First)
|
||||
except ValidatorApiError:
|
||||
error "Unable to publish blinded block"
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to publish blinded block",
|
||||
reason = exc.getFailureReason()
|
||||
return
|
||||
except CancelledError as exc:
|
||||
debug "Blinded block publication has been interrupted"
|
||||
|
@ -275,8 +277,8 @@ proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot,
|
|||
try:
|
||||
debug "Sending block"
|
||||
await vc.publishBlock(signedBlock, ApiStrategyKind.First)
|
||||
except ValidatorApiError:
|
||||
error "Unable to publish block"
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to publish block", reason = exc.getFailureReason()
|
||||
return
|
||||
except CancelledError as exc:
|
||||
debug "Block publication has been interrupted"
|
||||
|
|
|
@ -64,7 +64,7 @@ type
|
|||
DutiesServiceRef* = ref object of ClientServiceRef
|
||||
|
||||
FallbackServiceRef* = ref object of ClientServiceRef
|
||||
onlineEvent*: AsyncEvent
|
||||
changesEvent*: AsyncEvent
|
||||
|
||||
ForkServiceRef* = ref object of ClientServiceRef
|
||||
|
||||
|
@ -127,7 +127,16 @@ type
|
|||
duties*: Table[Epoch, SyncCommitteeDuty]
|
||||
|
||||
RestBeaconNodeStatus* {.pure.} = enum
|
||||
Uninitalized, Offline, Incompatible, NotSynced, Online
|
||||
Offline, ## BN is offline.
|
||||
Online, ## BN is online, passed checkOnline() check.
|
||||
Incompatible, ## BN configuration is NOT compatible with VC configuration.
|
||||
Compatible, ## BN configuration is compatible with VC configuration.
|
||||
NotSynced, ## BN is not in sync.
|
||||
OptSynced, ## BN is optimistically synced (EL is not in sync).
|
||||
Synced ## BN and EL are synced.
|
||||
|
||||
BeaconNodesCounters* = object
|
||||
data*: array[int(high(RestBeaconNodeStatus)) + 1, int]
|
||||
|
||||
BeaconNodeServerRef* = ref BeaconNodeServer
|
||||
|
||||
|
@ -176,10 +185,18 @@ type
|
|||
validatorsRegCache*: Table[ValidatorPubKey, SignedValidatorRegistrationV1]
|
||||
rng*: ref HmacDrbgContext
|
||||
|
||||
ApiFailure* {.pure.} = enum
|
||||
Communication, Invalid, NotFound, NotSynced, Internal, Unexpected
|
||||
|
||||
ApiNodeFailure* = object
|
||||
node*: BeaconNodeServerRef
|
||||
failure*: ApiFailure
|
||||
|
||||
ValidatorClientRef* = ref ValidatorClient
|
||||
|
||||
ValidatorClientError* = object of CatchableError
|
||||
ValidatorApiError* = object of ValidatorClientError
|
||||
data*: seq[ApiNodeFailure]
|
||||
|
||||
const
|
||||
DefaultDutyAndProof* = DutyAndProof(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64))
|
||||
|
@ -225,6 +242,49 @@ proc `$`*(roles: set[BeaconNodeRole]): string =
|
|||
else:
|
||||
"{}"
|
||||
|
||||
proc `$`*(status: RestBeaconNodeStatus): string =
|
||||
case status
|
||||
of RestBeaconNodeStatus.Offline: "offline"
|
||||
of RestBeaconNodeStatus.Online: "online"
|
||||
of RestBeaconNodeStatus.Incompatible: "incompatible"
|
||||
of RestBeaconNodeStatus.Compatible: "compatible"
|
||||
of RestBeaconNodeStatus.NotSynced: "bn-unsynced"
|
||||
of RestBeaconNodeStatus.OptSynced: "el-unsynced"
|
||||
of RestBeaconNodeStatus.Synced: "synced"
|
||||
|
||||
proc `$`*(failure: ApiFailure): string =
|
||||
case failure
|
||||
of ApiFailure.Communication: "Connection with beacon node has been lost"
|
||||
of ApiFailure.Invalid: "Invalid response received from beacon node"
|
||||
of ApiFailure.NotFound: "Beacon node did not found requested entity"
|
||||
of ApiFailure.NotSynced: "Beacon node not in sync with network"
|
||||
of ApiFailure.Internal: "Beacon node reports internal failure"
|
||||
of ApiFailure.Unexpected: "Beacon node reports unexpected status"
|
||||
|
||||
proc getNodeCounts*(vc: ValidatorClientRef): BeaconNodesCounters =
|
||||
var res = BeaconNodesCounters()
|
||||
for node in vc.beaconNodes: inc(res.data[int(node.status)])
|
||||
res
|
||||
|
||||
proc getFailureReason*(exc: ref ValidatorApiError): string =
|
||||
var counts: array[int(high(ApiFailure)) + 1, int]
|
||||
let errors = exc[].data
|
||||
|
||||
if len(errors) > 1:
|
||||
var maxFailure =
|
||||
block:
|
||||
var maxCount = -1
|
||||
var res = ApiFailure.Unexpected
|
||||
for item in errors:
|
||||
inc(counts[int(item.failure)])
|
||||
if counts[int(item.failure)] > maxCount:
|
||||
maxCount = counts[int(item.failure)]
|
||||
res = item.failure
|
||||
res
|
||||
$maxFailure
|
||||
else:
|
||||
$errors[0].failure
|
||||
|
||||
proc shortLog*(roles: set[BeaconNodeRole]): string =
|
||||
var r = "AGBSD"
|
||||
if BeaconNodeRole.AttestationData in roles:
|
||||
|
@ -362,7 +422,8 @@ proc init*(t: typedesc[BeaconNodeServerRef], remote: Uri,
|
|||
let server = BeaconNodeServerRef(
|
||||
client: client, endpoint: $remote, index: index, roles: roles,
|
||||
logIdent: client.address.hostname & ":" &
|
||||
Base10.toString(client.address.port)
|
||||
Base10.toString(client.address.port),
|
||||
status: RestBeaconNodeStatus.Offline
|
||||
)
|
||||
ok(server)
|
||||
|
||||
|
@ -731,3 +792,7 @@ proc prepareRegistrationList*(
|
|||
incorrect_time = timed
|
||||
|
||||
return registrations
|
||||
|
||||
proc init*(t: typedesc[ApiNodeFailure], node: BeaconNodeServerRef,
|
||||
failure: ApiFailure): ApiNodeFailure =
|
||||
ApiNodeFailure(node: node, failure: failure)
|
||||
|
|
|
@ -65,8 +65,9 @@ proc pollForValidatorIndices*(vc: ValidatorClientRef) {.async.} =
|
|||
let res =
|
||||
try:
|
||||
await vc.getValidators(idents, ApiStrategyKind.First)
|
||||
except ValidatorApiError:
|
||||
error "Unable to get head state's validator information"
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to get head state's validator information",
|
||||
reason = exc.getFailureReason()
|
||||
return
|
||||
except CancelledError as exc:
|
||||
debug "Validator's indices processing was interrupted"
|
||||
|
@ -138,8 +139,9 @@ proc pollForAttesterDuties*(vc: ValidatorClientRef,
|
|||
let res =
|
||||
try:
|
||||
await vc.getAttesterDuties(epoch, indices, ApiStrategyKind.First)
|
||||
except ValidatorApiError:
|
||||
error "Unable to get attester duties", epoch = epoch
|
||||
except ValidatorApiError as exc:
|
||||
notice "Unable to get attester duties", epoch = epoch,
|
||||
reason = exc.getFailureReason()
|
||||
return 0
|
||||
except CancelledError as exc:
|
||||
debug "Attester duties processing was interrupted"
|
||||
|
@ -271,8 +273,9 @@ proc pollForSyncCommitteeDuties*(vc: ValidatorClientRef,
|
|||
res =
|
||||
try:
|
||||
await vc.getSyncCommitteeDuties(epoch, indices, ApiStrategyKind.First)
|
||||
except ValidatorApiError:
|
||||
error "Unable to get sync committee duties", epoch = epoch
|
||||
except ValidatorApiError as exc:
|
||||
notice "Unable to get sync committee duties", epoch = epoch,
|
||||
reason = exc.getFailureReason()
|
||||
return 0
|
||||
except CancelledError as exc:
|
||||
debug "Sync committee duties processing was interrupted"
|
||||
|
@ -502,9 +505,9 @@ proc pollForBeaconProposers*(vc: ValidatorClientRef) {.async.} =
|
|||
else:
|
||||
debug "No relevant proposer duties received", slot = currentSlot,
|
||||
duties_count = len(duties)
|
||||
except ValidatorApiError:
|
||||
debug "Unable to get proposer duties", slot = currentSlot,
|
||||
epoch = currentEpoch
|
||||
except ValidatorApiError as exc:
|
||||
notice "Unable to get proposer duties", slot = currentSlot,
|
||||
epoch = currentEpoch, reason = exc.getFailureReason()
|
||||
except CancelledError as exc:
|
||||
debug "Proposer duties processing was interrupted"
|
||||
raise exc
|
||||
|
@ -531,7 +534,7 @@ proc prepareBeaconProposers*(service: DutiesServiceRef) {.async.} =
|
|||
except ValidatorApiError as exc:
|
||||
warn "Unable to prepare beacon proposers", slot = currentSlot,
|
||||
epoch = currentEpoch, err_name = exc.name,
|
||||
err_msg = exc.msg
|
||||
err_msg = exc.msg, reason = exc.getFailureReason()
|
||||
0
|
||||
except CancelledError as exc:
|
||||
debug "Beacon proposer preparation processing was interrupted"
|
||||
|
@ -575,7 +578,7 @@ proc registerValidators*(service: DutiesServiceRef) {.async.} =
|
|||
except ValidatorApiError as exc:
|
||||
warn "Unable to register validators", slot = currentSlot,
|
||||
fork = genesisFork, err_name = exc.name,
|
||||
err_msg = exc.msg
|
||||
err_msg = exc.msg, reason = exc.getFailureReason()
|
||||
0
|
||||
except CancelledError as exc:
|
||||
debug "Validator registration was interrupted", slot = currentSlot,
|
||||
|
|
|
@ -12,71 +12,47 @@ const
|
|||
|
||||
logScope: service = ServiceName
|
||||
|
||||
type
|
||||
BeaconNodesCounters* = object
|
||||
online*: int
|
||||
offline*: int
|
||||
uninitalized*: int
|
||||
incompatible*: int
|
||||
nosync*: int
|
||||
|
||||
proc onlineNodes*(vc: ValidatorClientRef,
|
||||
roles: set[BeaconNodeRole] = {}): seq[BeaconNodeServerRef] =
|
||||
if len(roles) == 0:
|
||||
vc.beaconNodes.filterIt(it.status == RestBeaconNodeStatus.Online)
|
||||
else:
|
||||
vc.beaconNodes.filterIt((it.roles * roles != {}) and
|
||||
(it.status == RestBeaconNodeStatus.Online))
|
||||
|
||||
proc onlineNodesCount*(vc: ValidatorClientRef,
|
||||
proc nodesCount*(vc: ValidatorClientRef,
|
||||
statuses: set[RestBeaconNodeStatus],
|
||||
roles: set[BeaconNodeRole] = {}): int =
|
||||
if len(roles) == 0:
|
||||
vc.beaconNodes.countIt(it.status == RestBeaconNodeStatus.Online)
|
||||
vc.beaconNodes.countIt(it.status in statuses)
|
||||
else:
|
||||
vc.beaconNodes.countIt((it.roles * roles != {}) and
|
||||
(it.status == RestBeaconNodeStatus.Online))
|
||||
vc.beaconNodes.countIt((it.roles * roles != {}) and (it.status in statuses))
|
||||
|
||||
proc unusableNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] =
|
||||
vc.beaconNodes.filterIt(it.status != RestBeaconNodeStatus.Online)
|
||||
proc filterNodes*(vc: ValidatorClientRef, statuses: set[RestBeaconNodeStatus],
|
||||
roles: set[BeaconNodeRole] = {}): seq[BeaconNodeServerRef] =
|
||||
if len(roles) == 0:
|
||||
vc.beaconNodes.filterIt(it.status in statuses)
|
||||
else:
|
||||
vc.beaconNodes.filterIt((it.roles * roles != {}) and
|
||||
(it.status in statuses))
|
||||
|
||||
proc unusableNodesCount*(vc: ValidatorClientRef): int =
|
||||
vc.beaconNodes.countIt(it.status != RestBeaconNodeStatus.Online)
|
||||
proc otherNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] =
|
||||
vc.beaconNodes.filterIt(it.status != RestBeaconNodeStatus.Synced)
|
||||
|
||||
proc getNodeCounts*(vc: ValidatorClientRef): BeaconNodesCounters =
|
||||
var res = BeaconNodesCounters()
|
||||
for node in vc.beaconNodes:
|
||||
case node.status
|
||||
of RestBeaconNodeStatus.Uninitalized:
|
||||
inc(res.uninitalized)
|
||||
of RestBeaconNodeStatus.Offline:
|
||||
inc(res.offline)
|
||||
of RestBeaconNodeStatus.Incompatible:
|
||||
inc(res.incompatible)
|
||||
of RestBeaconNodeStatus.NotSynced:
|
||||
inc(res.nosync)
|
||||
of RestBeaconNodeStatus.Online:
|
||||
inc(res.online)
|
||||
res
|
||||
proc otherNodesCount*(vc: ValidatorClientRef): int =
|
||||
vc.beaconNodes.countIt(it.status != RestBeaconNodeStatus.Synced)
|
||||
|
||||
proc waitOnlineNodes*(vc: ValidatorClientRef, timeoutFut: Future[void] = nil,
|
||||
roles: set[BeaconNodeRole] = {}) {.async.} =
|
||||
proc waitNodes*(vc: ValidatorClientRef, timeoutFut: Future[void],
|
||||
statuses: set[RestBeaconNodeStatus],
|
||||
roles: set[BeaconNodeRole], waitChanges: bool) {.async.} =
|
||||
doAssert(not(isNil(vc.fallbackService)))
|
||||
var iterations = 0
|
||||
while true:
|
||||
if vc.onlineNodesCount(roles) != 0:
|
||||
if not(waitChanges) or (iterations != 0):
|
||||
if vc.nodesCount(statuses, roles) != 0:
|
||||
break
|
||||
else:
|
||||
if vc.fallbackService.onlineEvent.isSet():
|
||||
vc.fallbackService.onlineEvent.clear()
|
||||
warn "Connection with beacon node(s) has been lost",
|
||||
online_nodes = vc.onlineNodesCount(),
|
||||
unusable_nodes = vc.unusableNodesCount(),
|
||||
total_nodes = len(vc.beaconNodes)
|
||||
|
||||
if vc.fallbackService.changesEvent.isSet():
|
||||
vc.fallbackService.changesEvent.clear()
|
||||
|
||||
if isNil(timeoutFut):
|
||||
await vc.fallbackService.onlineEvent.wait()
|
||||
await vc.fallbackService.changesEvent.wait()
|
||||
else:
|
||||
let breakLoop =
|
||||
block:
|
||||
let waitFut = vc.fallbackService.onlineEvent.wait()
|
||||
let waitFut = vc.fallbackService.changesEvent.wait()
|
||||
try:
|
||||
discard await race(waitFut, timeoutFut)
|
||||
except CancelledError as exc:
|
||||
|
@ -92,8 +68,12 @@ proc waitOnlineNodes*(vc: ValidatorClientRef, timeoutFut: Future[void] = nil,
|
|||
if breakLoop:
|
||||
break
|
||||
|
||||
proc checkCompatible(vc: ValidatorClientRef,
|
||||
node: BeaconNodeServerRef) {.async.} =
|
||||
inc(iterations)
|
||||
|
||||
proc checkCompatible(
|
||||
vc: ValidatorClientRef,
|
||||
node: BeaconNodeServerRef
|
||||
): Future[RestBeaconNodeStatus] {.async.} =
|
||||
logScope: endpoint = node
|
||||
let info =
|
||||
try:
|
||||
|
@ -102,18 +82,17 @@ proc checkCompatible(vc: ValidatorClientRef,
|
|||
res.data.data
|
||||
except CancelledError as exc:
|
||||
debug "Configuration request was interrupted"
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
raise exc
|
||||
except RestError as exc:
|
||||
if node.status != RestBeaconNodeStatus.Offline:
|
||||
debug "Unable to obtain beacon node's configuration",
|
||||
error_name = exc.name, error_message = exc.msg
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
return
|
||||
return RestBeaconNodeStatus.Offline
|
||||
except CatchableError as exc:
|
||||
if node.status != RestBeaconNodeStatus.Offline:
|
||||
error "Unexpected exception", error_name = exc.name,
|
||||
error_message = exc.msg
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
return
|
||||
return RestBeaconNodeStatus.Offline
|
||||
|
||||
let genesis =
|
||||
try:
|
||||
|
@ -122,18 +101,17 @@ proc checkCompatible(vc: ValidatorClientRef,
|
|||
res.data.data
|
||||
except CancelledError as exc:
|
||||
debug "Genesis request was interrupted"
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
raise exc
|
||||
except RestError as exc:
|
||||
if node.status != RestBeaconNodeStatus.Offline:
|
||||
debug "Unable to obtain beacon node's genesis",
|
||||
error_name = exc.name, error_message = exc.msg
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
return
|
||||
return RestBeaconNodeStatus.Offline
|
||||
except CatchableError as exc:
|
||||
if node.status != RestBeaconNodeStatus.Offline:
|
||||
error "Unexpected exception", error_name = exc.name,
|
||||
error_message = exc.msg
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
return
|
||||
return RestBeaconNodeStatus.Offline
|
||||
|
||||
let genesisFlag = (genesis != vc.beaconGenesis)
|
||||
let configFlag =
|
||||
|
@ -160,18 +138,24 @@ proc checkCompatible(vc: ValidatorClientRef,
|
|||
info.DOMAIN_SELECTION_PROOF != DOMAIN_SELECTION_PROOF or
|
||||
info.DOMAIN_AGGREGATE_AND_PROOF != DOMAIN_AGGREGATE_AND_PROOF
|
||||
|
||||
let res =
|
||||
if configFlag or genesisFlag:
|
||||
node.status = RestBeaconNodeStatus.Incompatible
|
||||
if node.status != RestBeaconNodeStatus.Incompatible:
|
||||
warn "Beacon node has incompatible configuration",
|
||||
genesis_flag = genesisFlag, config_flag = configFlag
|
||||
RestBeaconNodeStatus.Incompatible
|
||||
else:
|
||||
info "Beacon node has compatible configuration"
|
||||
if node.status != RestBeaconNodeStatus.Compatible:
|
||||
debug "Beacon node has compatible configuration"
|
||||
node.config = some(info)
|
||||
node.genesis = some(genesis)
|
||||
node.status = RestBeaconNodeStatus.Online
|
||||
RestBeaconNodeStatus.Compatible
|
||||
return res
|
||||
|
||||
proc checkSync(vc: ValidatorClientRef,
|
||||
node: BeaconNodeServerRef) {.async.} =
|
||||
proc checkSync(
|
||||
vc: ValidatorClientRef,
|
||||
node: BeaconNodeServerRef
|
||||
): Future[RestBeaconNodeStatus] {.async.} =
|
||||
logScope: endpoint = node
|
||||
let syncInfo =
|
||||
try:
|
||||
|
@ -180,20 +164,19 @@ proc checkSync(vc: ValidatorClientRef,
|
|||
res.data.data
|
||||
except CancelledError as exc:
|
||||
debug "Sync status request was interrupted"
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
raise exc
|
||||
except RestError as exc:
|
||||
if node.status != RestBeaconNodeStatus.Offline:
|
||||
debug "Unable to obtain beacon node's sync status",
|
||||
error_name = exc.name, error_message = exc.msg
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
return
|
||||
return RestBeaconNodeStatus.Offline
|
||||
except CatchableError as exc:
|
||||
if node.status != RestBeaconNodeStatus.Offline:
|
||||
error "Unexpected exception", error_name = exc.name,
|
||||
error_message = exc.msg
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
return
|
||||
return RestBeaconNodeStatus.Offline
|
||||
node.syncInfo = some(syncInfo)
|
||||
node.status =
|
||||
let res =
|
||||
block:
|
||||
let optimistic =
|
||||
if syncInfo.is_optimistic.isNone():
|
||||
|
@ -203,20 +186,29 @@ proc checkSync(vc: ValidatorClientRef,
|
|||
|
||||
if not(syncInfo.is_syncing) or (syncInfo.sync_distance < SYNC_TOLERANCE):
|
||||
if not(syncInfo.is_optimistic.get(false)):
|
||||
info "Beacon node is in sync", sync_distance = syncInfo.sync_distance,
|
||||
if node.status != RestBeaconNodeStatus.Synced:
|
||||
info "Beacon node is in sync",
|
||||
sync_distance = syncInfo.sync_distance,
|
||||
head_slot = syncInfo.head_slot, is_optimistic = optimistic
|
||||
RestBeaconNodeStatus.Online
|
||||
RestBeaconNodeStatus.Synced
|
||||
else:
|
||||
warn "Execution client not in sync (beacon node optimistically synced)",
|
||||
if node.status != RestBeaconNodeStatus.OptSynced:
|
||||
info "Execution client not in sync " &
|
||||
"(beacon node optimistically synced)",
|
||||
sync_distance = syncInfo.sync_distance,
|
||||
head_slot = syncInfo.head_slot, is_optimistic = optimistic
|
||||
RestBeaconNodeStatus.OptSynced
|
||||
else:
|
||||
if node.status != RestBeaconNodeStatus.NotSynced:
|
||||
warn "Beacon node not in sync",
|
||||
sync_distance = syncInfo.sync_distance,
|
||||
head_slot = syncInfo.head_slot, is_optimistic = optimistic
|
||||
RestBeaconNodeStatus.NotSynced
|
||||
else:
|
||||
warn "Beacon node not in sync", sync_distance = syncInfo.sync_distance,
|
||||
head_slot = syncInfo.head_slot, is_optimistic = optimistic
|
||||
RestBeaconNodeStatus.NotSynced
|
||||
return res
|
||||
|
||||
proc checkOnline(node: BeaconNodeServerRef) {.async.} =
|
||||
proc checkOnline(
|
||||
node: BeaconNodeServerRef
|
||||
): Future[RestBeaconNodeStatus] {.async.} =
|
||||
logScope: endpoint = node
|
||||
debug "Checking beacon node status"
|
||||
let agent =
|
||||
|
@ -225,40 +217,60 @@ proc checkOnline(node: BeaconNodeServerRef) {.async.} =
|
|||
res.data.data
|
||||
except CancelledError as exc:
|
||||
debug "Status request was interrupted"
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
raise exc
|
||||
except RestError as exc:
|
||||
if node.status != RestBeaconNodeStatus.Offline:
|
||||
debug "Unable to check beacon node's status",
|
||||
error_name = exc.name, error_message = exc.msg
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
return
|
||||
return RestBeaconNodeStatus.Offline
|
||||
except CatchableError as exc:
|
||||
if node.status != RestBeaconNodeStatus.Offline:
|
||||
error "Unexpected exception", error_name = exc.name,
|
||||
error_message = exc.msg
|
||||
node.status = RestBeaconNodeStatus.Offline
|
||||
return
|
||||
info "Beacon node has been identified", agent = agent.version
|
||||
node.ident = some(agent.version)
|
||||
node.status = RestBeaconNodeStatus.Online
|
||||
return RestBeaconNodeStatus.Offline
|
||||
if node.status != RestBeaconNodeStatus.Online:
|
||||
debug "Beacon node has been identified", agent = agent.version
|
||||
return RestBeaconNodeStatus.Online
|
||||
|
||||
proc checkNode(vc: ValidatorClientRef,
|
||||
node: BeaconNodeServerRef) {.async.} =
|
||||
debug "Checking beacon node", endpoint = node
|
||||
await node.checkOnline()
|
||||
if node.status != RestBeaconNodeStatus.Online:
|
||||
return
|
||||
await vc.checkCompatible(node)
|
||||
if node.status != RestBeaconNodeStatus.Online:
|
||||
return
|
||||
await vc.checkSync(node)
|
||||
node: BeaconNodeServerRef): Future[bool] {.async.} =
|
||||
let nstatus = node.status
|
||||
debug "Checking beacon node", endpoint = node, status = node.status
|
||||
|
||||
proc checkNodes*(service: FallbackServiceRef) {.async.} =
|
||||
if nstatus in {RestBeaconNodeStatus.Offline}:
|
||||
let status = await node.checkOnline()
|
||||
node.status = status
|
||||
if status != RestBeaconNodeStatus.Online:
|
||||
return nstatus != status
|
||||
|
||||
if nstatus in {RestBeaconNodeStatus.Offline,
|
||||
RestBeaconNodeStatus.Online,
|
||||
RestBeaconNodeStatus.Incompatible}:
|
||||
let status = await vc.checkCompatible(node)
|
||||
node.status = status
|
||||
if status != RestBeaconNodeStatus.Compatible:
|
||||
return nstatus != status
|
||||
|
||||
if nstatus in {RestBeaconNodeStatus.Offline,
|
||||
RestBeaconNodeStatus.Online,
|
||||
RestBeaconNodeStatus.Incompatible,
|
||||
RestBeaconNodeStatus.Compatible,
|
||||
RestBeaconNodeStatus.OptSynced,
|
||||
RestBeaconNodeStatus.NotSynced}:
|
||||
let status = await vc.checkSync(node)
|
||||
node.status = status
|
||||
return nstatus != status
|
||||
|
||||
proc checkNodes*(service: FallbackServiceRef): Future[bool] {.async.} =
|
||||
let
|
||||
nodesToCheck = service.client.unusableNodes()
|
||||
nodesToCheck = service.client.otherNodes()
|
||||
pendingChecks = nodesToCheck.mapIt(service.client.checkNode(it))
|
||||
|
||||
var res = false
|
||||
try:
|
||||
await allFutures(pendingChecks)
|
||||
for fut in pendingChecks:
|
||||
if fut.completed() and fut.read():
|
||||
res = true
|
||||
except CancelledError as exc:
|
||||
var pending: seq[Future[void]]
|
||||
for future in pendingChecks:
|
||||
|
@ -266,6 +278,7 @@ proc checkNodes*(service: FallbackServiceRef) {.async.} =
|
|||
pending.add(future.cancelAndWait())
|
||||
await allFutures(pending)
|
||||
raise exc
|
||||
return res
|
||||
|
||||
proc mainLoop(service: FallbackServiceRef) {.async.} =
|
||||
let vc = service.client
|
||||
|
@ -278,19 +291,8 @@ proc mainLoop(service: FallbackServiceRef) {.async.} =
|
|||
# become safe to combine loops, breaks and exception handlers.
|
||||
let breakLoop =
|
||||
try:
|
||||
await service.checkNodes()
|
||||
if await service.checkNodes(): service.changesEvent.fire()
|
||||
await sleepAsync(2.seconds)
|
||||
if service.client.onlineNodesCount() != 0:
|
||||
service.onlineEvent.fire()
|
||||
else:
|
||||
let counter = vc.getNodeCounts()
|
||||
warn "No suitable beacon nodes available",
|
||||
online_nodes = counter.online,
|
||||
offline_nodes = counter.offline,
|
||||
uninitalized_nodes = counter.uninitalized,
|
||||
incompatible_nodes = counter.incompatible,
|
||||
nonsynced_nodes = counter.nosync,
|
||||
total_nodes = len(vc.beaconNodes)
|
||||
false
|
||||
except CancelledError as exc:
|
||||
debug "Service interrupted"
|
||||
|
@ -308,10 +310,10 @@ proc init*(t: typedesc[FallbackServiceRef],
|
|||
logScope: service = ServiceName
|
||||
var res = FallbackServiceRef(name: ServiceName, client: vc,
|
||||
state: ServiceState.Initialized,
|
||||
onlineEvent: newAsyncEvent())
|
||||
changesEvent: newAsyncEvent())
|
||||
debug "Initializing service"
|
||||
# Perform initial nodes check.
|
||||
await res.checkNodes()
|
||||
if await res.checkNodes(): res.changesEvent.fire()
|
||||
return res
|
||||
|
||||
proc start*(service: FallbackServiceRef) =
|
||||
|
|
|
@ -53,7 +53,8 @@ proc pollForFork(vc: ValidatorClientRef) {.async.} =
|
|||
try:
|
||||
await vc.getForkSchedule(ApiStrategyKind.Best)
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to retrieve fork schedule", reason = exc.msg
|
||||
error "Unable to retrieve fork schedule",
|
||||
reason = exc.getFailureReason(), err_msg = exc.msg
|
||||
return
|
||||
except CancelledError as exc:
|
||||
debug "Fork retrieval process was interrupted"
|
||||
|
|
|
@ -57,11 +57,12 @@ proc serveSyncCommitteeMessage*(service: SyncCommitteeServiceRef,
|
|||
let res =
|
||||
try:
|
||||
await vc.submitPoolSyncCommitteeSignature(message, ApiStrategyKind.First)
|
||||
except ValidatorApiError:
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to publish sync committee message",
|
||||
message = shortLog(message),
|
||||
validator = shortLog(validator),
|
||||
validator_index = vindex
|
||||
validator_index = vindex,
|
||||
reason = exc.getFailureReason()
|
||||
return false
|
||||
except CancelledError:
|
||||
debug "Publish sync committee message request was interrupted"
|
||||
|
@ -171,12 +172,13 @@ proc serveContributionAndProof*(service: SyncCommitteeServiceRef,
|
|||
try:
|
||||
await vc.publishContributionAndProofs(@[restSignedProof],
|
||||
ApiStrategyKind.First)
|
||||
except ValidatorApiError as err:
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to publish sync contribution",
|
||||
contribution = shortLog(proof.contribution),
|
||||
validator = shortLog(validator),
|
||||
validator_index = validatorIdx,
|
||||
err_msg = err.msg
|
||||
err_msg = exc.msg,
|
||||
reason = exc.getFailureReason()
|
||||
false
|
||||
except CancelledError:
|
||||
debug "Publish sync contribution request was interrupted"
|
||||
|
@ -278,9 +280,10 @@ proc produceAndPublishContributions(service: SyncCommitteeServiceRef,
|
|||
let aggContribution =
|
||||
try:
|
||||
await contributionsFuts[item.subcommitteeIdx]
|
||||
except ValidatorApiError:
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to get sync message contribution data", slot = slot,
|
||||
beaconBlockRoot = shortLog(beaconBlockRoot)
|
||||
beaconBlockRoot = shortLog(beaconBlockRoot),
|
||||
reason = exc.getFailureReason()
|
||||
return
|
||||
except CancelledError:
|
||||
debug "Request for sync message contribution was interrupted"
|
||||
|
@ -357,12 +360,13 @@ proc publishSyncMessagesAndContributions(service: SyncCommitteeServiceRef,
|
|||
res.data.root
|
||||
else:
|
||||
if res.execution_optimistic.get():
|
||||
error "Could not obtain head block's root because beacon node " &
|
||||
"only optimistically synced", slot = slot
|
||||
notice "Execution client not in sync; skipping validator duties " &
|
||||
"for now", slot = slot
|
||||
return
|
||||
res.data.root
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to retrieve head block's root to sign", reason = exc.msg
|
||||
error "Unable to retrieve head block's root to sign", reason = exc.msg,
|
||||
reason = exc.getFailureReason()
|
||||
return
|
||||
except CancelledError:
|
||||
debug "Block root request was interrupted"
|
||||
|
@ -376,9 +380,9 @@ proc publishSyncMessagesAndContributions(service: SyncCommitteeServiceRef,
|
|||
await service.produceAndPublishSyncCommitteeMessages(slot,
|
||||
beaconBlockRoot,
|
||||
duties)
|
||||
except ValidatorApiError:
|
||||
except ValidatorApiError as exc:
|
||||
error "Unable to proceed sync committee messages", slot = slot,
|
||||
duties_count = len(duties)
|
||||
duties_count = len(duties), reason = exc.getFailureReason()
|
||||
return
|
||||
except CancelledError:
|
||||
debug "Sync committee producing process was interrupted"
|
||||
|
|
Loading…
Reference in New Issue