VC: Use not-synced/opt-synced BNs. (#4635)

* Initial commit.

* Address review comments and recommendations.

* Fix too often `Execution client not in sync` messages in logs.

* Add failure reason for duties requests.

* Add more reasons to every place of ValidatorApiError.

* Address race condition issue.

* Remove `vc` argument for getFailureReason().
This commit is contained in:
Eugene Kabanov 2023-02-23 02:11:00 +02:00 committed by GitHub
parent 1dd07d5def
commit 08b6bb7a6b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1099 additions and 502 deletions

View File

@ -161,11 +161,33 @@ proc onSlotStart(vc: ValidatorClientRef, wallTime: BeaconTime,
if checkIfShouldStopAtEpoch(wallSlot.slot, vc.config.stopAtEpoch): if checkIfShouldStopAtEpoch(wallSlot.slot, vc.config.stopAtEpoch):
return true return true
if len(vc.beaconNodes) > 1:
let
counts = vc.getNodeCounts()
# Good nodes are nodes which can be used for ALL the requests.
goodNodes = counts.data[int(RestBeaconNodeStatus.Synced)]
# Viable nodes are nodes which can be used only SOME of the requests.
viableNodes = counts.data[int(RestBeaconNodeStatus.OptSynced)] +
counts.data[int(RestBeaconNodeStatus.NotSynced)] +
counts.data[int(RestBeaconNodeStatus.Compatible)]
# Bad nodes are nodes which can't be used at all.
badNodes = counts.data[int(RestBeaconNodeStatus.Offline)] +
counts.data[int(RestBeaconNodeStatus.Online)] +
counts.data[int(RestBeaconNodeStatus.Incompatible)]
info "Slot start", info "Slot start",
slot = shortLog(wallSlot.slot), slot = shortLog(wallSlot.slot),
attestationIn = vc.getDurationToNextAttestation(wallSlot.slot), attestationIn = vc.getDurationToNextAttestation(wallSlot.slot),
blockIn = vc.getDurationToNextBlock(wallSlot.slot), blockIn = vc.getDurationToNextBlock(wallSlot.slot),
validators = vc.attachedValidators[].count(), validators = vc.attachedValidators[].count(),
good_nodes = goodNodes, viable_nodes = viableNodes, bad_nodes = badNodes,
delay = shortLog(delay)
else:
info "Slot start",
slot = shortLog(wallSlot.slot),
attestationIn = vc.getDurationToNextAttestation(wallSlot.slot),
blockIn = vc.getDurationToNextBlock(wallSlot.slot),
validators = vc.attachedValidators[].count(),
node_status = $vc.beaconNodes[0].status,
delay = shortLog(delay) delay = shortLog(delay)
return false return false

File diff suppressed because it is too large Load Diff

View File

@ -81,11 +81,12 @@ proc serveAttestation(service: AttestationServiceRef, adata: AttestationData,
let res = let res =
try: try:
await vc.submitPoolAttestations(@[attestation], ApiStrategyKind.First) await vc.submitPoolAttestations(@[attestation], ApiStrategyKind.First)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to publish attestation", error "Unable to publish attestation",
attestation = shortLog(attestation), attestation = shortLog(attestation),
validator = shortLog(validator), validator = shortLog(validator),
validator_index = vindex validator_index = vindex,
reason = exc.getFailureReason()
return false return false
except CancelledError as exc: except CancelledError as exc:
debug "Attestation publishing process was interrupted" debug "Attestation publishing process was interrupted"
@ -160,11 +161,12 @@ proc serveAggregateAndProof*(service: AttestationServiceRef,
let res = let res =
try: try:
await vc.publishAggregateAndProofs(@[signedProof], ApiStrategyKind.First) await vc.publishAggregateAndProofs(@[signedProof], ApiStrategyKind.First)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to publish aggregated attestation", error "Unable to publish aggregated attestation",
attestation = shortLog(signedProof.message.aggregate), attestation = shortLog(signedProof.message.aggregate),
validator = shortLog(validator), validator = shortLog(validator),
validator_index = vindex validator_index = vindex,
reason = exc.getFailureReason()
return false return false
except CancelledError as exc: except CancelledError as exc:
debug "Publish aggregate and proofs request was interrupted" debug "Publish aggregate and proofs request was interrupted"
@ -287,9 +289,10 @@ proc produceAndPublishAggregates(service: AttestationServiceRef,
try: try:
await vc.getAggregatedAttestation(slot, attestationRoot, await vc.getAggregatedAttestation(slot, attestationRoot,
ApiStrategyKind.Best) ApiStrategyKind.Best)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to get aggregated attestation data", slot = slot, error "Unable to get aggregated attestation data", slot = slot,
attestation_root = shortLog(attestationRoot) attestation_root = shortLog(attestationRoot),
reason = exc.getFailureReason()
return return
except CancelledError as exc: except CancelledError as exc:
debug "Aggregated attestation request was interrupted" debug "Aggregated attestation request was interrupted"
@ -360,9 +363,10 @@ proc publishAttestationsAndAggregates(service: AttestationServiceRef,
let ad = let ad =
try: try:
await service.produceAndPublishAttestations(slot, committee_index, duties) await service.produceAndPublishAttestations(slot, committee_index, duties)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to proceed attestations", slot = slot, error "Unable to proceed attestations", slot = slot,
committee_index = committee_index, duties_count = len(duties) committee_index = committee_index, duties_count = len(duties),
reason = exc.getFailureReason()
return return
except CancelledError as exc: except CancelledError as exc:
debug "Publish attestation request was interrupted" debug "Publish attestation request was interrupted"

View File

@ -38,8 +38,8 @@ proc produceBlock(
try: try:
await vc.produceBlockV2(slot, randao_reveal, graffiti, await vc.produceBlockV2(slot, randao_reveal, graffiti,
ApiStrategyKind.Best) ApiStrategyKind.Best)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to retrieve block data" error "Unable to retrieve block data", reason = exc.getFailureReason()
return Opt.none(PreparedBeaconBlock) return Opt.none(PreparedBeaconBlock)
except CancelledError as exc: except CancelledError as exc:
error "Block data production has been interrupted" error "Block data production has been interrupted"
@ -69,7 +69,8 @@ proc produceBlindedBlock(
await vc.produceBlindedBlock(slot, randao_reveal, graffiti, await vc.produceBlindedBlock(slot, randao_reveal, graffiti,
ApiStrategyKind.Best) ApiStrategyKind.Best)
except ValidatorApiError as exc: except ValidatorApiError as exc:
error "Unable to retrieve blinded block data", error_msg = exc.msg error "Unable to retrieve blinded block data", error_msg = exc.msg,
reason = exc.getFailureReason()
return Opt.none(PreparedBlindedBeaconBlock) return Opt.none(PreparedBlindedBeaconBlock)
except CancelledError as exc: except CancelledError as exc:
error "Blinded block data production has been interrupted" error "Blinded block data production has been interrupted"
@ -214,8 +215,9 @@ proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot,
try: try:
debug "Sending blinded block" debug "Sending blinded block"
await vc.publishBlindedBlock(signedBlock, ApiStrategyKind.First) await vc.publishBlindedBlock(signedBlock, ApiStrategyKind.First)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to publish blinded block" error "Unable to publish blinded block",
reason = exc.getFailureReason()
return return
except CancelledError as exc: except CancelledError as exc:
debug "Blinded block publication has been interrupted" debug "Blinded block publication has been interrupted"
@ -275,8 +277,8 @@ proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot,
try: try:
debug "Sending block" debug "Sending block"
await vc.publishBlock(signedBlock, ApiStrategyKind.First) await vc.publishBlock(signedBlock, ApiStrategyKind.First)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to publish block" error "Unable to publish block", reason = exc.getFailureReason()
return return
except CancelledError as exc: except CancelledError as exc:
debug "Block publication has been interrupted" debug "Block publication has been interrupted"

View File

@ -64,7 +64,7 @@ type
DutiesServiceRef* = ref object of ClientServiceRef DutiesServiceRef* = ref object of ClientServiceRef
FallbackServiceRef* = ref object of ClientServiceRef FallbackServiceRef* = ref object of ClientServiceRef
onlineEvent*: AsyncEvent changesEvent*: AsyncEvent
ForkServiceRef* = ref object of ClientServiceRef ForkServiceRef* = ref object of ClientServiceRef
@ -127,7 +127,16 @@ type
duties*: Table[Epoch, SyncCommitteeDuty] duties*: Table[Epoch, SyncCommitteeDuty]
RestBeaconNodeStatus* {.pure.} = enum RestBeaconNodeStatus* {.pure.} = enum
Uninitalized, Offline, Incompatible, NotSynced, Online Offline, ## BN is offline.
Online, ## BN is online, passed checkOnline() check.
Incompatible, ## BN configuration is NOT compatible with VC configuration.
Compatible, ## BN configuration is compatible with VC configuration.
NotSynced, ## BN is not in sync.
OptSynced, ## BN is optimistically synced (EL is not in sync).
Synced ## BN and EL are synced.
BeaconNodesCounters* = object
data*: array[int(high(RestBeaconNodeStatus)) + 1, int]
BeaconNodeServerRef* = ref BeaconNodeServer BeaconNodeServerRef* = ref BeaconNodeServer
@ -176,10 +185,18 @@ type
validatorsRegCache*: Table[ValidatorPubKey, SignedValidatorRegistrationV1] validatorsRegCache*: Table[ValidatorPubKey, SignedValidatorRegistrationV1]
rng*: ref HmacDrbgContext rng*: ref HmacDrbgContext
ApiFailure* {.pure.} = enum
Communication, Invalid, NotFound, NotSynced, Internal, Unexpected
ApiNodeFailure* = object
node*: BeaconNodeServerRef
failure*: ApiFailure
ValidatorClientRef* = ref ValidatorClient ValidatorClientRef* = ref ValidatorClient
ValidatorClientError* = object of CatchableError ValidatorClientError* = object of CatchableError
ValidatorApiError* = object of ValidatorClientError ValidatorApiError* = object of ValidatorClientError
data*: seq[ApiNodeFailure]
const const
DefaultDutyAndProof* = DutyAndProof(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64)) DefaultDutyAndProof* = DutyAndProof(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64))
@ -225,6 +242,49 @@ proc `$`*(roles: set[BeaconNodeRole]): string =
else: else:
"{}" "{}"
proc `$`*(status: RestBeaconNodeStatus): string =
case status
of RestBeaconNodeStatus.Offline: "offline"
of RestBeaconNodeStatus.Online: "online"
of RestBeaconNodeStatus.Incompatible: "incompatible"
of RestBeaconNodeStatus.Compatible: "compatible"
of RestBeaconNodeStatus.NotSynced: "bn-unsynced"
of RestBeaconNodeStatus.OptSynced: "el-unsynced"
of RestBeaconNodeStatus.Synced: "synced"
proc `$`*(failure: ApiFailure): string =
case failure
of ApiFailure.Communication: "Connection with beacon node has been lost"
of ApiFailure.Invalid: "Invalid response received from beacon node"
of ApiFailure.NotFound: "Beacon node did not found requested entity"
of ApiFailure.NotSynced: "Beacon node not in sync with network"
of ApiFailure.Internal: "Beacon node reports internal failure"
of ApiFailure.Unexpected: "Beacon node reports unexpected status"
proc getNodeCounts*(vc: ValidatorClientRef): BeaconNodesCounters =
var res = BeaconNodesCounters()
for node in vc.beaconNodes: inc(res.data[int(node.status)])
res
proc getFailureReason*(exc: ref ValidatorApiError): string =
var counts: array[int(high(ApiFailure)) + 1, int]
let errors = exc[].data
if len(errors) > 1:
var maxFailure =
block:
var maxCount = -1
var res = ApiFailure.Unexpected
for item in errors:
inc(counts[int(item.failure)])
if counts[int(item.failure)] > maxCount:
maxCount = counts[int(item.failure)]
res = item.failure
res
$maxFailure
else:
$errors[0].failure
proc shortLog*(roles: set[BeaconNodeRole]): string = proc shortLog*(roles: set[BeaconNodeRole]): string =
var r = "AGBSD" var r = "AGBSD"
if BeaconNodeRole.AttestationData in roles: if BeaconNodeRole.AttestationData in roles:
@ -362,7 +422,8 @@ proc init*(t: typedesc[BeaconNodeServerRef], remote: Uri,
let server = BeaconNodeServerRef( let server = BeaconNodeServerRef(
client: client, endpoint: $remote, index: index, roles: roles, client: client, endpoint: $remote, index: index, roles: roles,
logIdent: client.address.hostname & ":" & logIdent: client.address.hostname & ":" &
Base10.toString(client.address.port) Base10.toString(client.address.port),
status: RestBeaconNodeStatus.Offline
) )
ok(server) ok(server)
@ -731,3 +792,7 @@ proc prepareRegistrationList*(
incorrect_time = timed incorrect_time = timed
return registrations return registrations
proc init*(t: typedesc[ApiNodeFailure], node: BeaconNodeServerRef,
failure: ApiFailure): ApiNodeFailure =
ApiNodeFailure(node: node, failure: failure)

View File

@ -65,8 +65,9 @@ proc pollForValidatorIndices*(vc: ValidatorClientRef) {.async.} =
let res = let res =
try: try:
await vc.getValidators(idents, ApiStrategyKind.First) await vc.getValidators(idents, ApiStrategyKind.First)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to get head state's validator information" error "Unable to get head state's validator information",
reason = exc.getFailureReason()
return return
except CancelledError as exc: except CancelledError as exc:
debug "Validator's indices processing was interrupted" debug "Validator's indices processing was interrupted"
@ -138,8 +139,9 @@ proc pollForAttesterDuties*(vc: ValidatorClientRef,
let res = let res =
try: try:
await vc.getAttesterDuties(epoch, indices, ApiStrategyKind.First) await vc.getAttesterDuties(epoch, indices, ApiStrategyKind.First)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to get attester duties", epoch = epoch notice "Unable to get attester duties", epoch = epoch,
reason = exc.getFailureReason()
return 0 return 0
except CancelledError as exc: except CancelledError as exc:
debug "Attester duties processing was interrupted" debug "Attester duties processing was interrupted"
@ -271,8 +273,9 @@ proc pollForSyncCommitteeDuties*(vc: ValidatorClientRef,
res = res =
try: try:
await vc.getSyncCommitteeDuties(epoch, indices, ApiStrategyKind.First) await vc.getSyncCommitteeDuties(epoch, indices, ApiStrategyKind.First)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to get sync committee duties", epoch = epoch notice "Unable to get sync committee duties", epoch = epoch,
reason = exc.getFailureReason()
return 0 return 0
except CancelledError as exc: except CancelledError as exc:
debug "Sync committee duties processing was interrupted" debug "Sync committee duties processing was interrupted"
@ -502,9 +505,9 @@ proc pollForBeaconProposers*(vc: ValidatorClientRef) {.async.} =
else: else:
debug "No relevant proposer duties received", slot = currentSlot, debug "No relevant proposer duties received", slot = currentSlot,
duties_count = len(duties) duties_count = len(duties)
except ValidatorApiError: except ValidatorApiError as exc:
debug "Unable to get proposer duties", slot = currentSlot, notice "Unable to get proposer duties", slot = currentSlot,
epoch = currentEpoch epoch = currentEpoch, reason = exc.getFailureReason()
except CancelledError as exc: except CancelledError as exc:
debug "Proposer duties processing was interrupted" debug "Proposer duties processing was interrupted"
raise exc raise exc
@ -531,7 +534,7 @@ proc prepareBeaconProposers*(service: DutiesServiceRef) {.async.} =
except ValidatorApiError as exc: except ValidatorApiError as exc:
warn "Unable to prepare beacon proposers", slot = currentSlot, warn "Unable to prepare beacon proposers", slot = currentSlot,
epoch = currentEpoch, err_name = exc.name, epoch = currentEpoch, err_name = exc.name,
err_msg = exc.msg err_msg = exc.msg, reason = exc.getFailureReason()
0 0
except CancelledError as exc: except CancelledError as exc:
debug "Beacon proposer preparation processing was interrupted" debug "Beacon proposer preparation processing was interrupted"
@ -575,7 +578,7 @@ proc registerValidators*(service: DutiesServiceRef) {.async.} =
except ValidatorApiError as exc: except ValidatorApiError as exc:
warn "Unable to register validators", slot = currentSlot, warn "Unable to register validators", slot = currentSlot,
fork = genesisFork, err_name = exc.name, fork = genesisFork, err_name = exc.name,
err_msg = exc.msg err_msg = exc.msg, reason = exc.getFailureReason()
0 0
except CancelledError as exc: except CancelledError as exc:
debug "Validator registration was interrupted", slot = currentSlot, debug "Validator registration was interrupted", slot = currentSlot,

View File

@ -12,71 +12,47 @@ const
logScope: service = ServiceName logScope: service = ServiceName
type proc nodesCount*(vc: ValidatorClientRef,
BeaconNodesCounters* = object statuses: set[RestBeaconNodeStatus],
online*: int
offline*: int
uninitalized*: int
incompatible*: int
nosync*: int
proc onlineNodes*(vc: ValidatorClientRef,
roles: set[BeaconNodeRole] = {}): seq[BeaconNodeServerRef] =
if len(roles) == 0:
vc.beaconNodes.filterIt(it.status == RestBeaconNodeStatus.Online)
else:
vc.beaconNodes.filterIt((it.roles * roles != {}) and
(it.status == RestBeaconNodeStatus.Online))
proc onlineNodesCount*(vc: ValidatorClientRef,
roles: set[BeaconNodeRole] = {}): int = roles: set[BeaconNodeRole] = {}): int =
if len(roles) == 0: if len(roles) == 0:
vc.beaconNodes.countIt(it.status == RestBeaconNodeStatus.Online) vc.beaconNodes.countIt(it.status in statuses)
else: else:
vc.beaconNodes.countIt((it.roles * roles != {}) and vc.beaconNodes.countIt((it.roles * roles != {}) and (it.status in statuses))
(it.status == RestBeaconNodeStatus.Online))
proc unusableNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] = proc filterNodes*(vc: ValidatorClientRef, statuses: set[RestBeaconNodeStatus],
vc.beaconNodes.filterIt(it.status != RestBeaconNodeStatus.Online) roles: set[BeaconNodeRole] = {}): seq[BeaconNodeServerRef] =
if len(roles) == 0:
vc.beaconNodes.filterIt(it.status in statuses)
else:
vc.beaconNodes.filterIt((it.roles * roles != {}) and
(it.status in statuses))
proc unusableNodesCount*(vc: ValidatorClientRef): int = proc otherNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] =
vc.beaconNodes.countIt(it.status != RestBeaconNodeStatus.Online) vc.beaconNodes.filterIt(it.status != RestBeaconNodeStatus.Synced)
proc getNodeCounts*(vc: ValidatorClientRef): BeaconNodesCounters = proc otherNodesCount*(vc: ValidatorClientRef): int =
var res = BeaconNodesCounters() vc.beaconNodes.countIt(it.status != RestBeaconNodeStatus.Synced)
for node in vc.beaconNodes:
case node.status
of RestBeaconNodeStatus.Uninitalized:
inc(res.uninitalized)
of RestBeaconNodeStatus.Offline:
inc(res.offline)
of RestBeaconNodeStatus.Incompatible:
inc(res.incompatible)
of RestBeaconNodeStatus.NotSynced:
inc(res.nosync)
of RestBeaconNodeStatus.Online:
inc(res.online)
res
proc waitOnlineNodes*(vc: ValidatorClientRef, timeoutFut: Future[void] = nil, proc waitNodes*(vc: ValidatorClientRef, timeoutFut: Future[void],
roles: set[BeaconNodeRole] = {}) {.async.} = statuses: set[RestBeaconNodeStatus],
roles: set[BeaconNodeRole], waitChanges: bool) {.async.} =
doAssert(not(isNil(vc.fallbackService))) doAssert(not(isNil(vc.fallbackService)))
var iterations = 0
while true: while true:
if vc.onlineNodesCount(roles) != 0: if not(waitChanges) or (iterations != 0):
if vc.nodesCount(statuses, roles) != 0:
break break
else:
if vc.fallbackService.onlineEvent.isSet(): if vc.fallbackService.changesEvent.isSet():
vc.fallbackService.onlineEvent.clear() vc.fallbackService.changesEvent.clear()
warn "Connection with beacon node(s) has been lost",
online_nodes = vc.onlineNodesCount(),
unusable_nodes = vc.unusableNodesCount(),
total_nodes = len(vc.beaconNodes)
if isNil(timeoutFut): if isNil(timeoutFut):
await vc.fallbackService.onlineEvent.wait() await vc.fallbackService.changesEvent.wait()
else: else:
let breakLoop = let breakLoop =
block: block:
let waitFut = vc.fallbackService.onlineEvent.wait() let waitFut = vc.fallbackService.changesEvent.wait()
try: try:
discard await race(waitFut, timeoutFut) discard await race(waitFut, timeoutFut)
except CancelledError as exc: except CancelledError as exc:
@ -92,8 +68,12 @@ proc waitOnlineNodes*(vc: ValidatorClientRef, timeoutFut: Future[void] = nil,
if breakLoop: if breakLoop:
break break
proc checkCompatible(vc: ValidatorClientRef, inc(iterations)
node: BeaconNodeServerRef) {.async.} =
proc checkCompatible(
vc: ValidatorClientRef,
node: BeaconNodeServerRef
): Future[RestBeaconNodeStatus] {.async.} =
logScope: endpoint = node logScope: endpoint = node
let info = let info =
try: try:
@ -102,18 +82,17 @@ proc checkCompatible(vc: ValidatorClientRef,
res.data.data res.data.data
except CancelledError as exc: except CancelledError as exc:
debug "Configuration request was interrupted" debug "Configuration request was interrupted"
node.status = RestBeaconNodeStatus.Offline
raise exc raise exc
except RestError as exc: except RestError as exc:
if node.status != RestBeaconNodeStatus.Offline:
debug "Unable to obtain beacon node's configuration", debug "Unable to obtain beacon node's configuration",
error_name = exc.name, error_message = exc.msg error_name = exc.name, error_message = exc.msg
node.status = RestBeaconNodeStatus.Offline return RestBeaconNodeStatus.Offline
return
except CatchableError as exc: except CatchableError as exc:
if node.status != RestBeaconNodeStatus.Offline:
error "Unexpected exception", error_name = exc.name, error "Unexpected exception", error_name = exc.name,
error_message = exc.msg error_message = exc.msg
node.status = RestBeaconNodeStatus.Offline return RestBeaconNodeStatus.Offline
return
let genesis = let genesis =
try: try:
@ -122,18 +101,17 @@ proc checkCompatible(vc: ValidatorClientRef,
res.data.data res.data.data
except CancelledError as exc: except CancelledError as exc:
debug "Genesis request was interrupted" debug "Genesis request was interrupted"
node.status = RestBeaconNodeStatus.Offline
raise exc raise exc
except RestError as exc: except RestError as exc:
if node.status != RestBeaconNodeStatus.Offline:
debug "Unable to obtain beacon node's genesis", debug "Unable to obtain beacon node's genesis",
error_name = exc.name, error_message = exc.msg error_name = exc.name, error_message = exc.msg
node.status = RestBeaconNodeStatus.Offline return RestBeaconNodeStatus.Offline
return
except CatchableError as exc: except CatchableError as exc:
if node.status != RestBeaconNodeStatus.Offline:
error "Unexpected exception", error_name = exc.name, error "Unexpected exception", error_name = exc.name,
error_message = exc.msg error_message = exc.msg
node.status = RestBeaconNodeStatus.Offline return RestBeaconNodeStatus.Offline
return
let genesisFlag = (genesis != vc.beaconGenesis) let genesisFlag = (genesis != vc.beaconGenesis)
let configFlag = let configFlag =
@ -160,18 +138,24 @@ proc checkCompatible(vc: ValidatorClientRef,
info.DOMAIN_SELECTION_PROOF != DOMAIN_SELECTION_PROOF or info.DOMAIN_SELECTION_PROOF != DOMAIN_SELECTION_PROOF or
info.DOMAIN_AGGREGATE_AND_PROOF != DOMAIN_AGGREGATE_AND_PROOF info.DOMAIN_AGGREGATE_AND_PROOF != DOMAIN_AGGREGATE_AND_PROOF
let res =
if configFlag or genesisFlag: if configFlag or genesisFlag:
node.status = RestBeaconNodeStatus.Incompatible if node.status != RestBeaconNodeStatus.Incompatible:
warn "Beacon node has incompatible configuration", warn "Beacon node has incompatible configuration",
genesis_flag = genesisFlag, config_flag = configFlag genesis_flag = genesisFlag, config_flag = configFlag
RestBeaconNodeStatus.Incompatible
else: else:
info "Beacon node has compatible configuration" if node.status != RestBeaconNodeStatus.Compatible:
debug "Beacon node has compatible configuration"
node.config = some(info) node.config = some(info)
node.genesis = some(genesis) node.genesis = some(genesis)
node.status = RestBeaconNodeStatus.Online RestBeaconNodeStatus.Compatible
return res
proc checkSync(vc: ValidatorClientRef, proc checkSync(
node: BeaconNodeServerRef) {.async.} = vc: ValidatorClientRef,
node: BeaconNodeServerRef
): Future[RestBeaconNodeStatus] {.async.} =
logScope: endpoint = node logScope: endpoint = node
let syncInfo = let syncInfo =
try: try:
@ -180,20 +164,19 @@ proc checkSync(vc: ValidatorClientRef,
res.data.data res.data.data
except CancelledError as exc: except CancelledError as exc:
debug "Sync status request was interrupted" debug "Sync status request was interrupted"
node.status = RestBeaconNodeStatus.Offline
raise exc raise exc
except RestError as exc: except RestError as exc:
if node.status != RestBeaconNodeStatus.Offline:
debug "Unable to obtain beacon node's sync status", debug "Unable to obtain beacon node's sync status",
error_name = exc.name, error_message = exc.msg error_name = exc.name, error_message = exc.msg
node.status = RestBeaconNodeStatus.Offline return RestBeaconNodeStatus.Offline
return
except CatchableError as exc: except CatchableError as exc:
if node.status != RestBeaconNodeStatus.Offline:
error "Unexpected exception", error_name = exc.name, error "Unexpected exception", error_name = exc.name,
error_message = exc.msg error_message = exc.msg
node.status = RestBeaconNodeStatus.Offline return RestBeaconNodeStatus.Offline
return
node.syncInfo = some(syncInfo) node.syncInfo = some(syncInfo)
node.status = let res =
block: block:
let optimistic = let optimistic =
if syncInfo.is_optimistic.isNone(): if syncInfo.is_optimistic.isNone():
@ -203,20 +186,29 @@ proc checkSync(vc: ValidatorClientRef,
if not(syncInfo.is_syncing) or (syncInfo.sync_distance < SYNC_TOLERANCE): if not(syncInfo.is_syncing) or (syncInfo.sync_distance < SYNC_TOLERANCE):
if not(syncInfo.is_optimistic.get(false)): if not(syncInfo.is_optimistic.get(false)):
info "Beacon node is in sync", sync_distance = syncInfo.sync_distance, if node.status != RestBeaconNodeStatus.Synced:
info "Beacon node is in sync",
sync_distance = syncInfo.sync_distance,
head_slot = syncInfo.head_slot, is_optimistic = optimistic head_slot = syncInfo.head_slot, is_optimistic = optimistic
RestBeaconNodeStatus.Online RestBeaconNodeStatus.Synced
else: else:
warn "Execution client not in sync (beacon node optimistically synced)", if node.status != RestBeaconNodeStatus.OptSynced:
info "Execution client not in sync " &
"(beacon node optimistically synced)",
sync_distance = syncInfo.sync_distance,
head_slot = syncInfo.head_slot, is_optimistic = optimistic
RestBeaconNodeStatus.OptSynced
else:
if node.status != RestBeaconNodeStatus.NotSynced:
warn "Beacon node not in sync",
sync_distance = syncInfo.sync_distance, sync_distance = syncInfo.sync_distance,
head_slot = syncInfo.head_slot, is_optimistic = optimistic head_slot = syncInfo.head_slot, is_optimistic = optimistic
RestBeaconNodeStatus.NotSynced RestBeaconNodeStatus.NotSynced
else: return res
warn "Beacon node not in sync", sync_distance = syncInfo.sync_distance,
head_slot = syncInfo.head_slot, is_optimistic = optimistic
RestBeaconNodeStatus.NotSynced
proc checkOnline(node: BeaconNodeServerRef) {.async.} = proc checkOnline(
node: BeaconNodeServerRef
): Future[RestBeaconNodeStatus] {.async.} =
logScope: endpoint = node logScope: endpoint = node
debug "Checking beacon node status" debug "Checking beacon node status"
let agent = let agent =
@ -225,40 +217,60 @@ proc checkOnline(node: BeaconNodeServerRef) {.async.} =
res.data.data res.data.data
except CancelledError as exc: except CancelledError as exc:
debug "Status request was interrupted" debug "Status request was interrupted"
node.status = RestBeaconNodeStatus.Offline
raise exc raise exc
except RestError as exc: except RestError as exc:
if node.status != RestBeaconNodeStatus.Offline:
debug "Unable to check beacon node's status", debug "Unable to check beacon node's status",
error_name = exc.name, error_message = exc.msg error_name = exc.name, error_message = exc.msg
node.status = RestBeaconNodeStatus.Offline return RestBeaconNodeStatus.Offline
return
except CatchableError as exc: except CatchableError as exc:
if node.status != RestBeaconNodeStatus.Offline:
error "Unexpected exception", error_name = exc.name, error "Unexpected exception", error_name = exc.name,
error_message = exc.msg error_message = exc.msg
node.status = RestBeaconNodeStatus.Offline return RestBeaconNodeStatus.Offline
return if node.status != RestBeaconNodeStatus.Online:
info "Beacon node has been identified", agent = agent.version debug "Beacon node has been identified", agent = agent.version
node.ident = some(agent.version) return RestBeaconNodeStatus.Online
node.status = RestBeaconNodeStatus.Online
proc checkNode(vc: ValidatorClientRef, proc checkNode(vc: ValidatorClientRef,
node: BeaconNodeServerRef) {.async.} = node: BeaconNodeServerRef): Future[bool] {.async.} =
debug "Checking beacon node", endpoint = node let nstatus = node.status
await node.checkOnline() debug "Checking beacon node", endpoint = node, status = node.status
if node.status != RestBeaconNodeStatus.Online:
return
await vc.checkCompatible(node)
if node.status != RestBeaconNodeStatus.Online:
return
await vc.checkSync(node)
proc checkNodes*(service: FallbackServiceRef) {.async.} = if nstatus in {RestBeaconNodeStatus.Offline}:
let status = await node.checkOnline()
node.status = status
if status != RestBeaconNodeStatus.Online:
return nstatus != status
if nstatus in {RestBeaconNodeStatus.Offline,
RestBeaconNodeStatus.Online,
RestBeaconNodeStatus.Incompatible}:
let status = await vc.checkCompatible(node)
node.status = status
if status != RestBeaconNodeStatus.Compatible:
return nstatus != status
if nstatus in {RestBeaconNodeStatus.Offline,
RestBeaconNodeStatus.Online,
RestBeaconNodeStatus.Incompatible,
RestBeaconNodeStatus.Compatible,
RestBeaconNodeStatus.OptSynced,
RestBeaconNodeStatus.NotSynced}:
let status = await vc.checkSync(node)
node.status = status
return nstatus != status
proc checkNodes*(service: FallbackServiceRef): Future[bool] {.async.} =
let let
nodesToCheck = service.client.unusableNodes() nodesToCheck = service.client.otherNodes()
pendingChecks = nodesToCheck.mapIt(service.client.checkNode(it)) pendingChecks = nodesToCheck.mapIt(service.client.checkNode(it))
var res = false
try: try:
await allFutures(pendingChecks) await allFutures(pendingChecks)
for fut in pendingChecks:
if fut.completed() and fut.read():
res = true
except CancelledError as exc: except CancelledError as exc:
var pending: seq[Future[void]] var pending: seq[Future[void]]
for future in pendingChecks: for future in pendingChecks:
@ -266,6 +278,7 @@ proc checkNodes*(service: FallbackServiceRef) {.async.} =
pending.add(future.cancelAndWait()) pending.add(future.cancelAndWait())
await allFutures(pending) await allFutures(pending)
raise exc raise exc
return res
proc mainLoop(service: FallbackServiceRef) {.async.} = proc mainLoop(service: FallbackServiceRef) {.async.} =
let vc = service.client let vc = service.client
@ -278,19 +291,8 @@ proc mainLoop(service: FallbackServiceRef) {.async.} =
# become safe to combine loops, breaks and exception handlers. # become safe to combine loops, breaks and exception handlers.
let breakLoop = let breakLoop =
try: try:
await service.checkNodes() if await service.checkNodes(): service.changesEvent.fire()
await sleepAsync(2.seconds) await sleepAsync(2.seconds)
if service.client.onlineNodesCount() != 0:
service.onlineEvent.fire()
else:
let counter = vc.getNodeCounts()
warn "No suitable beacon nodes available",
online_nodes = counter.online,
offline_nodes = counter.offline,
uninitalized_nodes = counter.uninitalized,
incompatible_nodes = counter.incompatible,
nonsynced_nodes = counter.nosync,
total_nodes = len(vc.beaconNodes)
false false
except CancelledError as exc: except CancelledError as exc:
debug "Service interrupted" debug "Service interrupted"
@ -308,10 +310,10 @@ proc init*(t: typedesc[FallbackServiceRef],
logScope: service = ServiceName logScope: service = ServiceName
var res = FallbackServiceRef(name: ServiceName, client: vc, var res = FallbackServiceRef(name: ServiceName, client: vc,
state: ServiceState.Initialized, state: ServiceState.Initialized,
onlineEvent: newAsyncEvent()) changesEvent: newAsyncEvent())
debug "Initializing service" debug "Initializing service"
# Perform initial nodes check. # Perform initial nodes check.
await res.checkNodes() if await res.checkNodes(): res.changesEvent.fire()
return res return res
proc start*(service: FallbackServiceRef) = proc start*(service: FallbackServiceRef) =

View File

@ -53,7 +53,8 @@ proc pollForFork(vc: ValidatorClientRef) {.async.} =
try: try:
await vc.getForkSchedule(ApiStrategyKind.Best) await vc.getForkSchedule(ApiStrategyKind.Best)
except ValidatorApiError as exc: except ValidatorApiError as exc:
error "Unable to retrieve fork schedule", reason = exc.msg error "Unable to retrieve fork schedule",
reason = exc.getFailureReason(), err_msg = exc.msg
return return
except CancelledError as exc: except CancelledError as exc:
debug "Fork retrieval process was interrupted" debug "Fork retrieval process was interrupted"

View File

@ -57,11 +57,12 @@ proc serveSyncCommitteeMessage*(service: SyncCommitteeServiceRef,
let res = let res =
try: try:
await vc.submitPoolSyncCommitteeSignature(message, ApiStrategyKind.First) await vc.submitPoolSyncCommitteeSignature(message, ApiStrategyKind.First)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to publish sync committee message", error "Unable to publish sync committee message",
message = shortLog(message), message = shortLog(message),
validator = shortLog(validator), validator = shortLog(validator),
validator_index = vindex validator_index = vindex,
reason = exc.getFailureReason()
return false return false
except CancelledError: except CancelledError:
debug "Publish sync committee message request was interrupted" debug "Publish sync committee message request was interrupted"
@ -171,12 +172,13 @@ proc serveContributionAndProof*(service: SyncCommitteeServiceRef,
try: try:
await vc.publishContributionAndProofs(@[restSignedProof], await vc.publishContributionAndProofs(@[restSignedProof],
ApiStrategyKind.First) ApiStrategyKind.First)
except ValidatorApiError as err: except ValidatorApiError as exc:
error "Unable to publish sync contribution", error "Unable to publish sync contribution",
contribution = shortLog(proof.contribution), contribution = shortLog(proof.contribution),
validator = shortLog(validator), validator = shortLog(validator),
validator_index = validatorIdx, validator_index = validatorIdx,
err_msg = err.msg err_msg = exc.msg,
reason = exc.getFailureReason()
false false
except CancelledError: except CancelledError:
debug "Publish sync contribution request was interrupted" debug "Publish sync contribution request was interrupted"
@ -278,9 +280,10 @@ proc produceAndPublishContributions(service: SyncCommitteeServiceRef,
let aggContribution = let aggContribution =
try: try:
await contributionsFuts[item.subcommitteeIdx] await contributionsFuts[item.subcommitteeIdx]
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to get sync message contribution data", slot = slot, error "Unable to get sync message contribution data", slot = slot,
beaconBlockRoot = shortLog(beaconBlockRoot) beaconBlockRoot = shortLog(beaconBlockRoot),
reason = exc.getFailureReason()
return return
except CancelledError: except CancelledError:
debug "Request for sync message contribution was interrupted" debug "Request for sync message contribution was interrupted"
@ -357,12 +360,13 @@ proc publishSyncMessagesAndContributions(service: SyncCommitteeServiceRef,
res.data.root res.data.root
else: else:
if res.execution_optimistic.get(): if res.execution_optimistic.get():
error "Could not obtain head block's root because beacon node " & notice "Execution client not in sync; skipping validator duties " &
"only optimistically synced", slot = slot "for now", slot = slot
return return
res.data.root res.data.root
except ValidatorApiError as exc: except ValidatorApiError as exc:
error "Unable to retrieve head block's root to sign", reason = exc.msg error "Unable to retrieve head block's root to sign", reason = exc.msg,
reason = exc.getFailureReason()
return return
except CancelledError: except CancelledError:
debug "Block root request was interrupted" debug "Block root request was interrupted"
@ -376,9 +380,9 @@ proc publishSyncMessagesAndContributions(service: SyncCommitteeServiceRef,
await service.produceAndPublishSyncCommitteeMessages(slot, await service.produceAndPublishSyncCommitteeMessages(slot,
beaconBlockRoot, beaconBlockRoot,
duties) duties)
except ValidatorApiError: except ValidatorApiError as exc:
error "Unable to proceed sync committee messages", slot = slot, error "Unable to proceed sync committee messages", slot = slot,
duties_count = len(duties) duties_count = len(duties), reason = exc.getFailureReason()
return return
except CancelledError: except CancelledError:
debug "Sync committee producing process was interrupted" debug "Sync committee producing process was interrupted"