VC: Hardening and optimizing time handling. (#4743)

* Fix durationToNextSlot() and durationToNextEpoch() to work not only after Genesis, but also before Genesis.
Change VC pre-genesis behavior, add runPreGenesisWaitingLoop() and runGenesisWaitingLoop().
Add checkedWaitForSlot() and checkedWaitForNextSlot() to strictly check current time and print warnings.
Fix VC main loop to use checkedWaitForNextSlot().
Fix attestation_service to run attestations processing only until the end of the duty slot.
Change attestation_service main loop to use checkedWaitForNextSlot().
Change block_service to properly cancel all the pending proposer tasks.
Use checkedWaitForSlot to wait for block proposal.
Fix block_service waitForBlockPublished() to be compatible with BN.
Fix sync_committee_service to avoid asyncSpawn.
Fix sync_committee_service to run only until the end of the duty slot.
Fix sync_committee_service to use checkedWaitForNextSlot().

* Refactor validator logging.
Fix aggregated attestation publishing missing delay.

* Fix doppelganger detection should not start at pre-genesis time.
Fix fallback service sync status spam.
Fix false `sync committee subnets subscription error`.

* Address review comments part 1.

* Address review comments.

* Fix condition issue for near genesis waiting loop.

* Address review comments.

* Address review comments 2.
This commit is contained in:
Eugene Kabanov 2023-04-18 00:31:54 +03:00 committed by GitHub
parent 228e10f1d9
commit b51152153a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 862 additions and 517 deletions

View File

@ -70,18 +70,39 @@ proc fromNow*(c: BeaconClock, slot: Slot): tuple[inFuture: bool, offset: Duratio
c.fromNow(slot.start_beacon_time()) c.fromNow(slot.start_beacon_time())
proc durationToNextSlot*(c: BeaconClock): Duration = proc durationToNextSlot*(c: BeaconClock): Duration =
let (afterGenesis, slot) = c.now().toSlot() let
if afterGenesis: currentTime = c.now()
c.fromNow(slot + 1'u64).offset currentSlot = currentTime.toSlot()
if currentSlot.afterGenesis:
let nextSlot = currentSlot.slot + 1
chronos.nanoseconds(
(nextSlot.start_beacon_time() - currentTime).nanoseconds)
else: else:
c.fromNow(Slot(0)).offset # absoluteTime = BeaconTime(-currentTime.ns_since_genesis).
let
absoluteTime = Slot(0).start_beacon_time() +
(Slot(0).start_beacon_time() - currentTime)
timeToNextSlot = absoluteTime - currentSlot.slot.start_beacon_time()
chronos.nanoseconds(timeToNextSlot.nanoseconds)
proc durationToNextEpoch*(c: BeaconClock): Duration = proc durationToNextEpoch*(c: BeaconClock): Duration =
let (afterGenesis, slot) = c.now().toSlot() let
if afterGenesis: currentTime = c.now()
c.fromNow((slot.epoch + 1).start_slot()).offset currentSlot = currentTime.toSlot()
if currentSlot.afterGenesis:
let nextEpochSlot = (currentSlot.slot.epoch() + 1).start_slot()
chronos.nanoseconds(
(nextEpochSlot.start_beacon_time() - currentTime).nanoseconds)
else: else:
c.fromNow(Epoch(0).start_slot()).offset # absoluteTime = BeaconTime(-currentTime.ns_since_genesis).
let
absoluteTime = Slot(0).start_beacon_time() +
(Slot(0).start_beacon_time() - currentTime)
timeToNextEpoch = absoluteTime -
currentSlot.slot.epoch().start_slot().start_beacon_time()
chronos.nanoseconds(timeToNextEpoch.nanoseconds)
func saturate*(d: tuple[inFuture: bool, offset: Duration]): Duration = func saturate*(d: tuple[inFuture: bool, offset: Duration]): Duration =
if d.inFuture: d.offset else: seconds(0) if d.inFuture: d.offset else: seconds(0)

View File

@ -9,9 +9,12 @@ import
libp2p/crypto/crypto, libp2p/crypto/crypto,
./rpc/rest_key_management_api, ./rpc/rest_key_management_api,
./validator_client/[ ./validator_client/[
common, fallback_service, duties_service, fork_service, common, fallback_service, duties_service, fork_service, block_service,
doppelganger_service, attestation_service, sync_committee_service] doppelganger_service, attestation_service, sync_committee_service]
const
PREGENESIS_EPOCHS_COUNT = 1
proc initGenesis(vc: ValidatorClientRef): Future[RestGenesis] {.async.} = proc initGenesis(vc: ValidatorClientRef): Future[RestGenesis] {.async.} =
info "Initializing genesis", nodes_count = len(vc.beaconNodes) info "Initializing genesis", nodes_count = len(vc.beaconNodes)
var nodes = vc.beaconNodes var nodes = vc.beaconNodes
@ -93,16 +96,22 @@ proc initValidators(vc: ValidatorClientRef): Future[bool] {.async.} =
proc initClock(vc: ValidatorClientRef): Future[BeaconClock] {.async.} = proc initClock(vc: ValidatorClientRef): Future[BeaconClock] {.async.} =
# This procedure performs initialization of BeaconClock using current genesis # This procedure performs initialization of BeaconClock using current genesis
# information. It also performs waiting for genesis. # information. It also performs waiting for genesis.
let res = BeaconClock.init(vc.beaconGenesis.genesis_time) let
let currentSlot = res.now().slotOrZero() res = BeaconClock.init(vc.beaconGenesis.genesis_time)
let currentEpoch = currentSlot.epoch() currentTime = res.now()
currentSlot = currentTime.slotOrZero()
currentEpoch = currentSlot.epoch()
genesisTime = res.fromNow(Slot(0))
if genesisTime.inFuture:
info "Initializing beacon clock",
genesis_time = vc.beaconGenesis.genesis_time,
current_slot = "<n/a>", current_epoch = "<n/a>",
time_to_genesis = genesisTime.offset
else:
info "Initializing beacon clock", info "Initializing beacon clock",
genesis_time = vc.beaconGenesis.genesis_time, genesis_time = vc.beaconGenesis.genesis_time,
current_slot = currentSlot, current_epoch = currentEpoch current_slot = currentSlot, current_epoch = currentEpoch
let genesisTime = res.fromNow(start_beacon_time(Slot(0)))
if genesisTime.inFuture:
notice "Waiting for genesis", genesisIn = genesisTime.offset
await sleepAsync(genesisTime.offset)
return res return res
proc initMetrics(vc: ValidatorClientRef): Future[bool] {.async.} = proc initMetrics(vc: ValidatorClientRef): Future[bool] {.async.} =
@ -139,27 +148,35 @@ proc shutdownSlashingProtection(vc: ValidatorClientRef) =
info "Closing slashing protection", path = vc.config.validatorsDir() info "Closing slashing protection", path = vc.config.validatorsDir()
vc.attachedValidators[].slashingProtection.close() vc.attachedValidators[].slashingProtection.close()
proc onSlotStart(vc: ValidatorClientRef, wallTime: BeaconTime, proc runVCSlotLoop(vc: ValidatorClientRef) {.async.} =
lastSlot: Slot): Future[bool] {.async.} = var
## Called at the beginning of a slot - usually every slot, but sometimes might startTime = vc.beaconClock.now()
## skip a few in case we're running late. curSlot = startTime.slotOrZero()
## wallTime: current system time - we will strive to perform all duties up nextSlot = curSlot + 1 # No earlier than GENESIS_SLOT + 1
## to this point in time timeToNextSlot = nextSlot.start_beacon_time() - startTime
## lastSlot: the last slot that we successfully processed, so we know where to
## start work from - there might be jumps if processing is delayed info "Scheduling first slot action",
start_time = shortLog(startTime),
current_slot = shortLog(curSlot),
next_slot = shortLog(nextSlot),
time_to_next_slot = shortLog(timeToNextSlot)
var currentSlot = Opt.some(curSlot)
while true:
currentSlot = await vc.checkedWaitForNextSlot(currentSlot, ZeroTimeDiff,
true)
if currentSlot.isNone():
## Fatal log line should be printed by checkedWaitForNextSlot().
return
let let
# The slot we should be at, according to the clock wallTime = vc.beaconClock.now()
beaconTime = wallTime wallSlot = currentSlot.get()
wallSlot = wallTime.toSlot() delay = wallTime - wallSlot.start_beacon_time()
let if checkIfShouldStopAtEpoch(wallSlot, vc.config.stopAtEpoch):
# If everything was working perfectly, the slot that we should be processing return
expectedSlot = lastSlot + 1
delay = wallTime - expectedSlot.start_beacon_time()
if checkIfShouldStopAtEpoch(wallSlot.slot, vc.config.stopAtEpoch):
return true
if len(vc.beaconNodes) > 1: if len(vc.beaconNodes) > 1:
let let
@ -175,23 +192,23 @@ proc onSlotStart(vc: ValidatorClientRef, wallTime: BeaconTime,
counts.data[int(RestBeaconNodeStatus.Online)] + counts.data[int(RestBeaconNodeStatus.Online)] +
counts.data[int(RestBeaconNodeStatus.Incompatible)] counts.data[int(RestBeaconNodeStatus.Incompatible)]
info "Slot start", info "Slot start",
slot = shortLog(wallSlot.slot), slot = shortLog(wallSlot),
attestationIn = vc.getDurationToNextAttestation(wallSlot.slot), epoch = shortLog(wallSlot.epoch()),
blockIn = vc.getDurationToNextBlock(wallSlot.slot), attestationIn = vc.getDurationToNextAttestation(wallSlot),
blockIn = vc.getDurationToNextBlock(wallSlot),
validators = vc.attachedValidators[].count(), validators = vc.attachedValidators[].count(),
good_nodes = goodNodes, viable_nodes = viableNodes, bad_nodes = badNodes, good_nodes = goodNodes, viable_nodes = viableNodes,
delay = shortLog(delay) bad_nodes = badNodes, delay = shortLog(delay)
else: else:
info "Slot start", info "Slot start",
slot = shortLog(wallSlot.slot), slot = shortLog(wallSlot),
attestationIn = vc.getDurationToNextAttestation(wallSlot.slot), epoch = shortLog(wallSlot.epoch()),
blockIn = vc.getDurationToNextBlock(wallSlot.slot), attestationIn = vc.getDurationToNextAttestation(wallSlot),
blockIn = vc.getDurationToNextBlock(wallSlot),
validators = vc.attachedValidators[].count(), validators = vc.attachedValidators[].count(),
node_status = $vc.beaconNodes[0].status, node_status = $vc.beaconNodes[0].status,
delay = shortLog(delay) delay = shortLog(delay)
return false
proc new*(T: type ValidatorClientRef, proc new*(T: type ValidatorClientRef,
config: ValidatorClientConf, config: ValidatorClientConf,
rng: ref HmacDrbgContext): ValidatorClientRef = rng: ref HmacDrbgContext): ValidatorClientRef =
@ -224,6 +241,8 @@ proc new*(T: type ValidatorClientRef,
config: config, config: config,
beaconNodes: beaconNodes, beaconNodes: beaconNodes,
graffitiBytes: config.graffiti.get(defaultGraffitiBytes()), graffitiBytes: config.graffiti.get(defaultGraffitiBytes()),
preGenesisEvent: newAsyncEvent(),
genesisEvent: newAsyncEvent(),
nodesAvailable: newAsyncEvent(), nodesAvailable: newAsyncEvent(),
forksAvailable: newAsyncEvent(), forksAvailable: newAsyncEvent(),
doppelExit: newAsyncEvent(), doppelExit: newAsyncEvent(),
@ -239,6 +258,8 @@ proc new*(T: type ValidatorClientRef,
config: config, config: config,
beaconNodes: beaconNodes, beaconNodes: beaconNodes,
graffitiBytes: config.graffiti.get(defaultGraffitiBytes()), graffitiBytes: config.graffiti.get(defaultGraffitiBytes()),
preGenesisEvent: newAsyncEvent(),
genesisEvent: newAsyncEvent(),
nodesAvailable: newAsyncEvent(), nodesAvailable: newAsyncEvent(),
forksAvailable: newAsyncEvent(), forksAvailable: newAsyncEvent(),
indicesAvailable: newAsyncEvent(), indicesAvailable: newAsyncEvent(),
@ -295,6 +316,7 @@ proc asyncInit(vc: ValidatorClientRef): Future[ValidatorClientRef] {.async.} =
vc.dutiesService = await DutiesServiceRef.init(vc) vc.dutiesService = await DutiesServiceRef.init(vc)
vc.doppelgangerService = await DoppelgangerServiceRef.init(vc) vc.doppelgangerService = await DoppelgangerServiceRef.init(vc)
vc.attestationService = await AttestationServiceRef.init(vc) vc.attestationService = await AttestationServiceRef.init(vc)
vc.blockService = await BlockServiceRef.init(vc)
vc.syncCommitteeService = await SyncCommitteeServiceRef.init(vc) vc.syncCommitteeService = await SyncCommitteeServiceRef.init(vc)
vc.keymanagerServer = keymanagerInitResult.server vc.keymanagerServer = keymanagerInitResult.server
if vc.keymanagerServer != nil: if vc.keymanagerServer != nil:
@ -322,12 +344,65 @@ proc asyncInit(vc: ValidatorClientRef): Future[ValidatorClientRef] {.async.} =
return vc return vc
proc runPreGenesisWaitingLoop(vc: ValidatorClientRef) {.async.} =
var breakLoop = false
while not(breakLoop):
let
genesisTime = vc.beaconClock.fromNow(Slot(0))
currentEpoch = vc.beaconClock.now().toSlot().slot.epoch()
if not(genesisTime.inFuture) or currentEpoch < PREGENESIS_EPOCHS_COUNT:
break
notice "Waiting for genesis",
genesis_time = vc.beaconGenesis.genesis_time,
time_to_genesis = genesisTime.offset
breakLoop =
try:
await sleepAsync(vc.beaconClock.durationToNextSlot())
false
except CancelledError:
debug "Pre-genesis waiting loop was interrupted"
true
except CatchableError as exc:
error "Pre-genesis waiting loop failed with unexpected error",
err_name = $exc.name, err_msg = $exc.msg
true
vc.preGenesisEvent.fire()
proc runGenesisWaitingLoop(vc: ValidatorClientRef) {.async.} =
var breakLoop = false
while not(breakLoop):
let genesisTime = vc.beaconClock.fromNow(Slot(0))
if not(genesisTime.inFuture):
break
notice "Waiting for genesis",
genesis_time = vc.beaconGenesis.genesis_time,
time_to_genesis = genesisTime.offset
breakLoop =
try:
await sleepAsync(vc.beaconClock.durationToNextSlot())
false
except CancelledError:
debug "Genesis waiting loop was interrupted"
true
except CatchableError as exc:
error "Genesis waiting loop failed with unexpected error",
err_name = $exc.name, err_msg = $exc.msg
true
vc.genesisEvent.fire()
proc asyncRun*(vc: ValidatorClientRef) {.async.} = proc asyncRun*(vc: ValidatorClientRef) {.async.} =
vc.fallbackService.start() vc.fallbackService.start()
vc.forkService.start() vc.forkService.start()
vc.dutiesService.start() vc.dutiesService.start()
vc.doppelgangerService.start() vc.doppelgangerService.start()
vc.attestationService.start() vc.attestationService.start()
vc.blockService.start()
vc.syncCommitteeService.start() vc.syncCommitteeService.start()
if not isNil(vc.keymanagerServer): if not isNil(vc.keymanagerServer):
@ -337,7 +412,12 @@ proc asyncRun*(vc: ValidatorClientRef) {.async.} =
let doppelEventFut = vc.doppelExit.wait() let doppelEventFut = vc.doppelExit.wait()
try: try:
vc.runSlotLoopFut = runSlotLoop(vc, vc.beaconClock.now(), onSlotStart) # Waiting for `GENESIS - PREGENESIS_EPOCHS_COUNT` loop.
await vc.runPreGenesisWaitingLoop()
# Waiting for `GENESIS` loop.
await vc.runGenesisWaitingLoop()
# Main processing loop.
vc.runSlotLoopFut = vc.runVCSlotLoop()
vc.runKeystoreCachePruningLoopFut = vc.runKeystoreCachePruningLoopFut =
runKeystorecachePruningLoop(vc.keystoreCache) runKeystorecachePruningLoop(vc.keystoreCache)
discard await race(vc.runSlotLoopFut, doppelEventFut) discard await race(vc.runSlotLoopFut, doppelEventFut)
@ -355,8 +435,6 @@ proc asyncRun*(vc: ValidatorClientRef) {.async.} =
if doppelEventFut.completed(): if doppelEventFut.completed():
# Critically, database has been shut down - the rest doesn't matter, we need # Critically, database has been shut down - the rest doesn't matter, we need
# to stop as soon as possible # to stop as soon as possible
# TODO we need to actually quit _before_ any other async tasks have had the
# chance to happen
quitDoppelganger() quitDoppelganger()
debug "Stopping main processing loop" debug "Stopping main processing loop"
@ -373,10 +451,10 @@ proc asyncRun*(vc: ValidatorClientRef) {.async.} =
pending.add(vc.dutiesService.stop()) pending.add(vc.dutiesService.stop())
pending.add(vc.doppelgangerService.stop()) pending.add(vc.doppelgangerService.stop())
pending.add(vc.attestationService.stop()) pending.add(vc.attestationService.stop())
pending.add(vc.blockService.stop())
pending.add(vc.syncCommitteeService.stop()) pending.add(vc.syncCommitteeService.stop())
if not isNil(vc.keymanagerServer): if not isNil(vc.keymanagerServer):
pending.add(vc.keymanagerServer.stop()) pending.add(vc.keymanagerServer.stop())
await allFutures(pending) await allFutures(pending)
template runWithSignals(vc: ValidatorClientRef, body: untyped): bool = template runWithSignals(vc: ValidatorClientRef, body: untyped): bool =

View File

@ -33,6 +33,9 @@ proc serveAttestation(service: AttestationServiceRef, adata: AttestationData,
doAssert(validator.index.isSome()) doAssert(validator.index.isSome())
let vindex = validator.index.get() let vindex = validator.index.get()
logScope:
validator = validatorLog(validator)
# TODO: signing_root is recomputed in getAttestationSignature just after, # TODO: signing_root is recomputed in getAttestationSignature just after,
# but not for locally attached validators. # but not for locally attached validators.
let signingRoot = let signingRoot =
@ -47,8 +50,7 @@ proc serveAttestation(service: AttestationServiceRef, adata: AttestationData,
warn "Slashing protection activated for attestation", warn "Slashing protection activated for attestation",
attestationData = shortLog(adata), attestationData = shortLog(adata),
signingRoot = shortLog(signingRoot), signingRoot = shortLog(signingRoot),
validator = shortLog(validator), badVoteDetails = $notSlashable.error
validator_index = vindex, badVoteDetails = $notSlashable.error
return false return false
let attestation = block: let attestation = block:
@ -57,8 +59,7 @@ proc serveAttestation(service: AttestationServiceRef, adata: AttestationData,
let res = await validator.getAttestationSignature( let res = await validator.getAttestationSignature(
fork, vc.beaconGenesis.genesis_validators_root, adata) fork, vc.beaconGenesis.genesis_validators_root, adata)
if res.isErr(): if res.isErr():
warn "Unable to sign attestation", validator = shortLog(validator), warn "Unable to sign attestation", reason = res.error()
error_msg = res.error()
return false return false
res.get() res.get()
except CancelledError as exc: except CancelledError as exc:
@ -74,46 +75,35 @@ proc serveAttestation(service: AttestationServiceRef, adata: AttestationData,
int(duty.data.committee_length), adata, signature).expect( int(duty.data.committee_length), adata, signature).expect(
"data validity checked earlier") "data validity checked earlier")
debug "Sending attestation", attestation = shortLog(attestation), logScope:
validator = shortLog(validator), validator_index = vindex, attestation = shortLog(attestation)
delay = vc.getDelay(adata.slot.attestation_deadline()) delay = vc.getDelay(adata.slot.attestation_deadline())
debug "Sending attestation"
validator.doppelgangerActivity(attestation.data.slot.epoch) validator.doppelgangerActivity(attestation.data.slot.epoch)
let res = let res =
try: try:
await vc.submitPoolAttestations(@[attestation], ApiStrategyKind.First) await vc.submitPoolAttestations(@[attestation], ApiStrategyKind.First)
except ValidatorApiError as exc: except ValidatorApiError as exc:
warn "Unable to publish attestation", warn "Unable to publish attestation", reason = exc.getFailureReason()
attestation = shortLog(attestation),
validator = shortLog(validator),
validator_index = vindex,
reason = exc.getFailureReason()
return false return false
except CancelledError as exc: except CancelledError as exc:
debug "Attestation publishing process was interrupted" debug "Attestation publishing process was interrupted"
raise exc raise exc
except CatchableError as exc: except CatchableError as exc:
error "Unexpected error occured while publishing attestation", error "Unexpected error occured while publishing attestation",
attestation = shortLog(attestation),
validator = shortLog(validator),
validator_index = vindex,
err_name = exc.name, err_msg = exc.msg err_name = exc.name, err_msg = exc.msg
return false return false
let delay = vc.getDelay(adata.slot.attestation_deadline())
if res: if res:
let delay = vc.getDelay(adata.slot.attestation_deadline())
beacon_attestations_sent.inc() beacon_attestations_sent.inc()
beacon_attestation_sent_delay.observe(delay.toFloatSeconds()) beacon_attestation_sent_delay.observe(delay.toFloatSeconds())
notice "Attestation published", attestation = shortLog(attestation), notice "Attestation published"
validator = shortLog(validator),
validator_index = vindex,
delay = delay
else: else:
warn "Attestation was not accepted by beacon node", warn "Attestation was not accepted by beacon node"
attestation = shortLog(attestation),
validator = shortLog(validator),
validator_index = vindex, delay = delay
return res return res
proc serveAggregateAndProof*(service: AttestationServiceRef, proc serveAggregateAndProof*(service: AttestationServiceRef,
@ -124,21 +114,21 @@ proc serveAggregateAndProof*(service: AttestationServiceRef,
vc = service.client vc = service.client
genesisRoot = vc.beaconGenesis.genesis_validators_root genesisRoot = vc.beaconGenesis.genesis_validators_root
slot = proof.aggregate.data.slot slot = proof.aggregate.data.slot
vindex = validator.index.get()
fork = vc.forkAtEpoch(slot.epoch) fork = vc.forkAtEpoch(slot.epoch)
debug "Signing aggregate", validator = shortLog(validator), logScope:
attestation = shortLog(proof.aggregate), fork = fork validator = validatorLog(validator)
attestation = shortLog(proof.aggregate)
debug "Signing aggregate", fork = fork
let signature = let signature =
try: try:
let res = await validator.getAggregateAndProofSignature( let res =
fork, genesisRoot, proof) await validator.getAggregateAndProofSignature(fork, genesisRoot, proof)
if res.isErr(): if res.isErr():
warn "Unable to sign aggregate and proof using remote signer", warn "Unable to sign aggregate and proof using remote signer",
validator = shortLog(validator), reason = res.error()
attestation = shortLog(proof.aggregate),
error_msg = res.error()
return false return false
res.get() res.get()
except CancelledError as exc: except CancelledError as exc:
@ -146,20 +136,16 @@ proc serveAggregateAndProof*(service: AttestationServiceRef,
raise exc raise exc
except CatchableError as exc: except CatchableError as exc:
error "Unexpected error occured while signing aggregated attestation", error "Unexpected error occured while signing aggregated attestation",
validator = shortLog(validator),
attestation = shortLog(proof.aggregate),
validator_index = vindex,
err_name = exc.name, err_msg = exc.msg err_name = exc.name, err_msg = exc.msg
return false return false
let signedProof = SignedAggregateAndProof(message: proof, let signedProof = SignedAggregateAndProof(message: proof,
signature: signature) signature: signature)
logScope:
debug "Sending aggregated attestation", fork = fork,
attestation = shortLog(signedProof.message.aggregate),
validator = shortLog(validator), validator_index = vindex,
delay = vc.getDelay(slot.aggregate_deadline()) delay = vc.getDelay(slot.aggregate_deadline())
debug "Sending aggregated attestation", fork = fork
validator.doppelgangerActivity(proof.aggregate.data.slot.epoch) validator.doppelgangerActivity(proof.aggregate.data.slot.epoch)
let res = let res =
@ -167,9 +153,6 @@ proc serveAggregateAndProof*(service: AttestationServiceRef,
await vc.publishAggregateAndProofs(@[signedProof], ApiStrategyKind.First) await vc.publishAggregateAndProofs(@[signedProof], ApiStrategyKind.First)
except ValidatorApiError as exc: except ValidatorApiError as exc:
warn "Unable to publish aggregated attestation", warn "Unable to publish aggregated attestation",
attestation = shortLog(signedProof.message.aggregate),
validator = shortLog(validator),
validator_index = vindex,
reason = exc.getFailureReason() reason = exc.getFailureReason()
return false return false
except CancelledError as exc: except CancelledError as exc:
@ -177,22 +160,14 @@ proc serveAggregateAndProof*(service: AttestationServiceRef,
raise exc raise exc
except CatchableError as exc: except CatchableError as exc:
error "Unexpected error occured while publishing aggregated attestation", error "Unexpected error occured while publishing aggregated attestation",
attestation = shortLog(signedProof.message.aggregate),
validator = shortLog(validator),
err_name = exc.name, err_msg = exc.msg err_name = exc.name, err_msg = exc.msg
return false return false
if res: if res:
beacon_aggregates_sent.inc() beacon_aggregates_sent.inc()
notice "Aggregated attestation published", notice "Aggregated attestation published"
attestation = shortLog(signedProof.message.aggregate),
validator = shortLog(validator),
validator_index = vindex
else: else:
warn "Aggregated attestation was not accepted by beacon node", warn "Aggregated attestation was not accepted by beacon node"
attestation = shortLog(signedProof.message.aggregate),
validator = shortLog(validator),
validator_index = vindex
return res return res
proc produceAndPublishAttestations*(service: AttestationServiceRef, proc produceAndPublishAttestations*(service: AttestationServiceRef,
@ -394,7 +369,7 @@ proc publishAttestationsAndAggregates(service: AttestationServiceRef,
await service.produceAndPublishAggregates(ad, duties) await service.produceAndPublishAggregates(ad, duties)
proc spawnAttestationTasks(service: AttestationServiceRef, proc spawnAttestationTasks(service: AttestationServiceRef,
slot: Slot) = slot: Slot) {.async.} =
let vc = service.client let vc = service.client
let dutiesByCommittee = let dutiesByCommittee =
block: block:
@ -405,33 +380,66 @@ proc spawnAttestationTasks(service: AttestationServiceRef,
res.mgetOrPut(item.data.committee_index, default).add(item) res.mgetOrPut(item.data.committee_index, default).add(item)
res res
var dutiesSkipped: seq[string] var tasks: seq[Future[void]]
try:
for index, duties in dutiesByCommittee: for index, duties in dutiesByCommittee:
asyncSpawn service.publishAttestationsAndAggregates(slot, index, duties) tasks.add(service.publishAttestationsAndAggregates(slot, index, duties))
if len(dutiesSkipped) > 0: let timeout = vc.beaconClock.durationToNextSlot()
info "Doppelganger protection disabled validator duties", await allFutures(tasks).wait(timeout)
validators = len(dutiesSkipped) except AsyncTimeoutError:
trace "Doppelganger protection disabled validator duties dump", # Cancelling all the pending tasks.
validators = dutiesSkipped let pending = tasks.filterIt(not(it.finished())).mapIt(it.cancelAndWait())
await allFutures(pending)
except CancelledError as exc:
# Cancelling all the pending tasks.
let pending = tasks.filterIt(not(it.finished())).mapIt(it.cancelAndWait())
await allFutures(pending)
raise exc
except CatchableError as exc:
error "Unexpected error while processing attestation duties",
error_name = exc.name, error_message = exc.msg
proc mainLoop(service: AttestationServiceRef) {.async.} = proc mainLoop(service: AttestationServiceRef) {.async.} =
let vc = service.client let vc = service.client
service.state = ServiceState.Running service.state = ServiceState.Running
debug "Service started" debug "Service started"
debug "Attester loop is waiting for initialization"
try:
await allFutures(
vc.preGenesisEvent.wait(),
vc.genesisEvent.wait(),
vc.indicesAvailable.wait(),
vc.forksAvailable.wait()
)
except CancelledError:
debug "Service interrupted"
return
except CatchableError as exc:
warn "Service crashed with unexpected error", err_name = exc.name,
err_msg = exc.msg
return
doAssert(len(vc.forks) > 0, "Fork schedule must not be empty at this point")
var currentSlot: Opt[Slot]
while true: while true:
# This loop could look much more nicer/better, when # This loop could look much more nicer/better, when
# https://github.com/nim-lang/Nim/issues/19911 will be fixed, so it could # https://github.com/nim-lang/Nim/issues/19911 will be fixed, so it could
# become safe to combine loops, breaks and exception handlers. # become safe to combine loops, breaks and exception handlers.
let breakLoop = let breakLoop =
try: try:
let sleepTime = let
attestationSlotOffset + vc.beaconClock.durationToNextSlot() # We use zero offset here, because we do waiting in
let sres = vc.getCurrentSlot() # waitForBlockPublished(attestationSlotOffset).
if sres.isSome(): slot = await vc.checkedWaitForNextSlot(currentSlot,
let currentSlot = sres.get() ZeroTimeDiff, false)
service.spawnAttestationTasks(currentSlot) if slot.isNone():
await sleepAsync(sleepTime) debug "System time adjusted backwards significantly, exiting"
true
else:
currentSlot = slot
await service.spawnAttestationTasks(currentSlot.get())
false false
except CancelledError: except CancelledError:
debug "Service interrupted" debug "Service interrupted"

View File

@ -11,7 +11,10 @@ import
".."/spec/forks, ".."/spec/forks,
common, api common, api
logScope: service = "block_service" const
ServiceName = "block_service"
logScope: service = ServiceName
type type
PreparedBeaconBlock = object PreparedBeaconBlock = object
@ -300,26 +303,28 @@ proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot,
proc proposeBlock(vc: ValidatorClientRef, slot: Slot, proc proposeBlock(vc: ValidatorClientRef, slot: Slot,
proposerKey: ValidatorPubKey) {.async.} = proposerKey: ValidatorPubKey) {.async.} =
let (inFuture, timeToSleep) = vc.beaconClock.fromNow(slot)
try:
if inFuture:
debug "Proposing block", timeIn = timeToSleep,
validator = shortLog(proposerKey)
await sleepAsync(timeToSleep)
else:
debug "Proposing block", timeIn = 0.seconds,
validator = shortLog(proposerKey)
let sres = vc.getCurrentSlot()
if sres.isSome():
let let
currentSlot = sres.get() currentSlot = (await vc.checkedWaitForSlot(slot, ZeroTimeDiff,
validator = vc.getValidatorForDuties(proposerKey, slot).valueOr: return false)).valueOr:
error "Unable to perform block production because of system time"
return
if currentSlot > slot:
warn "Skip block production for expired slot",
current_slot = currentSlot, duties_slot = slot
return
let validator = vc.getValidatorForDuties(proposerKey, slot).valueOr: return
try:
await vc.publishBlock(currentSlot, slot, validator) await vc.publishBlock(currentSlot, slot, validator)
except CancelledError as exc: except CancelledError as exc:
debug "Block proposing was interrupted", slot = slot, debug "Block proposing process was interrupted",
validator = shortLog(proposerKey) slot = slot, validator = shortLog(proposerKey)
raise exc raise exc
except CatchableError as exc:
error "Unexpected error encountered while proposing block",
slot = slot, validator = shortLog(validator)
proc spawnProposalTask(vc: ValidatorClientRef, proc spawnProposalTask(vc: ValidatorClientRef,
duty: RestProposerDuty): ProposerTask = duty: RestProposerDuty): ProposerTask =
@ -356,18 +361,17 @@ proc checkDuty(duty: RestProposerDuty, epoch: Epoch, slot: Slot): bool =
proc addOrReplaceProposers*(vc: ValidatorClientRef, epoch: Epoch, proc addOrReplaceProposers*(vc: ValidatorClientRef, epoch: Epoch,
dependentRoot: Eth2Digest, dependentRoot: Eth2Digest,
duties: openArray[RestProposerDuty]) = duties: openArray[RestProposerDuty]) =
let default = ProposedData(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64))
let sres = vc.getCurrentSlot()
if sres.isSome():
let let
currentSlot = sres.get() default = ProposedData(epoch: FAR_FUTURE_EPOCH)
currentSlot = vc.getCurrentSlot().get(Slot(0))
epochDuties = vc.proposers.getOrDefault(epoch, default) epochDuties = vc.proposers.getOrDefault(epoch, default)
if not(epochDuties.isDefault()): if not(epochDuties.isDefault()):
if epochDuties.dependentRoot != dependentRoot: if epochDuties.dependentRoot != dependentRoot:
warn "Proposer duties re-organization", duties_count = len(duties), warn "Proposer duties re-organization", duties_count = len(duties),
wall_slot = currentSlot, epoch = epoch, wall_slot = currentSlot, epoch = epoch,
prior_dependent_root = epochDuties.dependentRoot, prior_dependent_root = epochDuties.dependentRoot,
dependent_root = dependentRoot, wall_slot = currentSlot dependent_root = dependentRoot
let tasks = let tasks =
block: block:
var res: seq[ProposerTask] var res: seq[ProposerTask]
@ -394,7 +398,7 @@ proc addOrReplaceProposers*(vc: ValidatorClientRef, epoch: Epoch,
if checkDuty(duty, epoch, currentSlot): if checkDuty(duty, epoch, currentSlot):
let task = vc.spawnProposalTask(duty) let task = vc.spawnProposalTask(duty)
if duty.slot in hashset: if duty.slot in hashset:
warn "Multiple block proposers for this slot, " & error "Multiple block proposers for this slot, " &
"producing blocks for all proposers", slot = duty.slot "producing blocks for all proposers", slot = duty.slot
else: else:
hashset.incl(duty.slot) hashset.incl(duty.slot)
@ -416,7 +420,7 @@ proc addOrReplaceProposers*(vc: ValidatorClientRef, epoch: Epoch,
if checkDuty(duty, epoch, currentSlot): if checkDuty(duty, epoch, currentSlot):
let task = vc.spawnProposalTask(duty) let task = vc.spawnProposalTask(duty)
if duty.slot in hashset: if duty.slot in hashset:
warn "Multiple block proposers for this slot, " & error "Multiple block proposers for this slot, " &
"producing blocks for all proposers", slot = duty.slot "producing blocks for all proposers", slot = duty.slot
else: else:
hashset.incl(duty.slot) hashset.incl(duty.slot)
@ -439,20 +443,49 @@ proc waitForBlockPublished*(vc: ValidatorClientRef,
if not(task.future.finished()): if not(task.future.finished()):
res.add(task.future) res.add(task.future)
res res
waitTime = (start_beacon_time(slot) + timediff) - vc.beaconClock.now()
logScope: logScope:
start_time = startTime start_time = startTime
pending_tasks = len(pendingTasks) pending_tasks = len(pendingTasks)
slot = slot slot = slot
timediff = timediff timediff = timediff
if len(pendingTasks) > 0: # TODO (cheatfate): This algorithm should be tuned, when we will have ability
let waitTime = (start_beacon_time(slot) + timediff) - vc.beaconClock.now() # to monitor block proposals which are not created by validators bundled with
logScope: # VC.
wait_time = waitTime logScope: wait_time = waitTime
if waitTime.nanoseconds > 0'i64: if waitTime.nanoseconds > 0'i64:
if len(pendingTasks) > 0:
# Block proposal pending
try: try:
await allFutures(pendingTasks).wait(nanoseconds(waitTime.nanoseconds)) await allFutures(pendingTasks).wait(nanoseconds(waitTime.nanoseconds))
trace "Block proposal awaited" trace "Block proposal awaited"
# The expected block arrived - in our async loop however, we might
# have been doing other processing that caused delays here so we'll
# cap the waiting to the time when we would have sent out attestations
# had the block not arrived. An opposite case is that we received
# (or produced) a block that has not yet reached our neighbours. To
# protect against our attestations being dropped (because the others
# have not yet seen the block), we'll impose a minimum delay of
# 2000ms. The delay is enforced only when we're not hitting the
# "normal" cutoff time for sending out attestations. An earlier delay
# of 250ms has proven to be not enough, increasing the risk of losing
# attestations, and with growing block sizes, 1000ms started to be
# risky as well. Regardless, because we "just" received the block,
# we'll impose the delay.
# Take into consideration chains with a different slot time
const afterBlockDelay = nanos(attestationSlotOffset.nanoseconds div 2)
let
afterBlockTime = vc.beaconClock.now() + afterBlockDelay
afterBlockCutoff = vc.beaconClock.fromNow(
min(afterBlockTime,
slot.attestation_deadline() + afterBlockDelay))
if afterBlockCutoff.inFuture:
debug "Got block, waiting to send attestations",
after_block_cutoff = shortLog(afterBlockCutoff.offset)
await sleepAsync(afterBlockCutoff.offset)
except CancelledError as exc: except CancelledError as exc:
let dur = Moment.now() - startTime let dur = Moment.now() - startTime
debug "Waiting for block publication interrupted", duration = dur debug "Waiting for block publication interrupted", duration = dur
@ -460,3 +493,50 @@ proc waitForBlockPublished*(vc: ValidatorClientRef,
except AsyncTimeoutError: except AsyncTimeoutError:
let dur = Moment.now() - startTime let dur = Moment.now() - startTime
debug "Block was not published in time", duration = dur debug "Block was not published in time", duration = dur
else:
# No pending block proposals.
try:
await sleepAsync(nanoseconds(waitTime.nanoseconds))
except CancelledError as exc:
let dur = Moment.now() - startTime
debug "Waiting for block publication interrupted", duration = dur
raise exc
except CatchableError as exc:
let dur = Moment.now() - startTime
error "Unexpected error occured while waiting for block publication",
err_name = exc.name, err_msg = exc.msg, duration = dur
return
proc mainLoop(service: BlockServiceRef) {.async.} =
let vc = service.client
service.state = ServiceState.Running
debug "Service started"
var future = newFuture[void]()
try:
# Future is not going to be completed, so the only way to exit, is to
# cancel it.
await future
except CancelledError as exc:
debug "Service interrupted"
except CatchableError as exc:
error "Service crashed with unexpected error", err_name = exc.name,
err_msg = exc.msg
# We going to cleanup all the pending proposer tasks.
var res: seq[Future[void]]
for epoch, data in vc.proposers.pairs():
for duty in data.duties.items():
if not(duty.future.finished()):
res.add(duty.future.cancelAndWait())
await allFutures(res)
proc init*(t: typedesc[BlockServiceRef],
vc: ValidatorClientRef): Future[BlockServiceRef] {.async.} =
logScope: service = ServiceName
var res = BlockServiceRef(name: ServiceName, client: vc,
state: ServiceState.Initialized)
debug "Initializing service"
return res
proc start*(service: BlockServiceRef) =
service.lifeFut = mainLoop(service)

View File

@ -39,14 +39,12 @@ const
DelayBuckets* = [-Inf, -4.0, -2.0, -1.0, -0.5, -0.1, -0.05, DelayBuckets* = [-Inf, -4.0, -2.0, -1.0, -0.5, -0.1, -0.05,
0.05, 0.1, 0.5, 1.0, 2.0, 4.0, 8.0, Inf] 0.05, 0.1, 0.5, 1.0, 2.0, 4.0, 8.0, Inf]
ZeroTimeDiff* = TimeDiff(nanoseconds: 0'i64)
type type
ServiceState* {.pure.} = enum ServiceState* {.pure.} = enum
Initialized, Running, Error, Closing, Closed Initialized, Running, Error, Closing, Closed
BlockServiceEventRef* = ref object of RootObj
slot*: Slot
proposers*: seq[ValidatorPubKey]
RegistrationKind* {.pure.} = enum RegistrationKind* {.pure.} = enum
Cached, IncorrectTime, MissingIndex, MissingFee, MissingGasLimit Cached, IncorrectTime, MissingIndex, MissingFee, MissingGasLimit
ErrorSignature, NoSignature ErrorSignature, NoSignature
@ -174,6 +172,8 @@ type
beaconClock*: BeaconClock beaconClock*: BeaconClock
attachedValidators*: ref ValidatorPool attachedValidators*: ref ValidatorPool
forks*: seq[Fork] forks*: seq[Fork]
preGenesisEvent*: AsyncEvent
genesisEvent*: AsyncEvent
forksAvailable*: AsyncEvent forksAvailable*: AsyncEvent
nodesAvailable*: AsyncEvent nodesAvailable*: AsyncEvent
indicesAvailable*: AsyncEvent indicesAvailable*: AsyncEvent
@ -201,7 +201,7 @@ type
data*: seq[ApiNodeFailure] data*: seq[ApiNodeFailure]
const const
DefaultDutyAndProof* = DutyAndProof(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64)) DefaultDutyAndProof* = DutyAndProof(epoch: FAR_FUTURE_EPOCH)
SlotDuration* = int64(SECONDS_PER_SLOT).seconds SlotDuration* = int64(SECONDS_PER_SLOT).seconds
OneThirdDuration* = int64(SECONDS_PER_SLOT).seconds div INTERVALS_PER_SLOT OneThirdDuration* = int64(SECONDS_PER_SLOT).seconds div INTERVALS_PER_SLOT
AllBeaconNodeRoles* = { AllBeaconNodeRoles* = {
@ -329,6 +329,15 @@ proc validatorLog*(key: ValidatorPubKey,
res.add(Base10.toString(uint64(index))) res.add(Base10.toString(uint64(index)))
res res
proc validatorLog*(validator: AttachedValidator): string =
var res = shortLog(validator)
res.add('@')
if validator.index.isSome():
res.add(Base10.toString(uint64(validator.index.get())))
else:
res.add("<missing>")
res
chronicles.expandIt(BeaconNodeServerRef): chronicles.expandIt(BeaconNodeServerRef):
node = $it node = $it
node_index = it.index node_index = it.index
@ -564,18 +573,12 @@ proc init*(t: typedesc[ProposedData], epoch: Epoch, dependentRoot: Eth2Digest,
data: openArray[ProposerTask]): ProposedData = data: openArray[ProposerTask]): ProposedData =
ProposedData(epoch: epoch, dependentRoot: dependentRoot, duties: @data) ProposedData(epoch: epoch, dependentRoot: dependentRoot, duties: @data)
proc getCurrentSlot*(vc: ValidatorClientRef): Option[Slot] = proc getCurrentSlot*(vc: ValidatorClientRef): Opt[Slot] =
let let res = vc.beaconClock.now().toSlot()
wallTime = vc.beaconClock.now() if res.afterGenesis:
wallSlot = wallTime.toSlot() Opt.some(res.slot)
if not(wallSlot.afterGenesis):
let checkGenesisTime = vc.beaconClock.fromNow(start_beacon_time(Slot(0)))
warn "Jump in time detected, something wrong with wallclock",
wall_time = wallTime, genesisIn = checkGenesisTime.offset
none[Slot]()
else: else:
some(wallSlot.slot) Opt.none(Slot)
proc getAttesterDutiesForSlot*(vc: ValidatorClientRef, proc getAttesterDutiesForSlot*(vc: ValidatorClientRef,
slot: Slot): seq[DutyAndProof] = slot: Slot): seq[DutyAndProof] =
@ -915,3 +918,75 @@ proc prepareRegistrationList*(
proc init*(t: typedesc[ApiNodeFailure], node: BeaconNodeServerRef, proc init*(t: typedesc[ApiNodeFailure], node: BeaconNodeServerRef,
failure: ApiFailure): ApiNodeFailure = failure: ApiFailure): ApiNodeFailure =
ApiNodeFailure(node: node, failure: failure) ApiNodeFailure(node: node, failure: failure)
proc checkedWaitForSlot*(vc: ValidatorClientRef, destinationSlot: Slot,
offset: TimeDiff,
showLogs: bool): Future[Opt[Slot]] {.async.} =
let
currentTime = vc.beaconClock.now()
currentSlot = currentTime.slotOrZero()
chronosOffset = chronos.nanoseconds(
if offset.nanoseconds < 0: 0'i64 else: offset.nanoseconds)
var timeToSlot = (destinationSlot.start_beacon_time() - currentTime) +
chronosOffset
logScope:
start_time = shortLog(currentTime)
start_slot = shortLog(currentSlot)
dest_slot = shortLog(destinationSlot)
time_to_slot = shortLog(timeToSlot)
while true:
await sleepAsync(timeToSlot)
let
wallTime = vc.beaconClock.now()
wallSlot = wallTime.slotOrZero()
logScope:
wall_time = shortLog(wallTime)
wall_slot = shortLog(wallSlot)
if wallSlot < destinationSlot:
# While we were sleeping, the system clock changed and time moved
# backwards!
if wallSlot + 1 < destinationSlot:
# This is a critical condition where it's hard to reason about what
# to do next - we'll call the attention of the user here by shutting
# down.
if showLogs:
fatal "System time adjusted backwards significantly - " &
"clock may be inaccurate - shutting down"
return Opt.none(Slot)
else:
# Time moved back by a single slot - this could be a minor adjustment,
# for example when NTP does its thing after not working for a while
timeToSlot = destinationSlot.start_beacon_time() - wallTime +
chronosOffset
if showLogs:
warn "System time adjusted backwards, rescheduling slot actions"
continue
elif wallSlot > destinationSlot + SLOTS_PER_EPOCH:
if showLogs:
warn "Time moved forwards by more than an epoch, skipping ahead"
return Opt.some(wallSlot)
elif wallSlot > destinationSlot:
if showLogs:
notice "Missed expected slot start, catching up"
return Opt.some(wallSlot)
else:
return Opt.some(destinationSlot)
proc checkedWaitForNextSlot*(vc: ValidatorClientRef, curSlot: Opt[Slot],
offset: TimeDiff,
showLogs: bool): Future[Opt[Slot]] =
let
currentTime = vc.beaconClock.now()
currentSlot = curSlot.valueOr: currentTime.slotOrZero()
nextSlot = currentSlot + 1
vc.checkedWaitForSlot(nextSlot, offset, showLogs)

View File

@ -58,6 +58,21 @@ proc mainLoop(service: DoppelgangerServiceRef) {.async.} =
debug "Service disabled because of configuration settings" debug "Service disabled because of configuration settings"
return return
debug "Doppelganger detection loop is waiting for initialization"
try:
await allFutures(
vc.preGenesisEvent.wait(),
vc.genesisEvent.wait(),
vc.indicesAvailable.wait()
)
except CancelledError:
debug "Service interrupted"
return
except CatchableError as exc:
warn "Service crashed with unexpected error", err_name = exc.name,
err_msg = exc.msg
return
# On (re)start, we skip the remainder of the epoch before we start monitoring # On (re)start, we skip the remainder of the epoch before we start monitoring
# for doppelgangers so we don't trigger on the attestations we produced before # for doppelgangers so we don't trigger on the attestations we produced before
# the epoch - there's no activity in the genesis slot, so if we start at or # the epoch - there's no activity in the genesis slot, so if we start at or

View File

@ -38,7 +38,9 @@ proc checkDuty(duty: RestAttesterDuty): bool =
proc checkSyncDuty(duty: RestSyncCommitteeDuty): bool = proc checkSyncDuty(duty: RestSyncCommitteeDuty): bool =
uint64(duty.validator_index) <= VALIDATOR_REGISTRY_LIMIT uint64(duty.validator_index) <= VALIDATOR_REGISTRY_LIMIT
proc pollForValidatorIndices*(vc: ValidatorClientRef) {.async.} = proc pollForValidatorIndices*(service: DutiesServiceRef) {.async.} =
let vc = service.client
let validatorIdents = let validatorIdents =
block: block:
var res: seq[ValidatorIdent] var res: seq[ValidatorIdent]
@ -107,16 +109,12 @@ proc pollForValidatorIndices*(vc: ValidatorClientRef) {.async.} =
updated_validators = updated updated_validators = updated
vc.indicesAvailable.fire() vc.indicesAvailable.fire()
proc pollForAttesterDuties*(vc: ValidatorClientRef, proc pollForAttesterDuties*(service: DutiesServiceRef,
epoch: Epoch): Future[int] {.async.} = epoch: Epoch): Future[int] {.async.} =
let validatorIndices = let vc = service.client
block: let validatorIndices = toSeq(vc.attachedValidators[].indices())
var res: seq[ValidatorIndex]
for index in vc.attachedValidators[].indices():
res.add(index)
res
if validatorIndices.len == 0: if len(validatorIndices) == 0:
return 0 return 0
var duties: seq[RestAttesterDuty] var duties: seq[RestAttesterDuty]
@ -243,7 +241,8 @@ proc pollForAttesterDuties*(vc: ValidatorClientRef,
return len(addOrReplaceItems) return len(addOrReplaceItems)
proc pruneSyncCommitteeDuties*(vc: ValidatorClientRef, slot: Slot) = proc pruneSyncCommitteeDuties*(service: DutiesServiceRef, slot: Slot) =
let vc = service.client
if slot.is_sync_committee_period(): if slot.is_sync_committee_period():
var newSyncCommitteeDuties: SyncCommitteeDutiesMap var newSyncCommitteeDuties: SyncCommitteeDutiesMap
let epoch = slot.epoch() let epoch = slot.epoch()
@ -255,8 +254,9 @@ proc pruneSyncCommitteeDuties*(vc: ValidatorClientRef, slot: Slot) =
newSyncCommitteeDuties[key] = currentPeriodDuties newSyncCommitteeDuties[key] = currentPeriodDuties
vc.syncCommitteeDuties = newSyncCommitteeDuties vc.syncCommitteeDuties = newSyncCommitteeDuties
proc pollForSyncCommitteeDuties*(vc: ValidatorClientRef, proc pollForSyncCommitteeDuties*(service: DutiesServiceRef,
epoch: Epoch): Future[int] {.async.} = epoch: Epoch): Future[int] {.async.} =
let vc = service.client
let validatorIndices = toSeq(vc.attachedValidators[].indices()) let validatorIndices = toSeq(vc.attachedValidators[].indices())
var var
filteredDuties: seq[RestSyncCommitteeDuty] filteredDuties: seq[RestSyncCommitteeDuty]
@ -335,7 +335,8 @@ proc pollForSyncCommitteeDuties*(vc: ValidatorClientRef,
return len(addOrReplaceItems) return len(addOrReplaceItems)
proc pruneAttesterDuties(vc: ValidatorClientRef, epoch: Epoch) = proc pruneAttesterDuties(service: DutiesServiceRef, epoch: Epoch) =
let vc = service.client
var attesters: AttesterMap var attesters: AttesterMap
for key, item in vc.attesters: for key, item in vc.attesters:
var v = EpochDuties() var v = EpochDuties()
@ -348,7 +349,7 @@ proc pruneAttesterDuties(vc: ValidatorClientRef, epoch: Epoch) =
attesters[key] = v attesters[key] = v
vc.attesters = attesters vc.attesters = attesters
proc pollForAttesterDuties*(vc: ValidatorClientRef) {.async.} = proc pollForAttesterDuties*(service: DutiesServiceRef) {.async.} =
## Query the beacon node for attestation duties for all known validators. ## Query the beacon node for attestation duties for all known validators.
## ##
## This function will perform (in the following order): ## This function will perform (in the following order):
@ -356,17 +357,18 @@ proc pollForAttesterDuties*(vc: ValidatorClientRef) {.async.} =
## 1. Poll for current-epoch duties and update the local `attesters` map. ## 1. Poll for current-epoch duties and update the local `attesters` map.
## 2. Poll for next-epoch duties and update the local `attesters` map. ## 2. Poll for next-epoch duties and update the local `attesters` map.
## 3. Push out any attestation subnet subscriptions to the BN. ## 3. Push out any attestation subnet subscriptions to the BN.
let sres = vc.getCurrentSlot() let vc = service.client
if sres.isSome():
let let
currentSlot = sres.get() currentSlot = vc.getCurrentSlot().get(Slot(0))
currentEpoch = currentSlot.epoch() currentEpoch = currentSlot.epoch()
nextEpoch = currentEpoch + 1'u64 nextEpoch = currentEpoch + 1'u64
if vc.attachedValidators[].count() != 0: if vc.attachedValidators[].count() != 0:
var counts: array[2, tuple[epoch: Epoch, count: int]] var counts: array[2, tuple[epoch: Epoch, count: int]]
counts[0] = (currentEpoch, await vc.pollForAttesterDuties(currentEpoch)) counts[0] = (currentEpoch,
counts[1] = (nextEpoch, await vc.pollForAttesterDuties(nextEpoch)) await service.pollForAttesterDuties(currentEpoch))
counts[1] = (nextEpoch,
await service.pollForAttesterDuties(nextEpoch))
if (counts[0].count == 0) and (counts[1].count == 0): if (counts[0].count == 0) and (counts[1].count == 0):
debug "No new attester's duties received", slot = currentSlot debug "No new attester's duties received", slot = currentSlot
@ -401,13 +403,12 @@ proc pollForAttesterDuties*(vc: ValidatorClientRef) {.async.} =
slot = currentSlot, epoch = currentEpoch, slot = currentSlot, epoch = currentEpoch,
subscriptions_count = len(subscriptions) subscriptions_count = len(subscriptions)
vc.pruneAttesterDuties(currentEpoch) service.pruneAttesterDuties(currentEpoch)
proc pollForSyncCommitteeDuties* (vc: ValidatorClientRef) {.async.} = proc pollForSyncCommitteeDuties*(service: DutiesServiceRef) {.async.} =
let sres = vc.getCurrentSlot() let vc = service.client
if sres.isSome():
let let
currentSlot = sres.get() currentSlot = vc.getCurrentSlot().get(Slot(0))
currentEpoch = currentSlot.epoch() currentEpoch = currentSlot.epoch()
if vc.attachedValidators[].count() != 0: if vc.attachedValidators[].count() != 0:
@ -438,7 +439,7 @@ proc pollForSyncCommitteeDuties* (vc: ValidatorClientRef) {.async.} =
var total = 0 var total = 0
if len(dutyPeriods) > 0: if len(dutyPeriods) > 0:
for (epoch, period) in dutyPeriods: for (epoch, period) in dutyPeriods:
let count = await vc.pollForSyncCommitteeDuties(epoch) let count = await service.pollForSyncCommitteeDuties(epoch)
res.add((epoch: epoch, period: period, count: count)) res.add((epoch: epoch, period: period, count: count))
total += count total += count
(res, total) (res, total)
@ -467,14 +468,16 @@ proc pollForSyncCommitteeDuties* (vc: ValidatorClientRef) {.async.} =
if len(subscriptions) > 0: if len(subscriptions) > 0:
let res = await vc.prepareSyncCommitteeSubnets(subscriptions) let res = await vc.prepareSyncCommitteeSubnets(subscriptions)
if res != 0: if res == 0:
warn "Failed to subscribe validators to sync committee subnets", warn "Failed to subscribe validators to sync committee subnets",
slot = currentSlot, epoch = currentEpoch, slot = currentSlot, epoch = currentEpoch,
subscriptions_count = len(subscriptions) subscriptions_count = len(subscriptions)
vc.pruneSyncCommitteeDuties(currentSlot) service.pruneSyncCommitteeDuties(currentSlot)
proc pruneBeaconProposers(service: DutiesServiceRef, epoch: Epoch) =
let vc = service.client
proc pruneBeaconProposers(vc: ValidatorClientRef, epoch: Epoch) =
var proposers: ProposerMap var proposers: ProposerMap
for epochKey, data in vc.proposers: for epochKey, data in vc.proposers:
if (epochKey + HISTORICAL_DUTIES_EPOCHS) >= epoch: if (epochKey + HISTORICAL_DUTIES_EPOCHS) >= epoch:
@ -484,11 +487,10 @@ proc pruneBeaconProposers(vc: ValidatorClientRef, epoch: Epoch) =
loop = ProposerLoop loop = ProposerLoop
vc.proposers = proposers vc.proposers = proposers
proc pollForBeaconProposers*(vc: ValidatorClientRef) {.async.} = proc pollForBeaconProposers*(service: DutiesServiceRef) {.async.} =
let sres = vc.getCurrentSlot() let vc = service.client
if sres.isSome():
let let
currentSlot = sres.get() currentSlot = vc.getCurrentSlot().get(Slot(0))
currentEpoch = currentSlot.epoch() currentEpoch = currentSlot.epoch()
if vc.attachedValidators[].count() != 0: if vc.attachedValidators[].count() != 0:
@ -506,7 +508,7 @@ proc pollForBeaconProposers*(vc: ValidatorClientRef) {.async.} =
debug "No relevant proposer duties received", slot = currentSlot, debug "No relevant proposer duties received", slot = currentSlot,
duties_count = len(duties) duties_count = len(duties)
except ValidatorApiError as exc: except ValidatorApiError as exc:
warn "Unable to get proposer duties", slot = currentSlot, notice "Unable to get proposer duties", slot = currentSlot,
epoch = currentEpoch, reason = exc.getFailureReason() epoch = currentEpoch, reason = exc.getFailureReason()
except CancelledError as exc: except CancelledError as exc:
debug "Proposer duties processing was interrupted" debug "Proposer duties processing was interrupted"
@ -516,14 +518,12 @@ proc pollForBeaconProposers*(vc: ValidatorClientRef) {.async.} =
slot = currentSlot, epoch = currentEpoch, err_name = exc.name, slot = currentSlot, epoch = currentEpoch, err_name = exc.name,
err_msg = exc.msg err_msg = exc.msg
vc.pruneBeaconProposers(currentEpoch) service.pruneBeaconProposers(currentEpoch)
proc prepareBeaconProposers*(service: DutiesServiceRef) {.async.} = proc prepareBeaconProposers*(service: DutiesServiceRef) {.async.} =
let vc = service.client let vc = service.client
let sres = vc.getCurrentSlot()
if sres.isSome():
let let
currentSlot = sres.get() currentSlot = vc.getCurrentSlot().get(Slot(0))
currentEpoch = currentSlot.epoch() currentEpoch = currentSlot.epoch()
proposers = vc.prepareProposersList(currentEpoch) proposers = vc.prepareProposersList(currentEpoch)
@ -551,13 +551,9 @@ proc prepareBeaconProposers*(service: DutiesServiceRef) {.async.} =
proc registerValidators*(service: DutiesServiceRef) {.async.} = proc registerValidators*(service: DutiesServiceRef) {.async.} =
let vc = service.client let vc = service.client
let sres = vc.getCurrentSlot()
var default: seq[SignedValidatorRegistrationV1]
if sres.isSome():
let let
currentSlot = vc.getCurrentSlot().get(Slot(0))
genesisFork = vc.forks[0] genesisFork = vc.forks[0]
currentSlot = sres.get()
registrations = registrations =
try: try:
await vc.prepareRegistrationList(getTime(), genesisFork) await vc.prepareRegistrationList(getTime(), genesisFork)
@ -566,12 +562,13 @@ proc registerValidators*(service: DutiesServiceRef) {.async.} =
slot = currentSlot, fork = genesisFork slot = currentSlot, fork = genesisFork
raise exc raise exc
except CatchableError as exc: except CatchableError as exc:
var default: seq[SignedValidatorRegistrationV1]
error "Unexpected error occured while preparing validators " & error "Unexpected error occured while preparing validators " &
"registration data", slot = currentSlot, fork = genesisFork, "registration data", slot = currentSlot, fork = genesisFork,
err_name = exc.name, err_msg = exc.msg err_name = exc.name, err_msg = exc.msg
default default
let count = count =
if len(registrations) > 0: if len(registrations) > 0:
try: try:
await registerValidator(vc, registrations) await registerValidator(vc, registrations)
@ -605,35 +602,45 @@ proc waitForNextSlot(service: DutiesServiceRef,
proc attesterDutiesLoop(service: DutiesServiceRef) {.async.} = proc attesterDutiesLoop(service: DutiesServiceRef) {.async.} =
let vc = service.client let vc = service.client
debug "Attester duties loop is waiting for initialization"
debug "Attester duties loop waiting for fork schedule update" await allFutures(
await vc.forksAvailable.wait() vc.preGenesisEvent.wait(),
vc.indicesAvailable.wait(),
vc.forksAvailable.wait()
)
doAssert(len(vc.forks) > 0, "Fork schedule must not be empty at this point") doAssert(len(vc.forks) > 0, "Fork schedule must not be empty at this point")
while true: while true:
await vc.pollForAttesterDuties() await service.pollForAttesterDuties()
await service.waitForNextSlot(AttesterLoop) await service.waitForNextSlot(AttesterLoop)
proc proposerDutiesLoop(service: DutiesServiceRef) {.async.} = proc proposerDutiesLoop(service: DutiesServiceRef) {.async.} =
let vc = service.client let vc = service.client
debug "Proposer duties loop is waiting for initialization"
debug "Proposer duties loop waiting for fork schedule update" await allFutures(
await vc.forksAvailable.wait() vc.preGenesisEvent.wait(),
vc.indicesAvailable.wait(),
vc.forksAvailable.wait()
)
doAssert(len(vc.forks) > 0, "Fork schedule must not be empty at this point") doAssert(len(vc.forks) > 0, "Fork schedule must not be empty at this point")
while true: while true:
await vc.pollForBeaconProposers() await service.pollForBeaconProposers()
await service.waitForNextSlot(ProposerLoop) await service.waitForNextSlot(ProposerLoop)
proc validatorIndexLoop(service: DutiesServiceRef) {.async.} = proc validatorIndexLoop(service: DutiesServiceRef) {.async.} =
let vc = service.client let vc = service.client
debug "Validator indices loop is waiting for initialization"
await vc.preGenesisEvent.wait()
while true: while true:
await vc.pollForValidatorIndices() await service.pollForValidatorIndices()
await service.waitForNextSlot(IndicesLoop) await service.waitForNextSlot(IndicesLoop)
proc proposerPreparationsLoop(service: DutiesServiceRef) {.async.} = proc proposerPreparationsLoop(service: DutiesServiceRef) {.async.} =
let vc = service.client let vc = service.client
debug "Beacon proposer preparation loop is waiting for initialization"
debug "Beacon proposer preparation loop waiting for validator indices update" await allFutures(
await vc.indicesAvailable.wait() vc.preGenesisEvent.wait(),
vc.indicesAvailable.wait()
)
while true: while true:
await service.prepareBeaconProposers() await service.prepareBeaconProposers()
await service.waitForNextSlot(ProposerPreparationLoop) await service.waitForNextSlot(ProposerPreparationLoop)
@ -641,21 +648,28 @@ proc proposerPreparationsLoop(service: DutiesServiceRef) {.async.} =
proc validatorRegisterLoop(service: DutiesServiceRef) {.async.} = proc validatorRegisterLoop(service: DutiesServiceRef) {.async.} =
let vc = service.client let vc = service.client
doAssert(vc.config.payloadBuilderEnable) doAssert(vc.config.payloadBuilderEnable)
debug "Validator registration loop is waiting for initialization" debug "Validator registration loop is waiting for initialization"
await allFutures(vc.indicesAvailable.wait(), vc.forksAvailable.wait()) await allFutures(
vc.preGenesisEvent.wait(),
vc.indicesAvailable.wait(),
vc.forksAvailable.wait()
)
doAssert(len(vc.forks) > 0, "Fork schedule must not be empty at this point")
while true: while true:
await service.registerValidators() await service.registerValidators()
await service.waitForNextSlot(ValidatorRegisterLoop) await service.waitForNextSlot(ValidatorRegisterLoop)
proc syncCommitteeDutiesLoop(service: DutiesServiceRef) {.async.} = proc syncCommitteeDutiesLoop(service: DutiesServiceRef) {.async.} =
let vc = service.client let vc = service.client
debug "Sync committee duties loop is waiting for initialization"
debug "Sync committee duties loop waiting for fork schedule update" await allFutures(
await vc.forksAvailable.wait() vc.preGenesisEvent.wait(),
vc.indicesAvailable.wait(),
vc.forksAvailable.wait()
)
doAssert(len(vc.forks) > 0, "Fork schedule must not be empty at this point") doAssert(len(vc.forks) > 0, "Fork schedule must not be empty at this point")
while true: while true:
await vc.pollForSyncCommitteeDuties() await service.pollForSyncCommitteeDuties()
await service.waitForNextSlot(SyncCommitteeLoop) await service.waitForNextSlot(SyncCommitteeLoop)
template checkAndRestart(serviceLoop: DutiesServiceLoop, template checkAndRestart(serviceLoop: DutiesServiceLoop,
@ -696,9 +710,13 @@ proc mainLoop(service: DutiesServiceRef) {.async.} =
# become safe to combine loops, breaks and exception handlers. # become safe to combine loops, breaks and exception handlers.
let breakLoop = let breakLoop =
try: try:
var futures = @[FutureBase(attestFut), FutureBase(proposeFut), var futures = @[
FutureBase(indicesFut), FutureBase(syncFut), FutureBase(attestFut),
FutureBase(prepareFut)] FutureBase(proposeFut),
FutureBase(indicesFut),
FutureBase(syncFut),
FutureBase(prepareFut)
]
if not(isNil(registerFut)): futures.add(FutureBase(registerFut)) if not(isNil(registerFut)): futures.add(FutureBase(registerFut))
discard await race(futures) discard await race(futures)
checkAndRestart(AttesterLoop, attestFut, service.attesterDutiesLoop()) checkAndRestart(AttesterLoop, attestFut, service.attesterDutiesLoop())
@ -743,8 +761,6 @@ proc init*(t: typedesc[DutiesServiceRef],
let res = DutiesServiceRef(name: ServiceName, let res = DutiesServiceRef(name: ServiceName,
client: vc, state: ServiceState.Initialized) client: vc, state: ServiceState.Initialized)
debug "Initializing service" debug "Initializing service"
# We query for indices first, to avoid empty queries for duties.
await vc.pollForValidatorIndices()
return res return res
proc start*(service: DutiesServiceRef) = proc start*(service: DutiesServiceRef) =

View File

@ -34,6 +34,10 @@ proc otherNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] =
proc otherNodesCount*(vc: ValidatorClientRef): int = proc otherNodesCount*(vc: ValidatorClientRef): int =
vc.beaconNodes.countIt(it.status != RestBeaconNodeStatus.Synced) vc.beaconNodes.countIt(it.status != RestBeaconNodeStatus.Synced)
proc preGenesisNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] =
vc.beaconNodes.filterIt(it.status notin {RestBeaconNodeStatus.Synced,
RestBeaconNodeStatus.OptSynced})
proc waitNodes*(vc: ValidatorClientRef, timeoutFut: Future[void], proc waitNodes*(vc: ValidatorClientRef, timeoutFut: Future[void],
statuses: set[RestBeaconNodeStatus], statuses: set[RestBeaconNodeStatus],
roles: set[BeaconNodeRole], waitChanges: bool) {.async.} = roles: set[BeaconNodeRole], waitChanges: bool) {.async.} =
@ -230,7 +234,12 @@ proc checkNode(vc: ValidatorClientRef,
proc checkNodes*(service: FallbackServiceRef): Future[bool] {.async.} = proc checkNodes*(service: FallbackServiceRef): Future[bool] {.async.} =
let let
nodesToCheck = service.client.otherNodes() vc = service.client
nodesToCheck =
if vc.genesisEvent.isSet():
service.client.otherNodes()
else:
service.client.preGenesisNodes()
pendingChecks = nodesToCheck.mapIt(service.client.checkNode(it)) pendingChecks = nodesToCheck.mapIt(service.client.checkNode(it))
var res = false var res = false
try: try:
@ -252,6 +261,16 @@ proc mainLoop(service: FallbackServiceRef) {.async.} =
service.state = ServiceState.Running service.state = ServiceState.Running
debug "Service started" debug "Service started"
try:
await vc.preGenesisEvent.wait()
except CancelledError:
debug "Service interrupted"
return
except CatchableError as exc:
warn "Service crashed with unexpected error", err_name = exc.name,
err_msg = exc.msg
return
while true: while true:
# This loop could look much more nicer/better, when # This loop could look much more nicer/better, when
# https://github.com/nim-lang/Nim/issues/19911 will be fixed, so it could # https://github.com/nim-lang/Nim/issues/19911 will be fixed, so it could
@ -279,8 +298,6 @@ proc init*(t: typedesc[FallbackServiceRef],
state: ServiceState.Initialized, state: ServiceState.Initialized,
changesEvent: newAsyncEvent()) changesEvent: newAsyncEvent())
debug "Initializing service" debug "Initializing service"
# Perform initial nodes check.
if await res.checkNodes(): res.changesEvent.fire()
return res return res
proc start*(service: FallbackServiceRef) = proc start*(service: FallbackServiceRef) =

View File

@ -43,12 +43,6 @@ proc sortForks(forks: openArray[Fork]): Result[seq[Fork], cstring] {.
ok(sortedForks) ok(sortedForks)
proc pollForFork(vc: ValidatorClientRef) {.async.} = proc pollForFork(vc: ValidatorClientRef) {.async.} =
let sres = vc.getCurrentSlot()
if sres.isSome():
let
currentSlot = sres.get()
currentEpoch = currentSlot.epoch()
let forks = let forks =
try: try:
await vc.getForkSchedule(ApiStrategyKind.Best) await vc.getForkSchedule(ApiStrategyKind.Best)
@ -88,6 +82,16 @@ proc mainLoop(service: ForkServiceRef) {.async.} =
service.state = ServiceState.Running service.state = ServiceState.Running
debug "Service started" debug "Service started"
try:
await vc.preGenesisEvent.wait()
except CancelledError:
debug "Service interrupted"
return
except CatchableError as exc:
warn "Service crashed with unexpected error", err_name = exc.name,
err_msg = exc.msg
return
while true: while true:
# This loop could look much more nicer/better, when # This loop could look much more nicer/better, when
# https://github.com/nim-lang/Nim/issues/19911 will be fixed, so it could # https://github.com/nim-lang/Nim/issues/19911 will be fixed, so it could
@ -114,7 +118,6 @@ proc init*(t: typedesc[ForkServiceRef],
let res = ForkServiceRef(name: ServiceName, let res = ForkServiceRef(name: ServiceName,
client: vc, state: ServiceState.Initialized) client: vc, state: ServiceState.Initialized)
debug "Initializing service" debug "Initializing service"
await vc.pollForFork()
return res return res
proc start*(service: ForkServiceRef) = proc start*(service: ForkServiceRef) =

View File

@ -406,35 +406,67 @@ proc publishSyncMessagesAndContributions(service: SyncCommitteeServiceRef,
debug "Producing contribution and proofs", delay = delay debug "Producing contribution and proofs", delay = delay
await service.produceAndPublishContributions(slot, beaconBlockRoot, duties) await service.produceAndPublishContributions(slot, beaconBlockRoot, duties)
proc spawnSyncCommitteeTasks(service: SyncCommitteeServiceRef, slot: Slot) = proc processSyncCommitteeTasks(service: SyncCommitteeServiceRef,
slot: Slot) {.async.} =
let let
vc = service.client vc = service.client
duties = vc.getSyncCommitteeDutiesForSlot(slot + 1) duties = vc.getSyncCommitteeDutiesForSlot(slot + 1)
timeout = vc.beaconClock.durationToNextSlot()
asyncSpawn service.publishSyncMessagesAndContributions(slot, duties) try:
await service.publishSyncMessagesAndContributions(slot,
duties).wait(timeout)
except AsyncTimeoutError:
warn "Unable to publish sync committee messages and contributions in time",
slot = slot, timeout = timeout
except CancelledError as exc:
debug "Sync committee publish task has been interrupted"
raise exc
except CatchableError as exc:
error "Unexpected error encountered while processing sync committee tasks",
error_name = exc.name, error_message = exc.msg
proc mainLoop(service: SyncCommitteeServiceRef) {.async.} = proc mainLoop(service: SyncCommitteeServiceRef) {.async.} =
let vc = service.client let vc = service.client
service.state = ServiceState.Running service.state = ServiceState.Running
debug "Service started" debug "Service started"
debug "Sync committee duties loop waiting for fork schedule update" debug "Sync committee processing loop is waiting for initialization"
await vc.forksAvailable.wait() try:
await allFutures(
vc.preGenesisEvent.wait(),
vc.genesisEvent.wait(),
vc.indicesAvailable.wait(),
vc.forksAvailable.wait()
)
except CancelledError:
debug "Service interrupted"
return
except CatchableError as exc:
warn "Service crashed with unexpected error", err_name = exc.name,
err_msg = exc.msg
return
doAssert(len(vc.forks) > 0, "Fork schedule must not be empty at this point")
var currentSlot: Opt[Slot]
while true: while true:
# This loop could look much more nicer/better, when # This loop could look much more nicer/better, when
# https://github.com/nim-lang/Nim/issues/19911 will be fixed, so it could # https://github.com/nim-lang/Nim/issues/19911 will be fixed, so it could
# become safe to combine loops, breaks and exception handlers. # become safe to combine loops, breaks and exception handlers.
let breakLoop = let breakLoop =
try: try:
let sleepTime = let
syncCommitteeMessageSlotOffset + vc.beaconClock.durationToNextSlot() # We use zero offset here, because we do waiting in
# waitForBlockPublished(syncCommitteeMessageSlotOffset).
let sres = vc.getCurrentSlot() slot = await vc.checkedWaitForNextSlot(currentSlot, ZeroTimeDiff,
if sres.isSome(): false)
let currentSlot = sres.get() if slot.isNone():
service.spawnSyncCommitteeTasks(currentSlot) debug "System time adjusted backwards significantly, exiting"
await sleepAsync(sleepTime) true
else:
currentSlot = slot
await service.processSyncCommitteeTasks(currentSlot.get())
false false
except CancelledError: except CancelledError:
debug "Service interrupted" debug "Service interrupted"