diff --git a/beacon_chain/beacon_node.nim b/beacon_chain/beacon_node.nim index a7a4f0f76..d85e7113b 100644 --- a/beacon_chain/beacon_node.nim +++ b/beacon_chain/beacon_node.nim @@ -106,6 +106,7 @@ type ## Number of validators that we've checked for activation processingDelay*: Opt[Duration] lastValidAttestedBlock*: Opt[BlockSlot] + shutdownEvent*: AsyncEvent template findIt*(s: openArray, predicate: untyped): int = var res = -1 diff --git a/beacon_chain/conf.nim b/beacon_chain/conf.nim index 454878b3d..8ec54e6ea 100644 --- a/beacon_chain/conf.nim +++ b/beacon_chain/conf.nim @@ -131,6 +131,10 @@ type url*: Uri provenBlockProperties*: seq[string] # empty if this is not a verifying Web3Signer + LongRangeSyncMode* {.pure.} = enum + Light = "light", + Lenient = "lenient" + BeaconNodeConf* = object configFile* {. desc: "Loads the configuration from a TOML file" @@ -557,6 +561,11 @@ type desc: "Maximum number of sync committee periods to retain light client data" name: "light-client-data-max-periods" .}: Option[uint64] + longRangeSync* {. + desc: "Enable long-range syncing (genesis sync)", + defaultValue: LongRangeSyncMode.Light, + name: "long-range-sync".}: LongRangeSyncMode + inProcessValidators* {. desc: "Disable the push model (the beacon node tells a signing process with the private keys of the validators what to sign and when) and load the validators in the beacon node itself" defaultValue: true # the use of the nimbus_signing_process binary by default will be delayed until async I/O over stdin/stdout is developed for the child process. diff --git a/beacon_chain/nimbus_beacon_node.nim b/beacon_chain/nimbus_beacon_node.nim index e862c31ee..6fc2187e8 100644 --- a/beacon_chain/nimbus_beacon_node.nim +++ b/beacon_chain/nimbus_beacon_node.nim @@ -373,6 +373,21 @@ proc initFullNode( func getFrontfillSlot(): Slot = max(dag.frontfill.get(BlockId()).slot, dag.horizon) + proc isWithinWeakSubjectivityPeriod(): bool = + let + currentSlot = node.beaconClock.now().slotOrZero() + checkpoint = Checkpoint( + epoch: epoch(getStateField(node.dag.headState, slot)), + root: getStateField(node.dag.headState, latest_block_header).state_root) + is_within_weak_subjectivity_period(node.dag.cfg, currentSlot, + node.dag.headState, checkpoint) + + proc eventWaiter(): Future[void] {.async: (raises: [CancelledError]).} = + await node.shutdownEvent.wait() + bnStatus = BeaconNodeStatus.Stopping + + asyncSpawn eventWaiter() + let quarantine = newClone( Quarantine.init()) @@ -441,19 +456,29 @@ proc initFullNode( blockProcessor, node.validatorMonitor, dag, attestationPool, validatorChangePool, node.attachedValidators, syncCommitteeMsgPool, lightClientPool, quarantine, blobQuarantine, rng, getBeaconTime, taskpool) + syncManagerFlags = + if node.config.longRangeSync != LongRangeSyncMode.Lenient: + {SyncManagerFlag.NoGenesisSync} + else: + {} syncManager = newSyncManager[Peer, PeerId]( node.network.peerPool, dag.cfg.DENEB_FORK_EPOCH, dag.cfg.MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS, SyncQueueKind.Forward, getLocalHeadSlot, getLocalWallSlot, getFirstSlotAtFinalizedEpoch, getBackfillSlot, - getFrontfillSlot, dag.tail.slot, blockVerifier) + getFrontfillSlot, isWithinWeakSubjectivityPeriod, + dag.tail.slot, blockVerifier, + shutdownEvent = node.shutdownEvent, + flags = syncManagerFlags) backfiller = newSyncManager[Peer, PeerId]( node.network.peerPool, dag.cfg.DENEB_FORK_EPOCH, dag.cfg.MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS, SyncQueueKind.Backward, getLocalHeadSlot, getLocalWallSlot, getFirstSlotAtFinalizedEpoch, getBackfillSlot, - getFrontfillSlot, dag.backfill.slot, blockVerifier, - maxHeadAge = 0) + getFrontfillSlot, isWithinWeakSubjectivityPeriod, + dag.backfill.slot, blockVerifier, maxHeadAge = 0, + shutdownEvent = node.shutdownEvent, + flags = syncManagerFlags) router = (ref MessageRouter)( processor: processor, network: node.network) @@ -554,6 +579,27 @@ proc init*(T: type BeaconNode, template cfg: auto = metadata.cfg template eth1Network: auto = metadata.eth1Network + if not(isDir(config.databaseDir)): + # If database directory missing, we going to use genesis state to check + # for weak_subjectivity_period. + let + genesisState = + await fetchGenesisState( + metadata, config.genesisState, config.genesisStateUrl) + genesisTime = getStateField(genesisState[], genesis_time) + beaconClock = BeaconClock.init(genesisTime).valueOr: + fatal "Invalid genesis time in genesis state", genesisTime + quit 1 + currentSlot = beaconClock.now().slotOrZero() + checkpoint = Checkpoint( + epoch: epoch(getStateField(genesisState[], slot)), + root: getStateField(genesisState[], latest_block_header).state_root) + if config.longRangeSync == LongRangeSyncMode.Light: + if not is_within_weak_subjectivity_period(metadata.cfg, currentSlot, + genesisState[], checkpoint): + fatal WeakSubjectivityLogMessage, current_slot = currentSlot + quit 1 + try: if config.numThreads < 0: fatal "The number of threads --numThreads cannot be negative." @@ -885,6 +931,7 @@ proc init*(T: type BeaconNode, beaconClock: beaconClock, validatorMonitor: validatorMonitor, stateTtlCache: stateTtlCache, + shutdownEvent: newAsyncEvent(), dynamicFeeRecipientsStore: newClone(DynamicFeeRecipientsStore.init())) node.initLightClient( diff --git a/beacon_chain/sync/sync_manager.nim b/beacon_chain/sync/sync_manager.nim index 0e6fa9f3d..6e7c17764 100644 --- a/beacon_chain/sync/sync_manager.nim +++ b/beacon_chain/sync/sync_manager.nim @@ -8,7 +8,7 @@ {.push raises: [].} import std/[strutils, sequtils, algorithm] -import stew/base10, chronos, chronicles +import stew/base10, chronos, chronicles, results import ../spec/datatypes/[phase0, altair], ../spec/eth2_apis/rest_types, @@ -34,13 +34,20 @@ const StatusExpirationTime* = chronos.minutes(2) ## Time time it takes for the peer's status information to expire. + WeakSubjectivityLogMessage* = + "Database state missing or too old, cannot sync - resync the client " & + "using a trusted node or allow lenient long-range syncing with the " & + "`--long-range-sync=lenient` option. See " & + "https://nimbus.guide/faq.html#what-is-long-range-sync " & + "for more information" + type SyncWorkerStatus* {.pure.} = enum Sleeping, WaitingPeer, UpdatingStatus, Requesting, Downloading, Queueing, Processing SyncManagerFlag* {.pure.} = enum - NoMonitor + NoMonitor, NoGenesisSync SyncWorker*[A, B] = object future: Future[void].Raising([CancelledError]) @@ -52,6 +59,7 @@ type MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: uint64 responseTimeout: chronos.Duration maxHeadAge: uint64 + isWithinWeakSubjectivityPeriod: GetBoolCallback getLocalHeadSlot: GetSlotCallback getLocalWallSlot: GetSlotCallback getSafeSlot: GetSlotCallback @@ -60,6 +68,7 @@ type progressPivot: Slot workers: array[SyncWorkersCount, SyncWorker[A, B]] notInSyncEvent: AsyncEvent + shutdownEvent: AsyncEvent rangeAge: uint64 chunkSize: uint64 queue: SyncQueue[A] @@ -124,8 +133,10 @@ proc newSyncManager*[A, B](pool: PeerPool[A, B], getFinalizedSlotCb: GetSlotCallback, getBackfillSlotCb: GetSlotCallback, getFrontfillSlotCb: GetSlotCallback, + weakSubjectivityPeriodCb: GetBoolCallback, progressPivot: Slot, blockVerifier: BlockVerifier, + shutdownEvent: AsyncEvent, maxHeadAge = uint64(SLOTS_PER_EPOCH * 1), chunkSize = uint64(SLOTS_PER_EPOCH), flags: set[SyncManagerFlag] = {}, @@ -143,6 +154,7 @@ proc newSyncManager*[A, B](pool: PeerPool[A, B], MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: minEpochsForBlobSidecarsRequests, getLocalHeadSlot: getLocalHeadSlotCb, getLocalWallSlot: getLocalWallSlotCb, + isWithinWeakSubjectivityPeriod: weakSubjectivityPeriodCb, getSafeSlot: getSafeSlot, getFirstSlot: getFirstSlot, getLastSlot: getLastSlot, @@ -152,6 +164,7 @@ proc newSyncManager*[A, B](pool: PeerPool[A, B], blockVerifier: blockVerifier, notInSyncEvent: newAsyncEvent(), direction: direction, + shutdownEvent: shutdownEvent, ident: ident, flags: flags ) @@ -566,6 +579,11 @@ proc startWorkers[A, B](man: SyncManager[A, B]) = for i in 0 ..< len(man.workers): man.workers[i].future = syncWorker[A, B](man, i) +proc stopWorkers[A, B](man: SyncManager[A, B]) {.async: (raises: []).} = + # Cancelling all the synchronization workers. + let pending = man.workers.mapIt(it.future.cancelAndWait()) + await noCancel allFutures(pending) + proc toTimeLeftString*(d: Duration): string = if d == InfiniteDuration: "--h--m" @@ -711,6 +729,14 @@ proc syncLoop[A, B](man: SyncManager[A, B]) {.async.} = man.avgSyncSpeed.formatBiggestFloat(ffDecimal, 4) & "slots/s (" & map & ":" & currentSlot & ")" + if (man.queue.kind == SyncQueueKind.Forward) and + (SyncManagerFlag.NoGenesisSync in man.flags): + if not(man.isWithinWeakSubjectivityPeriod()): + fatal WeakSubjectivityLogMessage, current_slot = wallSlot + await man.stopWorkers() + man.shutdownEvent.fire() + return + if man.remainingSlots() <= man.maxHeadAge: man.notInSyncEvent.clear() # We are marking SyncManager as not working only when we are in sync and diff --git a/beacon_chain/sync/sync_queue.nim b/beacon_chain/sync/sync_queue.nim index 10f0a9fb2..85b932e88 100644 --- a/beacon_chain/sync/sync_queue.nim +++ b/beacon_chain/sync/sync_queue.nim @@ -8,7 +8,7 @@ {.push raises: [].} import std/[heapqueue, tables, strutils, sequtils, math] -import stew/base10, chronos, chronicles +import stew/base10, chronos, chronicles, results import ../spec/datatypes/[base, phase0, altair], ../spec/[helpers, forks], @@ -24,6 +24,7 @@ logScope: type GetSlotCallback* = proc(): Slot {.gcsafe, raises: [].} + GetBoolCallback* = proc(): bool {.gcsafe, raises: [].} ProcessingCallback* = proc() {.gcsafe, raises: [].} BlockVerifier* = proc(signedBlock: ForkedSignedBeaconBlock, blobs: Opt[BlobSidecars], maybeFinalized: bool): diff --git a/docs/the_nimbus_book/src/options.md b/docs/the_nimbus_book/src/options.md index 670bf7955..16242d407 100644 --- a/docs/the_nimbus_book/src/options.md +++ b/docs/the_nimbus_book/src/options.md @@ -112,6 +112,7 @@ The following options are available: --light-client-data-import-mode Which classes of light client data to import. Must be one of: none, only-new, full (slow startup), on-demand (may miss validator duties) [=only-new]. --light-client-data-max-periods Maximum number of sync committee periods to retain light client data. + --long-range-sync Enable long-range syncing (genesis sync) [=LongRangeSyncMode.Light]. --in-process-validators Disable the push model (the beacon node tells a signing process with the private keys of the validators what to sign and when) and load the validators in the beacon node itself [=true].