From d6cd1cd46c357fcbe7e44bdcdb94ebc6d265dcdc Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Mon, 1 Nov 2021 15:50:24 +0100 Subject: [PATCH] Remove web3-mode, always keep web3 monitor enabled when given (#3042) * Init some components fully before BeaconNode, per component dependency graph * remove `--web3-mode` option * fixes https://github.com/status-im/nimbus-eth2/issues/2685 * reshuffle some beacon node init code --- beacon_chain/conf.nim | 11 -- beacon_chain/eth1/eth1_monitor.nim | 2 +- beacon_chain/networking/eth2_network.nim | 28 ++++- beacon_chain/nimbus_beacon_node.nim | 146 +++++++++-------------- 4 files changed, 81 insertions(+), 106 deletions(-) diff --git a/beacon_chain/conf.nim b/beacon_chain/conf.nim index d3e8c5341..172e436fd 100644 --- a/beacon_chain/conf.nim +++ b/beacon_chain/conf.nim @@ -65,11 +65,6 @@ type Web3Cmd* {.pure.} = enum test = "Test a web3 provider" - Web3Mode* {.pure.} = enum - auto # Enabled only when validators are attached - enabled # Always enabled - disabled # Always disabled - SlashingDbKind* {.pure.} = enum v1 v2 @@ -119,12 +114,6 @@ type desc: "One or more Web3 provider URLs used for obtaining deposit contract data" name: "web3-url" }: seq[string] - web3Mode* {. - hidden - defaultValue: Web3Mode.auto - desc: "URL of the Web3 server to observe Eth1" - name: "web3-mode" }: Web3Mode - nonInteractive* {. desc: "Do not display interative prompts. Quit on missing configuration" name: "non-interactive" }: bool diff --git a/beacon_chain/eth1/eth1_monitor.nim b/beacon_chain/eth1/eth1_monitor.nim index 021164e3c..e14022f30 100644 --- a/beacon_chain/eth1/eth1_monitor.nim +++ b/beacon_chain/eth1/eth1_monitor.nim @@ -1151,7 +1151,7 @@ proc start(m: Eth1Monitor, delayBeforeStart: Duration) = if runFut.failed: if runFut.error[] of CatchableError: if runFut == m.runFut: - error "Eth1 chain monitoring failure, restarting", err = runFut.error.msg + warn "Eth1 chain monitoring failure, restarting", err = runFut.error.msg m.state = Failed else: fatal "Fatal exception reached", err = runFut.error.msg diff --git a/beacon_chain/networking/eth2_network.nim b/beacon_chain/networking/eth2_network.nim index 73852db15..9ce0fa617 100644 --- a/beacon_chain/networking/eth2_network.nim +++ b/beacon_chain/networking/eth2_network.nim @@ -217,13 +217,13 @@ const clientId* = "Nimbus beacon node " & fullVersionStr nodeMetadataFilename = "node-metadata.json" - NewPeerScore* = 200 + NewPeerScore = 200 ## Score which will be assigned to new connected Peer - PeerScoreLowLimit* = 0 + PeerScoreLowLimit = 0 ## Score after which peer will be kicked - PeerScoreHighLimit* = 1000 + PeerScoreHighLimit = 1000 ## Max value of peer's score - PeerScoreInvalidRequest* = -500 + PeerScoreInvalidRequest = -500 ## This peer is sending malformed or nonsensical data ConcurrentConnections = 20 @@ -1363,6 +1363,26 @@ proc new*(T: type Eth2Node, config: BeaconNodeConf, runtimeCfg: RuntimeConfig, switch.addConnEventHandler(peerHook, ConnEventKind.Connected) switch.addConnEventHandler(peerHook, ConnEventKind.Disconnected) + proc scoreCheck(peer: Peer): bool = + peer.score >= PeerScoreLowLimit + + proc onDeletePeer(peer: Peer) = + if peer.connectionState notin {ConnectionState.Disconnecting, + ConnectionState.Disconnected}: + if peer.score < PeerScoreLowLimit: + debug "Peer was removed from PeerPool due to low score", peer = peer, + peer_score = peer.score, score_low_limit = PeerScoreLowLimit, + score_high_limit = PeerScoreHighLimit + asyncSpawn(peer.disconnect(PeerScoreLow)) + else: + debug "Peer was removed from PeerPool", peer = peer, + peer_score = peer.score, score_low_limit = PeerScoreLowLimit, + score_high_limit = PeerScoreHighLimit + asyncSpawn(peer.disconnect(FaultOrError)) # Shouldn't actually happen! + + node.peerPool.setScoreCheck(scoreCheck) + node.peerPool.setOnDeletePeer(onDeletePeer) + node template publicKey*(node: Eth2Node): keys.PublicKey = diff --git a/beacon_chain/nimbus_beacon_node.nim b/beacon_chain/nimbus_beacon_node.nim index 9960b96a1..f233bcfd2 100644 --- a/beacon_chain/nimbus_beacon_node.nim +++ b/beacon_chain/nimbus_beacon_node.nim @@ -79,6 +79,9 @@ template init(T: type RestServerRef, ip: ValidIpAddress, port: Port): T = reason = res.error() nil else: + notice "Starting REST HTTP server", + url = "http://" & $ip & ":" & $port & "/" + res.get() # https://github.com/ethereum/eth2.0-metrics/blob/master/metrics.md#interop-metrics @@ -376,6 +379,15 @@ proc init*(T: type BeaconNode, info "Loading slashing protection database (v2)", path = config.validatorsDir() + func getLocalHeadSlot(): Slot = + dag.head.slot + + proc getLocalWallSlot(): Slot = + beaconClock.now.slotOrZero + + func getFirstSlotAtFinalizedEpoch(): Slot = + dag.finalizedHead.slot + let slashingProtectionDB = SlashingProtectionDB.init( @@ -393,6 +405,9 @@ proc init*(T: type BeaconNode, config.doppelgangerDetection, blockProcessor, dag, attestationPool, exitPool, validatorPool, syncCommitteeMsgPool, quarantine, rng, getBeaconTime, taskpool) + syncManager = newSyncManager[Peer, PeerID]( + network.peerPool, getLocalHeadSlot, getLocalWallSlot, + getFirstSlotAtFinalizedEpoch, blockProcessor, chunkSize = 32) var node = BeaconNode( nickname: nickname, @@ -402,22 +417,23 @@ proc init*(T: type BeaconNode, netKeys: netKeys, db: db, config: config, + attachedValidators: validatorPool, dag: dag, - gossipState: GossipState.Disconnected, quarantine: quarantine, attestationPool: attestationPool, syncCommitteeMsgPool: syncCommitteeMsgPool, - attachedValidators: validatorPool, exitPool: exitPool, eth1Monitor: eth1Monitor, rpcServer: rpcServer, restServer: restServer, eventBus: eventBus, + requestManager: RequestManager.init(network, blockProcessor), + syncManager: syncManager, actionTracker: ActionTracker.init(rng, config.subscribeAllSubnets), processor: processor, blockProcessor: blockProcessor, consensusManager: consensusManager, - requestManager: RequestManager.init(network, blockProcessor), + gossipState: GossipState.Disconnected, beaconClock: beaconClock, taskpool: taskpool, onAttestationSent: onAttestationSent, @@ -938,47 +954,6 @@ proc runOnSecondLoop(node: BeaconNode) {.async.} = ticks_delay.set(sleepTime.nanoseconds.float / nanosecondsIn1s) trace "onSecond task completed", sleepTime, processingTime -proc startSyncManager(node: BeaconNode) = - func getLocalHeadSlot(): Slot = - node.dag.head.slot - - proc getLocalWallSlot(): Slot = - node.beaconClock.now.slotOrZero - - func getFirstSlotAtFinalizedEpoch(): Slot = - node.dag.finalizedHead.slot - - proc scoreCheck(peer: Peer): bool = - if peer.score < PeerScoreLowLimit: - false - else: - true - - proc onDeletePeer(peer: Peer) = - if peer.connectionState notin {ConnectionState.Disconnecting, - ConnectionState.Disconnected}: - if peer.score < PeerScoreLowLimit: - debug "Peer was removed from PeerPool due to low score", peer = peer, - peer_score = peer.score, score_low_limit = PeerScoreLowLimit, - score_high_limit = PeerScoreHighLimit - asyncSpawn(try: peer.disconnect(PeerScoreLow) - except Exception as exc: raiseAssert exc.msg) # Shouldn't actually happen! - else: - debug "Peer was removed from PeerPool", peer = peer, - peer_score = peer.score, score_low_limit = PeerScoreLowLimit, - score_high_limit = PeerScoreHighLimit - asyncSpawn(try: peer.disconnect(FaultOrError) - except Exception as exc: raiseAssert exc.msg) # Shouldn't actually happen! - - node.network.peerPool.setScoreCheck(scoreCheck) - node.network.peerPool.setOnDeletePeer(onDeletePeer) - - node.syncManager = newSyncManager[Peer, PeerID]( - node.network.peerPool, getLocalHeadSlot, getLocalWallSlot, - getFirstSlotAtFinalizedEpoch, node.blockProcessor, chunkSize = 32 - ) - node.syncManager.start() - func connectedPeersCount(node: BeaconNode): int = len(node.network.peerPool) @@ -1110,33 +1085,28 @@ proc stop*(node: BeaconNode) = notice "Databases closed" proc run*(node: BeaconNode) {.raises: [Defect, CatchableError].} = - if bnStatus == BeaconNodeStatus.Starting: - # it might have been set to "Stopping" with Ctrl+C - bnStatus = BeaconNodeStatus.Running + bnStatus = BeaconNodeStatus.Running - if not(isNil(node.rpcServer)): - node.rpcServer.installRpcHandlers(node) - node.rpcServer.start() + if not(isNil(node.rpcServer)): + node.rpcServer.installRpcHandlers(node) + node.rpcServer.start() - if not(isNil(node.restServer)): - node.restServer.installRestHandlers(node) - node.restServer.start() + if not(isNil(node.restServer)): + node.restServer.installRestHandlers(node) + node.restServer.start() - node.installMessageValidators() + let + wallTime = node.beaconClock.now() + wallSlot = wallTime.slotOrZero() - let - wallTime = node.beaconClock.now() - wallSlot = wallTime.slotOrZero() + node.requestManager.start() + node.syncManager.start() - node.requestManager.start() - node.startSyncManager() - - waitFor node.updateGossipStatus(wallSlot) - - asyncSpawn runSlotLoop(node, wallTime, onSlotStart) - asyncSpawn runOnSecondLoop(node) - asyncSpawn runQueueProcessingLoop(node.blockProcessor) + waitFor node.updateGossipStatus(wallSlot) + asyncSpawn runSlotLoop(node, wallTime, onSlotStart) + asyncSpawn runOnSecondLoop(node) + asyncSpawn runQueueProcessingLoop(node.blockProcessor) ## Ctrl+C handling proc controlCHandler() {.noconv.} = @@ -1173,6 +1143,8 @@ proc createPidFile(filename: string) {.raises: [Defect, IOError].} = addQuitProc proc {.noconv.} = discard io2.removeFile(gPidFile) proc initializeNetworking(node: BeaconNode) {.async.} = + node.installMessageValidators() + info "Listening to incoming network requests" await node.network.startListening() @@ -1181,10 +1153,6 @@ proc initializeNetworking(node: BeaconNode) {.async.} = await node.network.start() -func shouldWeStartWeb3(node: BeaconNode): bool = - (node.config.web3Mode == Web3Mode.enabled) or - (node.config.web3Mode == Web3Mode.auto and node.attachedValidators[].count > 0) - proc start(node: BeaconNode) {.raises: [Defect, CatchableError].} = let head = node.dag.head @@ -1210,9 +1178,10 @@ proc start(node: BeaconNode) {.raises: [Defect, CatchableError].} = waitFor node.initializeNetworking() - # TODO this does not account for validators getting attached "later" - if node.eth1Monitor != nil and node.shouldWeStartWeb3: + if node.eth1Monitor != nil: node.eth1Monitor.start() + else: + notice "Running without execution chain monitor, block producation partially disabled" node.run() @@ -1502,24 +1471,6 @@ proc loadEth2Network(config: BeaconNodeConf): Eth2NetworkMetadata {.raises: [Def echo "Must specify network on non-mainnet node" quit 1 -proc loadBeaconNode(config: var BeaconNodeConf, rng: ref BrHmacDrbgContext): BeaconNode {. - raises: [Defect, CatchableError].} = - let metadata = config.loadEth2Network() - - # Updating the config based on the metadata certainly is not beautiful but it - # works - for node in metadata.bootstrapNodes: - config.bootstrapNodes.add node - - BeaconNode.init( - metadata.cfg, - rng, - config, - metadata.depositContractDeployedAt, - metadata.eth1Network, - metadata.genesisData, - metadata.genesisDepositsSnapshot) - proc doRunBeaconNode(config: var BeaconNodeConf, rng: ref BrHmacDrbgContext) {.raises: [Defect, CatchableError].} = info "Launching beacon node", version = fullVersionStr, @@ -1543,7 +1494,22 @@ proc doRunBeaconNode(config: var BeaconNodeConf, rng: ref BrHmacDrbgContext) {.r # There are no managed event loops in here, to do a graceful shutdown, but # letting the default Ctrl+C handler exit is safe, since we only read from # the db. - let node = loadBeaconNode(config, rng) + + let metadata = config.loadEth2Network() + + # Updating the config based on the metadata certainly is not beautiful but it + # works + for node in metadata.bootstrapNodes: + config.bootstrapNodes.add node + + let node = BeaconNode.init( + metadata.cfg, + rng, + config, + metadata.depositContractDeployedAt, + metadata.eth1Network, + metadata.genesisData, + metadata.genesisDepositsSnapshot) if bnStatus == BeaconNodeStatus.Stopping: return