From a6b188bfd4be0135d72900f66c3be7d86e371764 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Mon, 16 Nov 2020 20:15:43 +0100 Subject: [PATCH] misc fixes (#2027) * log when database is loading (to avoid confusion) * generate network keys later during startup * fix quarantine not scheduling chain of parents for download and increase size to one epoch * log validator count, enr and peerid more clearly on startup --- CHANGELOG.md | 2 ++ beacon_chain/attestation_pool.nim | 3 +-- beacon_chain/block_pools/clearance.nim | 19 +++----------- beacon_chain/block_pools/quarantine.nim | 25 +++++++++++++----- beacon_chain/eth2_network.nim | 9 +------ beacon_chain/nimbus_beacon_node.nim | 34 +++++++++++++------------ beacon_chain/validator_duties.nim | 2 -- vendor/nim-json-rpc | 2 +- vendor/nim-metrics | 2 +- 9 files changed, 46 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 552c2a1f9..17216177d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ Next ================= * Avoid pcre dependency +* Increase quarantine size to 32 and fix parents not being downloaded + consistently 2020-11-12 v0.6.2 ================= diff --git a/beacon_chain/attestation_pool.nim b/beacon_chain/attestation_pool.nim index e0201af74..d3489d2ef 100644 --- a/beacon_chain/attestation_pool.nim +++ b/beacon_chain/attestation_pool.nim @@ -52,8 +52,7 @@ proc init*(T: type AttestationPool, chainDag: ChainDAGRef, quarantine: Quarantin blocks.add cur cur = cur.parent - info "Initializing fork choice from block database", - unfinalized_blocks = blocks.len + info "Initializing fork choice", unfinalized_blocks = blocks.len var epochRef = finalizedEpochRef for i in 0.. dag.finalizedHead.slot + func removeOldBlocks(quarantine: var QuarantineRef, dag: ChainDAGRef) = var oldBlocks: seq[(Eth2Digest, ValidatorSig)] for k, v in quarantine.orphans.pairs(): - if v.message.slot <= dag.finalizedHead.slot: + if not isViableOrphan(dag, v): oldBlocks.add k for k in oldBlocks: @@ -97,16 +100,24 @@ func add*(quarantine: var QuarantineRef, dag: ChainDAGRef, # for future slots are rejected before reaching quarantine, this usually # will be a block for the last couple of slots for which the parent is a # likely imminent arrival. - const MAX_QUARANTINE_ORPHANS = 10 + + # Since we start forward sync when about one epoch is missing, that's as + # good a number as any. + const MAX_QUARANTINE_ORPHANS = SLOTS_PER_EPOCH + + if not isViableOrphan(dag, signedBlock): + return false quarantine.removeOldBlocks(dag) - if quarantine.orphans.len >= MAX_QUARANTINE_ORPHANS: + # Even if the quarantine is full, we need to schedule its parent for + # downloading or we'll never get to the bottom of things + quarantine.addMissing(signedBlock.message.parent_root) + + if quarantine.orphans.lenu64 >= MAX_QUARANTINE_ORPHANS: return false quarantine.orphans[(signedBlock.root, signedBlock.signature)] = signedBlock quarantine.missing.del(signedBlock.root) - quarantine.addMissing(signedBlock.message.parent_root) - true diff --git a/beacon_chain/eth2_network.nim b/beacon_chain/eth2_network.nim index 53a70c76a..993611227 100644 --- a/beacon_chain/eth2_network.nim +++ b/beacon_chain/eth2_network.nim @@ -470,13 +470,6 @@ template raisePeerDisconnected(msg: string, r: DisconnectionReason) = e.reason = r raise e -proc disconnectAndRaise(peer: Peer, - reason: DisconnectionReason, - msg: string) {.async.} = - let r = reason - await peer.disconnect(r) - raisePeerDisconnected(msg, r) - proc writeChunk*(conn: Connection, responseCode: Option[ResponseCode], payload: Bytes): Future[void] = @@ -1396,7 +1389,7 @@ proc createEth2Node*(rng: ref BrHmacDrbgContext, announcedAddresses = if extIp.isNone(): @[] else: @[tcpEndPoint(extIp.get(), extTcpPort)] - info "Initializing networking", hostAddress, + debug "Initializing networking", hostAddress, network_public_key = netKeys.pubkey, announcedAddresses diff --git a/beacon_chain/nimbus_beacon_node.nim b/beacon_chain/nimbus_beacon_node.nim index c93d6b681..6ad2b3c4e 100644 --- a/beacon_chain/nimbus_beacon_node.nim +++ b/beacon_chain/nimbus_beacon_node.nim @@ -7,7 +7,8 @@ import # Standard library - std/[os, tables, strutils, sequtils, times, math, terminal, osproc, random], + std/[os, tables, strutils, strformat, sequtils, times, math, terminal, osproc, + random], # Nimble packages stew/[objects, byteutils, endians2, io2], stew/shims/macros, @@ -72,13 +73,9 @@ proc init*(T: type BeaconNode, genesisStateContents: ref string, eth1Network: Option[Eth1Network]): Future[BeaconNode] {.async.} = let - netKeys = getPersistentNetKeys(rng[], conf) - nickname = if conf.nodeName == "auto": shortForm(netKeys) - else: conf.nodeName db = BeaconChainDB.init(conf.runtimePreset, conf.databaseDir) var - eth1Monitor: Eth1Monitor genesisState, checkpointState: ref BeaconState checkpointBlock: SignedBeaconBlock @@ -114,6 +111,7 @@ proc init*(T: type BeaconNode, fatal "--finalized-checkpoint-block cannot be specified without --finalized-checkpoint-state" quit 1 + var eth1Monitor: Eth1Monitor if not ChainDAGRef.isInitialized(db): var tailState: ref BeaconState @@ -181,7 +179,7 @@ proc init*(T: type BeaconNode, try: genesisState = newClone(SSZ.decode(genesisStateContents[], BeaconState)) except CatchableError as err: - raiseAssert "The baked-in state must be valid" + raiseAssert "Invalid baked-in state: " & err.msg if checkpointState != nil: tailState = checkpointState @@ -197,11 +195,13 @@ proc init*(T: type BeaconNode, error "Failed to initialize database", err = e.msg quit 1 + info "Loading block dag from database", path = conf.databaseDir + # TODO(zah) check that genesis given on command line (if any) matches database let chainDagFlags = if conf.verifyFinalization: {verifyFinalization} else: {} - chainDag = init(ChainDAGRef, conf.runtimePreset, db, chainDagFlags) + chainDag = ChainDAGRef.init(conf.runtimePreset, db, chainDagFlags) beaconClock = BeaconClock.init(chainDag.headState.data.data) quarantine = QuarantineRef() @@ -252,6 +252,9 @@ proc init*(T: type BeaconNode, nil let + netKeys = getPersistentNetKeys(rng[], conf) + nickname = if conf.nodeName == "auto": shortForm(netKeys) + else: conf.nodeName enrForkId = enrForkIdFromState(chainDag.headState.data.data) topicBeaconBlocks = getBeaconBlocksTopic(enrForkId.forkDigest) topicAggregateAndProofs = getAggregateAndProofsTopic(enrForkId.forkDigest) @@ -278,6 +281,7 @@ proc init*(T: type BeaconNode, topicAggregateAndProofs: topicAggregateAndProofs, ) + info "Loading slashing protection database", path = conf.validatorsDir() res.attachedValidators = ValidatorPool.init( SlashingProtectionDB.init( chainDag.headState.data.data.genesis_validators_root, @@ -801,6 +805,7 @@ proc createPidFile(filename: string) = addQuitProc proc {.noconv.} = discard io2.removeFile(gPidFile) proc initializeNetworking(node: BeaconNode) {.async.} = + info "Listening to incoming network requests" await node.network.startListening() let addressFile = node.config.dataDir / "beacon_node.enr" @@ -808,10 +813,6 @@ proc initializeNetworking(node: BeaconNode) {.async.} = await node.network.start() - notice "Networking initialized", - enr = node.network.announcedENR.toURI, - libp2p = shortLog(node.network.switch.peerInfo) - proc start(node: BeaconNode) = let head = node.chainDag.head @@ -820,16 +821,17 @@ proc start(node: BeaconNode) = notice "Starting beacon node", version = fullVersionStr, - nim = shortNimBanner(), + enr = node.network.announcedENR.toURI, + peerId = $node.network.switch.peerInfo.peerId, timeSinceFinalization = - finalizedHead.slot.toBeaconTime() - - node.beaconClock.now(), + node.beaconClock.now() - finalizedHead.slot.toBeaconTime(), head = shortLog(head), finalizedHead = shortLog(finalizedHead), SLOTS_PER_EPOCH, SECONDS_PER_SLOT, SPEC_VERSION, - dataDir = node.config.dataDir.string + dataDir = node.config.dataDir.string, + validators = node.attachedValidators.count if genesisTime.inFuture: notice "Waiting for genesis", genesisIn = genesisTime.offset @@ -1145,7 +1147,7 @@ programMain: if config.metricsEnabled: let metricsAddress = config.metricsAddress notice "Starting metrics HTTP server", - address = metricsAddress, port = config.metricsPort + url = "http://" & $metricsAddress & ":" & $config.metricsPort & "/metrics" metrics.startHttpServer($metricsAddress, config.metricsPort) # There are no managed event loops in here, to do a graceful shutdown, but diff --git a/beacon_chain/validator_duties.nim b/beacon_chain/validator_duties.nim index d743d4bd4..94855c536 100644 --- a/beacon_chain/validator_duties.nim +++ b/beacon_chain/validator_duties.nim @@ -59,7 +59,6 @@ proc addLocalValidator*(node: BeaconNode, proc addLocalValidators*(node: BeaconNode) = for validatorKey in node.config.validatorKeys: node.addLocalValidator node.chainDag.headState.data.data, validatorKey - notice "Local validators attached ", count = node.attachedValidators.count proc addRemoteValidators*(node: BeaconNode) = # load all the validators from the child process - loop until `end` @@ -76,7 +75,6 @@ proc addRemoteValidators*(node: BeaconNode) = outStream: node.vcProcess.outputStream, pubKeyStr: $key)) node.attachedValidators.addRemoteValidator(key, v) - notice "Remote validators attached ", count = node.attachedValidators.count proc getAttachedValidator*(node: BeaconNode, pubkey: ValidatorPubKey): AttachedValidator = diff --git a/vendor/nim-json-rpc b/vendor/nim-json-rpc index 99455437b..5b6312a18 160000 --- a/vendor/nim-json-rpc +++ b/vendor/nim-json-rpc @@ -1 +1 @@ -Subproject commit 99455437ba3d83d5af1c38007fedeeff295e959e +Subproject commit 5b6312a18be4001ef644411b579138a09d853e8c diff --git a/vendor/nim-metrics b/vendor/nim-metrics index 77305f3b3..22a386734 160000 --- a/vendor/nim-metrics +++ b/vendor/nim-metrics @@ -1 +1 @@ -Subproject commit 77305f3b3ddfef5aff65b685effc7e7a1d1cf59c +Subproject commit 22a3867341f7b0a9d55661b41d7ee5febe35c86b