misc fixes (#2027)

* log when database is loading (to avoid confusion)
* generate network keys later during startup
* fix quarantine not scheduling chain of parents for download and
increase size to one epoch
* log validator count, enr and peerid more clearly on startup
This commit is contained in:
Jacek Sieka 2020-11-16 20:15:43 +01:00 committed by GitHub
parent 7ba541a564
commit a6b188bfd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 46 additions and 52 deletions

View File

@ -2,6 +2,8 @@ Next
=================
* Avoid pcre dependency
* Increase quarantine size to 32 and fix parents not being downloaded
consistently
2020-11-12 v0.6.2
=================

View File

@ -52,8 +52,7 @@ proc init*(T: type AttestationPool, chainDag: ChainDAGRef, quarantine: Quarantin
blocks.add cur
cur = cur.parent
info "Initializing fork choice from block database",
unfinalized_blocks = blocks.len
info "Initializing fork choice", unfinalized_blocks = blocks.len
var epochRef = finalizedEpochRef
for i in 0..<blocks.len:

View File

@ -33,7 +33,7 @@ func getOrResolve*(dag: ChainDAGRef, quarantine: var QuarantineRef, root: Eth2Di
result = dag.getRef(root)
if result.isNil:
quarantine.missing[root] = MissingBlock()
quarantine.addMissing(root)
proc addRawBlock*(
dag: var ChainDAGRef, quarantine: var QuarantineRef,
@ -223,18 +223,11 @@ proc addRawBlock*(
return ok dag.clearanceState.blck
# TODO already checked hash though? main reason to keep this is because
# the pending dag calls this function back later in a loop, so as long
# as dag.add(...) requires a SignedBeaconBlock, easier to keep them in
# pending too.
# This is an unresolved block - add it to the quarantine, which will cause its
# parent to be scheduled for downloading
if not quarantine.add(dag, signedBlock):
debug "Block quarantine full"
# TODO possibly, it makes sense to check the database - that would allow sync
# to simply fill up the database with random blocks the other clients
# think are useful - but, it would also risk filling the database with
# junk that's not part of the block graph
if blck.parent_root in quarantine.missing or
containsOrphan(quarantine, signedBlock):
debug "Unresolved block (parent missing or orphaned)",
@ -243,7 +236,6 @@ proc addRawBlock*(
return err((ValidationResult.Ignore, MissingParent))
# This is an unresolved block - put its parent on the missing list for now...
# TODO if we receive spam blocks, one heurestic to implement might be to wait
# for a couple of attestations to appear before fetching parents - this
# would help prevent using up network resources for spam - this serves
@ -253,9 +245,6 @@ proc addRawBlock*(
# them out without penalty - but signing invalid attestations carries
# a risk of being slashed, making attestations a more valuable spam
# filter.
# TODO when we receive the block, we don't know how many others we're missing
# from that branch, so right now, we'll just do a blind guess
debug "Unresolved block (parent missing)",
orphans = quarantine.orphans.len,
missing = quarantine.missing.len
@ -348,7 +337,7 @@ proc isValidBeaconBlock*(
debug "parent unknown, putting block in quarantine",
current_slot = wallTime.toSlot()
if not quarantine.add(dag, signed_beacon_block):
warn "Block quarantine full"
debug "Block quarantine full"
return err((ValidationResult.Ignore, MissingParent))
# [REJECT] The current finalized_checkpoint is an ancestor of block -- i.e.

View File

@ -31,8 +31,6 @@ func checkMissing*(quarantine: var QuarantineRef): seq[FetchRecord] =
inc v.tries
for k in done:
# TODO Need to potentially remove from quarantine.pending - this is currently a
# memory leak here!
quarantine.missing.del(k)
# simple (simplistic?) exponential backoff for retries..
@ -66,11 +64,16 @@ func removeOrphan*(
quarantine: var QuarantineRef, signedBlock: SignedBeaconBlock) =
quarantine.orphans.del((signedBlock.root, signedBlock.signature))
func isViableOrphan(dag: ChainDAGRef, signedBlock: SignedBeaconBlock): bool =
# The orphan must be newer than the finalization point so that its parent
# either is the finalized block or more recent
signedBlock.message.slot > dag.finalizedHead.slot
func removeOldBlocks(quarantine: var QuarantineRef, dag: ChainDAGRef) =
var oldBlocks: seq[(Eth2Digest, ValidatorSig)]
for k, v in quarantine.orphans.pairs():
if v.message.slot <= dag.finalizedHead.slot:
if not isViableOrphan(dag, v):
oldBlocks.add k
for k in oldBlocks:
@ -97,16 +100,24 @@ func add*(quarantine: var QuarantineRef, dag: ChainDAGRef,
# for future slots are rejected before reaching quarantine, this usually
# will be a block for the last couple of slots for which the parent is a
# likely imminent arrival.
const MAX_QUARANTINE_ORPHANS = 10
# Since we start forward sync when about one epoch is missing, that's as
# good a number as any.
const MAX_QUARANTINE_ORPHANS = SLOTS_PER_EPOCH
if not isViableOrphan(dag, signedBlock):
return false
quarantine.removeOldBlocks(dag)
if quarantine.orphans.len >= MAX_QUARANTINE_ORPHANS:
# Even if the quarantine is full, we need to schedule its parent for
# downloading or we'll never get to the bottom of things
quarantine.addMissing(signedBlock.message.parent_root)
if quarantine.orphans.lenu64 >= MAX_QUARANTINE_ORPHANS:
return false
quarantine.orphans[(signedBlock.root, signedBlock.signature)] = signedBlock
quarantine.missing.del(signedBlock.root)
quarantine.addMissing(signedBlock.message.parent_root)
true

View File

@ -470,13 +470,6 @@ template raisePeerDisconnected(msg: string, r: DisconnectionReason) =
e.reason = r
raise e
proc disconnectAndRaise(peer: Peer,
reason: DisconnectionReason,
msg: string) {.async.} =
let r = reason
await peer.disconnect(r)
raisePeerDisconnected(msg, r)
proc writeChunk*(conn: Connection,
responseCode: Option[ResponseCode],
payload: Bytes): Future[void] =
@ -1396,7 +1389,7 @@ proc createEth2Node*(rng: ref BrHmacDrbgContext,
announcedAddresses = if extIp.isNone(): @[]
else: @[tcpEndPoint(extIp.get(), extTcpPort)]
info "Initializing networking", hostAddress,
debug "Initializing networking", hostAddress,
network_public_key = netKeys.pubkey,
announcedAddresses

View File

@ -7,7 +7,8 @@
import
# Standard library
std/[os, tables, strutils, sequtils, times, math, terminal, osproc, random],
std/[os, tables, strutils, strformat, sequtils, times, math, terminal, osproc,
random],
# Nimble packages
stew/[objects, byteutils, endians2, io2], stew/shims/macros,
@ -72,13 +73,9 @@ proc init*(T: type BeaconNode,
genesisStateContents: ref string,
eth1Network: Option[Eth1Network]): Future[BeaconNode] {.async.} =
let
netKeys = getPersistentNetKeys(rng[], conf)
nickname = if conf.nodeName == "auto": shortForm(netKeys)
else: conf.nodeName
db = BeaconChainDB.init(conf.runtimePreset, conf.databaseDir)
var
eth1Monitor: Eth1Monitor
genesisState, checkpointState: ref BeaconState
checkpointBlock: SignedBeaconBlock
@ -114,6 +111,7 @@ proc init*(T: type BeaconNode,
fatal "--finalized-checkpoint-block cannot be specified without --finalized-checkpoint-state"
quit 1
var eth1Monitor: Eth1Monitor
if not ChainDAGRef.isInitialized(db):
var
tailState: ref BeaconState
@ -181,7 +179,7 @@ proc init*(T: type BeaconNode,
try:
genesisState = newClone(SSZ.decode(genesisStateContents[], BeaconState))
except CatchableError as err:
raiseAssert "The baked-in state must be valid"
raiseAssert "Invalid baked-in state: " & err.msg
if checkpointState != nil:
tailState = checkpointState
@ -197,11 +195,13 @@ proc init*(T: type BeaconNode,
error "Failed to initialize database", err = e.msg
quit 1
info "Loading block dag from database", path = conf.databaseDir
# TODO(zah) check that genesis given on command line (if any) matches database
let
chainDagFlags = if conf.verifyFinalization: {verifyFinalization}
else: {}
chainDag = init(ChainDAGRef, conf.runtimePreset, db, chainDagFlags)
chainDag = ChainDAGRef.init(conf.runtimePreset, db, chainDagFlags)
beaconClock = BeaconClock.init(chainDag.headState.data.data)
quarantine = QuarantineRef()
@ -252,6 +252,9 @@ proc init*(T: type BeaconNode,
nil
let
netKeys = getPersistentNetKeys(rng[], conf)
nickname = if conf.nodeName == "auto": shortForm(netKeys)
else: conf.nodeName
enrForkId = enrForkIdFromState(chainDag.headState.data.data)
topicBeaconBlocks = getBeaconBlocksTopic(enrForkId.forkDigest)
topicAggregateAndProofs = getAggregateAndProofsTopic(enrForkId.forkDigest)
@ -278,6 +281,7 @@ proc init*(T: type BeaconNode,
topicAggregateAndProofs: topicAggregateAndProofs,
)
info "Loading slashing protection database", path = conf.validatorsDir()
res.attachedValidators = ValidatorPool.init(
SlashingProtectionDB.init(
chainDag.headState.data.data.genesis_validators_root,
@ -801,6 +805,7 @@ proc createPidFile(filename: string) =
addQuitProc proc {.noconv.} = discard io2.removeFile(gPidFile)
proc initializeNetworking(node: BeaconNode) {.async.} =
info "Listening to incoming network requests"
await node.network.startListening()
let addressFile = node.config.dataDir / "beacon_node.enr"
@ -808,10 +813,6 @@ proc initializeNetworking(node: BeaconNode) {.async.} =
await node.network.start()
notice "Networking initialized",
enr = node.network.announcedENR.toURI,
libp2p = shortLog(node.network.switch.peerInfo)
proc start(node: BeaconNode) =
let
head = node.chainDag.head
@ -820,16 +821,17 @@ proc start(node: BeaconNode) =
notice "Starting beacon node",
version = fullVersionStr,
nim = shortNimBanner(),
enr = node.network.announcedENR.toURI,
peerId = $node.network.switch.peerInfo.peerId,
timeSinceFinalization =
finalizedHead.slot.toBeaconTime() -
node.beaconClock.now(),
node.beaconClock.now() - finalizedHead.slot.toBeaconTime(),
head = shortLog(head),
finalizedHead = shortLog(finalizedHead),
SLOTS_PER_EPOCH,
SECONDS_PER_SLOT,
SPEC_VERSION,
dataDir = node.config.dataDir.string
dataDir = node.config.dataDir.string,
validators = node.attachedValidators.count
if genesisTime.inFuture:
notice "Waiting for genesis", genesisIn = genesisTime.offset
@ -1145,7 +1147,7 @@ programMain:
if config.metricsEnabled:
let metricsAddress = config.metricsAddress
notice "Starting metrics HTTP server",
address = metricsAddress, port = config.metricsPort
url = "http://" & $metricsAddress & ":" & $config.metricsPort & "/metrics"
metrics.startHttpServer($metricsAddress, config.metricsPort)
# There are no managed event loops in here, to do a graceful shutdown, but

View File

@ -59,7 +59,6 @@ proc addLocalValidator*(node: BeaconNode,
proc addLocalValidators*(node: BeaconNode) =
for validatorKey in node.config.validatorKeys:
node.addLocalValidator node.chainDag.headState.data.data, validatorKey
notice "Local validators attached ", count = node.attachedValidators.count
proc addRemoteValidators*(node: BeaconNode) =
# load all the validators from the child process - loop until `end`
@ -76,7 +75,6 @@ proc addRemoteValidators*(node: BeaconNode) =
outStream: node.vcProcess.outputStream,
pubKeyStr: $key))
node.attachedValidators.addRemoteValidator(key, v)
notice "Remote validators attached ", count = node.attachedValidators.count
proc getAttachedValidator*(node: BeaconNode,
pubkey: ValidatorPubKey): AttachedValidator =

2
vendor/nim-json-rpc vendored

@ -1 +1 @@
Subproject commit 99455437ba3d83d5af1c38007fedeeff295e959e
Subproject commit 5b6312a18be4001ef644411b579138a09d853e8c

2
vendor/nim-metrics vendored

@ -1 +1 @@
Subproject commit 77305f3b3ddfef5aff65b685effc7e7a1d1cf59c
Subproject commit 22a3867341f7b0a9d55661b41d7ee5febe35c86b