mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-02-17 00:47:03 +00:00
Assorted logging improvements (#3237)
* log doppelganger detection when it activates and when it causes missed duties * less prominent eth1 sync progress * log in-progress sync at notice only when actually missing duties * better detail in replay log * don't log finalization checkpoints - this is quite verbose when syncing and already included in "Slot start"
This commit is contained in:
parent
3c63a78c01
commit
c4ce59e55b
@ -853,7 +853,9 @@ proc updateStateData*(
|
||||
# an earlier state must be loaded since there's no way to undo the slot
|
||||
# transitions
|
||||
|
||||
let startTick = Moment.now()
|
||||
let
|
||||
startTick = Moment.now()
|
||||
current {.used.} = state.blck.atSlot(getStateField(state.data, slot))
|
||||
|
||||
var
|
||||
ancestors: seq[BlockRef]
|
||||
@ -931,7 +933,7 @@ proc updateStateData*(
|
||||
|
||||
if not found:
|
||||
debug "UpdateStateData cache miss",
|
||||
bs, stateBlock = state.blck, stateSlot = getStateField(state.data, slot)
|
||||
current = shortLog(current), target = shortLog(bs)
|
||||
|
||||
# Either the state is too new or was created by applying a different block.
|
||||
# We'll now resort to loading the state from the database then reapplying
|
||||
@ -967,8 +969,9 @@ proc updateStateData*(
|
||||
# Starting state has been assigned, either from memory or database
|
||||
let
|
||||
assignTick = Moment.now()
|
||||
startSlot {.used.} = getStateField(state.data, slot) # used in logs below
|
||||
startRoot {.used.} = getStateRoot(state.data)
|
||||
ancestor {.used.} = state.blck.atSlot(getStateField(state.data, slot))
|
||||
ancestorRoot {.used.} = getStateRoot(state.data)
|
||||
|
||||
var info: ForkedEpochInfo
|
||||
# Time to replay all the blocks between then and now
|
||||
for i in countdown(ancestors.len - 1, 0):
|
||||
@ -990,27 +993,47 @@ proc updateStateData*(
|
||||
assignDur = assignTick - startTick
|
||||
replayDur = Moment.now() - assignTick
|
||||
|
||||
logScope:
|
||||
blocks = ancestors.len
|
||||
slots = getStateField(state.data, slot) - startSlot
|
||||
stateRoot = shortLog(getStateRoot(state.data))
|
||||
stateSlot = getStateField(state.data, slot)
|
||||
startRoot = shortLog(startRoot)
|
||||
startSlot
|
||||
blck = shortLog(bs)
|
||||
found
|
||||
assignDur
|
||||
replayDur
|
||||
|
||||
# TODO https://github.com/status-im/nim-chronicles/issues/108
|
||||
if (assignDur + replayDur) >= 250.millis:
|
||||
# This might indicate there's a cache that's not in order or a disk that is
|
||||
# too slow - for now, it's here for investigative purposes and the cutoff
|
||||
# time might need tuning
|
||||
info "State replayed"
|
||||
info "State replayed",
|
||||
blocks = ancestors.len,
|
||||
slots = getStateField(state.data, slot) - ancestor.slot,
|
||||
current = shortLog(current),
|
||||
ancestor = shortLog(ancestor),
|
||||
target = shortLog(bs),
|
||||
ancestorStateRoot = shortLog(ancestorRoot),
|
||||
targetStateRoot = shortLog(getStateRoot(state.data)),
|
||||
found,
|
||||
assignDur,
|
||||
replayDur
|
||||
|
||||
elif ancestors.len > 0:
|
||||
debug "State replayed"
|
||||
else:
|
||||
trace "State advanced" # Normal case!
|
||||
debug "State replayed",
|
||||
blocks = ancestors.len,
|
||||
slots = getStateField(state.data, slot) - ancestor.slot,
|
||||
current = shortLog(current),
|
||||
ancestor = shortLog(ancestor),
|
||||
target = shortLog(bs),
|
||||
ancestorStateRoot = shortLog(ancestorRoot),
|
||||
targetStateRoot = shortLog(getStateRoot(state.data)),
|
||||
found,
|
||||
assignDur,
|
||||
replayDur
|
||||
else: # Normal case!
|
||||
trace "State advanced",
|
||||
blocks = ancestors.len,
|
||||
slots = getStateField(state.data, slot) - ancestor.slot,
|
||||
current = shortLog(current),
|
||||
ancestor = shortLog(ancestor),
|
||||
target = shortLog(bs),
|
||||
ancestorStateRoot = shortLog(ancestorRoot),
|
||||
targetStateRoot = shortLog(getStateRoot(state.data)),
|
||||
found,
|
||||
assignDur,
|
||||
replayDur
|
||||
|
||||
proc delState(dag: ChainDAGRef, bs: BlockSlot) =
|
||||
# Delete state state and mapping for a particular block+slot
|
||||
@ -1340,7 +1363,7 @@ proc updateHead*(
|
||||
beacon_current_active_validators.set(number_of_active_validators)
|
||||
|
||||
if finalizedHead != dag.finalizedHead:
|
||||
info "Reached new finalization checkpoint",
|
||||
debug "Reached new finalization checkpoint",
|
||||
head = shortLog(dag.headState.blck),
|
||||
stateRoot = shortLog(getStateRoot(dag.headState.data)),
|
||||
justified = shortLog(getStateField(
|
||||
|
@ -1054,7 +1054,7 @@ proc syncBlockRange(m: Eth1Monitor,
|
||||
else:
|
||||
discard
|
||||
|
||||
notice "Eth1 sync progress",
|
||||
info "Eth1 sync progress",
|
||||
blockNumber = lastBlock.number,
|
||||
depositsProcessed = lastBlock.voteData.deposit_count
|
||||
|
||||
|
@ -152,7 +152,8 @@ proc new*(T: type Eth2Processor,
|
||||
(ref Eth2Processor)(
|
||||
doppelGangerDetectionEnabled: doppelGangerDetectionEnabled,
|
||||
doppelgangerDetection: DoppelgangerProtection(
|
||||
nodeLaunchSlot: getBeaconTime().slotOrZero),
|
||||
nodeLaunchSlot: getBeaconTime().slotOrZero,
|
||||
broadcastStartEpoch: FAR_FUTURE_EPOCH),
|
||||
blockProcessor: blockProcessor,
|
||||
validatorMonitor: validatorMonitor,
|
||||
dag: dag,
|
||||
@ -226,6 +227,25 @@ proc blockValidator*(
|
||||
|
||||
v
|
||||
|
||||
proc setupDoppelgangerDetection*(self: var Eth2Processor, slot: Slot) =
|
||||
# When another client's already running, this is very likely to detect
|
||||
# potential duplicate validators, which can trigger slashing.
|
||||
#
|
||||
# Every missed attestation costs approximately 3*get_base_reward(), which
|
||||
# can be up to around 10,000 Wei. Thus, skipping attestations isn't cheap
|
||||
# and one should gauge the likelihood of this simultaneous launch to tune
|
||||
# the epoch delay to one's perceived risk.
|
||||
|
||||
if self.validatorPool[].count() > 0:
|
||||
const duplicateValidatorEpochs = 2
|
||||
|
||||
self.doppelgangerDetection.broadcastStartEpoch =
|
||||
slot.epoch + duplicateValidatorEpochs
|
||||
notice "Setting up doppelganger protection",
|
||||
epoch = slot.epoch,
|
||||
broadcastStartEpoch =
|
||||
self.doppelgangerDetection.broadcastStartEpoch
|
||||
|
||||
proc checkForPotentialDoppelganger(
|
||||
self: var Eth2Processor, attestation: Attestation,
|
||||
attesterIndices: openArray[ValidatorIndex]) =
|
||||
|
@ -665,23 +665,6 @@ proc removeAltairMessageHandlers(node: BeaconNode, forkDigest: ForkDigest) =
|
||||
node.network.unsubscribe(
|
||||
getSyncCommitteeContributionAndProofTopic(forkDigest))
|
||||
|
||||
proc setupDoppelgangerDetection(node: BeaconNode, slot: Slot) =
|
||||
# When another client's already running, this is very likely to detect
|
||||
# potential duplicate validators, which can trigger slashing.
|
||||
#
|
||||
# Every missed attestation costs approximately 3*get_base_reward(), which
|
||||
# can be up to around 10,000 Wei. Thus, skipping attestations isn't cheap
|
||||
# and one should gauge the likelihood of this simultaneous launch to tune
|
||||
# the epoch delay to one's perceived risk.
|
||||
const duplicateValidatorEpochs = 2
|
||||
|
||||
node.processor.doppelgangerDetection.broadcastStartEpoch =
|
||||
slot.epoch + duplicateValidatorEpochs
|
||||
debug "Setting up doppelganger protection",
|
||||
epoch = slot.epoch,
|
||||
broadcastStartEpoch =
|
||||
node.processor.doppelgangerDetection.broadcastStartEpoch
|
||||
|
||||
proc trackSyncCommitteeTopics*(node: BeaconNode) =
|
||||
# TODO
|
||||
discard
|
||||
@ -743,7 +726,7 @@ proc updateGossipStatus(node: BeaconNode, slot: Slot) {.async.} =
|
||||
headSlot = head.slot,
|
||||
headDistance, targetGossipState
|
||||
|
||||
node.setupDoppelgangerDetection(slot)
|
||||
node.processor[].setupDoppelgangerDetection(slot)
|
||||
|
||||
# Specially when waiting for genesis, we'll already be synced on startup -
|
||||
# it might also happen on a sufficiently fast restart
|
||||
@ -836,12 +819,8 @@ proc onSlotEnd(node: BeaconNode, slot: Slot) {.async.} =
|
||||
node.actionTracker.updateActions(epochRef)
|
||||
|
||||
let
|
||||
nextAttestationSlot = getNextValidatorAction(
|
||||
node.actionTracker.attestingSlots,
|
||||
node.actionTracker.lastCalculatedEpoch, slot)
|
||||
nextProposalSlot = getNextValidatorAction(
|
||||
node.actionTracker.proposingSlots,
|
||||
node.actionTracker.lastCalculatedEpoch, slot)
|
||||
nextAttestationSlot = node.actionTracker.getNextAttestationSlot(slot)
|
||||
nextProposalSlot = node.actionTracker.getNextProposalSlot(slot)
|
||||
nextActionWaitTime = saturate(fromNow(
|
||||
node.beaconClock, min(nextAttestationSlot, nextProposalSlot)))
|
||||
|
||||
|
@ -186,6 +186,16 @@ func getNextValidatorAction*(
|
||||
|
||||
FAR_FUTURE_SLOT
|
||||
|
||||
func getNextAttestationSlot*(tracker: ActionTracker, slot: Slot): Slot =
|
||||
getNextValidatorAction(
|
||||
tracker.attestingSlots,
|
||||
tracker.lastCalculatedEpoch, slot)
|
||||
|
||||
func getNextProposalSlot*(tracker: ActionTracker, slot: Slot): Slot =
|
||||
getNextValidatorAction(
|
||||
tracker.proposingSlots,
|
||||
tracker.lastCalculatedEpoch, slot)
|
||||
|
||||
proc updateActions*(tracker: var ActionTracker, epochRef: EpochRef) =
|
||||
# Updates the schedule for upcoming attestation and proposal work
|
||||
let
|
||||
|
@ -995,8 +995,15 @@ proc handleValidatorDuties*(node: BeaconNode, lastSlot, slot: Slot) {.async.} =
|
||||
# await calls, thus we use a local variable to keep the logic straight here
|
||||
var head = node.dag.head
|
||||
if not node.isSynced(head):
|
||||
notice "Syncing in progress; skipping validator duties for now",
|
||||
slot, headSlot = head.slot
|
||||
let
|
||||
nextAttestationSlot = node.actionTracker.getNextAttestationSlot(slot)
|
||||
nextProposalSlot = node.actionTracker.getNextProposalSlot(slot)
|
||||
if slot in [nextAttestationSlot, nextProposalSlot]:
|
||||
notice "Syncing in progress; skipping validator duties for now",
|
||||
slot, headSlot = head.slot
|
||||
else:
|
||||
debug "Syncing in progress; skipping validator duties for now",
|
||||
slot, headSlot = head.slot
|
||||
|
||||
# Rewards will be growing though, as we sync..
|
||||
updateValidatorMetrics(node)
|
||||
@ -1007,12 +1014,24 @@ proc handleValidatorDuties*(node: BeaconNode, lastSlot, slot: Slot) {.async.} =
|
||||
|
||||
# If broadcastStartEpoch is 0, it hasn't had time to initialize yet, which
|
||||
# means that it'd be okay not to continue, but it won't gossip regardless.
|
||||
if curSlot.epoch <
|
||||
if curSlot.epoch <
|
||||
node.processor[].doppelgangerDetection.broadcastStartEpoch and
|
||||
node.config.doppelgangerDetection:
|
||||
debug "Waiting to gossip out to detect potential duplicate validators",
|
||||
broadcastStartEpoch =
|
||||
node.processor[].doppelgangerDetection.broadcastStartEpoch
|
||||
let
|
||||
nextAttestationSlot = node.actionTracker.getNextAttestationSlot(slot)
|
||||
nextProposalSlot = node.actionTracker.getNextProposalSlot(slot)
|
||||
|
||||
if slot in [nextAttestationSlot, nextProposalSlot]:
|
||||
notice "Doppelganger detection active - skipping validator duties while observing activity on the network",
|
||||
slot, epoch = slot.epoch, nextAttestationSlot, nextProposalSlot,
|
||||
broadcastStartEpoch =
|
||||
node.processor[].doppelgangerDetection.broadcastStartEpoch
|
||||
else:
|
||||
debug "Doppelganger detection active - skipping validator duties while observing activity on the network",
|
||||
slot, epoch = slot.epoch, nextAttestationSlot, nextProposalSlot,
|
||||
broadcastStartEpoch =
|
||||
node.processor[].doppelgangerDetection.broadcastStartEpoch
|
||||
|
||||
return
|
||||
|
||||
# Start by checking if there's work we should have done in the past that we
|
||||
|
Loading…
x
Reference in New Issue
Block a user