mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-02-20 02:08:12 +00:00
Assorted logging improvements (#3237)
* log doppelganger detection when it activates and when it causes missed duties * less prominent eth1 sync progress * log in-progress sync at notice only when actually missing duties * better detail in replay log * don't log finalization checkpoints - this is quite verbose when syncing and already included in "Slot start"
This commit is contained in:
parent
3c63a78c01
commit
c4ce59e55b
@ -853,7 +853,9 @@ proc updateStateData*(
|
|||||||
# an earlier state must be loaded since there's no way to undo the slot
|
# an earlier state must be loaded since there's no way to undo the slot
|
||||||
# transitions
|
# transitions
|
||||||
|
|
||||||
let startTick = Moment.now()
|
let
|
||||||
|
startTick = Moment.now()
|
||||||
|
current {.used.} = state.blck.atSlot(getStateField(state.data, slot))
|
||||||
|
|
||||||
var
|
var
|
||||||
ancestors: seq[BlockRef]
|
ancestors: seq[BlockRef]
|
||||||
@ -931,7 +933,7 @@ proc updateStateData*(
|
|||||||
|
|
||||||
if not found:
|
if not found:
|
||||||
debug "UpdateStateData cache miss",
|
debug "UpdateStateData cache miss",
|
||||||
bs, stateBlock = state.blck, stateSlot = getStateField(state.data, slot)
|
current = shortLog(current), target = shortLog(bs)
|
||||||
|
|
||||||
# Either the state is too new or was created by applying a different block.
|
# Either the state is too new or was created by applying a different block.
|
||||||
# We'll now resort to loading the state from the database then reapplying
|
# We'll now resort to loading the state from the database then reapplying
|
||||||
@ -967,8 +969,9 @@ proc updateStateData*(
|
|||||||
# Starting state has been assigned, either from memory or database
|
# Starting state has been assigned, either from memory or database
|
||||||
let
|
let
|
||||||
assignTick = Moment.now()
|
assignTick = Moment.now()
|
||||||
startSlot {.used.} = getStateField(state.data, slot) # used in logs below
|
ancestor {.used.} = state.blck.atSlot(getStateField(state.data, slot))
|
||||||
startRoot {.used.} = getStateRoot(state.data)
|
ancestorRoot {.used.} = getStateRoot(state.data)
|
||||||
|
|
||||||
var info: ForkedEpochInfo
|
var info: ForkedEpochInfo
|
||||||
# Time to replay all the blocks between then and now
|
# Time to replay all the blocks between then and now
|
||||||
for i in countdown(ancestors.len - 1, 0):
|
for i in countdown(ancestors.len - 1, 0):
|
||||||
@ -990,27 +993,47 @@ proc updateStateData*(
|
|||||||
assignDur = assignTick - startTick
|
assignDur = assignTick - startTick
|
||||||
replayDur = Moment.now() - assignTick
|
replayDur = Moment.now() - assignTick
|
||||||
|
|
||||||
logScope:
|
# TODO https://github.com/status-im/nim-chronicles/issues/108
|
||||||
blocks = ancestors.len
|
|
||||||
slots = getStateField(state.data, slot) - startSlot
|
|
||||||
stateRoot = shortLog(getStateRoot(state.data))
|
|
||||||
stateSlot = getStateField(state.data, slot)
|
|
||||||
startRoot = shortLog(startRoot)
|
|
||||||
startSlot
|
|
||||||
blck = shortLog(bs)
|
|
||||||
found
|
|
||||||
assignDur
|
|
||||||
replayDur
|
|
||||||
|
|
||||||
if (assignDur + replayDur) >= 250.millis:
|
if (assignDur + replayDur) >= 250.millis:
|
||||||
# This might indicate there's a cache that's not in order or a disk that is
|
# This might indicate there's a cache that's not in order or a disk that is
|
||||||
# too slow - for now, it's here for investigative purposes and the cutoff
|
# too slow - for now, it's here for investigative purposes and the cutoff
|
||||||
# time might need tuning
|
# time might need tuning
|
||||||
info "State replayed"
|
info "State replayed",
|
||||||
|
blocks = ancestors.len,
|
||||||
|
slots = getStateField(state.data, slot) - ancestor.slot,
|
||||||
|
current = shortLog(current),
|
||||||
|
ancestor = shortLog(ancestor),
|
||||||
|
target = shortLog(bs),
|
||||||
|
ancestorStateRoot = shortLog(ancestorRoot),
|
||||||
|
targetStateRoot = shortLog(getStateRoot(state.data)),
|
||||||
|
found,
|
||||||
|
assignDur,
|
||||||
|
replayDur
|
||||||
|
|
||||||
elif ancestors.len > 0:
|
elif ancestors.len > 0:
|
||||||
debug "State replayed"
|
debug "State replayed",
|
||||||
else:
|
blocks = ancestors.len,
|
||||||
trace "State advanced" # Normal case!
|
slots = getStateField(state.data, slot) - ancestor.slot,
|
||||||
|
current = shortLog(current),
|
||||||
|
ancestor = shortLog(ancestor),
|
||||||
|
target = shortLog(bs),
|
||||||
|
ancestorStateRoot = shortLog(ancestorRoot),
|
||||||
|
targetStateRoot = shortLog(getStateRoot(state.data)),
|
||||||
|
found,
|
||||||
|
assignDur,
|
||||||
|
replayDur
|
||||||
|
else: # Normal case!
|
||||||
|
trace "State advanced",
|
||||||
|
blocks = ancestors.len,
|
||||||
|
slots = getStateField(state.data, slot) - ancestor.slot,
|
||||||
|
current = shortLog(current),
|
||||||
|
ancestor = shortLog(ancestor),
|
||||||
|
target = shortLog(bs),
|
||||||
|
ancestorStateRoot = shortLog(ancestorRoot),
|
||||||
|
targetStateRoot = shortLog(getStateRoot(state.data)),
|
||||||
|
found,
|
||||||
|
assignDur,
|
||||||
|
replayDur
|
||||||
|
|
||||||
proc delState(dag: ChainDAGRef, bs: BlockSlot) =
|
proc delState(dag: ChainDAGRef, bs: BlockSlot) =
|
||||||
# Delete state state and mapping for a particular block+slot
|
# Delete state state and mapping for a particular block+slot
|
||||||
@ -1340,7 +1363,7 @@ proc updateHead*(
|
|||||||
beacon_current_active_validators.set(number_of_active_validators)
|
beacon_current_active_validators.set(number_of_active_validators)
|
||||||
|
|
||||||
if finalizedHead != dag.finalizedHead:
|
if finalizedHead != dag.finalizedHead:
|
||||||
info "Reached new finalization checkpoint",
|
debug "Reached new finalization checkpoint",
|
||||||
head = shortLog(dag.headState.blck),
|
head = shortLog(dag.headState.blck),
|
||||||
stateRoot = shortLog(getStateRoot(dag.headState.data)),
|
stateRoot = shortLog(getStateRoot(dag.headState.data)),
|
||||||
justified = shortLog(getStateField(
|
justified = shortLog(getStateField(
|
||||||
|
@ -1054,7 +1054,7 @@ proc syncBlockRange(m: Eth1Monitor,
|
|||||||
else:
|
else:
|
||||||
discard
|
discard
|
||||||
|
|
||||||
notice "Eth1 sync progress",
|
info "Eth1 sync progress",
|
||||||
blockNumber = lastBlock.number,
|
blockNumber = lastBlock.number,
|
||||||
depositsProcessed = lastBlock.voteData.deposit_count
|
depositsProcessed = lastBlock.voteData.deposit_count
|
||||||
|
|
||||||
|
@ -152,7 +152,8 @@ proc new*(T: type Eth2Processor,
|
|||||||
(ref Eth2Processor)(
|
(ref Eth2Processor)(
|
||||||
doppelGangerDetectionEnabled: doppelGangerDetectionEnabled,
|
doppelGangerDetectionEnabled: doppelGangerDetectionEnabled,
|
||||||
doppelgangerDetection: DoppelgangerProtection(
|
doppelgangerDetection: DoppelgangerProtection(
|
||||||
nodeLaunchSlot: getBeaconTime().slotOrZero),
|
nodeLaunchSlot: getBeaconTime().slotOrZero,
|
||||||
|
broadcastStartEpoch: FAR_FUTURE_EPOCH),
|
||||||
blockProcessor: blockProcessor,
|
blockProcessor: blockProcessor,
|
||||||
validatorMonitor: validatorMonitor,
|
validatorMonitor: validatorMonitor,
|
||||||
dag: dag,
|
dag: dag,
|
||||||
@ -226,6 +227,25 @@ proc blockValidator*(
|
|||||||
|
|
||||||
v
|
v
|
||||||
|
|
||||||
|
proc setupDoppelgangerDetection*(self: var Eth2Processor, slot: Slot) =
|
||||||
|
# When another client's already running, this is very likely to detect
|
||||||
|
# potential duplicate validators, which can trigger slashing.
|
||||||
|
#
|
||||||
|
# Every missed attestation costs approximately 3*get_base_reward(), which
|
||||||
|
# can be up to around 10,000 Wei. Thus, skipping attestations isn't cheap
|
||||||
|
# and one should gauge the likelihood of this simultaneous launch to tune
|
||||||
|
# the epoch delay to one's perceived risk.
|
||||||
|
|
||||||
|
if self.validatorPool[].count() > 0:
|
||||||
|
const duplicateValidatorEpochs = 2
|
||||||
|
|
||||||
|
self.doppelgangerDetection.broadcastStartEpoch =
|
||||||
|
slot.epoch + duplicateValidatorEpochs
|
||||||
|
notice "Setting up doppelganger protection",
|
||||||
|
epoch = slot.epoch,
|
||||||
|
broadcastStartEpoch =
|
||||||
|
self.doppelgangerDetection.broadcastStartEpoch
|
||||||
|
|
||||||
proc checkForPotentialDoppelganger(
|
proc checkForPotentialDoppelganger(
|
||||||
self: var Eth2Processor, attestation: Attestation,
|
self: var Eth2Processor, attestation: Attestation,
|
||||||
attesterIndices: openArray[ValidatorIndex]) =
|
attesterIndices: openArray[ValidatorIndex]) =
|
||||||
|
@ -665,23 +665,6 @@ proc removeAltairMessageHandlers(node: BeaconNode, forkDigest: ForkDigest) =
|
|||||||
node.network.unsubscribe(
|
node.network.unsubscribe(
|
||||||
getSyncCommitteeContributionAndProofTopic(forkDigest))
|
getSyncCommitteeContributionAndProofTopic(forkDigest))
|
||||||
|
|
||||||
proc setupDoppelgangerDetection(node: BeaconNode, slot: Slot) =
|
|
||||||
# When another client's already running, this is very likely to detect
|
|
||||||
# potential duplicate validators, which can trigger slashing.
|
|
||||||
#
|
|
||||||
# Every missed attestation costs approximately 3*get_base_reward(), which
|
|
||||||
# can be up to around 10,000 Wei. Thus, skipping attestations isn't cheap
|
|
||||||
# and one should gauge the likelihood of this simultaneous launch to tune
|
|
||||||
# the epoch delay to one's perceived risk.
|
|
||||||
const duplicateValidatorEpochs = 2
|
|
||||||
|
|
||||||
node.processor.doppelgangerDetection.broadcastStartEpoch =
|
|
||||||
slot.epoch + duplicateValidatorEpochs
|
|
||||||
debug "Setting up doppelganger protection",
|
|
||||||
epoch = slot.epoch,
|
|
||||||
broadcastStartEpoch =
|
|
||||||
node.processor.doppelgangerDetection.broadcastStartEpoch
|
|
||||||
|
|
||||||
proc trackSyncCommitteeTopics*(node: BeaconNode) =
|
proc trackSyncCommitteeTopics*(node: BeaconNode) =
|
||||||
# TODO
|
# TODO
|
||||||
discard
|
discard
|
||||||
@ -743,7 +726,7 @@ proc updateGossipStatus(node: BeaconNode, slot: Slot) {.async.} =
|
|||||||
headSlot = head.slot,
|
headSlot = head.slot,
|
||||||
headDistance, targetGossipState
|
headDistance, targetGossipState
|
||||||
|
|
||||||
node.setupDoppelgangerDetection(slot)
|
node.processor[].setupDoppelgangerDetection(slot)
|
||||||
|
|
||||||
# Specially when waiting for genesis, we'll already be synced on startup -
|
# Specially when waiting for genesis, we'll already be synced on startup -
|
||||||
# it might also happen on a sufficiently fast restart
|
# it might also happen on a sufficiently fast restart
|
||||||
@ -836,12 +819,8 @@ proc onSlotEnd(node: BeaconNode, slot: Slot) {.async.} =
|
|||||||
node.actionTracker.updateActions(epochRef)
|
node.actionTracker.updateActions(epochRef)
|
||||||
|
|
||||||
let
|
let
|
||||||
nextAttestationSlot = getNextValidatorAction(
|
nextAttestationSlot = node.actionTracker.getNextAttestationSlot(slot)
|
||||||
node.actionTracker.attestingSlots,
|
nextProposalSlot = node.actionTracker.getNextProposalSlot(slot)
|
||||||
node.actionTracker.lastCalculatedEpoch, slot)
|
|
||||||
nextProposalSlot = getNextValidatorAction(
|
|
||||||
node.actionTracker.proposingSlots,
|
|
||||||
node.actionTracker.lastCalculatedEpoch, slot)
|
|
||||||
nextActionWaitTime = saturate(fromNow(
|
nextActionWaitTime = saturate(fromNow(
|
||||||
node.beaconClock, min(nextAttestationSlot, nextProposalSlot)))
|
node.beaconClock, min(nextAttestationSlot, nextProposalSlot)))
|
||||||
|
|
||||||
|
@ -186,6 +186,16 @@ func getNextValidatorAction*(
|
|||||||
|
|
||||||
FAR_FUTURE_SLOT
|
FAR_FUTURE_SLOT
|
||||||
|
|
||||||
|
func getNextAttestationSlot*(tracker: ActionTracker, slot: Slot): Slot =
|
||||||
|
getNextValidatorAction(
|
||||||
|
tracker.attestingSlots,
|
||||||
|
tracker.lastCalculatedEpoch, slot)
|
||||||
|
|
||||||
|
func getNextProposalSlot*(tracker: ActionTracker, slot: Slot): Slot =
|
||||||
|
getNextValidatorAction(
|
||||||
|
tracker.proposingSlots,
|
||||||
|
tracker.lastCalculatedEpoch, slot)
|
||||||
|
|
||||||
proc updateActions*(tracker: var ActionTracker, epochRef: EpochRef) =
|
proc updateActions*(tracker: var ActionTracker, epochRef: EpochRef) =
|
||||||
# Updates the schedule for upcoming attestation and proposal work
|
# Updates the schedule for upcoming attestation and proposal work
|
||||||
let
|
let
|
||||||
|
@ -995,8 +995,15 @@ proc handleValidatorDuties*(node: BeaconNode, lastSlot, slot: Slot) {.async.} =
|
|||||||
# await calls, thus we use a local variable to keep the logic straight here
|
# await calls, thus we use a local variable to keep the logic straight here
|
||||||
var head = node.dag.head
|
var head = node.dag.head
|
||||||
if not node.isSynced(head):
|
if not node.isSynced(head):
|
||||||
notice "Syncing in progress; skipping validator duties for now",
|
let
|
||||||
slot, headSlot = head.slot
|
nextAttestationSlot = node.actionTracker.getNextAttestationSlot(slot)
|
||||||
|
nextProposalSlot = node.actionTracker.getNextProposalSlot(slot)
|
||||||
|
if slot in [nextAttestationSlot, nextProposalSlot]:
|
||||||
|
notice "Syncing in progress; skipping validator duties for now",
|
||||||
|
slot, headSlot = head.slot
|
||||||
|
else:
|
||||||
|
debug "Syncing in progress; skipping validator duties for now",
|
||||||
|
slot, headSlot = head.slot
|
||||||
|
|
||||||
# Rewards will be growing though, as we sync..
|
# Rewards will be growing though, as we sync..
|
||||||
updateValidatorMetrics(node)
|
updateValidatorMetrics(node)
|
||||||
@ -1007,12 +1014,24 @@ proc handleValidatorDuties*(node: BeaconNode, lastSlot, slot: Slot) {.async.} =
|
|||||||
|
|
||||||
# If broadcastStartEpoch is 0, it hasn't had time to initialize yet, which
|
# If broadcastStartEpoch is 0, it hasn't had time to initialize yet, which
|
||||||
# means that it'd be okay not to continue, but it won't gossip regardless.
|
# means that it'd be okay not to continue, but it won't gossip regardless.
|
||||||
if curSlot.epoch <
|
if curSlot.epoch <
|
||||||
node.processor[].doppelgangerDetection.broadcastStartEpoch and
|
node.processor[].doppelgangerDetection.broadcastStartEpoch and
|
||||||
node.config.doppelgangerDetection:
|
node.config.doppelgangerDetection:
|
||||||
debug "Waiting to gossip out to detect potential duplicate validators",
|
let
|
||||||
broadcastStartEpoch =
|
nextAttestationSlot = node.actionTracker.getNextAttestationSlot(slot)
|
||||||
node.processor[].doppelgangerDetection.broadcastStartEpoch
|
nextProposalSlot = node.actionTracker.getNextProposalSlot(slot)
|
||||||
|
|
||||||
|
if slot in [nextAttestationSlot, nextProposalSlot]:
|
||||||
|
notice "Doppelganger detection active - skipping validator duties while observing activity on the network",
|
||||||
|
slot, epoch = slot.epoch, nextAttestationSlot, nextProposalSlot,
|
||||||
|
broadcastStartEpoch =
|
||||||
|
node.processor[].doppelgangerDetection.broadcastStartEpoch
|
||||||
|
else:
|
||||||
|
debug "Doppelganger detection active - skipping validator duties while observing activity on the network",
|
||||||
|
slot, epoch = slot.epoch, nextAttestationSlot, nextProposalSlot,
|
||||||
|
broadcastStartEpoch =
|
||||||
|
node.processor[].doppelgangerDetection.broadcastStartEpoch
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# Start by checking if there's work we should have done in the past that we
|
# Start by checking if there's work we should have done in the past that we
|
||||||
|
Loading…
x
Reference in New Issue
Block a user