mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-02-03 18:25:09 +00:00
add metric for database checkpoint duration (#5897)
Database checkpointing can take seconds, e.g., while Geth is syncing. Add a debug log + metric for it, and also info log if it takes longer than 250ms, same as for the existing `State replayed` log. If the log shows up for a user while the system is not overloaded, it may point to slow disk speed or thermal issue.
This commit is contained in:
parent
5fdb06fcd1
commit
92197ce690
@ -128,3 +128,5 @@ func fromFloatSeconds*(T: type Duration, f: float): Duration =
|
|||||||
of fcInf: InfiniteDuration
|
of fcInf: InfiniteDuration
|
||||||
|
|
||||||
chronicles.formatIt Duration: $it
|
chronicles.formatIt Duration: $it
|
||||||
|
|
||||||
|
const MinSignificantProcessingDuration* = 250.millis
|
||||||
|
@ -15,7 +15,7 @@ import
|
|||||||
state_transition, validator],
|
state_transition, validator],
|
||||||
../spec/forks,
|
../spec/forks,
|
||||||
../spec/datatypes/[phase0, altair, bellatrix, capella],
|
../spec/datatypes/[phase0, altair, bellatrix, capella],
|
||||||
".."/[beacon_chain_db, era_db],
|
".."/[beacon_chain_db, beacon_clock, era_db],
|
||||||
"."/[block_pools_types, block_quarantine]
|
"."/[block_pools_types, block_quarantine]
|
||||||
|
|
||||||
from ../spec/datatypes/deneb import shortLog
|
from ../spec/datatypes/deneb import shortLog
|
||||||
@ -24,6 +24,8 @@ export
|
|||||||
eth2_merkleization, eth2_ssz_serialization,
|
eth2_merkleization, eth2_ssz_serialization,
|
||||||
block_pools_types, results, beacon_chain_db
|
block_pools_types, results, beacon_chain_db
|
||||||
|
|
||||||
|
logScope: topics = "chaindag"
|
||||||
|
|
||||||
# https://github.com/ethereum/beacon-metrics/blob/master/metrics.md#interop-metrics
|
# https://github.com/ethereum/beacon-metrics/blob/master/metrics.md#interop-metrics
|
||||||
declareGauge beacon_head_root, "Root of the head block of the beacon chain"
|
declareGauge beacon_head_root, "Root of the head block of the beacon chain"
|
||||||
declareGauge beacon_head_slot, "Slot of the head block of the beacon chain"
|
declareGauge beacon_head_slot, "Slot of the head block of the beacon chain"
|
||||||
@ -47,7 +49,7 @@ declareGauge beacon_current_active_validators, "Number of validators in the acti
|
|||||||
declareGauge beacon_pending_deposits, "Number of pending deposits (state.eth1_data.deposit_count - state.eth1_deposit_index)" # On block
|
declareGauge beacon_pending_deposits, "Number of pending deposits (state.eth1_data.deposit_count - state.eth1_deposit_index)" # On block
|
||||||
declareGauge beacon_processed_deposits_total, "Number of total deposits included on chain" # On block
|
declareGauge beacon_processed_deposits_total, "Number of total deposits included on chain" # On block
|
||||||
|
|
||||||
logScope: topics = "chaindag"
|
declareCounter total_state_replay_seconds, "Total time spent replaying states"
|
||||||
|
|
||||||
const
|
const
|
||||||
EPOCHS_PER_STATE_SNAPSHOT* = 32
|
EPOCHS_PER_STATE_SNAPSHOT* = 32
|
||||||
@ -1838,9 +1840,10 @@ proc updateState*(
|
|||||||
let
|
let
|
||||||
assignDur = assignTick - startTick
|
assignDur = assignTick - startTick
|
||||||
replayDur = Moment.now() - assignTick
|
replayDur = Moment.now() - assignTick
|
||||||
|
total_state_replay_seconds.inc(replayDur.toFloatSeconds)
|
||||||
|
|
||||||
# TODO https://github.com/status-im/nim-chronicles/issues/108
|
# TODO https://github.com/status-im/nim-chronicles/issues/108
|
||||||
if (assignDur + replayDur) >= 250.millis:
|
if (assignDur + replayDur) >= MinSignificantProcessingDuration:
|
||||||
# This might indicate there's a cache that's not in order or a disk that is
|
# This might indicate there's a cache that's not in order or a disk that is
|
||||||
# too slow - for now, it's here for investigative purposes and the cutoff
|
# too slow - for now, it's here for investigative purposes and the cutoff
|
||||||
# time might need tuning
|
# time might need tuning
|
||||||
|
@ -35,6 +35,8 @@ from
|
|||||||
import
|
import
|
||||||
TopicParams, validateParameters, init
|
TopicParams, validateParameters, init
|
||||||
|
|
||||||
|
logScope: topics = "beacnde"
|
||||||
|
|
||||||
# https://github.com/ethereum/eth2.0-metrics/blob/master/metrics.md#interop-metrics
|
# https://github.com/ethereum/eth2.0-metrics/blob/master/metrics.md#interop-metrics
|
||||||
declareGauge beacon_slot, "Latest slot of the beacon chain state"
|
declareGauge beacon_slot, "Latest slot of the beacon chain state"
|
||||||
declareGauge beacon_current_epoch, "Current epoch"
|
declareGauge beacon_current_epoch, "Current epoch"
|
||||||
@ -49,7 +51,8 @@ declareGauge ticks_delay,
|
|||||||
declareGauge next_action_wait,
|
declareGauge next_action_wait,
|
||||||
"Seconds until the next attestation will be sent"
|
"Seconds until the next attestation will be sent"
|
||||||
|
|
||||||
logScope: topics = "beacnde"
|
declareCounter total_db_checkpoint_seconds,
|
||||||
|
"Total time spent checkpointing the database to clear the WAL file"
|
||||||
|
|
||||||
proc doRunTrustedNodeSync(
|
proc doRunTrustedNodeSync(
|
||||||
db: BeaconChainDB,
|
db: BeaconChainDB,
|
||||||
@ -1381,10 +1384,19 @@ proc onSlotEnd(node: BeaconNode, slot: Slot) {.async.} =
|
|||||||
except Exception:
|
except Exception:
|
||||||
# TODO upstream
|
# TODO upstream
|
||||||
raiseAssert "Unexpected exception during GC collection"
|
raiseAssert "Unexpected exception during GC collection"
|
||||||
|
let gcCollectionTick = Moment.now()
|
||||||
|
|
||||||
# Checkpoint the database to clear the WAL file and make sure changes in
|
# Checkpoint the database to clear the WAL file and make sure changes in
|
||||||
# the database are synced with the filesystem.
|
# the database are synced with the filesystem.
|
||||||
node.db.checkpoint()
|
node.db.checkpoint()
|
||||||
|
let
|
||||||
|
dbCheckpointTick = Moment.now()
|
||||||
|
dbCheckpointDur = dbCheckpointTick - gcCollectionTick
|
||||||
|
total_db_checkpoint_seconds.inc(dbCheckpointDur.toFloatSeconds)
|
||||||
|
if dbCheckpointDur >= MinSignificantProcessingDuration:
|
||||||
|
info "Database checkpointed", dur = dbCheckpointDur
|
||||||
|
else:
|
||||||
|
debug "Database checkpointed", dur = dbCheckpointDur
|
||||||
|
|
||||||
node.syncCommitteeMsgPool[].pruneData(slot)
|
node.syncCommitteeMsgPool[].pruneData(slot)
|
||||||
if slot.is_epoch:
|
if slot.is_epoch:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user