val-mon: remove redundant `_total` suffix from counters
It turns out nim-metrics adds this suffix on its own - it also turns out some of the names are non-conventional and need follow-up.
This commit is contained in:
parent
c9aa1bee01
commit
3df9ffca9f
|
@ -666,6 +666,9 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
|
|||
# Clearance most likely happens from head - assign it after rewinding head
|
||||
assign(dag.clearanceState, dag.headState)
|
||||
|
||||
withState(dag.headState.data):
|
||||
dag.validatorMonitor[].registerState(state.data)
|
||||
|
||||
updateBeaconMetrics(dag.headState, cache)
|
||||
|
||||
# The tail block is "implicitly" finalized as it was given either as a
|
||||
|
|
|
@ -10,6 +10,16 @@ logScope: topics = "val_mon"
|
|||
# Validator monitoring based on the same feature in Lighthouse - using the same
|
||||
# metrics allows users to more easily reuse monitoring setups
|
||||
|
||||
# Some issues to address before taking this feature out of beta:
|
||||
#
|
||||
# * some gauges are named `_total` which goes against prometheus conventions
|
||||
# * because nim-metrics adds a compulsory `_total` to counters, we can't
|
||||
# support some of the metric names (https://github.com/sigp/lighthouse/issues/2977)
|
||||
# * in v1.6.0, some of our counters got an extra `_total` suffix, for the same reason
|
||||
# * Per-epoch metrics are being updated while syncing, which makes them a bit
|
||||
# hard to use in time series / graphs which depend on the metrics changing at
|
||||
# a steady clock-based rate
|
||||
|
||||
declareGauge validator_monitor_balance_gwei,
|
||||
"The validator's balance in gwei.", labels = ["validator"]
|
||||
declareGauge validator_monitor_effective_balance_gwei,
|
||||
|
@ -88,43 +98,43 @@ declareGauge validator_monitor_validator_in_next_sync_committee,
|
|||
|
||||
declareGauge validator_monitor_validators_total,
|
||||
"Count of validators that are specifically monitored by this beacon node"
|
||||
declareCounter validator_monitor_unaggregated_attestation_total,
|
||||
declareCounter validator_monitor_unaggregated_attestation,
|
||||
"Number of unaggregated attestations seen", labels = ["src", "validator"]
|
||||
declareHistogram validator_monitor_unaggregated_attestation_delay_seconds,
|
||||
"The delay between when the validator should send the attestation and when it was received.", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_sync_committee_messages_total,
|
||||
declareCounter validator_monitor_sync_committee_messages,
|
||||
"Number of sync committee messages seen", labels = ["src", "validator"]
|
||||
declareHistogram validator_monitor_sync_committee_messages_delay_seconds,
|
||||
"The delay between when the validator should send the sync committee message and when it was received.", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_sync_contributions_total,
|
||||
declareCounter validator_monitor_sync_contributions,
|
||||
"Number of sync contributions seen", labels = ["src", "validator"]
|
||||
declareHistogram validator_monitor_sync_contributions_delay_seconds,
|
||||
"The delay between when the aggregator should send the sync contribution and when it was received.", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_aggregated_attestation_total,
|
||||
declareCounter validator_monitor_aggregated_attestation,
|
||||
"Number of aggregated attestations seen", labels = ["src", "validator"]
|
||||
declareHistogram validator_monitor_aggregated_attestation_delay_seconds,
|
||||
"The delay between then the validator should send the aggregate and when it was received.", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_attestation_in_aggregate_total,
|
||||
declareCounter validator_monitor_attestation_in_aggregate,
|
||||
"Number of times an attestation has been seen in an aggregate", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_sync_committee_message_in_contribution_total,
|
||||
declareCounter validator_monitor_sync_committee_message_in_contribution,
|
||||
"Number of times a sync committee message has been seen in a sync contribution", labels = ["src", "validator"]
|
||||
declareHistogram validator_monitor_attestation_in_aggregate_delay_seconds,
|
||||
"The delay between when the validator should send the aggregate and when it was received.", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_attestation_in_block_total,
|
||||
declareCounter validator_monitor_attestation_in_block,
|
||||
"Number of times an attestation has been seen in a block", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_sync_committee_message_in_block_total,
|
||||
declareCounter validator_monitor_sync_committee_message_in_block,
|
||||
"Number of times a validator's sync committee message has been seen in a sync aggregate", labels = ["src", "validator"]
|
||||
declareGauge validator_monitor_attestation_in_block_delay_slots,
|
||||
"The excess slots (beyond the minimum delay) between the attestation slot and the block slot.", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_beacon_block_total,
|
||||
declareCounter validator_monitor_beacon_block,
|
||||
"Number of beacon blocks seen", labels = ["src", "validator"]
|
||||
declareHistogram validator_monitor_beacon_block_delay_seconds,
|
||||
"The delay between when the validator should send the block and when it was received.", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_exit_total,
|
||||
declareCounter validator_monitor_exit,
|
||||
"Number of beacon exits seen", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_proposer_slashing_total,
|
||||
declareCounter validator_monitor_proposer_slashing,
|
||||
"Number of proposer slashings seen", labels = ["src", "validator"]
|
||||
declareCounter validator_monitor_attester_slashing_total,
|
||||
declareCounter validator_monitor_attester_slashing,
|
||||
"Number of attester slashings seen", labels = ["src", "validator"]
|
||||
|
||||
const
|
||||
|
@ -616,7 +626,7 @@ proc registerAttestation*(
|
|||
|
||||
self.withMonitor(idx):
|
||||
let id = monitor.id
|
||||
validator_monitor_unaggregated_attestation_total.inc(1, [$src, metricId])
|
||||
validator_monitor_unaggregated_attestation.inc(1, [$src, metricId])
|
||||
validator_monitor_unaggregated_attestation_delay_seconds.observe(
|
||||
delay.toGaugeValue(), [$src, metricId])
|
||||
|
||||
|
@ -641,7 +651,7 @@ proc registerAggregate*(
|
|||
|
||||
self.withMonitor(aggregator_index):
|
||||
let id = monitor.id
|
||||
validator_monitor_aggregated_attestation_total.inc(1, [$src, metricId])
|
||||
validator_monitor_aggregated_attestation.inc(1, [$src, metricId])
|
||||
validator_monitor_aggregated_attestation_delay_seconds.observe(
|
||||
delay.toGaugeValue(), [$src, metricId])
|
||||
|
||||
|
@ -656,7 +666,7 @@ proc registerAggregate*(
|
|||
for idx in attesting_indices:
|
||||
self.withMonitor(idx):
|
||||
let id = monitor.id
|
||||
validator_monitor_attestation_in_aggregate_total.inc(1, [$src, metricId])
|
||||
validator_monitor_attestation_in_aggregate.inc(1, [$src, metricId])
|
||||
validator_monitor_attestation_in_aggregate_delay_seconds.observe(
|
||||
delay.toGaugeValue(), [$src, metricId])
|
||||
|
||||
|
@ -678,7 +688,7 @@ proc registerAttestationInBlock*(
|
|||
inclusion_lag = (blck.slot - data.slot) - MIN_ATTESTATION_INCLUSION_DELAY
|
||||
epoch = data.slot.epoch
|
||||
|
||||
validator_monitor_attestation_in_block_total.inc(1, ["block", metricId])
|
||||
validator_monitor_attestation_in_block.inc(1, ["block", metricId])
|
||||
|
||||
if not self.totals:
|
||||
validator_monitor_attestation_in_block_delay_slots.set(
|
||||
|
@ -706,7 +716,7 @@ proc registerBeaconBlock*(
|
|||
slot = blck.slot
|
||||
delay = seen_timestamp - slot.block_deadline()
|
||||
|
||||
validator_monitor_beacon_block_total.inc(1, [$src, metricId])
|
||||
validator_monitor_beacon_block.inc(1, [$src, metricId])
|
||||
validator_monitor_beacon_block_delay_seconds.observe(
|
||||
delay.toGaugeValue(), [$src, metricId])
|
||||
|
||||
|
@ -724,7 +734,7 @@ proc registerSyncCommitteeMessage*(
|
|||
slot = sync_committee_message.slot
|
||||
delay = seen_timestamp - slot.sync_committee_message_deadline()
|
||||
|
||||
validator_monitor_sync_committee_messages_total.inc(1, [$src, metricId])
|
||||
validator_monitor_sync_committee_messages.inc(1, [$src, metricId])
|
||||
validator_monitor_sync_committee_messages_delay_seconds.observe(
|
||||
delay.toGaugeValue(), [$src, metricId])
|
||||
|
||||
|
@ -750,7 +760,7 @@ proc registerSyncContribution*(
|
|||
let aggregator_index = sync_contribution.message.aggregator_index
|
||||
self.withMonitor(aggregator_index):
|
||||
let id = monitor.id
|
||||
validator_monitor_sync_contributions_total.inc(1, [$src, metricId])
|
||||
validator_monitor_sync_contributions.inc(1, [$src, metricId])
|
||||
validator_monitor_sync_contributions_delay_seconds.observe(
|
||||
delay.toGaugeValue(), [$src, metricId])
|
||||
|
||||
|
@ -765,7 +775,7 @@ proc registerSyncContribution*(
|
|||
for participant in participants:
|
||||
self.withMonitor(participant):
|
||||
let id = monitor.id
|
||||
validator_monitor_sync_committee_message_in_contribution_total.inc(1, [$src, metricId])
|
||||
validator_monitor_sync_committee_message_in_contribution.inc(1, [$src, metricId])
|
||||
|
||||
info "Sync signature included in contribution",
|
||||
contribution = shortLog(sync_contribution.message.contribution),
|
||||
|
@ -779,7 +789,7 @@ proc registerSyncAggregateInBlock*(
|
|||
pubkey: ValidatorPubKey) =
|
||||
self.withMonitor(pubkey):
|
||||
let id = monitor.id
|
||||
validator_monitor_sync_committee_message_in_block_total.inc(1, ["block", metricId])
|
||||
validator_monitor_sync_committee_message_in_block.inc(1, ["block", metricId])
|
||||
|
||||
info "Sync signature included in block",
|
||||
head = beacon_block_root, slot = slot, validator = id
|
||||
|
@ -794,7 +804,7 @@ proc registerVoluntaryExit*(
|
|||
id = monitor.id
|
||||
epoch = exit.epoch
|
||||
|
||||
validator_monitor_exit_total.inc(1, [$src, metricId])
|
||||
validator_monitor_exit.inc(1, [$src, metricId])
|
||||
|
||||
notice "Voluntary exit seen",
|
||||
epoch = epoch, validator = id, src = src
|
||||
|
@ -813,7 +823,7 @@ proc registerProposerSlashing*(
|
|||
root_1 = hash_tree_root(slashing.signed_header_1.message)
|
||||
root_2 = hash_tree_root(slashing.signed_header_2.message)
|
||||
|
||||
validator_monitor_proposer_slashing_total.inc(1, [$src, metricId])
|
||||
validator_monitor_proposer_slashing.inc(1, [$src, metricId])
|
||||
|
||||
warn "Proposer slashing seen",
|
||||
root_2 = root_2, root_1 = root_1, slot = slot, validator = id, src = src
|
||||
|
@ -834,7 +844,7 @@ proc registerAttesterSlashing*(
|
|||
id = monitor.id
|
||||
slot = data.slot
|
||||
|
||||
validator_monitor_attester_slashing_total.inc(1, [$src, metricId])
|
||||
validator_monitor_attester_slashing.inc(1, [$src, metricId])
|
||||
|
||||
warn "Attester slashing seen",
|
||||
slot = slot, validator = id, src = src
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Validator monitoring
|
||||
|
||||
> ⚠️ This feature is currently in BETA - implementation details may change in response to community feedback.
|
||||
> ⚠️ This feature is currently in BETA - implementation details such as metric names and counters may change in response to community feedback.
|
||||
|
||||
The validator monitoring feature allows for tracking the life-cycle and performance of one or more validators in detail.
|
||||
|
||||
|
@ -11,7 +11,9 @@ Every time the validator performs a duty, the duty is recorded and the monitor k
|
|||
* When attesting, the attestation is added to an aggregate, then a block, before a reward is applied to the state
|
||||
* When performing sync committee duties, likewise
|
||||
|
||||
Validator actions can be traced either through logging, or comprehensive metrics that allow for creating alerts in monitoring tools. The metrics are based on the same feature in [Lighthouse](https://lighthouse-book.sigmaprime.io/validator-monitoring.html), thus dashboards and alerts can be used with either client.
|
||||
Validator actions can be traced either through logging, or comprehensive metrics that allow for creating alerts in monitoring tools.
|
||||
|
||||
The metrics are broadly compatible with [Lighthouse](https://lighthouse-book.sigmaprime.io/validator-monitoring.html), thus dashboards and alerts can be used with either client.
|
||||
|
||||
## Enabling validator monitoring
|
||||
|
||||
|
|
Loading…
Reference in New Issue