From ce9e50e275285631b7c5b02405ba07d4616ffb01 Mon Sep 17 00:00:00 2001 From: Eugene Kabanov Date: Fri, 29 Jul 2022 11:36:20 +0300 Subject: [PATCH] VC: metrics (#3915) * Initial commit. Enable MetricsHttpServerRef and configuration. * Add metrics. * Add headers. Add compilation issue fixes. --- beacon_chain/conf.nim | 16 +++++ beacon_chain/nimbus_validator_client.nim | 67 +++++++++++++++++-- beacon_chain/validator_client/api.nim | 7 ++ .../validator_client/attestation_service.nim | 15 ++++- .../validator_client/block_service.nim | 19 +++++- beacon_chain/validator_client/common.nim | 5 ++ .../validator_client/duties_service.nim | 7 ++ .../validator_client/fallback_service.nim | 7 ++ .../validator_client/fork_service.nim | 7 ++ .../sync_committee_service.nim | 15 ++++- 10 files changed, 156 insertions(+), 9 deletions(-) diff --git a/beacon_chain/conf.nim b/beacon_chain/conf.nim index e60d4f608..4d5a2d83e 100644 --- a/beacon_chain/conf.nim +++ b/beacon_chain/conf.nim @@ -811,6 +811,22 @@ type desc: "A file specifying the authorizition token required for accessing the keymanager API" name: "keymanager-token-file" .}: Option[InputFile] + metricsEnabled* {. + desc: "Enable the metrics server" + defaultValue: false + name: "metrics" .}: bool + + metricsAddress* {. + desc: "Listening address of the metrics server" + defaultValue: defaultAdminListenAddress + defaultValueDesc: $defaultAdminListenAddressDesc + name: "metrics-address" .}: ValidIpAddress + + metricsPort* {. + desc: "Listening HTTP port of the metrics server" + defaultValue: 8008 + name: "metrics-port" .}: Port + graffiti* {. desc: "The graffiti value that will appear in proposed blocks. " & "You can use a 0x-prefixed hex encoded string to specify " & diff --git a/beacon_chain/nimbus_validator_client.nim b/beacon_chain/nimbus_validator_client.nim index 01657ce12..856be30b6 100644 --- a/beacon_chain/nimbus_validator_client.nim +++ b/beacon_chain/nimbus_validator_client.nim @@ -4,10 +4,14 @@ # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. +import metrics, metrics/chronos_httpserver import validator_client/[common, fallback_service, duties_service, attestation_service, fork_service, sync_committee_service, doppelganger_service] +type + ValidatorClientError* = object of CatchableError + proc initGenesis(vc: ValidatorClientRef): Future[RestGenesis] {.async.} = info "Initializing genesis", nodes_count = len(vc.beaconNodes) var nodes = vc.beaconNodes @@ -110,6 +114,40 @@ proc initClock(vc: ValidatorClientRef): Future[BeaconClock] {.async.} = await sleepAsync(genesisTime.offset) return res +proc initMetrics(vc: ValidatorClientRef): Future[bool] {.async.} = + if vc.config.metricsEnabled: + let + metricsAddress = vc.config.metricsAddress + metricsPort = vc.config.metricsPort + url = "http://" & $metricsAddress & ":" & $metricsPort & "/metrics" + info "Starting metrics HTTP server", url = url + let server = + block: + let res = MetricsHttpServerRef.new($metricsAddress, metricsPort) + if res.isErr(): + error "Could not start metrics HTTP server", url = url, + error_msg = res.error() + return false + res.get() + vc.metricsServer = some(server) + try: + await server.start() + except MetricsError as exc: + error "Could not start metrics HTTP server", url = url, + error_msg = exc.msg, error_name = exc.name + return false + return true + +proc shutdownMetrics(vc: ValidatorClientRef) {.async.} = + if vc.config.metricsEnabled: + if vc.metricsServer.isSome(): + info "Shutting down metrics HTTP server" + await vc.metricsServer.get().close() + +proc shutdownSlashingProtection(vc: ValidatorClientRef) = + info "Closing slashing protection", path = vc.config.validatorsDir() + vc.attachedValidators.slashingProtection.close() + proc onSlotStart(vc: ValidatorClientRef, wallTime: BeaconTime, lastSlot: Slot): Future[bool] {.async.} = ## Called at the beginning of a slot - usually every slot, but sometimes might @@ -148,8 +186,19 @@ proc asyncInit(vc: ValidatorClientRef) {.async.} = vc.beaconClock = await vc.initClock() - if not(await initValidators(vc)): - fatal "Could not initialize local validators" + if not(await initMetrics(vc)): + raise newException(ValidatorClientError, + "Could not initialize metrics server") + + try: + if not(await initValidators(vc)): + await vc.shutdownMetrics() + raise newException(ValidatorClientError, + "Could not initialize local validators") + except CancelledError: + debug "Initialization process interrupted" + await vc.shutdownMetrics() + return info "Initializing slashing protection", path = vc.config.validatorsDir() vc.attachedValidators.slashingProtection = @@ -165,10 +214,16 @@ proc asyncInit(vc: ValidatorClientRef) {.async.} = vc.doppelgangerService = await DoppelgangerServiceRef.init(vc) vc.attestationService = await AttestationServiceRef.init(vc) vc.syncCommitteeService = await SyncCommitteeServiceRef.init(vc) + except CatchableError as exc: + warn "Unexpected error encountered while initializing", + error_name = exc.name, error_msg = exc.msg + await vc.shutdownMetrics() + vc.shutdownSlashingProtection() except CancelledError: debug "Initialization process interrupted" - info "Closing slashing protection", path = vc.config.validatorsDir() - vc.attachedValidators.slashingProtection.close() + await vc.shutdownMetrics() + vc.shutdownSlashingProtection() + return proc asyncRun(vc: ValidatorClientRef) {.async.} = vc.fallbackService.start() @@ -190,8 +245,8 @@ proc asyncRun(vc: ValidatorClientRef) {.async.} = debug "Main loop failed with an error", err_name = $exc.name, err_msg = $exc.msg - info "Closing slashing protection", path = vc.config.validatorsDir() - vc.attachedValidators.slashingProtection.close() + await vc.shutdownMetrics() + vc.shutdownSlashingProtection() debug "Stopping main processing loop" var pending: seq[Future[void]] if not(vc.runSlotLoopFut.finished()): diff --git a/beacon_chain/validator_client/api.nim b/beacon_chain/validator_client/api.nim index 426f961d2..867454e08 100644 --- a/beacon_chain/validator_client/api.nim +++ b/beacon_chain/validator_client/api.nim @@ -1,3 +1,10 @@ +# beacon_chain +# Copyright (c) 2021-2022 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + import chronicles import ../spec/eth2_apis/eth2_rest_serialization, ../spec/datatypes/[phase0, altair] diff --git a/beacon_chain/validator_client/attestation_service.nim b/beacon_chain/validator_client/attestation_service.nim index d2de911e1..58bee37cf 100644 --- a/beacon_chain/validator_client/attestation_service.nim +++ b/beacon_chain/validator_client/attestation_service.nim @@ -6,7 +6,7 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import std/sets -import chronicles +import metrics, chronicles import "."/[common, api, block_service] const @@ -14,6 +14,16 @@ const logScope: service = ServiceName +declareCounter beacon_attestations_sent, + "Number of attestations sent by the node" + +declareCounter beacon_aggregates_sent, + "Number of beacon chain attestations sent by the node" + +declareHistogram beacon_attestation_sent_delay, + "Time(s) between expected and actual attestation send moment", + buckets = DelayBuckets + type AggregateItem* = object aggregator_index: uint64 @@ -108,6 +118,8 @@ proc serveAttestation(service: AttestationServiceRef, adata: AttestationData, let delay = vc.getDelay(adata.slot.attestation_deadline()) if res: + beacon_attestations_sent.inc() + beacon_attestation_sent_delay.observe(delay.toFloatSeconds()) notice "Attestation published", attestation = shortLog(attestation), validator = shortLog(validator), validator_index = vindex, @@ -191,6 +203,7 @@ proc serveAggregateAndProof*(service: AttestationServiceRef, return false if res: + beacon_aggregates_sent.inc() notice "Aggregated attestation published", attestation = shortLog(signedProof.message.aggregate), validator = shortLog(validator), diff --git a/beacon_chain/validator_client/block_service.nim b/beacon_chain/validator_client/block_service.nim index 473e361a0..0bf5bd628 100644 --- a/beacon_chain/validator_client/block_service.nim +++ b/beacon_chain/validator_client/block_service.nim @@ -1,9 +1,23 @@ +# beacon_chain +# Copyright (c) 2021-2022 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + import ".."/spec/forks import common, api -import chronicles +import chronicles, metrics logScope: service = "block_service" +declareCounter beacon_blocks_sent, + "Number of beacon blocks sent by this node" + +declareHistogram beacon_blocks_sent_delay, + "Time(s) between expected and actual block send moment", + buckets = DelayBuckets + proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot, validator: AttachedValidator) {.async.} = let @@ -113,6 +127,9 @@ proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot, err_name = exc.name, err_msg = exc.msg return if res: + let delay = vc.getDelay(slot.block_deadline()) + beacon_blocks_sent.inc() + beacon_blocks_sent_delay.observe(delay.toFloatSeconds()) notice "Block published", blockRoot = shortLog(blockRoot), blck = shortLog(beaconBlock), signature = shortLog(signature), validator = shortLog(validator) diff --git a/beacon_chain/validator_client/common.nim b/beacon_chain/validator_client/common.nim index 89aa99289..ca7fde0aa 100644 --- a/beacon_chain/validator_client/common.nim +++ b/beacon_chain/validator_client/common.nim @@ -9,6 +9,7 @@ import std/[tables, os, sets, sequtils] import chronos, presto, presto/client as presto_client, chronicles, confutils, json_serialization/std/[options, net], stew/[base10, results, byteutils] +import metrics, metrics/chronos_httpserver # Local modules import @@ -36,6 +37,9 @@ const TIME_DELAY_FROM_SLOT* = 79.milliseconds SUBSCRIPTION_BUFFER_SLOTS* = 2'u64 + DelayBuckets* = [-Inf, -4.0, -2.0, -1.0, -0.5, -0.1, -0.05, + 0.05, 0.1, 0.5, 1.0, 2.0, 4.0, 8.0, Inf] + type ServiceState* {.pure.} = enum Initialized, Running, Error, Closing, Closed @@ -137,6 +141,7 @@ type ValidatorClient* = object config*: ValidatorClientConf + metricsServer*: Option[MetricsHttpServerRef] graffitiBytes*: GraffitiBytes beaconNodes*: seq[BeaconNodeServerRef] fallbackService*: FallbackServiceRef diff --git a/beacon_chain/validator_client/duties_service.nim b/beacon_chain/validator_client/duties_service.nim index 19e0d2ab9..f67addffb 100644 --- a/beacon_chain/validator_client/duties_service.nim +++ b/beacon_chain/validator_client/duties_service.nim @@ -1,3 +1,10 @@ +# beacon_chain +# Copyright (c) 2021-2022 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + import std/[sets, sequtils] import chronicles import common, api, block_service diff --git a/beacon_chain/validator_client/fallback_service.nim b/beacon_chain/validator_client/fallback_service.nim index 60eec6fe9..8a7832eac 100644 --- a/beacon_chain/validator_client/fallback_service.nim +++ b/beacon_chain/validator_client/fallback_service.nim @@ -1,3 +1,10 @@ +# beacon_chain +# Copyright (c) 2021-2022 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + import common const diff --git a/beacon_chain/validator_client/fork_service.nim b/beacon_chain/validator_client/fork_service.nim index 6b4f029b9..9b8c6f08d 100644 --- a/beacon_chain/validator_client/fork_service.nim +++ b/beacon_chain/validator_client/fork_service.nim @@ -1,3 +1,10 @@ +# beacon_chain +# Copyright (c) 2021-2022 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + import std/algorithm import chronicles import common, api diff --git a/beacon_chain/validator_client/sync_committee_service.nim b/beacon_chain/validator_client/sync_committee_service.nim index b81648613..6eeee1405 100644 --- a/beacon_chain/validator_client/sync_committee_service.nim +++ b/beacon_chain/validator_client/sync_committee_service.nim @@ -7,7 +7,7 @@ import std/sets, - chronicles, + metrics, chronicles, "."/[common, api, block_service], ../spec/datatypes/[phase0, altair, bellatrix], ../spec/eth2_apis/rest_types @@ -17,6 +17,16 @@ const logScope: service = ServiceName +declareCounter beacon_sync_committee_messages_sent, + "Number of sync committee messages sent by the node" + +declareHistogram beacon_sync_committee_message_sent_delay, + "Time(s) between expected and actual sync committee message send moment", + buckets = DelayBuckets + +declareCounter beacon_sync_committee_contributions_sent, + "Number of sync committee contributions sent by the node" + type ContributionItem* = object aggregator_index: uint64 @@ -82,6 +92,8 @@ proc serveSyncCommitteeMessage*(service: SyncCommitteeServiceRef, let delay = vc.getDelay(message.slot.sync_committee_message_deadline()) if res: + beacon_sync_committee_messages_sent.inc() + beacon_sync_committee_message_sent_delay.observe(delay.toFloatSeconds()) notice "Sync committee message published", message = shortLog(message), validator = shortLog(validator), @@ -193,6 +205,7 @@ proc serveContributionAndProof*(service: SyncCommitteeServiceRef, false if res: + beacon_sync_committee_contributions_sent.inc() notice "Sync contribution published", validator = shortLog(validator), validator_index = validatorIdx