VC: metrics (#3915)

* Initial commit.
Enable MetricsHttpServerRef and configuration.

* Add metrics.

* Add headers.
Add compilation issue fixes.
This commit is contained in:
Eugene Kabanov 2022-07-29 11:36:20 +03:00 committed by GitHub
parent 9b081e524c
commit ce9e50e275
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 156 additions and 9 deletions

View File

@ -811,6 +811,22 @@ type
desc: "A file specifying the authorizition token required for accessing the keymanager API"
name: "keymanager-token-file" .}: Option[InputFile]
metricsEnabled* {.
desc: "Enable the metrics server"
defaultValue: false
name: "metrics" .}: bool
metricsAddress* {.
desc: "Listening address of the metrics server"
defaultValue: defaultAdminListenAddress
defaultValueDesc: $defaultAdminListenAddressDesc
name: "metrics-address" .}: ValidIpAddress
metricsPort* {.
desc: "Listening HTTP port of the metrics server"
defaultValue: 8008
name: "metrics-port" .}: Port
graffiti* {.
desc: "The graffiti value that will appear in proposed blocks. " &
"You can use a 0x-prefixed hex encoded string to specify " &

View File

@ -4,10 +4,14 @@
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import metrics, metrics/chronos_httpserver
import validator_client/[common, fallback_service, duties_service,
attestation_service, fork_service,
sync_committee_service, doppelganger_service]
type
ValidatorClientError* = object of CatchableError
proc initGenesis(vc: ValidatorClientRef): Future[RestGenesis] {.async.} =
info "Initializing genesis", nodes_count = len(vc.beaconNodes)
var nodes = vc.beaconNodes
@ -110,6 +114,40 @@ proc initClock(vc: ValidatorClientRef): Future[BeaconClock] {.async.} =
await sleepAsync(genesisTime.offset)
return res
proc initMetrics(vc: ValidatorClientRef): Future[bool] {.async.} =
if vc.config.metricsEnabled:
let
metricsAddress = vc.config.metricsAddress
metricsPort = vc.config.metricsPort
url = "http://" & $metricsAddress & ":" & $metricsPort & "/metrics"
info "Starting metrics HTTP server", url = url
let server =
block:
let res = MetricsHttpServerRef.new($metricsAddress, metricsPort)
if res.isErr():
error "Could not start metrics HTTP server", url = url,
error_msg = res.error()
return false
res.get()
vc.metricsServer = some(server)
try:
await server.start()
except MetricsError as exc:
error "Could not start metrics HTTP server", url = url,
error_msg = exc.msg, error_name = exc.name
return false
return true
proc shutdownMetrics(vc: ValidatorClientRef) {.async.} =
if vc.config.metricsEnabled:
if vc.metricsServer.isSome():
info "Shutting down metrics HTTP server"
await vc.metricsServer.get().close()
proc shutdownSlashingProtection(vc: ValidatorClientRef) =
info "Closing slashing protection", path = vc.config.validatorsDir()
vc.attachedValidators.slashingProtection.close()
proc onSlotStart(vc: ValidatorClientRef, wallTime: BeaconTime,
lastSlot: Slot): Future[bool] {.async.} =
## Called at the beginning of a slot - usually every slot, but sometimes might
@ -148,8 +186,19 @@ proc asyncInit(vc: ValidatorClientRef) {.async.} =
vc.beaconClock = await vc.initClock()
if not(await initMetrics(vc)):
raise newException(ValidatorClientError,
"Could not initialize metrics server")
try:
if not(await initValidators(vc)):
fatal "Could not initialize local validators"
await vc.shutdownMetrics()
raise newException(ValidatorClientError,
"Could not initialize local validators")
except CancelledError:
debug "Initialization process interrupted"
await vc.shutdownMetrics()
return
info "Initializing slashing protection", path = vc.config.validatorsDir()
vc.attachedValidators.slashingProtection =
@ -165,10 +214,16 @@ proc asyncInit(vc: ValidatorClientRef) {.async.} =
vc.doppelgangerService = await DoppelgangerServiceRef.init(vc)
vc.attestationService = await AttestationServiceRef.init(vc)
vc.syncCommitteeService = await SyncCommitteeServiceRef.init(vc)
except CatchableError as exc:
warn "Unexpected error encountered while initializing",
error_name = exc.name, error_msg = exc.msg
await vc.shutdownMetrics()
vc.shutdownSlashingProtection()
except CancelledError:
debug "Initialization process interrupted"
info "Closing slashing protection", path = vc.config.validatorsDir()
vc.attachedValidators.slashingProtection.close()
await vc.shutdownMetrics()
vc.shutdownSlashingProtection()
return
proc asyncRun(vc: ValidatorClientRef) {.async.} =
vc.fallbackService.start()
@ -190,8 +245,8 @@ proc asyncRun(vc: ValidatorClientRef) {.async.} =
debug "Main loop failed with an error", err_name = $exc.name,
err_msg = $exc.msg
info "Closing slashing protection", path = vc.config.validatorsDir()
vc.attachedValidators.slashingProtection.close()
await vc.shutdownMetrics()
vc.shutdownSlashingProtection()
debug "Stopping main processing loop"
var pending: seq[Future[void]]
if not(vc.runSlotLoopFut.finished()):

View File

@ -1,3 +1,10 @@
# beacon_chain
# Copyright (c) 2021-2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import chronicles
import ../spec/eth2_apis/eth2_rest_serialization,
../spec/datatypes/[phase0, altair]

View File

@ -6,7 +6,7 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import std/sets
import chronicles
import metrics, chronicles
import "."/[common, api, block_service]
const
@ -14,6 +14,16 @@ const
logScope: service = ServiceName
declareCounter beacon_attestations_sent,
"Number of attestations sent by the node"
declareCounter beacon_aggregates_sent,
"Number of beacon chain attestations sent by the node"
declareHistogram beacon_attestation_sent_delay,
"Time(s) between expected and actual attestation send moment",
buckets = DelayBuckets
type
AggregateItem* = object
aggregator_index: uint64
@ -108,6 +118,8 @@ proc serveAttestation(service: AttestationServiceRef, adata: AttestationData,
let delay = vc.getDelay(adata.slot.attestation_deadline())
if res:
beacon_attestations_sent.inc()
beacon_attestation_sent_delay.observe(delay.toFloatSeconds())
notice "Attestation published", attestation = shortLog(attestation),
validator = shortLog(validator),
validator_index = vindex,
@ -191,6 +203,7 @@ proc serveAggregateAndProof*(service: AttestationServiceRef,
return false
if res:
beacon_aggregates_sent.inc()
notice "Aggregated attestation published",
attestation = shortLog(signedProof.message.aggregate),
validator = shortLog(validator),

View File

@ -1,9 +1,23 @@
# beacon_chain
# Copyright (c) 2021-2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import ".."/spec/forks
import common, api
import chronicles
import chronicles, metrics
logScope: service = "block_service"
declareCounter beacon_blocks_sent,
"Number of beacon blocks sent by this node"
declareHistogram beacon_blocks_sent_delay,
"Time(s) between expected and actual block send moment",
buckets = DelayBuckets
proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot,
validator: AttachedValidator) {.async.} =
let
@ -113,6 +127,9 @@ proc publishBlock(vc: ValidatorClientRef, currentSlot, slot: Slot,
err_name = exc.name, err_msg = exc.msg
return
if res:
let delay = vc.getDelay(slot.block_deadline())
beacon_blocks_sent.inc()
beacon_blocks_sent_delay.observe(delay.toFloatSeconds())
notice "Block published", blockRoot = shortLog(blockRoot),
blck = shortLog(beaconBlock), signature = shortLog(signature),
validator = shortLog(validator)

View File

@ -9,6 +9,7 @@ import std/[tables, os, sets, sequtils]
import chronos, presto, presto/client as presto_client, chronicles, confutils,
json_serialization/std/[options, net],
stew/[base10, results, byteutils]
import metrics, metrics/chronos_httpserver
# Local modules
import
@ -36,6 +37,9 @@ const
TIME_DELAY_FROM_SLOT* = 79.milliseconds
SUBSCRIPTION_BUFFER_SLOTS* = 2'u64
DelayBuckets* = [-Inf, -4.0, -2.0, -1.0, -0.5, -0.1, -0.05,
0.05, 0.1, 0.5, 1.0, 2.0, 4.0, 8.0, Inf]
type
ServiceState* {.pure.} = enum
Initialized, Running, Error, Closing, Closed
@ -137,6 +141,7 @@ type
ValidatorClient* = object
config*: ValidatorClientConf
metricsServer*: Option[MetricsHttpServerRef]
graffitiBytes*: GraffitiBytes
beaconNodes*: seq[BeaconNodeServerRef]
fallbackService*: FallbackServiceRef

View File

@ -1,3 +1,10 @@
# beacon_chain
# Copyright (c) 2021-2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import std/[sets, sequtils]
import chronicles
import common, api, block_service

View File

@ -1,3 +1,10 @@
# beacon_chain
# Copyright (c) 2021-2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import common
const

View File

@ -1,3 +1,10 @@
# beacon_chain
# Copyright (c) 2021-2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import std/algorithm
import chronicles
import common, api

View File

@ -7,7 +7,7 @@
import
std/sets,
chronicles,
metrics, chronicles,
"."/[common, api, block_service],
../spec/datatypes/[phase0, altair, bellatrix],
../spec/eth2_apis/rest_types
@ -17,6 +17,16 @@ const
logScope: service = ServiceName
declareCounter beacon_sync_committee_messages_sent,
"Number of sync committee messages sent by the node"
declareHistogram beacon_sync_committee_message_sent_delay,
"Time(s) between expected and actual sync committee message send moment",
buckets = DelayBuckets
declareCounter beacon_sync_committee_contributions_sent,
"Number of sync committee contributions sent by the node"
type
ContributionItem* = object
aggregator_index: uint64
@ -82,6 +92,8 @@ proc serveSyncCommitteeMessage*(service: SyncCommitteeServiceRef,
let delay = vc.getDelay(message.slot.sync_committee_message_deadline())
if res:
beacon_sync_committee_messages_sent.inc()
beacon_sync_committee_message_sent_delay.observe(delay.toFloatSeconds())
notice "Sync committee message published",
message = shortLog(message),
validator = shortLog(validator),
@ -193,6 +205,7 @@ proc serveContributionAndProof*(service: SyncCommitteeServiceRef,
false
if res:
beacon_sync_committee_contributions_sent.inc()
notice "Sync contribution published",
validator = shortLog(validator),
validator_index = validatorIdx