mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-01-22 04:24:05 +00:00
5404178a40
* Make some startup procedures async. Add more handful makeBannerAndConfig(). * Dissect windows service code from `nimbus_beacon_node.nim`. * Add report service startup errors using windows error codes. Add plug able exitService(). Co-authored-by: Zahary Karadjov <zahary@status.im> Co-authored-by: Jacek Sieka <jacek@status.im>
453 lines
16 KiB
Nim
453 lines
16 KiB
Nim
# beacon_chain
|
|
# Copyright (c) 2018-2024 Status Research & Development GmbH
|
|
# Licensed and distributed under either of
|
|
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
|
|
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
|
|
|
{.push raises: [].}
|
|
|
|
# Common routines for a BeaconNode and a ValidatorClient
|
|
|
|
import
|
|
# Standard library
|
|
std/[tables, strutils, terminal, typetraits],
|
|
|
|
# Nimble packages
|
|
chronos, confutils, presto, toml_serialization, metrics,
|
|
chronicles, chronicles/helpers as chroniclesHelpers, chronicles/topics_registry,
|
|
stew/io2,
|
|
|
|
# Local modules
|
|
./spec/[helpers, keystore],
|
|
./spec/datatypes/base,
|
|
"."/[beacon_clock, beacon_node_status, conf, conf_common, version]
|
|
|
|
when defined(posix):
|
|
import termios
|
|
|
|
declareGauge versionGauge, "Nimbus version info (as metric labels)", ["version", "commit"], name = "version"
|
|
versionGauge.set(1, labelValues=[fullVersionStr, gitRevision])
|
|
|
|
declareGauge nimVersionGauge, "Nim version info", ["version", "nim_commit"], name = "nim_version"
|
|
nimVersionGauge.set(1, labelValues=[NimVersion, getNimGitHash()])
|
|
|
|
export
|
|
confutils, toml_serialization, beacon_clock, beacon_node_status, conf,
|
|
conf_common
|
|
|
|
type
|
|
SlotStartProc*[T] = proc(node: T, wallTime: BeaconTime,
|
|
lastSlot: Slot): Future[bool] {.gcsafe,
|
|
raises: [].}
|
|
|
|
# silly chronicles, colors is a compile-time property
|
|
when defaultChroniclesStream.outputs.type.arity == 2:
|
|
func stripAnsi(v: string): string =
|
|
var
|
|
res = newStringOfCap(v.len)
|
|
i: int
|
|
|
|
while i < v.len:
|
|
let c = v[i]
|
|
if c == '\x1b':
|
|
var
|
|
x = i + 1
|
|
found = false
|
|
|
|
while x < v.len: # look for [..m
|
|
let c2 = v[x]
|
|
if x == i + 1:
|
|
if c2 != '[':
|
|
break
|
|
else:
|
|
if c2 in {'0'..'9'} + {';'}:
|
|
discard # keep looking
|
|
elif c2 == 'm':
|
|
i = x + 1
|
|
found = true
|
|
break
|
|
else:
|
|
break
|
|
inc x
|
|
|
|
if found: # skip adding c
|
|
continue
|
|
res.add c
|
|
inc i
|
|
|
|
res
|
|
|
|
proc updateLogLevel*(logLevel: string) {.raises: [ValueError].} =
|
|
# Updates log levels (without clearing old ones)
|
|
let directives = logLevel.split(";")
|
|
try:
|
|
setLogLevel(parseEnum[LogLevel](directives[0].capitalizeAscii()))
|
|
except ValueError:
|
|
raise (ref ValueError)(msg: "Please specify one of TRACE, DEBUG, INFO, NOTICE, WARN, ERROR or FATAL")
|
|
|
|
if directives.len > 1:
|
|
for topicName, settings in parseTopicDirectives(directives[1..^1]):
|
|
if not setTopicState(topicName, settings.state, settings.logLevel):
|
|
warn "Unrecognized logging topic", topic = topicName
|
|
|
|
proc detectTTY*(stdoutKind: StdoutLogKind): StdoutLogKind =
|
|
if stdoutKind == StdoutLogKind.Auto:
|
|
if isatty(stdout):
|
|
# On a TTY, let's be fancy
|
|
StdoutLogKind.Colors
|
|
else:
|
|
# When there's no TTY, we output no colors because this matches what
|
|
# released binaries were doing before auto-detection was around and
|
|
# looks decent in systemd-captured journals.
|
|
StdoutLogKind.NoColors
|
|
else:
|
|
stdoutKind
|
|
|
|
when defaultChroniclesStream.outputs.type.arity == 2:
|
|
from std/os import splitFile
|
|
from "."/filepath import secureCreatePath
|
|
|
|
proc setupFileLimits*() =
|
|
when not defined(windows):
|
|
# In addition to databases and sockets, we need a file descriptor for every
|
|
# validator - setting it to 16k should provide sufficient margin
|
|
let
|
|
limit = getMaxOpenFiles2().valueOr(16384)
|
|
|
|
if limit < 16384:
|
|
setMaxOpenFiles2(16384).isOkOr:
|
|
warn "Cannot increase open file limit", err = osErrorMsg(error)
|
|
|
|
proc setupLogging*(
|
|
logLevel: string, stdoutKind: StdoutLogKind, logFile: Option[OutFile]) =
|
|
# In the cfg file for nimbus, we create two formats: textlines and json.
|
|
# Here, we either write those logs to an output, or not, depending on the
|
|
# given configuration.
|
|
# Arguably, if we don't use a format, chronicles should not create it.
|
|
|
|
when defaultChroniclesStream.outputs.type.arity != 2:
|
|
warn "Logging configuration options not enabled in the current build"
|
|
else:
|
|
# Naive approach where chronicles will form a string and we will discard
|
|
# it, even if it could have skipped the formatting phase
|
|
|
|
proc noOutput(logLevel: LogLevel, msg: LogOutputStr) = discard
|
|
proc writeAndFlush(f: File, msg: LogOutputStr) =
|
|
try:
|
|
f.write(msg)
|
|
f.flushFile()
|
|
except IOError as err:
|
|
logLoggingFailure(cstring(msg), err)
|
|
|
|
proc stdoutFlush(logLevel: LogLevel, msg: LogOutputStr) =
|
|
writeAndFlush(stdout, msg)
|
|
|
|
proc noColorsFlush(logLevel: LogLevel, msg: LogOutputStr) =
|
|
writeAndFlush(stdout, stripAnsi(msg))
|
|
|
|
let fileWriter =
|
|
if logFile.isSome():
|
|
let
|
|
logFile = logFile.get.string
|
|
logFileDir = splitFile(logFile).dir
|
|
lres = secureCreatePath(logFileDir)
|
|
if lres.isOk():
|
|
try:
|
|
let
|
|
f = open(logFile, fmAppend)
|
|
x = proc(logLevel: LogLevel, msg: LogOutputStr) =
|
|
writeAndFlush(f, msg) # will close when program terminates
|
|
x
|
|
except CatchableError as exc:
|
|
error "Failed to create log file", logFile, msg = exc.msg
|
|
noOutput
|
|
else:
|
|
error "Failed to create directory for log file",
|
|
path = logFileDir, err = ioErrorMsg(lres.error)
|
|
noOutput
|
|
else:
|
|
noOutput
|
|
|
|
defaultChroniclesStream.outputs[1].writer = fileWriter
|
|
|
|
let tmp = detectTTY(stdoutKind)
|
|
|
|
case tmp
|
|
of StdoutLogKind.Auto: raiseAssert "checked above"
|
|
of StdoutLogKind.Colors:
|
|
defaultChroniclesStream.outputs[0].writer = stdoutFlush
|
|
of StdoutLogKind.NoColors:
|
|
defaultChroniclesStream.outputs[0].writer = noColorsFlush
|
|
of StdoutLogKind.Json:
|
|
defaultChroniclesStream.outputs[0].writer = noOutput
|
|
|
|
let prevWriter = defaultChroniclesStream.outputs[1].writer
|
|
defaultChroniclesStream.outputs[1].writer =
|
|
proc(logLevel: LogLevel, msg: LogOutputStr) =
|
|
stdoutFlush(logLevel, msg)
|
|
prevWriter(logLevel, msg)
|
|
of StdoutLogKind.None:
|
|
defaultChroniclesStream.outputs[0].writer = noOutput
|
|
|
|
if logFile.isSome():
|
|
warn "The --log-file option is deprecated. Consider redirecting the standard output to a file instead"
|
|
try:
|
|
updateLogLevel(logLevel)
|
|
except ValueError as err:
|
|
try:
|
|
stderr.write "Invalid value for --log-level. " & err.msg
|
|
except IOError:
|
|
echo "Invalid value for --log-level. " & err.msg
|
|
quit 1
|
|
|
|
template makeBannerAndConfig*(clientId: string, ConfType: type): untyped =
|
|
let
|
|
version = clientId & "\p" & copyrights & "\p\p" &
|
|
"eth2 specification v" & SPEC_VERSION & "\p\p" &
|
|
nimBanner
|
|
|
|
# TODO for some reason, copyrights are printed when doing `--help`
|
|
{.push warning[ProveInit]: off.}
|
|
let config = try:
|
|
ConfType.load(
|
|
version = version, # but a short version string makes more sense...
|
|
copyrightBanner = clientId,
|
|
secondarySources = proc (
|
|
config: ConfType, sources: ref SecondarySources
|
|
) {.raises: [ConfigurationError].} =
|
|
if config.configFile.isSome:
|
|
sources.addConfigFile(Toml, config.configFile.get)
|
|
)
|
|
except CatchableError as err:
|
|
# We need to log to stderr here, because logging hasn't been configured yet
|
|
try:
|
|
stderr.write "Failure while loading the configuration:\n"
|
|
stderr.write err.msg
|
|
stderr.write "\n"
|
|
|
|
if err[] of ConfigurationError and
|
|
err.parent != nil and
|
|
err.parent[] of TomlFieldReadingError:
|
|
let fieldName = ((ref TomlFieldReadingError)(err.parent)).field
|
|
if fieldName in ["web3-url", "bootstrap-node",
|
|
"direct-peer", "validator-monitor-pubkey"]:
|
|
stderr.write "Since the '" & fieldName & "' option is allowed to " &
|
|
"have more than one value, please make sure to supply " &
|
|
"a properly formatted TOML array\n"
|
|
except IOError:
|
|
discard
|
|
quit 1
|
|
{.pop.}
|
|
config
|
|
|
|
proc checkIfShouldStopAtEpoch*(scheduledSlot: Slot,
|
|
stopAtEpoch: uint64): bool =
|
|
# Offset backwards slightly to allow this epoch's finalization check to occur
|
|
if scheduledSlot > 3 and stopAtEpoch > 0'u64 and
|
|
(scheduledSlot - 3).epoch() >= stopAtEpoch:
|
|
info "Stopping at pre-chosen epoch",
|
|
chosenEpoch = stopAtEpoch,
|
|
epoch = scheduledSlot.epoch(),
|
|
slot = scheduledSlot
|
|
true
|
|
else:
|
|
false
|
|
|
|
proc resetStdin*() =
|
|
when defined(posix):
|
|
# restore echoing, in case it was disabled by a password prompt
|
|
let fd = stdin.getFileHandle()
|
|
var attrs: Termios
|
|
discard fd.tcGetAttr(attrs.addr)
|
|
attrs.c_lflag = attrs.c_lflag or Cflag(ECHO)
|
|
discard fd.tcSetAttr(TCSANOW, attrs.addr)
|
|
|
|
proc runKeystoreCachePruningLoop*(cache: KeystoreCacheRef) {.async.} =
|
|
while true:
|
|
let exitLoop =
|
|
try:
|
|
await sleepAsync(60.seconds)
|
|
false
|
|
except CatchableError:
|
|
cache.clear()
|
|
true
|
|
if exitLoop: break
|
|
cache.pruneExpiredKeys()
|
|
|
|
proc sleepAsync*(t: TimeDiff): Future[void] =
|
|
sleepAsync(nanoseconds(
|
|
if t.nanoseconds < 0: 0'i64 else: t.nanoseconds))
|
|
|
|
proc runSlotLoop*[T](node: T, startTime: BeaconTime,
|
|
slotProc: SlotStartProc[T]) {.async.} =
|
|
var
|
|
curSlot = startTime.slotOrZero()
|
|
nextSlot = curSlot + 1 # No earlier than GENESIS_SLOT + 1
|
|
timeToNextSlot = nextSlot.start_beacon_time() - startTime
|
|
|
|
info "Scheduling first slot action",
|
|
startTime = shortLog(startTime),
|
|
nextSlot = shortLog(nextSlot),
|
|
timeToNextSlot = shortLog(timeToNextSlot)
|
|
|
|
while true:
|
|
# Start by waiting for the time when the slot starts. Sleeping relinquishes
|
|
# control to other tasks which may or may not finish within the alotted
|
|
# time, so below, we need to be wary that the ship might have sailed
|
|
# already.
|
|
await sleepAsync(timeToNextSlot)
|
|
|
|
let
|
|
wallTime = node.beaconClock.now()
|
|
wallSlot = wallTime.slotOrZero() # Always > GENESIS!
|
|
|
|
if wallSlot < nextSlot:
|
|
# While we were sleeping, the system clock changed and time moved
|
|
# backwards!
|
|
if wallSlot + 1 < nextSlot:
|
|
# This is a critical condition where it's hard to reason about what
|
|
# to do next - we'll call the attention of the user here by shutting
|
|
# down.
|
|
fatal "System time adjusted backwards significantly - clock may be inaccurate - shutting down",
|
|
nextSlot = shortLog(nextSlot),
|
|
wallSlot = shortLog(wallSlot)
|
|
bnStatus = BeaconNodeStatus.Stopping
|
|
return
|
|
|
|
# Time moved back by a single slot - this could be a minor adjustment,
|
|
# for example when NTP does its thing after not working for a while
|
|
warn "System time adjusted backwards, rescheduling slot actions",
|
|
wallTime = shortLog(wallTime),
|
|
nextSlot = shortLog(nextSlot),
|
|
wallSlot = shortLog(wallSlot)
|
|
|
|
# cur & next slot remain the same
|
|
timeToNextSlot = nextSlot.start_beacon_time() - wallTime
|
|
continue
|
|
|
|
if wallSlot > nextSlot + SLOTS_PER_EPOCH:
|
|
# Time moved forwards by more than an epoch - either the clock was reset
|
|
# or we've been stuck in processing for a long time - either way, we will
|
|
# skip ahead so that we only process the events of the last
|
|
# SLOTS_PER_EPOCH slots
|
|
warn "Time moved forwards by more than an epoch, skipping ahead",
|
|
curSlot = shortLog(curSlot),
|
|
nextSlot = shortLog(nextSlot),
|
|
wallSlot = shortLog(wallSlot)
|
|
|
|
curSlot = wallSlot - SLOTS_PER_EPOCH
|
|
|
|
elif wallSlot > nextSlot:
|
|
notice "Missed expected slot start, catching up",
|
|
delay = shortLog(wallTime - nextSlot.start_beacon_time()),
|
|
curSlot = shortLog(curSlot),
|
|
nextSlot = shortLog(curSlot)
|
|
|
|
let breakLoop = await slotProc(node, wallTime, curSlot)
|
|
if breakLoop:
|
|
break
|
|
|
|
curSlot = wallSlot
|
|
nextSlot = wallSlot + 1
|
|
timeToNextSlot = nextSlot.start_beacon_time() - node.beaconClock.now()
|
|
|
|
proc init*(T: type RestServerRef,
|
|
ip: IpAddress,
|
|
port: Port,
|
|
allowedOrigin: Option[string],
|
|
validateFn: PatternCallback,
|
|
config: AnyConf): T =
|
|
let
|
|
address = initTAddress(ip, port)
|
|
serverFlags = {HttpServerFlags.QueryCommaSeparatedArray,
|
|
HttpServerFlags.NotifyDisconnect}
|
|
# We increase default timeout to help validator clients who poll our server
|
|
# at least once per slot (12.seconds).
|
|
let
|
|
headersTimeout =
|
|
if config.restRequestTimeout == 0:
|
|
chronos.InfiniteDuration
|
|
else:
|
|
seconds(int64(config.restRequestTimeout))
|
|
maxHeadersSize = config.restMaxRequestHeadersSize * 1024
|
|
maxRequestBodySize = config.restMaxRequestBodySize * 1024
|
|
|
|
let res = RestServerRef.new(RestRouter.init(validateFn, allowedOrigin),
|
|
address, serverFlags = serverFlags,
|
|
httpHeadersTimeout = headersTimeout,
|
|
maxHeadersSize = maxHeadersSize,
|
|
maxRequestBodySize = maxRequestBodySize,
|
|
errorType = string)
|
|
if res.isErr():
|
|
notice "REST HTTP server could not be started", address = $address,
|
|
reason = res.error()
|
|
nil
|
|
else:
|
|
let server = res.get()
|
|
notice "Starting REST HTTP server", url = "http://" & $server.localAddress()
|
|
server
|
|
|
|
type
|
|
KeymanagerInitResult* = object
|
|
server*: RestServerRef
|
|
token*: string
|
|
|
|
proc initKeymanagerServer*(
|
|
config: AnyConf,
|
|
existingRestServer: RestServerRef = nil): KeymanagerInitResult
|
|
{.raises: [].} =
|
|
|
|
var token: string
|
|
let keymanagerServer = if config.keymanagerEnabled:
|
|
if config.keymanagerTokenFile.isNone:
|
|
echo "To enable the Keymanager API, you must also specify " &
|
|
"the --keymanager-token-file option."
|
|
quit 1
|
|
|
|
let
|
|
tokenFilePath = config.keymanagerTokenFile.get.string
|
|
tokenFileReadRes = readAllChars(tokenFilePath)
|
|
|
|
if tokenFileReadRes.isErr:
|
|
fatal "Failed to read the keymanager token file",
|
|
error = $tokenFileReadRes.error
|
|
quit 1
|
|
|
|
token = tokenFileReadRes.value.strip
|
|
if token.len == 0:
|
|
fatal "The keymanager token should not be empty", tokenFilePath
|
|
quit 1
|
|
|
|
when config is BeaconNodeConf:
|
|
if existingRestServer != nil and
|
|
config.restAddress == config.keymanagerAddress and
|
|
config.restPort == config.keymanagerPort:
|
|
existingRestServer
|
|
else:
|
|
RestServerRef.init(config.keymanagerAddress, config.keymanagerPort,
|
|
config.keymanagerAllowedOrigin,
|
|
validateKeymanagerApiQueries,
|
|
config)
|
|
else:
|
|
RestServerRef.init(config.keymanagerAddress, config.keymanagerPort,
|
|
config.keymanagerAllowedOrigin,
|
|
validateKeymanagerApiQueries,
|
|
config)
|
|
else:
|
|
nil
|
|
|
|
KeymanagerInitResult(server: keymanagerServer, token: token)
|
|
|
|
proc quitDoppelganger*() =
|
|
# Avoid colliding with
|
|
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Exit%20Codes
|
|
# This error code is used to permanently shut down validators
|
|
fatal "Doppelganger detection triggered! It appears a validator loaded into " &
|
|
"this process is already live on the network - the validator is at high " &
|
|
"risk of being slashed due to the same keys being used in two setups. " &
|
|
"See https://nimbus.guide/doppelganger-detection.html for more information!"
|
|
|
|
const QuitDoppelganger = 129
|
|
quit QuitDoppelganger
|