Dissect Windows specific code from beacon node. (#5612)

* Make some startup procedures async.
Add more handful makeBannerAndConfig().

* Dissect windows service code from `nimbus_beacon_node.nim`.

* Add report service startup errors using windows error codes.
Add plug able exitService().

Co-authored-by: Zahary Karadjov <zahary@status.im>
Co-authored-by: Jacek Sieka <jacek@status.im>
This commit is contained in:
Eugene Kabanov 2024-01-13 12:53:53 +02:00 committed by GitHub
parent 05fb7ffff0
commit 5404178a40
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 257 additions and 147 deletions

View File

@ -0,0 +1,53 @@
# beacon_chain
# Copyright (c) 2023 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.push raises: [].}
import std/os
import "."/[conf, conf_light_client]
import results, confutils, confutils/defs, confutils/std/net,
confutils/toml/defs as confTomlDefs,
confutils/toml/std/net as confTomlNet,
confutils/toml/std/uri as confTomlUri
proc makeBannerAndConfig*(clientId, copyright, banner, specVersion: string,
environment: openArray[string],
ConfType: type): Result[ConfType, string] =
let
version = clientId & "\p" & copyright & "\p\p" &
"eth2 specification v" & specVersion & "\p\p" &
banner
cmdLine = if len(environment) == 0: commandLineParams()
else: @environment
# TODO for some reason, copyrights are printed when doing `--help`
{.push warning[ProveInit]: off.}
let config = try:
ConfType.load(
version = version, # but a short version string makes more sense...
copyrightBanner = clientId,
cmdLine = cmdLine,
secondarySources = proc (
config: ConfType, sources: auto
) {.raises: [ConfigurationError], gcsafe.} =
if config.configFile.isSome:
sources.addConfigFile(Toml, config.configFile.get)
)
except CatchableError as exc:
# We need to log to stderr here, because logging hasn't been configured yet
var msg = "Failure while loading the configuration:\p" & exc.msg & "\p"
if (exc[] of ConfigurationError) and not(isNil(exc.parent)) and
(exc.parent[] of TomlFieldReadingError):
let fieldName = ((ref TomlFieldReadingError)(exc.parent)).field
if fieldName in ["el", "web3-url", "bootstrap-node",
"direct-peer", "validator-monitor-pubkey"]:
msg &= "Since the '" & fieldName & "' option is allowed to " &
"have more than one value, please make sure to supply " &
"a properly formatted TOML array\p"
return err(msg)
{.pop.}
ok(config)

View File

@ -23,7 +23,8 @@ import
./validators/[keystore_management, beacon_validators],
"."/[
beacon_node, beacon_node_light_client, deposits,
nimbus_binary_common, statusbar, trusted_node_sync, wallets]
nimbus_binary_common, statusbar, trusted_node_sync, wallets,
winservice]
when defined(posix):
import system/ansi_c
@ -35,64 +36,6 @@ from
import
TopicParams, validateParameters, init
when defined(windows):
import winlean
type
LPCSTR* = cstring
LPSTR* = cstring
SERVICE_STATUS* {.final, pure.} = object
dwServiceType*: DWORD
dwCurrentState*: DWORD
dwControlsAccepted*: DWORD
dwWin32ExitCode*: DWORD
dwServiceSpecificExitCode*: DWORD
dwCheckPoint*: DWORD
dwWaitHint*: DWORD
SERVICE_STATUS_HANDLE* = DWORD
LPSERVICE_STATUS* = ptr SERVICE_STATUS
LPSERVICE_MAIN_FUNCTION* = proc (para1: DWORD, para2: LPSTR) {.stdcall.}
SERVICE_TABLE_ENTRY* {.final, pure.} = object
lpServiceName*: LPSTR
lpServiceProc*: LPSERVICE_MAIN_FUNCTION
LPSERVICE_TABLE_ENTRY* = ptr SERVICE_TABLE_ENTRY
LPHANDLER_FUNCTION* = proc (para1: DWORD): WINBOOL{.stdcall.}
const
SERVICE_WIN32_OWN_PROCESS = 16
SERVICE_RUNNING = 4
SERVICE_STOPPED = 1
SERVICE_START_PENDING = 2
SERVICE_STOP_PENDING = 3
SERVICE_CONTROL_STOP = 1
SERVICE_CONTROL_PAUSE = 2
SERVICE_CONTROL_CONTINUE = 3
SERVICE_CONTROL_INTERROGATE = 4
SERVICE_ACCEPT_STOP = 1
NO_ERROR = 0
SERVICE_NAME = LPCSTR "NIMBUS_BEACON_NODE"
var
gSvcStatusHandle: SERVICE_STATUS_HANDLE
gSvcStatus: SERVICE_STATUS
proc reportServiceStatus*(dwCurrentState, dwWin32ExitCode, dwWaitHint: DWORD) {.gcsafe.}
proc StartServiceCtrlDispatcher*(lpServiceStartTable: LPSERVICE_TABLE_ENTRY): WINBOOL{.
stdcall, dynlib: "advapi32", importc: "StartServiceCtrlDispatcherA".}
proc SetServiceStatus*(hServiceStatus: SERVICE_STATUS_HANDLE,
lpServiceStatus: LPSERVICE_STATUS): WINBOOL{.stdcall,
dynlib: "advapi32", importc: "SetServiceStatus".}
proc RegisterServiceCtrlHandler*(lpServiceName: LPCSTR,
lpHandlerProc: LPHANDLER_FUNCTION): SERVICE_STATUS_HANDLE{.
stdcall, dynlib: "advapi32", importc: "RegisterServiceCtrlHandlerA".}
# https://github.com/ethereum/eth2.0-metrics/blob/master/metrics.md#interop-metrics
declareGauge beacon_slot, "Latest slot of the beacon chain state"
declareGauge beacon_current_epoch, "Current epoch"
@ -281,7 +224,7 @@ proc initFullNode(
rng: ref HmacDrbgContext,
dag: ChainDAGRef,
taskpool: TaskPoolPtr,
getBeaconTime: GetBeaconTimeFn) =
getBeaconTime: GetBeaconTimeFn) {.async.} =
template config(): auto = node.config
proc onAttestationReceived(data: Attestation) =
@ -490,7 +433,7 @@ proc initFullNode(
node.backfiller = backfiller
node.router = router
node.addValidators()
await node.addValidators()
block:
# Add in-process validators to the list of "known" validators such that
@ -866,8 +809,7 @@ proc init*(T: type BeaconNode,
node.initLightClient(
rng, cfg, dag.forkDigests, getBeaconTime, dag.genesis_validators_root)
node.initFullNode(
rng, dag, taskpool, getBeaconTime)
await node.initFullNode(rng, dag, taskpool, getBeaconTime)
node.updateLightClientFromDag()
@ -1601,7 +1543,7 @@ proc onSlotStart(node: BeaconNode, wallTime: BeaconTime,
when defined(windows):
if node.config.runAsService:
reportServiceStatus(SERVICE_RUNNING, NO_ERROR, 0)
reportServiceStatusSuccess()
beacon_slot.set wallSlot.toGaugeValue
beacon_current_epoch.set wallSlot.epoch.toGaugeValue
@ -2286,64 +2228,11 @@ proc handleStartUpCmd(config: var BeaconNodeConf) {.raises: [CatchableError].} =
{.pop.} # TODO moduletests exceptions
when defined(windows):
proc reportServiceStatus*(dwCurrentState, dwWin32ExitCode, dwWaitHint: DWORD) {.gcsafe.} =
gSvcStatus.dwCurrentState = dwCurrentState
gSvcStatus.dwWin32ExitCode = dwWin32ExitCode
gSvcStatus.dwWaitHint = dwWaitHint
if dwCurrentState == SERVICE_START_PENDING:
gSvcStatus.dwControlsAccepted = 0
else:
gSvcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP
# TODO
# We can use non-zero values for the `dwCheckPoint` parameter to report
# progress during lengthy operations such as start-up and shut down.
gSvcStatus.dwCheckPoint = 0
# Report the status of the service to the SCM.
let status = SetServiceStatus(gSvcStatusHandle, addr gSvcStatus)
debug "Service status updated", status
proc serviceControlHandler(dwCtrl: DWORD): WINBOOL {.stdcall.} =
case dwCtrl
of SERVICE_CONTROL_STOP:
# We re reporting that we plan stop the service in 10 seconds
reportServiceStatus(SERVICE_STOP_PENDING, NO_ERROR, 10_000)
bnStatus = BeaconNodeStatus.Stopping
of SERVICE_CONTROL_PAUSE, SERVICE_CONTROL_CONTINUE:
warn "The Nimbus service cannot be paused and resimed"
of SERVICE_CONTROL_INTERROGATE:
# The default behavior is correct.
# The service control manager will report our last status.
discard
else:
debug "Service received an unexpected user-defined control message",
msg = dwCtrl
proc serviceMainFunction(dwArgc: DWORD, lpszArgv: LPSTR) {.stdcall.} =
# The service is launched in a fresh thread created by Windows, so
# we must initialize the Nim GC here
setupForeignThreadGc()
gSvcStatusHandle = RegisterServiceCtrlHandler(
SERVICE_NAME,
serviceControlHandler)
gSvcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS
gSvcStatus.dwServiceSpecificExitCode = 0
reportServiceStatus(SERVICE_RUNNING, NO_ERROR, 0)
info "Service thread started"
var config = makeBannerAndConfig(clientId, BeaconNodeConf)
handleStartUpCmd(config)
info "Service thread stopped"
reportServiceStatus(SERVICE_STOPPED, NO_ERROR, 0) # we have to report back when we stopped!
programMain:
var config = makeBannerAndConfig(clientId, BeaconNodeConf)
var config = makeBannerAndConfig(clientId, copyrights, nimBanner,
SPEC_VERSION, [], BeaconNodeConf).valueOr:
stderr.write error
quit QuitFailure
if not(checkAndCreateDataDir(string(config.dataDir))):
# We are unable to access/create data folder or data folder's
@ -2378,15 +2267,11 @@ programMain:
when defined(windows):
if config.runAsService:
var dispatchTable = [
SERVICE_TABLE_ENTRY(lpServiceName: SERVICE_NAME, lpServiceProc: serviceMainFunction),
SERVICE_TABLE_ENTRY(lpServiceName: nil, lpServiceProc: nil) # last entry must be nil
]
let status = StartServiceCtrlDispatcher(LPSERVICE_TABLE_ENTRY(addr dispatchTable[0]))
if status == 0:
fatal "Failed to start Windows service", errorCode = getLastError()
quit 1
proc exitService() =
bnStatus = BeaconNodeStatus.Stopping
establishWindowsService(clientId, copyrights, nimBanner, SPEC_VERSION,
"nimbus_beacon_node", BeaconNodeConf,
handleStartUpCmd, exitService)
else:
handleStartUpCmd(config)
else:

View File

@ -21,7 +21,7 @@ import
# Local modules
./spec/[helpers, keystore],
./spec/datatypes/base,
"."/[beacon_clock, beacon_node_status, conf, version]
"."/[beacon_clock, beacon_node_status, conf, conf_common, version]
when defined(posix):
import termios
@ -33,7 +33,8 @@ declareGauge nimVersionGauge, "Nim version info", ["version", "nim_commit"], nam
nimVersionGauge.set(1, labelValues=[NimVersion, getNimGitHash()])
export
confutils, toml_serialization, beacon_clock, beacon_node_status, conf
confutils, toml_serialization, beacon_clock, beacon_node_status, conf,
conf_common
type
SlotStartProc*[T] = proc(node: T, wallTime: BeaconTime,

View File

@ -136,7 +136,7 @@ proc addValidatorsFromWeb3Signer(node: BeaconNode, web3signerUrl: Web3SignerUrl,
gasLimit)
v.updateValidator(data)
proc addValidators*(node: BeaconNode) =
proc addValidators*(node: BeaconNode) {.async.} =
info "Loading validators", validatorsDir = node.config.validatorsDir(),
keystore_cache_available = not(isNil(node.keystoreCache))
let epoch = node.currentSlot().epoch
@ -158,20 +158,12 @@ proc addValidators*(node: BeaconNode) =
gasLimit)
v.updateValidator(data)
try:
# We use `allFutures` because all failures are already reported as
# user-visible warnings in `queryValidatorsSource`.
# We don't consider them fatal because the Web3Signer may be experiencing
# a temporary hiccup that will be resolved later.
waitFor allFutures(mapIt(node.config.web3SignerUrls,
await allFutures(mapIt(node.config.web3SignerUrls,
node.addValidatorsFromWeb3Signer(it, epoch)))
except CatchableError as err:
# This should never happen because all errors are handled within
# `addValidatorsFromWeb3Signer`. Furthermore, the code above is
# using `allFutures` which is guaranteed to not raise exceptions.
# Nevertheless, we need it to make the compiler's exception tracking happy.
debug "Unexpected error while fetching the list of validators from a remote signer",
err = err.msg
proc pollForDynamicValidators*(node: BeaconNode,
web3signerUrl: Web3SignerUrl,

179
beacon_chain/winservice.nim Normal file
View File

@ -0,0 +1,179 @@
# beacon_chain
# Copyright (c) 2023 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# Everything needed to run beacon node as Windows service.
{.push raises: [].}
when defined(windows):
import results, chronicles
import chronos/[osdefs, osutils, oserrno]
import ./conf_common
type
SERVICE_STATUS* {.final, pure.} = object
dwServiceType*: DWORD
dwCurrentState*: DWORD
dwControlsAccepted*: DWORD
dwWin32ExitCode*: DWORD
dwServiceSpecificExitCode*: DWORD
dwCheckPoint*: DWORD
dwWaitHint*: DWORD
SERVICE_STATUS_HANDLE* = DWORD
LPSERVICE_STATUS* = ptr SERVICE_STATUS
LPSERVICE_MAIN_FUNCTIONW* = proc (para1: DWORD, para2: LPWSTR) {.stdcall.}
SERVICE_TABLE_ENTRYW* {.final, pure.} = object
lpServiceName*: LPWSTR
lpServiceProc*: LPSERVICE_MAIN_FUNCTIONW
LPSERVICE_TABLE_ENTRYW* = ptr SERVICE_TABLE_ENTRYW
LPHANDLER_FUNCTION* = proc (para1: DWORD): WINBOOL {.stdcall.}
const
SERVICE_WIN32_OWN_PROCESS = 16
SERVICE_RUNNING = 4
SERVICE_STOPPED = 1
SERVICE_START_PENDING = 2
SERVICE_STOP_PENDING = 3
SERVICE_CONTROL_STOP = 1
SERVICE_CONTROL_PAUSE = 2
SERVICE_CONTROL_CONTINUE = 3
SERVICE_CONTROL_INTERROGATE = 4
SERVICE_ACCEPT_STOP = 1
ERROR_INVALID_PARAMETER = 87
ERROR_BAD_CONFIGURATION = 1610
NO_ERROR = 0
var
gSvcStatusHandle: SERVICE_STATUS_HANDLE
gSvcStatus: SERVICE_STATUS
proc startServiceCtrlDispatcher(
lpServiceStartTable: LPSERVICE_TABLE_ENTRYW
): WINBOOL {.
stdcall, dynlib: "advapi32", importc: "StartServiceCtrlDispatcherW".}
proc setServiceStatus(
hServiceStatus: SERVICE_STATUS_HANDLE,
lpServiceStatus: LPSERVICE_STATUS
): WINBOOL {.
stdcall, dynlib: "advapi32", importc: "SetServiceStatus".}
proc registerServiceCtrlHandler(
lpServiceName: LPWSTR,
lpHandlerProc: LPHANDLER_FUNCTION
): SERVICE_STATUS_HANDLE {.
stdcall, dynlib: "advapi32", importc: "RegisterServiceCtrlHandlerW".}
proc getCommandLine(dwArgc: DWORD,
lpszArgv: LPWSTR): Result[seq[string], string] =
var res: seq[string]
let arguments = cast[ptr UncheckedArray[LPWSTR]](lpszArgv)
if uint64(dwArgc) > uint64(high(int)):
return err("Unable to process incredible count of arguments")
for i in 0 ..< int(dwArgc):
let str = arguments[i].toString().valueOr:
return err("Unable to process arguments, reason: " & osErrorMsg(error))
res.add(str)
ok(res)
proc reportServiceStatus(dwCurrentState, dwWin32ExitCode,
dwWaitHint: DWORD) {.gcsafe.} =
gSvcStatus.dwCurrentState = dwCurrentState
gSvcStatus.dwWin32ExitCode = dwWin32ExitCode
gSvcStatus.dwWaitHint = dwWaitHint
if dwCurrentState == SERVICE_START_PENDING:
gSvcStatus.dwControlsAccepted = 0
else:
gSvcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP
# TODO
# We can use non-zero values for the `dwCheckPoint` parameter to report
# progress during lengthy operations such as start-up and shut down.
gSvcStatus.dwCheckPoint = 0
# Report the status of the service to the SCM.
let status = setServiceStatus(gSvcStatusHandle, addr gSvcStatus)
debug "Service status updated", status
proc reportServiceStatusSuccess*() =
reportServiceStatus(SERVICE_RUNNING, NO_ERROR, 0)
template establishWindowsService*(argClientId,
argCopyrights,
argNimBanner,
argSpecVersion,
argServiceName: string,
argConfigType: untyped,
argEntryPoint: untyped,
argExitPoint: untyped): untyped =
proc serviceControlHandler(dwCtrl: DWORD): WINBOOL {.stdcall.} =
case dwCtrl
of SERVICE_CONTROL_STOP:
# We're reporting that we plan to stop the service in 10 seconds
reportServiceStatus(SERVICE_STOP_PENDING, NO_ERROR, 10_000)
argExitPoint()
of SERVICE_CONTROL_PAUSE, SERVICE_CONTROL_CONTINUE:
warn "The Nimbus service cannot be paused and resimed"
of SERVICE_CONTROL_INTERROGATE:
# The default behavior is correct.
# The service control manager will report our last status.
discard
else:
debug "Service received an unexpected user-defined control message",
msg = dwCtrl
proc serviceMainFunction(dwArgc: DWORD, lpszArgv: LPWSTR) {.stdcall.} =
# The service is launched in a fresh thread created by Windows, so
# we must initialize the Nim GC here
let serviceName = newWideCString(argServiceName)
setupForeignThreadGc()
gSvcStatusHandle = registerServiceCtrlHandler(
cast[LPWSTR](serviceName),
serviceControlHandler)
gSvcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS
gSvcStatus.dwServiceSpecificExitCode = 0
reportServiceStatus(SERVICE_RUNNING, NO_ERROR, 0)
let environment = getCommandLine(dwArgc, lpszArgv).valueOr:
reportServiceStatus(SERVICE_STOPPED, ERROR_INVALID_PARAMETER, 0)
quit QuitFailure
var config = makeBannerAndConfig(argClientId, argCopyrights,
argNimBanner, argSpecVersion,
environment, argConfigType).valueOr:
reportServiceStatus(SERVICE_STOPPED, ERROR_BAD_CONFIGURATION, 0)
quit QuitFailure
argEntryPoint(config)
info "Service thread stopped"
reportServiceStatus(SERVICE_STOPPED, NO_ERROR, 0)
# we have to report back when we stopped!
let serviceName = newWideCString(argServiceName)
var dispatchTable = [
SERVICE_TABLE_ENTRYW(lpServiceName: cast[LPWSTR](serviceName),
lpServiceProc: serviceMainFunction),
SERVICE_TABLE_ENTRYW(lpServiceName: nil,
lpServiceProc: nil)
]
let status =
startServiceCtrlDispatcher(LPSERVICE_TABLE_ENTRYW(addr dispatchTable[0]))
if status == 0:
let errorCode = osLastError()
fatal "Failed to start Windows service", error_code = uint32(errorCode),
reason = osErrorMsg(errorCode)
quit QuitFailure