mirror of
https://github.com/logos-messaging/logos-messaging-nim.git
synced 2026-01-05 23:43:07 +00:00
chore: refactor to unify online and health monitors (#3456)
This commit is contained in:
parent
3f3c594885
commit
2e40f2971f
@ -14,13 +14,11 @@ import
|
|||||||
waku/[
|
waku/[
|
||||||
common/enr,
|
common/enr,
|
||||||
common/logging,
|
common/logging,
|
||||||
factory/waku,
|
factory/waku as waku_factory,
|
||||||
factory/external_config,
|
factory/external_config,
|
||||||
waku_node,
|
waku_node,
|
||||||
node/health_monitor,
|
|
||||||
node/waku_metrics,
|
node/waku_metrics,
|
||||||
node/peer_manager,
|
node/peer_manager,
|
||||||
waku_api/rest/builder as rest_server_builder,
|
|
||||||
waku_lightpush/common,
|
waku_lightpush/common,
|
||||||
waku_filter_v2,
|
waku_filter_v2,
|
||||||
waku_peer_exchange/protocol,
|
waku_peer_exchange/protocol,
|
||||||
@ -49,7 +47,7 @@ when isMainModule:
|
|||||||
## 5. Start monitoring tools and external interfaces
|
## 5. Start monitoring tools and external interfaces
|
||||||
## 6. Setup graceful shutdown hooks
|
## 6. Setup graceful shutdown hooks
|
||||||
|
|
||||||
const versionString = "version / git commit hash: " & waku.git_version
|
const versionString = "version / git commit hash: " & waku_factory.git_version
|
||||||
|
|
||||||
let confRes = LiteProtocolTesterConf.load(version = versionString)
|
let confRes = LiteProtocolTesterConf.load(version = versionString)
|
||||||
if confRes.isErr():
|
if confRes.isErr():
|
||||||
@ -61,7 +59,7 @@ when isMainModule:
|
|||||||
## Logging setup
|
## Logging setup
|
||||||
logging.setupLog(conf.logLevel, conf.logFormat)
|
logging.setupLog(conf.logLevel, conf.logFormat)
|
||||||
|
|
||||||
info "Running Lite Protocol Tester node", version = waku.git_version
|
info "Running Lite Protocol Tester node", version = waku_factory.git_version
|
||||||
logConfig(conf)
|
logConfig(conf)
|
||||||
|
|
||||||
##Prepare Waku configuration
|
##Prepare Waku configuration
|
||||||
@ -69,13 +67,13 @@ when isMainModule:
|
|||||||
## - override according to tester functionality
|
## - override according to tester functionality
|
||||||
##
|
##
|
||||||
|
|
||||||
var wConf: WakuNodeConf
|
var wakuNodeConf: WakuNodeConf
|
||||||
|
|
||||||
if conf.configFile.isSome():
|
if conf.configFile.isSome():
|
||||||
try:
|
try:
|
||||||
var configFile {.threadvar.}: InputFile
|
var configFile {.threadvar.}: InputFile
|
||||||
configFile = conf.configFile.get()
|
configFile = conf.configFile.get()
|
||||||
wConf = WakuNodeConf.load(
|
wakuNodeConf = WakuNodeConf.load(
|
||||||
version = versionString,
|
version = versionString,
|
||||||
printUsage = false,
|
printUsage = false,
|
||||||
secondarySources = proc(
|
secondarySources = proc(
|
||||||
@ -88,101 +86,54 @@ when isMainModule:
|
|||||||
error "Loading Waku configuration failed", error = getCurrentExceptionMsg()
|
error "Loading Waku configuration failed", error = getCurrentExceptionMsg()
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
|
|
||||||
wConf.logLevel = conf.logLevel
|
wakuNodeConf.logLevel = conf.logLevel
|
||||||
wConf.logFormat = conf.logFormat
|
wakuNodeConf.logFormat = conf.logFormat
|
||||||
wConf.nat = conf.nat
|
wakuNodeConf.nat = conf.nat
|
||||||
wConf.maxConnections = 500
|
wakuNodeConf.maxConnections = 500
|
||||||
wConf.restAddress = conf.restAddress
|
wakuNodeConf.restAddress = conf.restAddress
|
||||||
wConf.restPort = conf.restPort
|
wakuNodeConf.restPort = conf.restPort
|
||||||
wConf.restAllowOrigin = conf.restAllowOrigin
|
wakuNodeConf.restAllowOrigin = conf.restAllowOrigin
|
||||||
|
|
||||||
wConf.dnsAddrsNameServers = @[parseIpAddress("8.8.8.8"), parseIpAddress("1.1.1.1")]
|
wakuNodeConf.dnsAddrsNameServers =
|
||||||
|
@[parseIpAddress("8.8.8.8"), parseIpAddress("1.1.1.1")]
|
||||||
|
|
||||||
wConf.shards = @[conf.shard]
|
wakuNodeConf.shards = @[conf.shard]
|
||||||
wConf.contentTopics = conf.contentTopics
|
wakuNodeConf.contentTopics = conf.contentTopics
|
||||||
wConf.clusterId = conf.clusterId
|
wakuNodeConf.clusterId = conf.clusterId
|
||||||
## TODO: Depending on the tester needs we might extend here with shards, clusterId, etc...
|
## TODO: Depending on the tester needs we might extend here with shards, clusterId, etc...
|
||||||
|
|
||||||
wConf.metricsServer = true
|
wakuNodeConf.metricsServer = true
|
||||||
wConf.metricsServerAddress = parseIpAddress("0.0.0.0")
|
wakuNodeConf.metricsServerAddress = parseIpAddress("0.0.0.0")
|
||||||
wConf.metricsServerPort = conf.metricsPort
|
wakuNodeConf.metricsServerPort = conf.metricsPort
|
||||||
|
|
||||||
# If bootstrap option is chosen we expect our clients will not mounted
|
# If bootstrap option is chosen we expect our clients will not mounted
|
||||||
# so we will mount PeerExchange manually to gather possible service peers,
|
# so we will mount PeerExchange manually to gather possible service peers,
|
||||||
# if got some we will mount the client protocols afterward.
|
# if got some we will mount the client protocols afterward.
|
||||||
wConf.peerExchange = false
|
wakuNodeConf.peerExchange = false
|
||||||
wConf.relay = false
|
wakuNodeConf.relay = false
|
||||||
wConf.filter = false
|
wakuNodeConf.filter = false
|
||||||
wConf.lightpush = false
|
wakuNodeConf.lightpush = false
|
||||||
wConf.store = false
|
wakuNodeConf.store = false
|
||||||
|
|
||||||
wConf.rest = false
|
wakuNodeConf.rest = false
|
||||||
wConf.relayServiceRatio = "40:60"
|
wakuNodeConf.relayServiceRatio = "40:60"
|
||||||
|
|
||||||
# NOTE: {.threadvar.} is used to make the global variable GC safe for the closure uses it
|
let wakuConf = wakuNodeConf.toWakuConf().valueOr:
|
||||||
# It will always be called from main thread anyway.
|
error "Issue converting toWakuConf", error = $error
|
||||||
# Ref: https://nim-lang.org/docs/manual.html#threads-gc-safety
|
|
||||||
var nodeHealthMonitor {.threadvar.}: WakuNodeHealthMonitor
|
|
||||||
nodeHealthMonitor = WakuNodeHealthMonitor()
|
|
||||||
nodeHealthMonitor.setOverallHealth(HealthStatus.INITIALIZING)
|
|
||||||
|
|
||||||
let wakuConf = wConf.toWakuConf().valueOr:
|
|
||||||
error "Waku configuration failed", error = error
|
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
|
|
||||||
let restServer: WakuRestServerRef =
|
var waku = Waku.new(wakuConf).valueOr:
|
||||||
if wakuConf.restServerConf.isSome():
|
|
||||||
rest_server_builder.startRestServerEssentials(
|
|
||||||
nodeHealthMonitor, wakuConf.restServerConf.get(), wakuConf.portsShift
|
|
||||||
).valueOr:
|
|
||||||
error "Starting essential REST server failed.", error = $error
|
|
||||||
quit(QuitFailure)
|
|
||||||
else:
|
|
||||||
nil
|
|
||||||
|
|
||||||
var wakuApp = Waku.new(wakuConf).valueOr:
|
|
||||||
error "Waku initialization failed", error = error
|
error "Waku initialization failed", error = error
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
|
|
||||||
wakuApp.restServer = restServer
|
(waitFor startWaku(addr waku)).isOkOr:
|
||||||
|
|
||||||
nodeHealthMonitor.setNode(wakuApp.node)
|
|
||||||
|
|
||||||
(waitFor startWaku(addr wakuApp)).isOkOr:
|
|
||||||
error "Starting waku failed", error = error
|
error "Starting waku failed", error = error
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
|
|
||||||
if wakuConf.restServerConf.isSome():
|
|
||||||
rest_server_builder.startRestServerProtocolSupport(
|
|
||||||
restServer,
|
|
||||||
wakuApp.node,
|
|
||||||
wakuApp.wakuDiscv5,
|
|
||||||
wakuConf.restServerConf.get(),
|
|
||||||
wakuConf.relay,
|
|
||||||
wakuConf.lightPush,
|
|
||||||
wakuConf.clusterId,
|
|
||||||
wakuConf.shards,
|
|
||||||
wakuConf.contentTopics,
|
|
||||||
).isOkOr:
|
|
||||||
error "Starting protocols support REST server failed.", error = $error
|
|
||||||
quit(QuitFailure)
|
|
||||||
|
|
||||||
if wakuConf.metricsServerConf.isSome():
|
|
||||||
wakuApp.metricsServer = waku_metrics.startMetricsServerAndLogging(
|
|
||||||
wakuConf.metricsServerConf.get(), wakuConf.portsShift
|
|
||||||
).valueOr:
|
|
||||||
error "Starting monitoring and external interfaces failed", error = error
|
|
||||||
quit(QuitFailure)
|
|
||||||
|
|
||||||
nodeHealthMonitor.setOverallHealth(HealthStatus.READY)
|
|
||||||
|
|
||||||
debug "Setting up shutdown hooks"
|
debug "Setting up shutdown hooks"
|
||||||
## Setup shutdown hooks for this process.
|
|
||||||
## Stop node gracefully on shutdown.
|
|
||||||
|
|
||||||
proc asyncStopper(wakuApp: Waku) {.async: (raises: [Exception]).} =
|
proc asyncStopper(waku: Waku) {.async: (raises: [Exception]).} =
|
||||||
nodeHealthMonitor.setOverallHealth(HealthStatus.SHUTTING_DOWN)
|
await waku.stop()
|
||||||
await wakuApp.stop()
|
|
||||||
quit(QuitSuccess)
|
quit(QuitSuccess)
|
||||||
|
|
||||||
# Handle Ctrl-C SIGINT
|
# Handle Ctrl-C SIGINT
|
||||||
@ -191,7 +142,7 @@ when isMainModule:
|
|||||||
# workaround for https://github.com/nim-lang/Nim/issues/4057
|
# workaround for https://github.com/nim-lang/Nim/issues/4057
|
||||||
setupForeignThreadGc()
|
setupForeignThreadGc()
|
||||||
notice "Shutting down after receiving SIGINT"
|
notice "Shutting down after receiving SIGINT"
|
||||||
asyncSpawn asyncStopper(wakuApp)
|
asyncSpawn asyncStopper(waku)
|
||||||
|
|
||||||
setControlCHook(handleCtrlC)
|
setControlCHook(handleCtrlC)
|
||||||
|
|
||||||
@ -199,7 +150,7 @@ when isMainModule:
|
|||||||
when defined(posix):
|
when defined(posix):
|
||||||
proc handleSigterm(signal: cint) {.noconv.} =
|
proc handleSigterm(signal: cint) {.noconv.} =
|
||||||
notice "Shutting down after receiving SIGTERM"
|
notice "Shutting down after receiving SIGTERM"
|
||||||
asyncSpawn asyncStopper(wakuApp)
|
asyncSpawn asyncStopper(waku)
|
||||||
|
|
||||||
c_signal(ansi_c.SIGTERM, handleSigterm)
|
c_signal(ansi_c.SIGTERM, handleSigterm)
|
||||||
|
|
||||||
@ -212,22 +163,26 @@ when isMainModule:
|
|||||||
# Not available in -d:release mode
|
# Not available in -d:release mode
|
||||||
writeStackTrace()
|
writeStackTrace()
|
||||||
|
|
||||||
waitFor wakuApp.stop()
|
waitFor waku.stop()
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
|
|
||||||
c_signal(ansi_c.SIGSEGV, handleSigsegv)
|
c_signal(ansi_c.SIGSEGV, handleSigsegv)
|
||||||
|
|
||||||
info "Node setup complete"
|
info "Node setup complete"
|
||||||
|
|
||||||
let codec = conf.getCodec()
|
var codec = WakuLightPushCodec
|
||||||
# mounting relevant client, for PX filter client must be mounted ahead
|
# mounting relevant client, for PX filter client must be mounted ahead
|
||||||
|
if conf.testFunc == TesterFunctionality.SENDER:
|
||||||
|
codec = WakuLightPushCodec
|
||||||
|
else:
|
||||||
|
codec = WakuFilterSubscribeCodec
|
||||||
|
|
||||||
var lookForServiceNode = false
|
var lookForServiceNode = false
|
||||||
var serviceNodePeerInfo: RemotePeerInfo
|
var serviceNodePeerInfo: RemotePeerInfo
|
||||||
if conf.serviceNode.len == 0:
|
if conf.serviceNode.len == 0:
|
||||||
if conf.bootstrapNode.len > 0:
|
if conf.bootstrapNode.len > 0:
|
||||||
info "Bootstrapping with PeerExchange to gather random service node"
|
info "Bootstrapping with PeerExchange to gather random service node"
|
||||||
let futForServiceNode = pxLookupServiceNode(wakuApp.node, conf)
|
let futForServiceNode = pxLookupServiceNode(waku.node, conf)
|
||||||
if not (waitFor futForServiceNode.withTimeout(20.minutes)):
|
if not (waitFor futForServiceNode.withTimeout(20.minutes)):
|
||||||
error "Service node not found in time via PX"
|
error "Service node not found in time via PX"
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
@ -237,7 +192,7 @@ when isMainModule:
|
|||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
|
|
||||||
serviceNodePeerInfo = selectRandomServicePeer(
|
serviceNodePeerInfo = selectRandomServicePeer(
|
||||||
wakuApp.node.peerManager, none(RemotePeerInfo), codec
|
waku.node.peerManager, none(RemotePeerInfo), codec
|
||||||
).valueOr:
|
).valueOr:
|
||||||
error "Service node selection failed"
|
error "Service node selection failed"
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
@ -252,11 +207,11 @@ when isMainModule:
|
|||||||
|
|
||||||
info "Service node to be used", serviceNode = $serviceNodePeerInfo
|
info "Service node to be used", serviceNode = $serviceNodePeerInfo
|
||||||
|
|
||||||
logSelfPeers(wakuApp.node.peerManager)
|
logSelfPeers(waku.node.peerManager)
|
||||||
|
|
||||||
if conf.testFunc == TesterFunctionality.SENDER:
|
if conf.testFunc == TesterFunctionality.SENDER:
|
||||||
setupAndPublish(wakuApp.node, conf, serviceNodePeerInfo)
|
setupAndPublish(waku.node, conf, serviceNodePeerInfo)
|
||||||
else:
|
else:
|
||||||
setupAndListen(wakuApp.node, conf, serviceNodePeerInfo)
|
setupAndListen(waku.node, conf, serviceNodePeerInfo)
|
||||||
|
|
||||||
runForever()
|
runForever()
|
||||||
|
|||||||
@ -16,7 +16,6 @@ import
|
|||||||
factory/external_config,
|
factory/external_config,
|
||||||
factory/waku,
|
factory/waku,
|
||||||
node/health_monitor,
|
node/health_monitor,
|
||||||
node/waku_metrics,
|
|
||||||
waku_api/rest/builder as rest_server_builder,
|
waku_api/rest/builder as rest_server_builder,
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -53,69 +52,21 @@ when isMainModule:
|
|||||||
let conf = wakuNodeConf.toInspectRlnDbConf()
|
let conf = wakuNodeConf.toInspectRlnDbConf()
|
||||||
doInspectRlnDb(conf)
|
doInspectRlnDb(conf)
|
||||||
of noCommand:
|
of noCommand:
|
||||||
# NOTE: {.threadvar.} is used to make the global variable GC safe for the closure uses it
|
|
||||||
# It will always be called from main thread anyway.
|
|
||||||
# Ref: https://nim-lang.org/docs/manual.html#threads-gc-safety
|
|
||||||
var nodeHealthMonitor {.threadvar.}: WakuNodeHealthMonitor
|
|
||||||
nodeHealthMonitor = WakuNodeHealthMonitor()
|
|
||||||
nodeHealthMonitor.setOverallHealth(HealthStatus.INITIALIZING)
|
|
||||||
|
|
||||||
let conf = wakuNodeConf.toWakuConf().valueOr:
|
let conf = wakuNodeConf.toWakuConf().valueOr:
|
||||||
error "Waku configuration failed", error = error
|
error "Waku configuration failed", error = error
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
|
|
||||||
var restServer: WakuRestServerRef = nil
|
|
||||||
|
|
||||||
if conf.restServerConf.isSome():
|
|
||||||
restServer = rest_server_builder.startRestServerEssentials(
|
|
||||||
nodeHealthMonitor, conf.restServerConf.get(), conf.portsShift
|
|
||||||
).valueOr:
|
|
||||||
error "Starting essential REST server failed.", error = $error
|
|
||||||
quit(QuitFailure)
|
|
||||||
|
|
||||||
var waku = Waku.new(conf).valueOr:
|
var waku = Waku.new(conf).valueOr:
|
||||||
error "Waku initialization failed", error = error
|
error "Waku initialization failed", error = error
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
|
|
||||||
waku.restServer = restServer
|
|
||||||
|
|
||||||
nodeHealthMonitor.setNode(waku.node)
|
|
||||||
|
|
||||||
(waitFor startWaku(addr waku)).isOkOr:
|
(waitFor startWaku(addr waku)).isOkOr:
|
||||||
error "Starting waku failed", error = error
|
error "Starting waku failed", error = error
|
||||||
quit(QuitFailure)
|
quit(QuitFailure)
|
||||||
|
|
||||||
if conf.restServerConf.isSome():
|
|
||||||
rest_server_builder.startRestServerProtocolSupport(
|
|
||||||
restServer,
|
|
||||||
waku.node,
|
|
||||||
waku.wakuDiscv5,
|
|
||||||
conf.restServerConf.get(),
|
|
||||||
conf.relay,
|
|
||||||
conf.lightPush,
|
|
||||||
conf.clusterId,
|
|
||||||
conf.shards,
|
|
||||||
conf.contentTopics,
|
|
||||||
).isOkOr:
|
|
||||||
error "Starting protocols support REST server failed.", error = $error
|
|
||||||
quit(QuitFailure)
|
|
||||||
|
|
||||||
if conf.metricsServerConf.isSome():
|
|
||||||
waku.metricsServer = waku_metrics.startMetricsServerAndLogging(
|
|
||||||
conf.metricsServerConf.get(), conf.portsShift
|
|
||||||
).valueOr:
|
|
||||||
error "Starting monitoring and external interfaces failed", error = error
|
|
||||||
quit(QuitFailure)
|
|
||||||
|
|
||||||
nodeHealthMonitor.setOverallHealth(HealthStatus.READY)
|
|
||||||
|
|
||||||
debug "Setting up shutdown hooks"
|
debug "Setting up shutdown hooks"
|
||||||
## Setup shutdown hooks for this process.
|
proc asyncStopper(waku: Waku) {.async: (raises: [Exception]).} =
|
||||||
## Stop node gracefully on shutdown.
|
await waku.stop()
|
||||||
|
|
||||||
proc asyncStopper(node: Waku) {.async: (raises: [Exception]).} =
|
|
||||||
nodeHealthMonitor.setOverallHealth(HealthStatus.SHUTTING_DOWN)
|
|
||||||
await node.stop()
|
|
||||||
quit(QuitSuccess)
|
quit(QuitSuccess)
|
||||||
|
|
||||||
# Handle Ctrl-C SIGINT
|
# Handle Ctrl-C SIGINT
|
||||||
|
|||||||
@ -878,8 +878,8 @@ proc waku_is_online(
|
|||||||
|
|
||||||
handleRequest(
|
handleRequest(
|
||||||
ctx,
|
ctx,
|
||||||
RequestType.PEER_MANAGER,
|
RequestType.DEBUG,
|
||||||
PeerManagementRequest.createShared(PeerManagementMsgType.IS_ONLINE),
|
DebugNodeRequest.createShared(DebugNodeMsgType.RETRIEVE_ONLINE_STATE),
|
||||||
callback,
|
callback,
|
||||||
userData,
|
userData,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -7,13 +7,17 @@ import
|
|||||||
strutils,
|
strutils,
|
||||||
libp2p/peerid,
|
libp2p/peerid,
|
||||||
metrics
|
metrics
|
||||||
import ../../../../waku/factory/waku, ../../../../waku/node/waku_node
|
import
|
||||||
|
../../../../waku/factory/waku,
|
||||||
|
../../../../waku/node/waku_node,
|
||||||
|
../../../../waku/node/health_monitor
|
||||||
|
|
||||||
type DebugNodeMsgType* = enum
|
type DebugNodeMsgType* = enum
|
||||||
RETRIEVE_LISTENING_ADDRESSES
|
RETRIEVE_LISTENING_ADDRESSES
|
||||||
RETRIEVE_MY_ENR
|
RETRIEVE_MY_ENR
|
||||||
RETRIEVE_MY_PEER_ID
|
RETRIEVE_MY_PEER_ID
|
||||||
RETRIEVE_METRICS
|
RETRIEVE_METRICS
|
||||||
|
RETRIEVE_ONLINE_STATE
|
||||||
|
|
||||||
type DebugNodeRequest* = object
|
type DebugNodeRequest* = object
|
||||||
operation: DebugNodeMsgType
|
operation: DebugNodeMsgType
|
||||||
@ -49,6 +53,8 @@ proc process*(
|
|||||||
return ok($waku.node.peerId())
|
return ok($waku.node.peerId())
|
||||||
of RETRIEVE_METRICS:
|
of RETRIEVE_METRICS:
|
||||||
return ok(getMetrics())
|
return ok(getMetrics())
|
||||||
|
of RETRIEVE_ONLINE_STATE:
|
||||||
|
return ok($waku.healthMonitor.onlineMonitor.amIOnline())
|
||||||
|
|
||||||
error "unsupported operation in DebugNodeRequest"
|
error "unsupported operation in DebugNodeRequest"
|
||||||
return err("unsupported operation in DebugNodeRequest")
|
return err("unsupported operation in DebugNodeRequest")
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import
|
|||||||
../../../../waku/factory/node_factory,
|
../../../../waku/factory/node_factory,
|
||||||
../../../../waku/factory/networks_config,
|
../../../../waku/factory/networks_config,
|
||||||
../../../../waku/factory/app_callbacks,
|
../../../../waku/factory/app_callbacks,
|
||||||
|
../../../../waku/waku_api/rest/builder,
|
||||||
../../../alloc
|
../../../alloc
|
||||||
|
|
||||||
type NodeLifecycleMsgType* = enum
|
type NodeLifecycleMsgType* = enum
|
||||||
@ -73,9 +74,11 @@ proc createWaku(
|
|||||||
appCallbacks.topicHealthChangeHandler = nil
|
appCallbacks.topicHealthChangeHandler = nil
|
||||||
|
|
||||||
# TODO: Convert `confJson` directly to `WakuConf`
|
# TODO: Convert `confJson` directly to `WakuConf`
|
||||||
let wakuConf = conf.toWakuConf().valueOr:
|
var wakuConf = conf.toWakuConf().valueOr:
|
||||||
return err("Configuration error: " & $error)
|
return err("Configuration error: " & $error)
|
||||||
|
|
||||||
|
wakuConf.restServerConf = none(RestServerConf) ## don't want REST in libwaku
|
||||||
|
|
||||||
let wakuRes = Waku.new(wakuConf, appCallbacks).valueOr:
|
let wakuRes = Waku.new(wakuConf, appCallbacks).valueOr:
|
||||||
error "waku initialization failed", error = error
|
error "waku initialization failed", error = error
|
||||||
return err("Failed setting up Waku: " & $error)
|
return err("Failed setting up Waku: " & $error)
|
||||||
|
|||||||
@ -16,7 +16,6 @@ type PeerManagementMsgType* {.pure.} = enum
|
|||||||
DIAL_PEER
|
DIAL_PEER
|
||||||
DIAL_PEER_BY_ID
|
DIAL_PEER_BY_ID
|
||||||
GET_CONNECTED_PEERS
|
GET_CONNECTED_PEERS
|
||||||
IS_ONLINE
|
|
||||||
|
|
||||||
type PeerManagementRequest* = object
|
type PeerManagementRequest* = object
|
||||||
operation: PeerManagementMsgType
|
operation: PeerManagementMsgType
|
||||||
@ -156,7 +155,5 @@ proc process*(
|
|||||||
(inPeerIds, outPeerIds) = waku.node.peerManager.connectedPeers()
|
(inPeerIds, outPeerIds) = waku.node.peerManager.connectedPeers()
|
||||||
connectedPeerids = concat(inPeerIds, outPeerIds)
|
connectedPeerids = concat(inPeerIds, outPeerIds)
|
||||||
return ok(connectedPeerids.mapIt($it).join(","))
|
return ok(connectedPeerids.mapIt($it).join(","))
|
||||||
of IS_ONLINE:
|
|
||||||
return ok($waku.node.peerManager.isOnline())
|
|
||||||
|
|
||||||
return ok("")
|
return ok("")
|
||||||
|
|||||||
@ -39,7 +39,7 @@ suite "Waku v2 REST API - health":
|
|||||||
asyncTest "Get node health info - GET /health":
|
asyncTest "Get node health info - GET /health":
|
||||||
# Given
|
# Given
|
||||||
let node = testWakuNode()
|
let node = testWakuNode()
|
||||||
let healthMonitor = WakuNodeHealthMonitor()
|
let healthMonitor = NodeHealthMonitor()
|
||||||
await node.start()
|
await node.start()
|
||||||
(await node.mountRelay()).isOkOr:
|
(await node.mountRelay()).isOkOr:
|
||||||
assert false, "Failed to mount relay"
|
assert false, "Failed to mount relay"
|
||||||
@ -78,7 +78,7 @@ suite "Waku v2 REST API - health":
|
|||||||
node.mountLightPushClient()
|
node.mountLightPushClient()
|
||||||
await node.mountFilterClient()
|
await node.mountFilterClient()
|
||||||
|
|
||||||
healthMonitor.setNode(node)
|
healthMonitor.setNodeToHealthMonitor(node)
|
||||||
healthMonitor.setOverallHealth(HealthStatus.READY)
|
healthMonitor.setOverallHealth(HealthStatus.READY)
|
||||||
# When
|
# When
|
||||||
response = await client.healthCheck()
|
response = await client.healthCheck()
|
||||||
|
|||||||
@ -209,7 +209,6 @@ proc build*(builder: WakuNodeBuilder): Result[WakuNode, string] =
|
|||||||
maxServicePeers = some(builder.maxServicePeers),
|
maxServicePeers = some(builder.maxServicePeers),
|
||||||
colocationLimit = builder.colocationLimit,
|
colocationLimit = builder.colocationLimit,
|
||||||
shardedPeerManagement = builder.shardAware,
|
shardedPeerManagement = builder.shardAware,
|
||||||
dnsNameServers = netConfig.dnsNameServers,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var node: WakuNode
|
var node: WakuNode
|
||||||
|
|||||||
@ -426,7 +426,7 @@ proc startNode*(
|
|||||||
## Connect to static nodes and start
|
## Connect to static nodes and start
|
||||||
## keep-alive, if configured.
|
## keep-alive, if configured.
|
||||||
|
|
||||||
# Start Waku v2 node
|
info "Running nwaku node", version = git_version
|
||||||
try:
|
try:
|
||||||
await node.start()
|
await node.start()
|
||||||
except CatchableError:
|
except CatchableError:
|
||||||
|
|||||||
@ -26,9 +26,11 @@ import
|
|||||||
../waku_node,
|
../waku_node,
|
||||||
../node/peer_manager,
|
../node/peer_manager,
|
||||||
../node/health_monitor,
|
../node/health_monitor,
|
||||||
|
../node/waku_metrics,
|
||||||
../node/delivery_monitor/delivery_monitor,
|
../node/delivery_monitor/delivery_monitor,
|
||||||
../waku_api/message_cache,
|
../waku_api/message_cache,
|
||||||
../waku_api/rest/server,
|
../waku_api/rest/server,
|
||||||
|
../waku_api/rest/builder as rest_server_builder,
|
||||||
../waku_archive,
|
../waku_archive,
|
||||||
../waku_relay/protocol,
|
../waku_relay/protocol,
|
||||||
../discovery/waku_dnsdisc,
|
../discovery/waku_dnsdisc,
|
||||||
@ -66,6 +68,8 @@ type Waku* = ref object
|
|||||||
|
|
||||||
node*: WakuNode
|
node*: WakuNode
|
||||||
|
|
||||||
|
healthMonitor*: NodeHealthMonitor
|
||||||
|
|
||||||
deliveryMonitor: DeliveryMonitor
|
deliveryMonitor: DeliveryMonitor
|
||||||
|
|
||||||
restServer*: WakuRestServerRef
|
restServer*: WakuRestServerRef
|
||||||
@ -159,19 +163,33 @@ proc new*(
|
|||||||
logging.setupLog(wakuConf.logLevel, wakuConf.logFormat)
|
logging.setupLog(wakuConf.logLevel, wakuConf.logFormat)
|
||||||
|
|
||||||
?wakuConf.validate()
|
?wakuConf.validate()
|
||||||
|
|
||||||
wakuConf.logConf()
|
wakuConf.logConf()
|
||||||
|
|
||||||
info "Running nwaku node", version = git_version
|
let healthMonitor = NodeHealthMonitor.new(wakuConf.dnsAddrsNameServers)
|
||||||
|
|
||||||
|
let restServer: WakuRestServerRef =
|
||||||
|
if wakuConf.restServerConf.isSome():
|
||||||
|
let restServer = startRestServerEssentials(
|
||||||
|
healthMonitor, wakuConf.restServerConf.get(), wakuConf.portsShift
|
||||||
|
).valueOr:
|
||||||
|
error "Starting essential REST server failed", error = $error
|
||||||
|
return err("Failed to start essential REST server in Waku.new: " & $error)
|
||||||
|
|
||||||
|
restServer
|
||||||
|
else:
|
||||||
|
nil
|
||||||
|
|
||||||
var relay = newCircuitRelay(wakuConf.circuitRelayClient)
|
var relay = newCircuitRelay(wakuConf.circuitRelayClient)
|
||||||
|
|
||||||
let nodeRes = setupNode(wakuConf, rng, relay)
|
let node = setupNode(wakuConf, rng, relay).valueOr:
|
||||||
if nodeRes.isErr():
|
error "Failed setting up node", error = $error
|
||||||
error "Failed setting up node", error = nodeRes.error
|
return err("Failed setting up node: " & $error)
|
||||||
return err("Failed setting up node: " & nodeRes.error)
|
|
||||||
|
|
||||||
let node = nodeRes.get()
|
healthMonitor.setNodeToHealthMonitor(node)
|
||||||
|
healthMonitor.onlineMonitor.setPeerStoreToOnlineMonitor(node.switch.peerStore)
|
||||||
|
healthMonitor.onlineMonitor.addOnlineStateObserver(
|
||||||
|
node.peerManager.getOnlineStateObserver()
|
||||||
|
)
|
||||||
|
|
||||||
node.setupAppCallbacks(wakuConf, appCallbacks).isOkOr:
|
node.setupAppCallbacks(wakuConf, appCallbacks).isOkOr:
|
||||||
error "Failed setting up app callbacks", error = error
|
error "Failed setting up app callbacks", error = error
|
||||||
@ -197,8 +215,10 @@ proc new*(
|
|||||||
rng: rng,
|
rng: rng,
|
||||||
key: wakuConf.nodeKey,
|
key: wakuConf.nodeKey,
|
||||||
node: node,
|
node: node,
|
||||||
|
healthMonitor: healthMonitor,
|
||||||
deliveryMonitor: deliveryMonitor,
|
deliveryMonitor: deliveryMonitor,
|
||||||
appCallbacks: appCallbacks,
|
appCallbacks: appCallbacks,
|
||||||
|
restServer: restServer,
|
||||||
)
|
)
|
||||||
|
|
||||||
waku.setupSwitchServices(wakuConf, relay, rng)
|
waku.setupSwitchServices(wakuConf, relay, rng)
|
||||||
@ -334,15 +354,6 @@ proc startDnsDiscoveryRetryLoop(waku: ptr Waku): Future[void] {.async.} =
|
|||||||
error "failed to connect to dynamic bootstrap nodes: " & getCurrentExceptionMsg()
|
error "failed to connect to dynamic bootstrap nodes: " & getCurrentExceptionMsg()
|
||||||
return
|
return
|
||||||
|
|
||||||
# The network connectivity loop checks periodically whether the node is online or not
|
|
||||||
# and triggers any change that depends on the network connectivity state
|
|
||||||
proc startNetworkConnectivityLoop(waku: Waku): Future[void] {.async.} =
|
|
||||||
while true:
|
|
||||||
await sleepAsync(15.seconds)
|
|
||||||
|
|
||||||
# Update online state
|
|
||||||
await waku.node.peerManager.updateOnlineState()
|
|
||||||
|
|
||||||
proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async.} =
|
proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async.} =
|
||||||
debug "Retrieve dynamic bootstrap nodes"
|
debug "Retrieve dynamic bootstrap nodes"
|
||||||
let conf = waku[].conf
|
let conf = waku[].conf
|
||||||
@ -369,7 +380,7 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async.} =
|
|||||||
return err("Error in updateApp: " & $error)
|
return err("Error in updateApp: " & $error)
|
||||||
|
|
||||||
## Discv5
|
## Discv5
|
||||||
if conf.discv5Conf.isSome:
|
if conf.discv5Conf.isSome():
|
||||||
waku[].wakuDiscV5 = waku_discv5.setupDiscoveryV5(
|
waku[].wakuDiscV5 = waku_discv5.setupDiscoveryV5(
|
||||||
waku.node.enr,
|
waku.node.enr,
|
||||||
waku.node.peerManager,
|
waku.node.peerManager,
|
||||||
@ -389,23 +400,41 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async.} =
|
|||||||
if not waku[].deliveryMonitor.isNil():
|
if not waku[].deliveryMonitor.isNil():
|
||||||
waku[].deliveryMonitor.startDeliveryMonitor()
|
waku[].deliveryMonitor.startDeliveryMonitor()
|
||||||
|
|
||||||
# Start network connectivity check loop
|
## Health Monitor
|
||||||
waku[].networkConnLoopHandle = waku[].startNetworkConnectivityLoop()
|
waku[].healthMonitor.startHealthMonitor()
|
||||||
|
|
||||||
|
if conf.restServerConf.isSome():
|
||||||
|
rest_server_builder.startRestServerProtocolSupport(
|
||||||
|
waku[].restServer,
|
||||||
|
waku[].node,
|
||||||
|
waku[].wakuDiscv5,
|
||||||
|
conf.restServerConf.get(),
|
||||||
|
conf.relay,
|
||||||
|
conf.lightPush,
|
||||||
|
conf.clusterId,
|
||||||
|
conf.shards,
|
||||||
|
conf.contentTopics,
|
||||||
|
).isOkOr:
|
||||||
|
return err ("Starting protocols support REST server failed: " & $error)
|
||||||
|
|
||||||
|
if conf.metricsServerConf.isSome():
|
||||||
|
waku[].metricsServer = waku_metrics.startMetricsServerAndLogging(
|
||||||
|
conf.metricsServerConf.get(), conf.portsShift
|
||||||
|
).valueOr:
|
||||||
|
return err("Starting monitoring and external interfaces failed: " & error)
|
||||||
|
|
||||||
|
waku[].healthMonitor.setOverallHealth(HealthStatus.READY)
|
||||||
|
|
||||||
return ok()
|
return ok()
|
||||||
|
|
||||||
# Waku shutdown
|
|
||||||
|
|
||||||
proc stop*(waku: Waku): Future[void] {.async: (raises: [Exception]).} =
|
proc stop*(waku: Waku): Future[void] {.async: (raises: [Exception]).} =
|
||||||
if not waku.restServer.isNil():
|
## Waku shutdown
|
||||||
await waku.restServer.stop()
|
|
||||||
|
waku.healthMonitor.setOverallHealth(HealthStatus.SHUTTING_DOWN)
|
||||||
|
|
||||||
if not waku.metricsServer.isNil():
|
if not waku.metricsServer.isNil():
|
||||||
await waku.metricsServer.stop()
|
await waku.metricsServer.stop()
|
||||||
|
|
||||||
if not waku.networkConnLoopHandle.isNil():
|
|
||||||
await waku.networkConnLoopHandle.cancelAndWait()
|
|
||||||
|
|
||||||
if not waku.wakuDiscv5.isNil():
|
if not waku.wakuDiscv5.isNil():
|
||||||
await waku.wakuDiscv5.stop()
|
await waku.wakuDiscv5.stop()
|
||||||
|
|
||||||
@ -414,3 +443,9 @@ proc stop*(waku: Waku): Future[void] {.async: (raises: [Exception]).} =
|
|||||||
|
|
||||||
if not waku.dnsRetryLoopHandle.isNil():
|
if not waku.dnsRetryLoopHandle.isNil():
|
||||||
await waku.dnsRetryLoopHandle.cancelAndWait()
|
await waku.dnsRetryLoopHandle.cancelAndWait()
|
||||||
|
|
||||||
|
if not waku.healthMonitor.isNil():
|
||||||
|
await waku.healthMonitor.stopHealthMonitor()
|
||||||
|
|
||||||
|
if not waku.restServer.isNil():
|
||||||
|
await waku.restServer.stop()
|
||||||
|
|||||||
@ -1,293 +1,4 @@
|
|||||||
{.push raises: [].}
|
import
|
||||||
|
health_monitor/[node_health_monitor, protocol_health, online_monitor, health_status]
|
||||||
|
|
||||||
import std/[options, sets], chronos, libp2p/protocols/rendezvous
|
export node_health_monitor, protocol_health, online_monitor, health_status
|
||||||
|
|
||||||
import waku_node, ../waku_rln_relay, ../waku_relay, ./peer_manager
|
|
||||||
|
|
||||||
type
|
|
||||||
HealthStatus* = enum
|
|
||||||
INITIALIZING
|
|
||||||
SYNCHRONIZING
|
|
||||||
READY
|
|
||||||
NOT_READY
|
|
||||||
NOT_MOUNTED
|
|
||||||
SHUTTING_DOWN
|
|
||||||
|
|
||||||
ProtocolHealth* = object
|
|
||||||
protocol*: string
|
|
||||||
health*: HealthStatus
|
|
||||||
desc*: Option[string] ## describes why a certain protocol is considered `NOT_READY`
|
|
||||||
|
|
||||||
HealthReport* = object
|
|
||||||
nodeHealth*: HealthStatus
|
|
||||||
protocolsHealth*: seq[ProtocolHealth]
|
|
||||||
|
|
||||||
WakuNodeHealthMonitor* = ref object
|
|
||||||
nodeHealth: HealthStatus
|
|
||||||
node: Option[WakuNode]
|
|
||||||
|
|
||||||
proc `$`*(t: HealthStatus): string =
|
|
||||||
result =
|
|
||||||
case t
|
|
||||||
of INITIALIZING: "Initializing"
|
|
||||||
of SYNCHRONIZING: "Synchronizing"
|
|
||||||
of READY: "Ready"
|
|
||||||
of NOT_READY: "Not Ready"
|
|
||||||
of NOT_MOUNTED: "Not Mounted"
|
|
||||||
of SHUTTING_DOWN: "Shutting Down"
|
|
||||||
|
|
||||||
proc init*(
|
|
||||||
t: typedesc[HealthStatus], strRep: string
|
|
||||||
): HealthStatus {.raises: [ValueError].} =
|
|
||||||
case strRep
|
|
||||||
of "Initializing":
|
|
||||||
return HealthStatus.INITIALIZING
|
|
||||||
of "Synchronizing":
|
|
||||||
return HealthStatus.SYNCHRONIZING
|
|
||||||
of "Ready":
|
|
||||||
return HealthStatus.READY
|
|
||||||
of "Not Ready":
|
|
||||||
return HealthStatus.NOT_READY
|
|
||||||
of "Not Mounted":
|
|
||||||
return HealthStatus.NOT_MOUNTED
|
|
||||||
of "Shutting Down":
|
|
||||||
return HealthStatus.SHUTTING_DOWN
|
|
||||||
else:
|
|
||||||
raise newException(ValueError, "Invalid HealthStatus string representation")
|
|
||||||
|
|
||||||
proc init*(p: typedesc[ProtocolHealth], protocol: string): ProtocolHealth =
|
|
||||||
let p = ProtocolHealth(
|
|
||||||
protocol: protocol, health: HealthStatus.NOT_MOUNTED, desc: none[string]()
|
|
||||||
)
|
|
||||||
return p
|
|
||||||
|
|
||||||
proc notReady(p: var ProtocolHealth, desc: string): ProtocolHealth =
|
|
||||||
p.health = HealthStatus.NOT_READY
|
|
||||||
p.desc = some(desc)
|
|
||||||
return p
|
|
||||||
|
|
||||||
proc ready(p: var ProtocolHealth): ProtocolHealth =
|
|
||||||
p.health = HealthStatus.READY
|
|
||||||
p.desc = none[string]()
|
|
||||||
return p
|
|
||||||
|
|
||||||
proc notMounted(p: var ProtocolHealth): ProtocolHealth =
|
|
||||||
p.health = HealthStatus.NOT_MOUNTED
|
|
||||||
p.desc = none[string]()
|
|
||||||
return p
|
|
||||||
|
|
||||||
proc synchronizing(p: var ProtocolHealth): ProtocolHealth =
|
|
||||||
p.health = HealthStatus.SYNCHRONIZING
|
|
||||||
p.desc = none[string]()
|
|
||||||
return p
|
|
||||||
|
|
||||||
proc initializing(p: var ProtocolHealth): ProtocolHealth =
|
|
||||||
p.health = HealthStatus.INITIALIZING
|
|
||||||
p.desc = none[string]()
|
|
||||||
return p
|
|
||||||
|
|
||||||
proc shuttingDown(p: var ProtocolHealth): ProtocolHealth =
|
|
||||||
p.health = HealthStatus.SHUTTING_DOWN
|
|
||||||
p.desc = none[string]()
|
|
||||||
return p
|
|
||||||
|
|
||||||
const FutIsReadyTimout = 5.seconds
|
|
||||||
|
|
||||||
proc getRelayHealth(hm: WakuNodeHealthMonitor): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Relay")
|
|
||||||
if hm.node.get().wakuRelay == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
let relayPeers = hm.node
|
|
||||||
.get().wakuRelay
|
|
||||||
.getConnectedPubSubPeers(pubsubTopic = "").valueOr:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
if relayPeers.len() == 0:
|
|
||||||
return p.notReady("No connected peers")
|
|
||||||
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
proc getRlnRelayHealth(hm: WakuNodeHealthMonitor): Future[ProtocolHealth] {.async.} =
|
|
||||||
var p = ProtocolHealth.init("Rln Relay")
|
|
||||||
if hm.node.get().wakuRlnRelay.isNil():
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
let isReadyStateFut = hm.node.get().wakuRlnRelay.isReady()
|
|
||||||
if not await isReadyStateFut.withTimeout(FutIsReadyTimout):
|
|
||||||
return p.notReady("Ready state check timed out")
|
|
||||||
|
|
||||||
try:
|
|
||||||
if not isReadyStateFut.completed():
|
|
||||||
return p.notReady("Ready state check timed out")
|
|
||||||
elif isReadyStateFut.read():
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
return p.synchronizing()
|
|
||||||
except:
|
|
||||||
error "exception reading state: " & getCurrentExceptionMsg()
|
|
||||||
return p.notReady("State cannot be determined")
|
|
||||||
|
|
||||||
proc getLightpushHealth(
|
|
||||||
hm: WakuNodeHealthMonitor, relayHealth: HealthStatus
|
|
||||||
): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Lightpush")
|
|
||||||
if hm.node.get().wakuLightPush == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
if relayHealth == HealthStatus.READY:
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
return p.notReady("Node has no relay peers to fullfill push requests")
|
|
||||||
|
|
||||||
proc getLightpushClientHealth(
|
|
||||||
hm: WakuNodeHealthMonitor, relayHealth: HealthStatus
|
|
||||||
): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Lightpush Client")
|
|
||||||
if hm.node.get().wakuLightpushClient == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
let selfServiceAvailable =
|
|
||||||
hm.node.get().wakuLightPush != nil and relayHealth == HealthStatus.READY
|
|
||||||
let servicePeerAvailable =
|
|
||||||
hm.node.get().peerManager.selectPeer(WakuLightPushCodec).isSome()
|
|
||||||
|
|
||||||
if selfServiceAvailable or servicePeerAvailable:
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
return p.notReady("No Lightpush service peer available yet")
|
|
||||||
|
|
||||||
proc getLegacyLightpushHealth(
|
|
||||||
hm: WakuNodeHealthMonitor, relayHealth: HealthStatus
|
|
||||||
): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Legacy Lightpush")
|
|
||||||
if hm.node.get().wakuLegacyLightPush == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
if relayHealth == HealthStatus.READY:
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
return p.notReady("Node has no relay peers to fullfill push requests")
|
|
||||||
|
|
||||||
proc getLegacyLightpushClientHealth(
|
|
||||||
hm: WakuNodeHealthMonitor, relayHealth: HealthStatus
|
|
||||||
): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Legacy Lightpush Client")
|
|
||||||
if hm.node.get().wakuLegacyLightpushClient == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
if (hm.node.get().wakuLegacyLightPush != nil and relayHealth == HealthStatus.READY) or
|
|
||||||
hm.node.get().peerManager.selectPeer(WakuLegacyLightPushCodec).isSome():
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
return p.notReady("No Lightpush service peer available yet")
|
|
||||||
|
|
||||||
proc getFilterHealth(
|
|
||||||
hm: WakuNodeHealthMonitor, relayHealth: HealthStatus
|
|
||||||
): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Filter")
|
|
||||||
if hm.node.get().wakuFilter == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
if relayHealth == HealthStatus.READY:
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
return p.notReady("Relay is not ready, filter will not be able to sort out messages")
|
|
||||||
|
|
||||||
proc getFilterClientHealth(
|
|
||||||
hm: WakuNodeHealthMonitor, relayHealth: HealthStatus
|
|
||||||
): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Filter Client")
|
|
||||||
if hm.node.get().wakuFilterClient == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
if hm.node.get().peerManager.selectPeer(WakuFilterSubscribeCodec).isSome():
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
return p.notReady("No Filter service peer available yet")
|
|
||||||
|
|
||||||
proc getStoreHealth(hm: WakuNodeHealthMonitor): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Store")
|
|
||||||
if hm.node.get().wakuStore == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
proc getStoreClientHealth(hm: WakuNodeHealthMonitor): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Store Client")
|
|
||||||
if hm.node.get().wakuStoreClient == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
if hm.node.get().peerManager.selectPeer(WakuStoreCodec).isSome() or
|
|
||||||
hm.node.get().wakuStore != nil:
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
return p.notReady(
|
|
||||||
"No Store service peer available yet, neither Store service set up for the node"
|
|
||||||
)
|
|
||||||
|
|
||||||
proc getLegacyStoreHealth(hm: WakuNodeHealthMonitor): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Legacy Store")
|
|
||||||
if hm.node.get().wakuLegacyStore == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
proc getLegacyStoreClientHealth(hm: WakuNodeHealthMonitor): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Legacy Store Client")
|
|
||||||
if hm.node.get().wakuLegacyStoreClient == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
if hm.node.get().peerManager.selectPeer(WakuLegacyStoreCodec).isSome() or
|
|
||||||
hm.node.get().wakuLegacyStore != nil:
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
return p.notReady(
|
|
||||||
"No Legacy Store service peers are available yet, neither Store service set up for the node"
|
|
||||||
)
|
|
||||||
|
|
||||||
proc getPeerExchangeHealth(hm: WakuNodeHealthMonitor): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Peer Exchange")
|
|
||||||
if hm.node.get().wakuPeerExchange == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
proc getRendezvousHealth(hm: WakuNodeHealthMonitor): ProtocolHealth =
|
|
||||||
var p = ProtocolHealth.init("Rendezvous")
|
|
||||||
if hm.node.get().wakuRendezvous == nil:
|
|
||||||
return p.notMounted()
|
|
||||||
|
|
||||||
if hm.node.get().peerManager.switch.peerStore.peers(RendezVousCodec).len() == 0:
|
|
||||||
return p.notReady("No Rendezvous peers are available yet")
|
|
||||||
|
|
||||||
return p.ready()
|
|
||||||
|
|
||||||
proc getNodeHealthReport*(hm: WakuNodeHealthMonitor): Future[HealthReport] {.async.} =
|
|
||||||
var report: HealthReport
|
|
||||||
report.nodeHealth = hm.nodeHealth
|
|
||||||
|
|
||||||
if hm.node.isSome():
|
|
||||||
let relayHealth = hm.getRelayHealth()
|
|
||||||
report.protocolsHealth.add(relayHealth)
|
|
||||||
report.protocolsHealth.add(await hm.getRlnRelayHealth())
|
|
||||||
report.protocolsHealth.add(hm.getLightpushHealth(relayHealth.health))
|
|
||||||
report.protocolsHealth.add(hm.getLegacyLightpushHealth(relayHealth.health))
|
|
||||||
report.protocolsHealth.add(hm.getFilterHealth(relayHealth.health))
|
|
||||||
report.protocolsHealth.add(hm.getStoreHealth())
|
|
||||||
report.protocolsHealth.add(hm.getLegacyStoreHealth())
|
|
||||||
report.protocolsHealth.add(hm.getPeerExchangeHealth())
|
|
||||||
report.protocolsHealth.add(hm.getRendezvousHealth())
|
|
||||||
|
|
||||||
report.protocolsHealth.add(hm.getLightpushClientHealth(relayHealth.health))
|
|
||||||
report.protocolsHealth.add(hm.getLegacyLightpushClientHealth(relayHealth.health))
|
|
||||||
report.protocolsHealth.add(hm.getStoreClientHealth())
|
|
||||||
report.protocolsHealth.add(hm.getLegacyStoreClientHealth())
|
|
||||||
report.protocolsHealth.add(hm.getFilterClientHealth(relayHealth.health))
|
|
||||||
return report
|
|
||||||
|
|
||||||
proc setNode*(hm: WakuNodeHealthMonitor, node: WakuNode) =
|
|
||||||
hm.node = some(node)
|
|
||||||
|
|
||||||
proc setOverallHealth*(hm: WakuNodeHealthMonitor, health: HealthStatus) =
|
|
||||||
hm.nodeHealth = health
|
|
||||||
|
|||||||
16
waku/node/health_monitor/health_status.nim
Normal file
16
waku/node/health_monitor/health_status.nim
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
import results, std/strutils
|
||||||
|
|
||||||
|
type HealthStatus* {.pure.} = enum
|
||||||
|
INITIALIZING
|
||||||
|
SYNCHRONIZING
|
||||||
|
READY
|
||||||
|
NOT_READY
|
||||||
|
NOT_MOUNTED
|
||||||
|
SHUTTING_DOWN
|
||||||
|
|
||||||
|
proc init*(t: typedesc[HealthStatus], strRep: string): Result[HealthStatus, string] =
|
||||||
|
try:
|
||||||
|
let status = parseEnum[HealthStatus](strRep)
|
||||||
|
return ok(status)
|
||||||
|
except ValueError:
|
||||||
|
return err("Invalid HealthStatus string representation: " & strRep)
|
||||||
270
waku/node/health_monitor/node_health_monitor.nim
Normal file
270
waku/node/health_monitor/node_health_monitor.nim
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
{.push raises: [].}
|
||||||
|
|
||||||
|
import std/[options, sets, strformat], chronos, chronicles, libp2p/protocols/rendezvous
|
||||||
|
|
||||||
|
import
|
||||||
|
../waku_node,
|
||||||
|
../../waku_rln_relay,
|
||||||
|
../../waku_relay,
|
||||||
|
../peer_manager,
|
||||||
|
./online_monitor,
|
||||||
|
./health_status,
|
||||||
|
./protocol_health
|
||||||
|
|
||||||
|
## This module is aimed to check the state of the "self" Waku Node
|
||||||
|
|
||||||
|
type
|
||||||
|
HealthReport* = object
|
||||||
|
nodeHealth*: HealthStatus
|
||||||
|
protocolsHealth*: seq[ProtocolHealth]
|
||||||
|
|
||||||
|
NodeHealthMonitor* = ref object
|
||||||
|
nodeHealth: HealthStatus
|
||||||
|
node: WakuNode
|
||||||
|
onlineMonitor*: OnlineMonitor
|
||||||
|
|
||||||
|
template checkWakuNodeNotNil(node: WakuNode, p: ProtocolHealth): untyped =
|
||||||
|
if node.isNil():
|
||||||
|
warn "WakuNode is not set, cannot check health", protocol_health_instance = $p
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
proc getRelayHealth(hm: NodeHealthMonitor): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Relay")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuRelay == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
let relayPeers = hm.node.wakuRelay.getConnectedPubSubPeers(pubsubTopic = "").valueOr:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if relayPeers.len() == 0:
|
||||||
|
return p.notReady("No connected peers")
|
||||||
|
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
proc getRlnRelayHealth(hm: NodeHealthMonitor): Future[ProtocolHealth] {.async.} =
|
||||||
|
var p = ProtocolHealth.init("Rln Relay")
|
||||||
|
if hm.node.isNil():
|
||||||
|
warn "WakuNode is not set, cannot check health", protocol_health_instance = $p
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if hm.node.wakuRlnRelay.isNil():
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
const FutIsReadyTimout = 5.seconds
|
||||||
|
|
||||||
|
let isReadyStateFut = hm.node.wakuRlnRelay.isReady()
|
||||||
|
if not await isReadyStateFut.withTimeout(FutIsReadyTimout):
|
||||||
|
return p.notReady("Ready state check timed out")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not isReadyStateFut.completed():
|
||||||
|
return p.notReady("Ready state check timed out")
|
||||||
|
elif isReadyStateFut.read():
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
return p.synchronizing()
|
||||||
|
except:
|
||||||
|
error "exception reading state: " & getCurrentExceptionMsg()
|
||||||
|
return p.notReady("State cannot be determined")
|
||||||
|
|
||||||
|
proc getLightpushHealth(
|
||||||
|
hm: NodeHealthMonitor, relayHealth: HealthStatus
|
||||||
|
): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Lightpush")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuLightPush == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if relayHealth == HealthStatus.READY:
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
return p.notReady("Node has no relay peers to fullfill push requests")
|
||||||
|
|
||||||
|
proc getLightpushClientHealth(
|
||||||
|
hm: NodeHealthMonitor, relayHealth: HealthStatus
|
||||||
|
): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Lightpush Client")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuLightpushClient == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
let selfServiceAvailable =
|
||||||
|
hm.node.wakuLightPush != nil and relayHealth == HealthStatus.READY
|
||||||
|
let servicePeerAvailable = hm.node.peerManager.selectPeer(WakuLightPushCodec).isSome()
|
||||||
|
|
||||||
|
if selfServiceAvailable or servicePeerAvailable:
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
return p.notReady("No Lightpush service peer available yet")
|
||||||
|
|
||||||
|
proc getLegacyLightpushHealth(
|
||||||
|
hm: NodeHealthMonitor, relayHealth: HealthStatus
|
||||||
|
): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Legacy Lightpush")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuLegacyLightPush == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if relayHealth == HealthStatus.READY:
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
return p.notReady("Node has no relay peers to fullfill push requests")
|
||||||
|
|
||||||
|
proc getLegacyLightpushClientHealth(
|
||||||
|
hm: NodeHealthMonitor, relayHealth: HealthStatus
|
||||||
|
): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Legacy Lightpush Client")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuLegacyLightpushClient == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if (hm.node.wakuLegacyLightPush != nil and relayHealth == HealthStatus.READY) or
|
||||||
|
hm.node.peerManager.selectPeer(WakuLegacyLightPushCodec).isSome():
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
return p.notReady("No Lightpush service peer available yet")
|
||||||
|
|
||||||
|
proc getFilterHealth(hm: NodeHealthMonitor, relayHealth: HealthStatus): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Filter")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuFilter == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if relayHealth == HealthStatus.READY:
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
return p.notReady("Relay is not ready, filter will not be able to sort out messages")
|
||||||
|
|
||||||
|
proc getFilterClientHealth(
|
||||||
|
hm: NodeHealthMonitor, relayHealth: HealthStatus
|
||||||
|
): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Filter Client")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuFilterClient == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if hm.node.peerManager.selectPeer(WakuFilterSubscribeCodec).isSome():
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
return p.notReady("No Filter service peer available yet")
|
||||||
|
|
||||||
|
proc getStoreHealth(hm: NodeHealthMonitor): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Store")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuStore == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
proc getStoreClientHealth(hm: NodeHealthMonitor): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Store Client")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuStoreClient == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if hm.node.peerManager.selectPeer(WakuStoreCodec).isSome() or hm.node.wakuStore != nil:
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
return p.notReady(
|
||||||
|
"No Store service peer available yet, neither Store service set up for the node"
|
||||||
|
)
|
||||||
|
|
||||||
|
proc getLegacyStoreHealth(hm: NodeHealthMonitor): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Legacy Store")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuLegacyStore == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
proc getLegacyStoreClientHealth(hm: NodeHealthMonitor): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Legacy Store Client")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuLegacyStoreClient == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if hm.node.peerManager.selectPeer(WakuLegacyStoreCodec).isSome() or
|
||||||
|
hm.node.wakuLegacyStore != nil:
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
return p.notReady(
|
||||||
|
"No Legacy Store service peers are available yet, neither Store service set up for the node"
|
||||||
|
)
|
||||||
|
|
||||||
|
proc getPeerExchangeHealth(hm: NodeHealthMonitor): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Peer Exchange")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuPeerExchange == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
proc getRendezvousHealth(hm: NodeHealthMonitor): ProtocolHealth =
|
||||||
|
var p = ProtocolHealth.init("Rendezvous")
|
||||||
|
checkWakuNodeNotNil(hm.node, p)
|
||||||
|
|
||||||
|
if hm.node.wakuRendezvous == nil:
|
||||||
|
return p.notMounted()
|
||||||
|
|
||||||
|
if hm.node.peerManager.switch.peerStore.peers(RendezVousCodec).len() == 0:
|
||||||
|
return p.notReady("No Rendezvous peers are available yet")
|
||||||
|
|
||||||
|
return p.ready()
|
||||||
|
|
||||||
|
proc getNodeHealthReport*(hm: NodeHealthMonitor): Future[HealthReport] {.async.} =
|
||||||
|
var report: HealthReport
|
||||||
|
report.nodeHealth = hm.nodeHealth
|
||||||
|
|
||||||
|
if not hm.node.isNil():
|
||||||
|
let relayHealth = hm.getRelayHealth()
|
||||||
|
report.protocolsHealth.add(relayHealth)
|
||||||
|
report.protocolsHealth.add(await hm.getRlnRelayHealth())
|
||||||
|
report.protocolsHealth.add(hm.getLightpushHealth(relayHealth.health))
|
||||||
|
report.protocolsHealth.add(hm.getLegacyLightpushHealth(relayHealth.health))
|
||||||
|
report.protocolsHealth.add(hm.getFilterHealth(relayHealth.health))
|
||||||
|
report.protocolsHealth.add(hm.getStoreHealth())
|
||||||
|
report.protocolsHealth.add(hm.getLegacyStoreHealth())
|
||||||
|
report.protocolsHealth.add(hm.getPeerExchangeHealth())
|
||||||
|
report.protocolsHealth.add(hm.getRendezvousHealth())
|
||||||
|
|
||||||
|
report.protocolsHealth.add(hm.getLightpushClientHealth(relayHealth.health))
|
||||||
|
report.protocolsHealth.add(hm.getLegacyLightpushClientHealth(relayHealth.health))
|
||||||
|
report.protocolsHealth.add(hm.getStoreClientHealth())
|
||||||
|
report.protocolsHealth.add(hm.getLegacyStoreClientHealth())
|
||||||
|
report.protocolsHealth.add(hm.getFilterClientHealth(relayHealth.health))
|
||||||
|
return report
|
||||||
|
|
||||||
|
proc setNodeToHealthMonitor*(hm: NodeHealthMonitor, node: WakuNode) =
|
||||||
|
hm.node = node
|
||||||
|
|
||||||
|
proc setOverallHealth*(hm: NodeHealthMonitor, health: HealthStatus) =
|
||||||
|
hm.nodeHealth = health
|
||||||
|
|
||||||
|
proc startHealthMonitor*(hm: NodeHealthMonitor) =
|
||||||
|
hm.onlineMonitor.startOnlineMonitor()
|
||||||
|
|
||||||
|
proc stopHealthMonitor*(hm: NodeHealthMonitor) {.async.} =
|
||||||
|
await hm.onlineMonitor.stopOnlineMonitor()
|
||||||
|
|
||||||
|
proc new*(
|
||||||
|
T: type NodeHealthMonitor,
|
||||||
|
dnsNameServers = @[parseIpAddress("1.1.1.1"), parseIpAddress("1.0.0.1")],
|
||||||
|
): T =
|
||||||
|
T(
|
||||||
|
nodeHealth: INITIALIZING,
|
||||||
|
node: nil,
|
||||||
|
onlineMonitor: OnlineMonitor.init(dnsNameServers),
|
||||||
|
)
|
||||||
77
waku/node/health_monitor/online_monitor.nim
Normal file
77
waku/node/health_monitor/online_monitor.nim
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
import std/sequtils
|
||||||
|
import chronos, chronicles, libp2p/nameresolving/dnsresolver, libp2p/peerstore
|
||||||
|
|
||||||
|
import ../peer_manager/waku_peer_store, waku/waku_core/peers
|
||||||
|
|
||||||
|
type
|
||||||
|
OnOnlineStateChange* = proc(online: bool) {.gcsafe, raises: [].}
|
||||||
|
|
||||||
|
OnlineMonitor* = ref object
|
||||||
|
onOnlineStateChange: OnOnlineStateChange
|
||||||
|
dnsNameServers*: seq[IpAddress]
|
||||||
|
onlineStateObservers: seq[OnOnlineStateChange]
|
||||||
|
networkConnLoopHandle: Future[void] # node: WakuNode
|
||||||
|
peerStore: PeerStore
|
||||||
|
online: bool
|
||||||
|
|
||||||
|
proc checkInternetConnectivity(
|
||||||
|
nameServerIps: seq[IpAddress], timeout = 2.seconds
|
||||||
|
): Future[bool] {.async.} =
|
||||||
|
const DNSCheckDomain = "one.one.one.one"
|
||||||
|
let nameServers = nameServerIps.mapIt(initTAddress(it, Port(53)))
|
||||||
|
let dnsResolver = DnsResolver.new(nameServers)
|
||||||
|
|
||||||
|
# Resolve domain IP
|
||||||
|
let resolved = await dnsResolver.resolveIp(DNSCheckDomain, 0.Port, Domain.AF_UNSPEC)
|
||||||
|
if resolved.len > 0:
|
||||||
|
return true
|
||||||
|
else:
|
||||||
|
return false
|
||||||
|
|
||||||
|
proc updateOnlineState(self: OnlineMonitor) {.async.} =
|
||||||
|
if self.onlineStateObservers.len == 0:
|
||||||
|
trace "No online state observers registered, cannot notify about online state change"
|
||||||
|
return
|
||||||
|
|
||||||
|
let numConnectedPeers =
|
||||||
|
if self.peerStore.isNil():
|
||||||
|
0
|
||||||
|
else:
|
||||||
|
self.peerStore.peers().countIt(it.connectedness == Connected)
|
||||||
|
|
||||||
|
self.online =
|
||||||
|
if numConnectedPeers > 0:
|
||||||
|
true
|
||||||
|
else:
|
||||||
|
await checkInternetConnectivity(self.dnsNameServers)
|
||||||
|
|
||||||
|
for onlineStateObserver in self.onlineStateObservers:
|
||||||
|
onlineStateObserver(self.online)
|
||||||
|
|
||||||
|
proc networkConnectivityLoop(self: OnlineMonitor): Future[void] {.async.} =
|
||||||
|
## Checks periodically whether the node is online or not
|
||||||
|
## and triggers any change that depends on the network connectivity state
|
||||||
|
while true:
|
||||||
|
await self.updateOnlineState()
|
||||||
|
await sleepAsync(15.seconds)
|
||||||
|
|
||||||
|
proc startOnlineMonitor*(self: OnlineMonitor) =
|
||||||
|
self.networkConnLoopHandle = self.networkConnectivityLoop()
|
||||||
|
|
||||||
|
proc stopOnlineMonitor*(self: OnlineMonitor) {.async.} =
|
||||||
|
if not self.networkConnLoopHandle.isNil():
|
||||||
|
await self.networkConnLoopHandle.cancelAndWait()
|
||||||
|
|
||||||
|
proc setPeerStoreToOnlineMonitor*(self: OnlineMonitor, peerStore: PeerStore) =
|
||||||
|
self.peerStore = peerStore
|
||||||
|
|
||||||
|
proc addOnlineStateObserver*(self: OnlineMonitor, observer: OnOnlineStateChange) =
|
||||||
|
## Adds an observer that will be called when the online state changes
|
||||||
|
if observer notin self.onlineStateObservers:
|
||||||
|
self.onlineStateObservers.add(observer)
|
||||||
|
|
||||||
|
proc amIOnline*(self: OnlineMonitor): bool =
|
||||||
|
return self.online
|
||||||
|
|
||||||
|
proc init*(T: type OnlineMonitor, dnsNameServers: seq[IpAddress]): OnlineMonitor =
|
||||||
|
T(dnsNameServers: dnsNameServers, onlineStateObservers: @[])
|
||||||
46
waku/node/health_monitor/protocol_health.nim
Normal file
46
waku/node/health_monitor/protocol_health.nim
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
import std/[options, strformat]
|
||||||
|
import ./health_status
|
||||||
|
|
||||||
|
type ProtocolHealth* = object
|
||||||
|
protocol*: string
|
||||||
|
health*: HealthStatus
|
||||||
|
desc*: Option[string] ## describes why a certain protocol is considered `NOT_READY`
|
||||||
|
|
||||||
|
proc notReady*(p: var ProtocolHealth, desc: string): ProtocolHealth =
|
||||||
|
p.health = HealthStatus.NOT_READY
|
||||||
|
p.desc = some(desc)
|
||||||
|
return p
|
||||||
|
|
||||||
|
proc ready*(p: var ProtocolHealth): ProtocolHealth =
|
||||||
|
p.health = HealthStatus.READY
|
||||||
|
p.desc = none[string]()
|
||||||
|
return p
|
||||||
|
|
||||||
|
proc notMounted*(p: var ProtocolHealth): ProtocolHealth =
|
||||||
|
p.health = HealthStatus.NOT_MOUNTED
|
||||||
|
p.desc = none[string]()
|
||||||
|
return p
|
||||||
|
|
||||||
|
proc synchronizing*(p: var ProtocolHealth): ProtocolHealth =
|
||||||
|
p.health = HealthStatus.SYNCHRONIZING
|
||||||
|
p.desc = none[string]()
|
||||||
|
return p
|
||||||
|
|
||||||
|
proc initializing*(p: var ProtocolHealth): ProtocolHealth =
|
||||||
|
p.health = HealthStatus.INITIALIZING
|
||||||
|
p.desc = none[string]()
|
||||||
|
return p
|
||||||
|
|
||||||
|
proc shuttingDown*(p: var ProtocolHealth): ProtocolHealth =
|
||||||
|
p.health = HealthStatus.SHUTTING_DOWN
|
||||||
|
p.desc = none[string]()
|
||||||
|
return p
|
||||||
|
|
||||||
|
proc `$`*(p: ProtocolHealth): string =
|
||||||
|
return fmt"protocol: {p.protocol}, health: {p.health}, description: {p.desc}"
|
||||||
|
|
||||||
|
proc init*(p: typedesc[ProtocolHealth], protocol: string): ProtocolHealth =
|
||||||
|
let p = ProtocolHealth(
|
||||||
|
protocol: protocol, health: HealthStatus.NOT_MOUNTED, desc: none[string]()
|
||||||
|
)
|
||||||
|
return p
|
||||||
@ -8,7 +8,6 @@ import
|
|||||||
libp2p/multistream,
|
libp2p/multistream,
|
||||||
libp2p/muxers/muxer,
|
libp2p/muxers/muxer,
|
||||||
libp2p/nameresolving/nameresolver,
|
libp2p/nameresolving/nameresolver,
|
||||||
libp2p/nameresolving/dnsresolver,
|
|
||||||
libp2p/peerstore
|
libp2p/peerstore
|
||||||
|
|
||||||
import
|
import
|
||||||
@ -21,6 +20,7 @@ import
|
|||||||
../../waku_enr/sharding,
|
../../waku_enr/sharding,
|
||||||
../../waku_enr/capabilities,
|
../../waku_enr/capabilities,
|
||||||
../../waku_metadata,
|
../../waku_metadata,
|
||||||
|
../health_monitor/online_monitor,
|
||||||
./peer_store/peer_storage,
|
./peer_store/peer_storage,
|
||||||
./waku_peer_store
|
./waku_peer_store
|
||||||
|
|
||||||
@ -74,8 +74,6 @@ const
|
|||||||
# Max peers that we allow from the same IP
|
# Max peers that we allow from the same IP
|
||||||
DefaultColocationLimit* = 5
|
DefaultColocationLimit* = 5
|
||||||
|
|
||||||
DNSCheckDomain = "one.one.one.one"
|
|
||||||
|
|
||||||
type ConnectionChangeHandler* = proc(
|
type ConnectionChangeHandler* = proc(
|
||||||
peerId: PeerId, peerEvent: PeerEventKind
|
peerId: PeerId, peerEvent: PeerEventKind
|
||||||
): Future[void] {.gcsafe, raises: [Defect].}
|
): Future[void] {.gcsafe, raises: [Defect].}
|
||||||
@ -98,16 +96,12 @@ type PeerManager* = ref object of RootObj
|
|||||||
started: bool
|
started: bool
|
||||||
shardedPeerManagement: bool # temp feature flag
|
shardedPeerManagement: bool # temp feature flag
|
||||||
onConnectionChange*: ConnectionChangeHandler
|
onConnectionChange*: ConnectionChangeHandler
|
||||||
dnsNameServers*: seq[IpAddress]
|
online: bool ## state managed by online_monitor module
|
||||||
online: bool
|
|
||||||
|
|
||||||
#~~~~~~~~~~~~~~~~~~~#
|
#~~~~~~~~~~~~~~~~~~~#
|
||||||
# Helper Functions #
|
# Helper Functions #
|
||||||
#~~~~~~~~~~~~~~~~~~~#
|
#~~~~~~~~~~~~~~~~~~~#
|
||||||
|
|
||||||
template isOnline*(self: PeerManager): bool =
|
|
||||||
self.online
|
|
||||||
|
|
||||||
proc calculateBackoff(
|
proc calculateBackoff(
|
||||||
initialBackoffInSec: int, backoffFactor: int, failedAttempts: int
|
initialBackoffInSec: int, backoffFactor: int, failedAttempts: int
|
||||||
): timer.Duration =
|
): timer.Duration =
|
||||||
@ -543,35 +537,9 @@ proc getStreamByPeerIdAndProtocol*(
|
|||||||
|
|
||||||
return ok(streamRes.get())
|
return ok(streamRes.get())
|
||||||
|
|
||||||
proc checkInternetConnectivity(
|
|
||||||
nameServerIps: seq[IpAddress], timeout = 2.seconds
|
|
||||||
): Future[bool] {.async.} =
|
|
||||||
var nameServers: seq[TransportAddress]
|
|
||||||
for ip in nameServerIps:
|
|
||||||
nameServers.add(initTAddress(ip, Port(53))) # Assume all servers use port 53
|
|
||||||
|
|
||||||
let dnsResolver = DnsResolver.new(nameServers)
|
|
||||||
|
|
||||||
# Resolve domain IP
|
|
||||||
let resolved = await dnsResolver.resolveIp(DNSCheckDomain, 0.Port, Domain.AF_UNSPEC)
|
|
||||||
|
|
||||||
if resolved.len > 0:
|
|
||||||
return true
|
|
||||||
else:
|
|
||||||
return false
|
|
||||||
|
|
||||||
proc updateOnlineState*(pm: PeerManager) {.async.} =
|
|
||||||
let numConnectedPeers =
|
|
||||||
pm.switch.peerStore.peers().countIt(it.connectedness == Connected)
|
|
||||||
|
|
||||||
if numConnectedPeers > 0:
|
|
||||||
pm.online = true
|
|
||||||
else:
|
|
||||||
pm.online = await checkInternetConnectivity(pm.dnsNameServers)
|
|
||||||
|
|
||||||
proc connectToRelayPeers*(pm: PeerManager) {.async.} =
|
proc connectToRelayPeers*(pm: PeerManager) {.async.} =
|
||||||
# only attempt if current node is online
|
# only attempt if current node is online
|
||||||
if not pm.isOnline():
|
if not pm.online:
|
||||||
error "connectToRelayPeers: won't attempt new connections - node is offline"
|
error "connectToRelayPeers: won't attempt new connections - node is offline"
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -739,6 +707,7 @@ proc onPeerEvent(pm: PeerManager, peerId: PeerId, event: PeerEvent) {.async.} =
|
|||||||
debug "Pruning connection due to ip colocation", peerId = peerId, ip = ip
|
debug "Pruning connection due to ip colocation", peerId = peerId, ip = ip
|
||||||
asyncSpawn(pm.switch.disconnect(peerId))
|
asyncSpawn(pm.switch.disconnect(peerId))
|
||||||
peerStore.delete(peerId)
|
peerStore.delete(peerId)
|
||||||
|
|
||||||
if not pm.onConnectionChange.isNil():
|
if not pm.onConnectionChange.isNil():
|
||||||
# we don't want to await for the callback to finish
|
# we don't want to await for the callback to finish
|
||||||
asyncSpawn pm.onConnectionChange(peerId, Joined)
|
asyncSpawn pm.onConnectionChange(peerId, Joined)
|
||||||
@ -753,6 +722,7 @@ proc onPeerEvent(pm: PeerManager, peerId: PeerId, event: PeerEvent) {.async.} =
|
|||||||
if pm.ipTable[ip].len == 0:
|
if pm.ipTable[ip].len == 0:
|
||||||
pm.ipTable.del(ip)
|
pm.ipTable.del(ip)
|
||||||
break
|
break
|
||||||
|
|
||||||
if not pm.onConnectionChange.isNil():
|
if not pm.onConnectionChange.isNil():
|
||||||
# we don't want to await for the callback to finish
|
# we don't want to await for the callback to finish
|
||||||
asyncSpawn pm.onConnectionChange(peerId, Left)
|
asyncSpawn pm.onConnectionChange(peerId, Left)
|
||||||
@ -809,6 +779,10 @@ proc logAndMetrics(pm: PeerManager) {.async.} =
|
|||||||
protoStreamsOut.float64, labelValues = [$Direction.Out, proto]
|
protoStreamsOut.float64, labelValues = [$Direction.Out, proto]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
proc getOnlineStateObserver*(pm: PeerManager): OnOnlineStateChange =
|
||||||
|
return proc(online: bool) {.gcsafe, raises: [].} =
|
||||||
|
pm.online = online
|
||||||
|
|
||||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||||
# Pruning and Maintenance (Stale Peers Management) #
|
# Pruning and Maintenance (Stale Peers Management) #
|
||||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
||||||
@ -817,7 +791,7 @@ proc manageRelayPeers*(pm: PeerManager) {.async.} =
|
|||||||
if pm.wakuMetadata.shards.len == 0:
|
if pm.wakuMetadata.shards.len == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
if not pm.isOnline():
|
if not pm.online:
|
||||||
error "manageRelayPeers: won't attempt new connections - node is offline"
|
error "manageRelayPeers: won't attempt new connections - node is offline"
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -1048,7 +1022,6 @@ proc new*(
|
|||||||
maxFailedAttempts = MaxFailedAttempts,
|
maxFailedAttempts = MaxFailedAttempts,
|
||||||
colocationLimit = DefaultColocationLimit,
|
colocationLimit = DefaultColocationLimit,
|
||||||
shardedPeerManagement = false,
|
shardedPeerManagement = false,
|
||||||
dnsNameServers = @[parseIpAddress("1.1.1.1"), parseIpAddress("1.0.0.1")],
|
|
||||||
): PeerManager {.gcsafe.} =
|
): PeerManager {.gcsafe.} =
|
||||||
let capacity = switch.peerStore.capacity
|
let capacity = switch.peerStore.capacity
|
||||||
let maxConnections = switch.connManager.inSema.size
|
let maxConnections = switch.connManager.inSema.size
|
||||||
@ -1099,7 +1072,6 @@ proc new*(
|
|||||||
maxFailedAttempts: maxFailedAttempts,
|
maxFailedAttempts: maxFailedAttempts,
|
||||||
colocationLimit: colocationLimit,
|
colocationLimit: colocationLimit,
|
||||||
shardedPeerManagement: shardedPeerManagement,
|
shardedPeerManagement: shardedPeerManagement,
|
||||||
dnsNameServers: dnsNameServers,
|
|
||||||
online: true,
|
online: true,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -40,7 +40,7 @@ type RestServerConf* = object
|
|||||||
relayCacheCapacity*: uint32
|
relayCacheCapacity*: uint32
|
||||||
|
|
||||||
proc startRestServerEssentials*(
|
proc startRestServerEssentials*(
|
||||||
nodeHealthMonitor: WakuNodeHealthMonitor, conf: RestServerConf, portsShift: uint16
|
nodeHealthMonitor: NodeHealthMonitor, conf: RestServerConf, portsShift: uint16
|
||||||
): Result[WakuRestServerRef, string] =
|
): Result[WakuRestServerRef, string] =
|
||||||
let requestErrorHandler: RestRequestErrorHandler = proc(
|
let requestErrorHandler: RestRequestErrorHandler = proc(
|
||||||
error: RestRequestError, request: HttpRequestRef
|
error: RestRequestError, request: HttpRequestRef
|
||||||
|
|||||||
@ -11,7 +11,7 @@ const ROUTE_HEALTH* = "/health"
|
|||||||
const FutHealthReportTimeout = 5.seconds
|
const FutHealthReportTimeout = 5.seconds
|
||||||
|
|
||||||
proc installHealthApiHandler*(
|
proc installHealthApiHandler*(
|
||||||
router: var RestRouter, nodeHealthMonitor: WakuNodeHealthMonitor
|
router: var RestRouter, nodeHealthMonitor: NodeHealthMonitor
|
||||||
) =
|
) =
|
||||||
router.api(MethodGet, ROUTE_HEALTH) do() -> RestApiResponse:
|
router.api(MethodGet, ROUTE_HEALTH) do() -> RestApiResponse:
|
||||||
let healthReportFut = nodeHealthMonitor.getNodeHealthReport()
|
let healthReportFut = nodeHealthMonitor.getNodeHealthReport()
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
{.push raises: [].}
|
{.push raises: [].}
|
||||||
|
|
||||||
|
import results
|
||||||
import chronicles, json_serialization, json_serialization/std/options
|
import chronicles, json_serialization, json_serialization/std/options
|
||||||
import ../../../waku_node, ../serdes
|
import ../../../waku_node, ../serdes
|
||||||
|
|
||||||
@ -31,13 +32,10 @@ proc readValue*(
|
|||||||
)
|
)
|
||||||
|
|
||||||
let fieldValue = reader.readValue(string)
|
let fieldValue = reader.readValue(string)
|
||||||
try:
|
let h = HealthStatus.init(fieldValue).valueOr:
|
||||||
health = some(HealthStatus.init(fieldValue))
|
reader.raiseUnexpectedValue("Invalid `health` value: " & $error)
|
||||||
protocol = some(fieldName)
|
health = some(h)
|
||||||
except ValueError:
|
protocol = some(fieldName)
|
||||||
reader.raiseUnexpectedValue(
|
|
||||||
"Invalid `health` value: " & getCurrentExceptionMsg()
|
|
||||||
)
|
|
||||||
|
|
||||||
value = ProtocolHealth(protocol: protocol.get(), health: health.get(), desc: desc)
|
value = ProtocolHealth(protocol: protocol.get(), health: health.get(), desc: desc)
|
||||||
|
|
||||||
@ -63,10 +61,11 @@ proc readValue*(
|
|||||||
reader.raiseUnexpectedField(
|
reader.raiseUnexpectedField(
|
||||||
"Multiple `nodeHealth` fields found", "HealthReport"
|
"Multiple `nodeHealth` fields found", "HealthReport"
|
||||||
)
|
)
|
||||||
try:
|
|
||||||
nodeHealth = some(HealthStatus.init(reader.readValue(string)))
|
let health = HealthStatus.init(reader.readValue(string)).valueOr:
|
||||||
except ValueError:
|
reader.raiseUnexpectedValue("Invalid `health` value: " & $error)
|
||||||
reader.raiseUnexpectedValue("Invalid `health` value")
|
|
||||||
|
nodeHealth = some(health)
|
||||||
of "protocolsHealth":
|
of "protocolsHealth":
|
||||||
if protocolsHealth.isSome():
|
if protocolsHealth.isSome():
|
||||||
reader.raiseUnexpectedField(
|
reader.raiseUnexpectedField(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user