mirror of
https://github.com/logos-messaging/logos-messaging-nim.git
synced 2026-01-07 16:33:08 +00:00
fix: network monitor improvements (#2939)
This commit is contained in:
parent
d4e8a0dab6
commit
8058323760
@ -18,13 +18,27 @@ networkmonitor [OPTIONS]...
|
|||||||
|
|
||||||
The following options are available:
|
The following options are available:
|
||||||
|
|
||||||
-l, --log-level Sets the log level [=LogLevel.DEBUG].
|
-l, --log-level Sets the log level [=LogLevel.INFO].
|
||||||
-t, --timeout Timeout to consider that the connection failed [=chronos.seconds(10)].
|
-t, --timeout Timeout to consider that the connection failed [=chronos.seconds(10)].
|
||||||
-b, --bootstrap-node Bootstrap ENR node. Argument may be repeated. [=@[""]].
|
-b, --bootstrap-node Bootstrap ENR node. Argument may be repeated. [=@[""]].
|
||||||
--dns-discovery-url URL for DNS node list in format 'enrtree://<key>@<fqdn>'.
|
--dns-discovery-url URL for DNS node list in format 'enrtree://<key>@<fqdn>'.
|
||||||
|
--pubsub-topic Default pubsub topic to subscribe to. Argument may be repeated..
|
||||||
-r, --refresh-interval How often new peers are discovered and connected to (in seconds) [=5].
|
-r, --refresh-interval How often new peers are discovered and connected to (in seconds) [=5].
|
||||||
|
--cluster-id Cluster id that the node is running in. Node in a different cluster id is
|
||||||
|
disconnected. [=1].
|
||||||
|
--rln-relay Enable spam protection through rln-relay: true|false [=true].
|
||||||
|
--rln-relay-dynamic Enable waku-rln-relay with on-chain dynamic group management: true|false
|
||||||
|
[=true].
|
||||||
|
--rln-relay-tree-path Path to the RLN merkle tree sled db (https://github.com/spacejam/sled).
|
||||||
|
--rln-relay-eth-client-address HTTP address of an Ethereum testnet client e.g., http://localhost:8540/
|
||||||
|
[=http://localhost:8540/].
|
||||||
|
--rln-relay-eth-contract-address Address of membership contract on an Ethereum testnet.
|
||||||
|
--rln-relay-epoch-sec Epoch size in seconds used to rate limit RLN memberships. Default is 1 second.
|
||||||
|
[=1].
|
||||||
|
--rln-relay-user-message-limit Set a user message limit for the rln membership registration. Must be a positive
|
||||||
|
integer. Default is 1. [=1].
|
||||||
--metrics-server Enable the metrics server: true|false [=true].
|
--metrics-server Enable the metrics server: true|false [=true].
|
||||||
--metrics-server-address Listening address of the metrics server. [=ValidIpAddress.init("127.0.0.1")].
|
--metrics-server-address Listening address of the metrics server. [=parseIpAddress("127.0.0.1")].
|
||||||
--metrics-server-port Listening HTTP port of the metrics server. [=8008].
|
--metrics-server-port Listening HTTP port of the metrics server. [=8008].
|
||||||
--metrics-rest-address Listening address of the metrics rest server. [=127.0.0.1].
|
--metrics-rest-address Listening address of the metrics rest server. [=127.0.0.1].
|
||||||
--metrics-rest-port Listening HTTP port of the metrics rest server. [=8009].
|
--metrics-rest-port Listening HTTP port of the metrics rest server. [=8009].
|
||||||
|
|||||||
34
apps/networkmonitor/docker-compose.yml
Normal file
34
apps/networkmonitor/docker-compose.yml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
version: '3.8'
|
||||||
|
networks:
|
||||||
|
monitoring:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
prometheus-data:
|
||||||
|
driver: local
|
||||||
|
grafana-data:
|
||||||
|
driver: local
|
||||||
|
|
||||||
|
# Services definitions
|
||||||
|
services:
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: docker.io/prom/prometheus:latest
|
||||||
|
container_name: prometheus
|
||||||
|
ports:
|
||||||
|
- 9090:9090
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yaml'
|
||||||
|
volumes:
|
||||||
|
- ./prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
|
||||||
|
- ./data:/prometheus
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana-oss:latest
|
||||||
|
container_name: grafana
|
||||||
|
ports:
|
||||||
|
- '3000:3000'
|
||||||
|
volumes:
|
||||||
|
- grafana-data:/var/lib/grafana
|
||||||
|
restart: unless-stopped
|
||||||
@ -45,10 +45,10 @@ const MaxConnectedPeers = 150
|
|||||||
|
|
||||||
const git_version* {.strdefine.} = "n/a"
|
const git_version* {.strdefine.} = "n/a"
|
||||||
|
|
||||||
proc setDiscoveredPeersCapabilities(routingTableNodes: seq[Node]) =
|
proc setDiscoveredPeersCapabilities(routingTableNodes: seq[waku_enr.Record]) =
|
||||||
for capability in @[Relay, Store, Filter, Lightpush]:
|
for capability in @[Relay, Store, Filter, Lightpush]:
|
||||||
let nOfNodesWithCapability =
|
let nOfNodesWithCapability =
|
||||||
routingTableNodes.countIt(it.record.supportsCapability(capability))
|
routingTableNodes.countIt(it.supportsCapability(capability))
|
||||||
info "capabilities as per ENR waku flag",
|
info "capabilities as per ENR waku flag",
|
||||||
capability = capability, amount = nOfNodesWithCapability
|
capability = capability, amount = nOfNodesWithCapability
|
||||||
networkmonitor_peer_type_as_per_enr.set(
|
networkmonitor_peer_type_as_per_enr.set(
|
||||||
@ -116,7 +116,7 @@ proc shouldReconnect(customPeerInfo: CustomPeerInfoRef): bool =
|
|||||||
|
|
||||||
# TODO: Split in discover, connect
|
# TODO: Split in discover, connect
|
||||||
proc setConnectedPeersMetrics(
|
proc setConnectedPeersMetrics(
|
||||||
discoveredNodes: seq[Node],
|
discoveredNodes: seq[waku_enr.Record],
|
||||||
node: WakuNode,
|
node: WakuNode,
|
||||||
timeout: chronos.Duration,
|
timeout: chronos.Duration,
|
||||||
restClient: RestClientRef,
|
restClient: RestClientRef,
|
||||||
@ -141,20 +141,10 @@ proc setConnectedPeersMetrics(
|
|||||||
|
|
||||||
# iterate all newly discovered nodes
|
# iterate all newly discovered nodes
|
||||||
for discNode in discoveredNodes:
|
for discNode in discoveredNodes:
|
||||||
let typedRecord = discNode.record.toTypedRecord()
|
let peerRes = toRemotePeerInfo(discNode)
|
||||||
if not typedRecord.isOk():
|
|
||||||
warn "could not convert record to typed record", record = discNode.record
|
|
||||||
continue
|
|
||||||
|
|
||||||
let secp256k1 = typedRecord.get().secp256k1
|
|
||||||
if not secp256k1.isSome():
|
|
||||||
warn "could not get secp256k1 key", typedRecord = typedRecord.get()
|
|
||||||
continue
|
|
||||||
|
|
||||||
let peerRes = toRemotePeerInfo(discNode.record)
|
|
||||||
|
|
||||||
let peerInfo = peerRes.valueOr:
|
let peerInfo = peerRes.valueOr:
|
||||||
warn "error converting record to remote peer info", record = discNode.record
|
warn "error converting record to remote peer info", record = discNode
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# create new entry if new peerId found
|
# create new entry if new peerId found
|
||||||
@ -169,10 +159,17 @@ proc setConnectedPeersMetrics(
|
|||||||
let customPeerInfo = allPeers[peerId]
|
let customPeerInfo = allPeers[peerId]
|
||||||
|
|
||||||
customPeerInfo.lastTimeDiscovered = currentTime
|
customPeerInfo.lastTimeDiscovered = currentTime
|
||||||
customPeerInfo.enr = discNode.record.toURI()
|
customPeerInfo.enr = discNode.toURI()
|
||||||
customPeerInfo.enrCapabilities = discNode.record.getCapabilities().mapIt($it)
|
customPeerInfo.enrCapabilities = discNode.getCapabilities().mapIt($it)
|
||||||
customPeerInfo.discovered += 1
|
customPeerInfo.discovered += 1
|
||||||
|
|
||||||
|
for maddr in peerInfo.addrs:
|
||||||
|
if $maddr notin customPeerInfo.maddrs:
|
||||||
|
customPeerInfo.maddrs.add $maddr
|
||||||
|
let typedRecord = discNode.toTypedRecord()
|
||||||
|
if not typedRecord.isOk():
|
||||||
|
warn "could not convert record to typed record", record = discNode
|
||||||
|
continue
|
||||||
if not typedRecord.get().ip.isSome():
|
if not typedRecord.get().ip.isSome():
|
||||||
warn "ip field is not set", record = typedRecord.get()
|
warn "ip field is not set", record = typedRecord.get()
|
||||||
continue
|
continue
|
||||||
@ -301,13 +298,13 @@ proc crawlNetwork(
|
|||||||
while true:
|
while true:
|
||||||
let startTime = Moment.now()
|
let startTime = Moment.now()
|
||||||
# discover new random nodes
|
# discover new random nodes
|
||||||
let discoveredNodes = await wakuDiscv5.protocol.queryRandom()
|
let discoveredNodes = await wakuDiscv5.findRandomPeers()
|
||||||
|
|
||||||
# nodes are nested into bucket, flat it
|
# nodes are nested into bucket, flat it
|
||||||
let flatNodes = wakuDiscv5.protocol.routingTable.buckets.mapIt(it.nodes).flatten()
|
#let flatNodes = wakuDiscv5.protocol.routingTable.buckets.mapIt(it.nodes).flatten()
|
||||||
|
|
||||||
# populate metrics related to capabilities as advertised by the ENR (see waku field)
|
# populate metrics related to capabilities as advertised by the ENR (see waku field)
|
||||||
setDiscoveredPeersCapabilities(flatNodes)
|
setDiscoveredPeersCapabilities(discoveredNodes)
|
||||||
|
|
||||||
# tries to connect to all newly discovered nodes
|
# tries to connect to all newly discovered nodes
|
||||||
# and populates metrics related to peers we could connect
|
# and populates metrics related to peers we could connect
|
||||||
@ -321,10 +318,10 @@ proc crawlNetwork(
|
|||||||
# populate info from ip addresses
|
# populate info from ip addresses
|
||||||
await populateInfoFromIp(allPeersRef, restClient)
|
await populateInfoFromIp(allPeersRef, restClient)
|
||||||
|
|
||||||
let totalNodes = flatNodes.len
|
let totalNodes = discoveredNodes.len
|
||||||
let seenNodes = flatNodes.countIt(it.seen)
|
#let seenNodes = totalNodes
|
||||||
|
|
||||||
info "discovered nodes: ", total = totalNodes, seen = seenNodes
|
info "discovered nodes: ", total = totalNodes #, seen = seenNodes
|
||||||
|
|
||||||
# Notes:
|
# Notes:
|
||||||
# we dont run ipMajorityLoop
|
# we dont run ipMajorityLoop
|
||||||
|
|||||||
@ -54,6 +54,7 @@ type
|
|||||||
enrCapabilities*: seq[string]
|
enrCapabilities*: seq[string]
|
||||||
country*: string
|
country*: string
|
||||||
city*: string
|
city*: string
|
||||||
|
maddrs*: seq[string]
|
||||||
|
|
||||||
# only after ok connection
|
# only after ok connection
|
||||||
lastTimeConnected*: int64
|
lastTimeConnected*: int64
|
||||||
|
|||||||
9
apps/networkmonitor/prometheus.yaml
Normal file
9
apps/networkmonitor/prometheus.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
scrape_interval: 5s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['host.docker.internal:8008']
|
||||||
|
metrics_path: '/metrics'
|
||||||
@ -83,7 +83,8 @@ proc initProtocolHandler(m: WakuMetadata) =
|
|||||||
remoteClusterId = response.clusterId,
|
remoteClusterId = response.clusterId,
|
||||||
remoteShards = response.shards,
|
remoteShards = response.shards,
|
||||||
localClusterId = m.clusterId,
|
localClusterId = m.clusterId,
|
||||||
localShards = m.shards
|
localShards = m.shards,
|
||||||
|
peer = conn.peerId
|
||||||
|
|
||||||
discard await m.respond(conn)
|
discard await m.respond(conn)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user