nwaku/apps/networkmonitor/networkmonitor.nim
2023-09-26 07:33:52 -04:00

467 lines
17 KiB
Nim
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

when (NimMajor, NimMinor) < (1, 4):
{.push raises: [Defect].}
else:
{.push raises: [].}
import
std/[tables,strutils,times,sequtils],
stew/results,
stew/shims/net,
chronicles,
chronicles/topics_registry,
chronos,
chronos/timer as ctime,
confutils,
eth/keys,
eth/p2p/discoveryv5/enr,
libp2p/crypto/crypto,
libp2p/nameresolving/dnsresolver,
libp2p/protocols/ping,
metrics,
metrics/chronos_httpserver,
presto/[route, server, client]
import
../../waku/waku_core,
../../waku/node/peer_manager,
../../waku/waku_node,
../../waku/waku_enr,
../../waku/waku_discv5,
../../waku/waku_dnsdisc,
./networkmonitor_metrics,
./networkmonitor_config,
./networkmonitor_utils
logScope:
topics = "networkmonitor"
const ReconnectTime = 60
const MaxConnectionRetries = 10
const AvgPingWindow = 10.0
proc setDiscoveredPeersCapabilities(
routingTableNodes: seq[Node]) =
for capability in @[Relay, Store, Filter, Lightpush]:
let nOfNodesWithCapability = routingTableNodes.countIt(it.record.supportsCapability(capability))
info "capabilities as per ENR waku flag", capability=capability, amount=nOfNodesWithCapability
peer_type_as_per_enr.set(int64(nOfNodesWithCapability), labelValues = [$capability])
# TODO: Split in discover, connect
proc setConnectedPeersMetrics(discoveredNodes: seq[Node],
node: WakuNode,
timeout: chronos.Duration,
restClient: RestClientRef,
allPeers: CustomPeersTableRef) {.async.} =
let currentTime = getTime().toUnix()
# Protocols and agent string and its count
var allProtocols: Table[string, int]
var allAgentStrings: Table[string, int]
var newPeers = 0
# iterate all newly discovered nodes
for discNode in discoveredNodes:
let typedRecord = discNode.record.toTypedRecord()
if not typedRecord.isOk():
warn "could not convert record to typed record", record=discNode.record
continue
let secp256k1 = typedRecord.get().secp256k1
if not secp256k1.isSome():
warn "could not get secp256k1 key", typedRecord=typedRecord.get()
continue
let peerRes = toRemotePeerInfo(discNode.record)
let peerInfo = peerRes.valueOr():
warn "error converting record to remote peer info", record=discNode.record
continue
# create new entry if new peerId found
let peerId = $peerInfo.peerId
let customPeerInfo = CustomPeerInfo(peerId: peerId)
if not allPeers.hasKey(peerId):
allPeers[peerId] = customPeerInfo
newPeers += 1
else:
info "already seen", peerId=peerId
allPeers[peerId].lastTimeDiscovered = currentTime
allPeers[peerId].enr = discNode.record.toURI()
allPeers[peerId].enrCapabilities = discNode.record.getCapabilities().mapIt($it)
allPeers[peerId].discovered += 1
if not typedRecord.get().ip.isSome():
warn "ip field is not set", record=typedRecord.get()
continue
let ip = $typedRecord.get().ip.get().join(".")
allPeers[peerId].ip = ip
# try to ping the peer
if getTime().toUnix() >= allPeers[peerId].lastTimeConnected + ReconnectTime and allPeers[peerId].retries < MaxConnectionRetries:
if allPeers[peerId].retries > 0:
warn "trying to dial failed peer again", peerId=peerId, retry=allPeers[peerId].retries
var pingDelay:chronos.Duration
proc ping(): Future[Result[void, string]] {.async, gcsafe.} =
try:
let conn = await node.switch.dial(peerInfo.peerId, peerInfo.addrs, PingCodec)
pingDelay = await node.libp2pPing.ping(conn)
return ok()
except CatchableError:
var msg = getCurrentExceptionMsg()
if msg == "Future operation cancelled!":
msg = "timedout"
warn "failed to ping the peer", peer=peerInfo, err=msg
allPeers[peerId].connError = msg
return err("could not ping peer: " & msg)
let timedOut = not await ping().withTimeout(timeout)
# need this check for pingDelat == 0 because there may be a conn error before timeout
if timedOut or pingDelay == 0.millis:
allPeers[peerId].retries += 1
continue
info "successfully pinged peer", peer=peerInfo, duration=pingDelay.millis
peer_ping.observe(pingDelay.millis)
if allPeers[peerId].avgPingDuration == 0.millis:
allPeers[peerId].avgPingDuration = pingDelay
# TODO: check why the calculation ends up losing precision
allPeers[peerId].avgPingDuration = int64((float64(allPeers[peerId].avgPingDuration.millis) * (AvgPingWindow - 1.0) + float64(pingDelay.millis)) / AvgPingWindow).millis
allPeers[peerId].lastPingDuration = pingDelay
# after connection, get supported protocols
let lp2pPeerStore = node.switch.peerStore
let nodeProtocols = lp2pPeerStore[ProtoBook][peerInfo.peerId]
allPeers[peerId].supportedProtocols = nodeProtocols
allPeers[peerId].lastTimeConnected = currentTime
# after connection, get user-agent
let nodeUserAgent = lp2pPeerStore[AgentBook][peerInfo.peerId]
allPeers[peerId].userAgent = nodeUserAgent
# store avaiable protocols in the network
for protocol in nodeProtocols:
if not allProtocols.hasKey(protocol):
allProtocols[protocol] = 0
allProtocols[protocol] += 1
# store available user-agents in the network
if not allAgentStrings.hasKey(nodeUserAgent):
allAgentStrings[nodeUserAgent] = 0
allAgentStrings[nodeUserAgent] += 1
debug "connected to peer", peer=allPeers[customPeerInfo.peerId]
info "number of newly discovered peers", amount=newPeers
# inform the total connections that we did in this round
let nOfOkConnections = allProtocols.len()
info "number of successful connections", amount=nOfOkConnections
# update count on each protocol
for protocol in allProtocols.keys():
let countOfProtocols = allProtocols[protocol]
peer_type_as_per_protocol.set(int64(countOfProtocols), labelValues = [protocol])
info "supported protocols in the network", protocol=protocol, count=countOfProtocols
# update count on each user-agent
for userAgent in allAgentStrings.keys():
let countOfUserAgent = allAgentStrings[userAgent]
peer_user_agents.set(int64(countOfUserAgent), labelValues = [userAgent])
info "user agents participating in the network", userAgent=userAgent, count=countOfUserAgent
proc populateInfoFromIp(allPeersRef: CustomPeersTableRef,
restClient: RestClientRef) {.async.} =
for peer in allPeersRef.keys():
if allPeersRef[peer].country != "" and allPeersRef[peer].city != "":
continue
# TODO: Update also if last update > x
if allPeersRef[peer].ip == "":
continue
# get more info the peers from its ip address
var location: NodeLocation
try:
# IP-API endpoints are now limited to 45 HTTP requests per minute
await sleepAsync(1400.millis)
let response = await restClient.ipToLocation(allPeersRef[peer].ip)
location = response.data
except CatchableError:
warn "could not get location", ip=allPeersRef[peer].ip
continue
allPeersRef[peer].country = location.country
allPeersRef[peer].city = location.city
# TODO: Split in discovery, connections, and ip2location
# crawls the network discovering peers and trying to connect to them
# metrics are processed and exposed
proc crawlNetwork(node: WakuNode,
wakuDiscv5: WakuDiscoveryV5,
restClient: RestClientRef,
conf: NetworkMonitorConf,
allPeersRef: CustomPeersTableRef) {.async.} =
let crawlInterval = conf.refreshInterval * 1000
while true:
# discover new random nodes
let discoveredNodes = await wakuDiscv5.protocol.queryRandom()
# nodes are nested into bucket, flat it
let flatNodes = wakuDiscv5.protocol.routingTable.buckets.mapIt(it.nodes).flatten()
# populate metrics related to capabilities as advertised by the ENR (see waku field)
setDiscoveredPeersCapabilities(flatNodes)
# tries to connect to all newly discovered nodes
# and populates metrics related to peers we could connect
# note random discovered nodes can be already known
await setConnectedPeersMetrics(discoveredNodes, node, conf.timeout, restClient, allPeersRef)
# populate info from ip addresses
await populateInfoFromIp(allPeersRef, restClient)
let totalNodes = flatNodes.len
let seenNodes = flatNodes.countIt(it.seen)
info "discovered nodes: ", total=totalNodes, seen=seenNodes
# Notes:
# we dont run ipMajorityLoop
# we dont run revalidateLoop
await sleepAsync(crawlInterval.millis)
proc retrieveDynamicBootstrapNodes(dnsDiscovery: bool, dnsDiscoveryUrl: string, dnsDiscoveryNameServers: seq[ValidIpAddress]): Result[seq[RemotePeerInfo], string] =
if dnsDiscovery and dnsDiscoveryUrl != "":
# DNS discovery
debug "Discovering nodes using Waku DNS discovery", url=dnsDiscoveryUrl
var nameServers: seq[TransportAddress]
for ip in dnsDiscoveryNameServers:
nameServers.add(initTAddress(ip, Port(53))) # Assume all servers use port 53
let dnsResolver = DnsResolver.new(nameServers)
proc resolver(domain: string): Future[string] {.async, gcsafe.} =
trace "resolving", domain=domain
let resolved = await dnsResolver.resolveTxt(domain)
return resolved[0] # Use only first answer
var wakuDnsDiscovery = WakuDnsDiscovery.init(dnsDiscoveryUrl, resolver)
if wakuDnsDiscovery.isOk():
return wakuDnsDiscovery.get().findPeers()
.mapErr(proc (e: cstring): string = $e)
else:
warn "Failed to init Waku DNS discovery"
debug "No method for retrieving dynamic bootstrap nodes specified."
ok(newSeq[RemotePeerInfo]()) # Return an empty seq by default
proc getBootstrapFromDiscDns(conf: NetworkMonitorConf): Result[seq[enr.Record], string] =
try:
let dnsNameServers = @[ValidIpAddress.init("1.1.1.1"), ValidIpAddress.init("1.0.0.1")]
let dynamicBootstrapNodesRes = retrieveDynamicBootstrapNodes(true, conf.dnsDiscoveryUrl, dnsNameServers)
if not dynamicBootstrapNodesRes.isOk():
error("failed discovering peers from DNS")
let dynamicBootstrapNodes = dynamicBootstrapNodesRes.get()
# select dynamic bootstrap nodes that have an ENR containing a udp port.
# Discv5 only supports UDP https://github.com/ethereum/devp2p/blob/master/discv5/discv5-theory.md)
var discv5BootstrapEnrs: seq[enr.Record]
for n in dynamicBootstrapNodes:
if n.enr.isSome():
let
enr = n.enr.get()
tenrRes = enr.toTypedRecord()
if tenrRes.isOk() and (tenrRes.get().udp.isSome() or tenrRes.get().udp6.isSome()):
discv5BootstrapEnrs.add(enr)
return ok(discv5BootstrapEnrs)
except CatchableError:
error("failed discovering peers from DNS")
proc initAndStartApp(conf: NetworkMonitorConf): Result[(WakuNode, WakuDiscoveryV5), string] =
let bindIp = try:
ValidIpAddress.init("0.0.0.0")
except CatchableError:
return err("could not start node: " & getCurrentExceptionMsg())
let extIp = try:
ValidIpAddress.init("127.0.0.1")
except CatchableError:
return err("could not start node: " & getCurrentExceptionMsg())
let
# some hardcoded parameters
rng = keys.newRng()
key = crypto.PrivateKey.random(Secp256k1, rng[])[]
nodeTcpPort = Port(60000)
nodeUdpPort = Port(9000)
flags = CapabilitiesBitfield.init(lightpush = false, filter = false, store = false, relay = true)
var builder = EnrBuilder.init(key)
builder.withIpAddressAndPorts(
ipAddr = some(extIp),
tcpPort = some(nodeTcpPort),
udpPort = some(nodeUdpPort),
)
builder.withWakuCapabilities(flags)
let recordRes = builder.build()
let record =
if recordRes.isErr():
return err("cannot build record: " & $recordRes.error)
else: recordRes.get()
var nodeBuilder = WakuNodeBuilder.init()
nodeBuilder.withNodeKey(key)
nodeBuilder.withRecord(record)
let res = nodeBuilder.withNetworkConfigurationDetails(bindIp, nodeTcpPort)
if res.isErr():
return err("node building error" & $res.error)
let nodeRes = nodeBuilder.build()
let node =
if nodeRes.isErr():
return err("node building error" & $res.error)
else: nodeRes.get()
var discv5BootstrapEnrsRes = getBootstrapFromDiscDns(conf)
if discv5BootstrapEnrsRes.isErr():
error("failed discovering peers from DNS")
var discv5BootstrapEnrs = discv5BootstrapEnrsRes.get()
# parse enrURIs from the configuration and add the resulting ENRs to the discv5BootstrapEnrs seq
for enrUri in conf.bootstrapNodes:
addBootstrapNode(enrUri, discv5BootstrapEnrs)
# discv5
let discv5Conf = WakuDiscoveryV5Config(
discv5Config: none(DiscoveryConfig),
address: bindIp,
port: nodeUdpPort,
privateKey: keys.PrivateKey(key.skkey),
bootstrapRecords: discv5BootstrapEnrs,
autoupdateRecord: false
)
let wakuDiscv5 = WakuDiscoveryV5.new(node.rng, discv5Conf, some(record))
try:
wakuDiscv5.protocol.open()
except CatchableError:
return err("could not start node: " & getCurrentExceptionMsg())
ok((node, wakuDiscv5))
proc startRestApiServer(conf: NetworkMonitorConf,
allPeersInfo: CustomPeersTableRef,
numMessagesPerContentTopic: ContentTopicMessageTableRef
): Result[void, string] =
try:
let serverAddress = initTAddress(conf.metricsRestAddress & ":" & $conf.metricsRestPort)
proc validate(pattern: string, value: string): int =
if pattern.startsWith("{") and pattern.endsWith("}"): 0
else: 1
var router = RestRouter.init(validate)
router.installHandler(allPeersInfo, numMessagesPerContentTopic)
var sres = RestServerRef.new(router, serverAddress)
let restServer = sres.get()
restServer.start()
except CatchableError:
error("could not start rest api server")
ok()
# handles rx of messages over a topic (see subscribe)
# counts the number of messages per content topic
proc subscribeAndHandleMessages(node: WakuNode,
pubsubTopic: PubsubTopic,
msgPerContentTopic: ContentTopicMessageTableRef) =
# handle function
proc handler(pubsubTopic: PubsubTopic, msg: WakuMessage): Future[void] {.async, gcsafe.} =
trace "rx message", pubsubTopic=pubsubTopic, contentTopic=msg.contentTopic
# If we reach a table limit size, remove c topics with the least messages.
let tableSize = 100
if msgPerContentTopic.len > (tableSize - 1):
let minIndex = toSeq(msgPerContentTopic.values()).minIndex()
msgPerContentTopic.del(toSeq(msgPerContentTopic.keys())[minIndex])
# TODO: Will overflow at some point
# +1 if content topic existed, init to 1 otherwise
if msgPerContentTopic.hasKey(msg.contentTopic):
msgPerContentTopic[msg.contentTopic] += 1
else:
msgPerContentTopic[msg.contentTopic] = 1
node.subscribe((kind: PubsubSub, topic: pubsubTopic), some(handler))
when isMainModule:
# known issue: confutils.nim(775, 17) Error: can raise an unlisted exception: ref IOError
{.pop.}
let confRes = NetworkMonitorConf.loadConfig()
if confRes.isErr():
error "could not load cli variables", err=confRes.error
quit(1)
let conf = confRes.get()
info "cli flags", conf=conf
if conf.logLevel != LogLevel.NONE:
setLogLevel(conf.logLevel)
# list of peers that we have discovered/connected
var allPeersInfo = CustomPeersTableRef()
# content topic and the number of messages that were received
var msgPerContentTopic = ContentTopicMessageTableRef()
# start metrics server
if conf.metricsServer:
let res = startMetricsServer(conf.metricsServerAddress, Port(conf.metricsServerPort))
if res.isErr():
error "could not start metrics server", err=res.error
quit(1)
# start rest server for custom metrics
let res = startRestApiServer(conf, allPeersInfo, msgPerContentTopic)
if res.isErr():
error "could not start rest api server", err=res.error
quit(1)
# create a rest client
let clientRest = RestClientRef.new(url="http://ip-api.com",
connectTimeout=ctime.seconds(2))
if clientRest.isErr():
error "could not start rest api client", err=res.error
quit(1)
let restClient = clientRest.get()
# start waku node
let nodeRes = initAndStartApp(conf)
if nodeRes.isErr():
error "could not start node"
quit 1
let (node, discv5) = nodeRes.get()
waitFor node.mountRelay()
waitFor node.mountLibp2pPing()
# Subscribe the node to the default pubsubtopic, to count messages
subscribeAndHandleMessages(node, DefaultPubsubTopic, msgPerContentTopic)
# spawn the routine that crawls the network
# TODO: split into 3 routines (discovery, connections, ip2location)
asyncSpawn crawlNetwork(node, discv5, restClient, conf, allPeersInfo)
runForever()