expose more libp2p performance and queuing metrics (#678)
* gossipsub: adding duplicate arrival metrics Adding counters for received deduplicated messages and for duplicates recognized by the seen cache. Note that duplicates that are not recognized (arrive after seenTTL) are not counted as duplicates here either. * gossipsub: adding mcache (message cache for responding IWANT) stats It is generally assumed that IWANT messages arrive when mcache still has the message. These stats are to verify this assumption. * libp2p: adding internal TX queuing stats Messages are queued in TX before getting written on the stream, but we have no statistics about these queues. This patch adds some queue length and queuing time related statistics. * adding Grafana libp2p dashboard Adding Grafana dashboard with newly exposed metrics. * enable libp2p_mplex_metrics in nimble test Signed-off-by: Csaba Kiraly <csaba.kiraly@gmail.com>
This commit is contained in:
parent
868ecab54f
commit
9973b9466d
|
@ -27,7 +27,7 @@ const nimflags =
|
||||||
|
|
||||||
proc runTest(filename: string, verify: bool = true, sign: bool = true,
|
proc runTest(filename: string, verify: bool = true, sign: bool = true,
|
||||||
moreoptions: string = "") =
|
moreoptions: string = "") =
|
||||||
var excstr = "nim c --opt:speed -d:debug -d:libp2p_agents_metrics -d:libp2p_protobuf_metrics -d:libp2p_network_protocols_metrics "
|
var excstr = "nim c --opt:speed -d:debug -d:libp2p_agents_metrics -d:libp2p_protobuf_metrics -d:libp2p_network_protocols_metrics -d:libp2p_mplex_metrics "
|
||||||
excstr.add(" " & getEnv("NIMFLAGS") & " ")
|
excstr.add(" " & getEnv("NIMFLAGS") & " ")
|
||||||
excstr.add(" " & nimflags & " ")
|
excstr.add(" " & nimflags & " ")
|
||||||
excstr.add(" -d:libp2p_pubsub_sign=" & $sign)
|
excstr.add(" -d:libp2p_pubsub_sign=" & $sign)
|
||||||
|
|
|
@ -21,6 +21,12 @@ export connection
|
||||||
logScope:
|
logScope:
|
||||||
topics = "libp2p mplexchannel"
|
topics = "libp2p mplexchannel"
|
||||||
|
|
||||||
|
when defined(libp2p_mplex_metrics):
|
||||||
|
declareHistogram libp2p_mplex_qlen, "message queue length",
|
||||||
|
buckets = [0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0]
|
||||||
|
declareCounter libp2p_mplex_qlenclose, "closed because of max queuelen"
|
||||||
|
declareHistogram libp2p_mplex_qtime, "message queuing time"
|
||||||
|
|
||||||
when defined(libp2p_network_protocols_metrics):
|
when defined(libp2p_network_protocols_metrics):
|
||||||
declareCounter libp2p_protocols_bytes, "total sent or received bytes", ["protocol", "direction"]
|
declareCounter libp2p_protocols_bytes, "total sent or received bytes", ["protocol", "direction"]
|
||||||
|
|
||||||
|
@ -187,6 +193,8 @@ proc prepareWrite(s: LPChannel, msg: seq[byte]): Future[void] {.async.} =
|
||||||
if s.writes >= MaxWrites:
|
if s.writes >= MaxWrites:
|
||||||
debug "Closing connection, too many in-flight writes on channel",
|
debug "Closing connection, too many in-flight writes on channel",
|
||||||
s, conn = s.conn, writes = s.writes
|
s, conn = s.conn, writes = s.writes
|
||||||
|
when defined(libp2p_mplex_metrics):
|
||||||
|
libp2p_mplex_qlenclose.inc()
|
||||||
await s.reset()
|
await s.reset()
|
||||||
await s.conn.close()
|
await s.conn.close()
|
||||||
return
|
return
|
||||||
|
@ -201,8 +209,14 @@ proc completeWrite(
|
||||||
try:
|
try:
|
||||||
s.writes += 1
|
s.writes += 1
|
||||||
|
|
||||||
await fut
|
when defined(libp2p_mplex_metrics):
|
||||||
when defined(libp2p_network_protocols_metrics):
|
libp2p_mplex_qlen.observe(s.writes.int64 - 1)
|
||||||
|
libp2p_mplex_qtime.time:
|
||||||
|
await fut
|
||||||
|
else:
|
||||||
|
await fut
|
||||||
|
|
||||||
|
when defined(libp2p_network_protocol_metrics):
|
||||||
if s.tag.len > 0:
|
if s.tag.len > 0:
|
||||||
libp2p_protocols_bytes.inc(msgLen.int64, labelValues=[s.tag, "out"])
|
libp2p_protocols_bytes.inc(msgLen.int64, labelValues=[s.tag, "out"])
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,8 @@ logScope:
|
||||||
declareCounter(libp2p_gossipsub_failed_publish, "number of failed publish")
|
declareCounter(libp2p_gossipsub_failed_publish, "number of failed publish")
|
||||||
declareCounter(libp2p_gossipsub_invalid_topic_subscription, "number of invalid topic subscriptions that happened")
|
declareCounter(libp2p_gossipsub_invalid_topic_subscription, "number of invalid topic subscriptions that happened")
|
||||||
declareCounter(libp2p_gossipsub_duplicate_during_validation, "number of duplicates received during message validation")
|
declareCounter(libp2p_gossipsub_duplicate_during_validation, "number of duplicates received during message validation")
|
||||||
|
declareCounter(libp2p_gossipsub_duplicate, "number of duplicates received")
|
||||||
|
declareCounter(libp2p_gossipsub_received, "number of messages received (deduplicated)")
|
||||||
|
|
||||||
proc init*(_: type[GossipSubParams]): GossipSubParams =
|
proc init*(_: type[GossipSubParams]): GossipSubParams =
|
||||||
GossipSubParams(
|
GossipSubParams(
|
||||||
|
@ -385,9 +387,13 @@ method rpcHandler*(g: GossipSub,
|
||||||
|
|
||||||
g.validationSeen.withValue(msgIdSalted, seen): seen[].incl(peer)
|
g.validationSeen.withValue(msgIdSalted, seen): seen[].incl(peer)
|
||||||
|
|
||||||
|
libp2p_gossipsub_duplicate.inc()
|
||||||
|
|
||||||
# onto the next message
|
# onto the next message
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
libp2p_gossipsub_received.inc()
|
||||||
|
|
||||||
# avoid processing messages we are not interested in
|
# avoid processing messages we are not interested in
|
||||||
if msg.topicIDs.allIt(it notin g.topics):
|
if msg.topicIDs.allIt(it notin g.topics):
|
||||||
debug "Dropping message of topic without subscription", msgId = shortLog(msgId), peer
|
debug "Dropping message of topic without subscription", msgId = shortLog(msgId), peer
|
||||||
|
|
|
@ -25,6 +25,7 @@ declareGauge(libp2p_gossipsub_no_peers_topics, "number of topics in mesh with no
|
||||||
declareGauge(libp2p_gossipsub_low_peers_topics, "number of topics in mesh with at least one but below dlow peers")
|
declareGauge(libp2p_gossipsub_low_peers_topics, "number of topics in mesh with at least one but below dlow peers")
|
||||||
declareGauge(libp2p_gossipsub_healthy_peers_topics, "number of topics in mesh with at least dlow peers (but below dhigh)")
|
declareGauge(libp2p_gossipsub_healthy_peers_topics, "number of topics in mesh with at least dlow peers (but below dhigh)")
|
||||||
declareCounter(libp2p_gossipsub_above_dhigh_condition, "number of above dhigh pruning branches ran", labels = ["topic"])
|
declareCounter(libp2p_gossipsub_above_dhigh_condition, "number of above dhigh pruning branches ran", labels = ["topic"])
|
||||||
|
declareSummary(libp2p_gossipsub_mcache_hit, "ratio of successful IWANT message cache lookups")
|
||||||
|
|
||||||
proc grafted*(g: GossipSub, p: PubSubPeer, topic: string) {.raises: [Defect].} =
|
proc grafted*(g: GossipSub, p: PubSubPeer, topic: string) {.raises: [Defect].} =
|
||||||
g.withPeerStats(p.peerId) do (stats: var PeerStats):
|
g.withPeerStats(p.peerId) do (stats: var PeerStats):
|
||||||
|
@ -276,12 +277,15 @@ proc handleIWant*(g: GossipSub,
|
||||||
trace "peer sent iwant", peer, messageID = mid
|
trace "peer sent iwant", peer, messageID = mid
|
||||||
let msg = g.mcache.get(mid)
|
let msg = g.mcache.get(mid)
|
||||||
if msg.isSome:
|
if msg.isSome:
|
||||||
|
libp2p_gossipsub_mcache_hit.observe(1)
|
||||||
# avoid spam
|
# avoid spam
|
||||||
if peer.iWantBudget > 0:
|
if peer.iWantBudget > 0:
|
||||||
messages.add(msg.get())
|
messages.add(msg.get())
|
||||||
dec peer.iWantBudget
|
dec peer.iWantBudget
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
libp2p_gossipsub_mcache_hit.observe(0)
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
proc commitMetrics(metrics: var MeshMetrics) {.raises: [Defect].} =
|
proc commitMetrics(metrics: var MeshMetrics) {.raises: [Defect].} =
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue