swarm: add ip_version to metrics (#2114)

* add ip_version to swarm metrics

* use "unknown" as a default for the IP version

---------

Co-authored-by: Marten Seemann <martenseemann@gmail.com>
This commit is contained in:
Sukun 2023-02-19 04:41:18 +05:30 committed by GitHub
parent 32f2f25529
commit d9004d21b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 246 additions and 36 deletions

View File

@ -464,7 +464,7 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "libp2p_swarm_connections_opened_total{dir=\"inbound\"} - libp2p_swarm_connections_closed_total{dir=\"inbound\"}",
"expr": "sum by (transport, security, muxer) (libp2p_swarm_connections_opened_total{dir=\"inbound\"}) - sum by (transport, security, muxer) (libp2p_swarm_connections_closed_total{dir=\"inbound\"})",
"legendFormat": "{{transport}} {{security}} {{muxer}}",
"range": true,
"refId": "A"
@ -691,7 +691,7 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "libp2p_swarm_connections_opened_total{dir=\"outbound\"} - libp2p_swarm_connections_closed_total{dir=\"outbound\"}",
"expr": "sum by (transport, security, muxer)(libp2p_swarm_connections_opened_total{dir=\"outbound\"}) - sum by (transport, security, muxer) (libp2p_swarm_connections_closed_total{dir=\"outbound\"})",
"legendFormat": "{{transport}} {{security}} {{muxer}}",
"range": true,
"refId": "A"
@ -1289,7 +1289,7 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "rate(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__rate_interval])",
"expr": "sum (rate(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__rate_interval])) by (transport, security, muxer)",
"legendFormat": "{{transport}} {{security}} {{muxer}}",
"range": true,
"refId": "A"
@ -1515,7 +1515,7 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "rate(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__rate_interval])",
"expr": "sum (rate(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__rate_interval])) by (transport, security, muxer)",
"legendFormat": "{{transport}} {{security}} {{muxer}}",
"range": true,
"refId": "A"
@ -1713,7 +1713,7 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "increase(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__range])",
"expr": "sum (increase(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__range])) by (transport, security, muxer)",
"legendFormat": "{{transport}} {{security}} {{muxer}}",
"range": true,
"refId": "A"
@ -1913,7 +1913,7 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "increase(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__range])",
"expr": "sum (increase(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__range])) by (transport, security, muxer)",
"legendFormat": "{{transport}} {{security}} {{muxer}}",
"range": true,
"refId": "A"
@ -1941,7 +1941,38 @@
},
"mappings": []
},
"overrides": []
"overrides": [
{
"matcher": {
"id": "byName",
"options": "ip4"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "light-blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "ip6"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "super-light-purple",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
@ -1949,6 +1980,164 @@
"x": 0,
"y": 51
},
"id": 32,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum by (ip_version) (libp2p_swarm_connections_opened_total{dir=\"inbound\"}) - sum by (ip_version) (libp2p_swarm_connections_closed_total{dir=\"inbound\"})",
"legendFormat": "{{ip_version}}",
"range": true,
"refId": "A"
}
],
"title": "New Inbound Connections: IP Version",
"type": "piechart"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": []
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "ip6"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "super-light-purple",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "ip4"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "light-blue",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 51
},
"id": 34,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum by (ip_version) (libp2p_swarm_connections_opened_total{dir=\"outbound\"}) - sum by (ip_version) (libp2p_swarm_connections_closed_total{dir=\"outbound\"})",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "New Outbound Connections: IP Version",
"type": "piechart"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": []
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 59
},
"id": 15,
"options": {
"legend": {
@ -2137,7 +2326,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 51
"y": 59
},
"id": 17,
"options": {
@ -2220,7 +2409,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 59
"y": 67
},
"id": 25,
"options": {
@ -2296,6 +2485,6 @@
"timezone": "",
"title": "libp2p Swarm",
"uid": "a15PyhO4z",
"version": 68,
"version": 12,
"weekStart": ""
}

View File

@ -61,7 +61,7 @@ func (c *Conn) Close() error {
func (c *Conn) doClose() {
if c.swarm.metricsTracer != nil {
c.swarm.metricsTracer.ClosedConnection(c.stat.Direction, time.Since(c.stat.Stats.Opened), c.ConnState())
c.swarm.metricsTracer.ClosedConnection(c.stat.Direction, time.Since(c.stat.Stats.Opened), c.ConnState(), c.LocalMultiaddr())
}
c.swarm.removeConn(c)

View File

@ -500,8 +500,8 @@ func (s *Swarm) dialAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr) (tra
canonicallog.LogPeerStatus(100, connC.RemotePeer(), connC.RemoteMultiaddr(), "connection_status", "established", "dir", "outbound")
if s.metricsTracer != nil {
connState := connC.ConnState()
s.metricsTracer.OpenedConnection(network.DirOutbound, connC.RemotePublicKey(), connState)
s.metricsTracer.CompletedHandshake(time.Since(start), connState)
s.metricsTracer.OpenedConnection(network.DirOutbound, connC.RemotePublicKey(), connState, connC.LocalMultiaddr())
s.metricsTracer.CompletedHandshake(time.Since(start), connState, connC.LocalMultiaddr())
}
// Trust the transport? Yeah... right.

View File

@ -131,7 +131,7 @@ func (s *Swarm) AddListenAddr(a ma.Multiaddr) error {
}
canonicallog.LogPeerStatus(100, c.RemotePeer(), c.RemoteMultiaddr(), "connection_status", "established", "dir", "inbound")
if s.metricsTracer != nil {
s.metricsTracer.OpenedConnection(network.DirInbound, c.RemotePublicKey(), c.ConnState())
s.metricsTracer.OpenedConnection(network.DirInbound, c.RemotePublicKey(), c.ConnState(), c.LocalMultiaddr())
}
log.Debugf("swarm listener accepted connection: %s <-> %s", c.LocalMultiaddr(), c.RemoteMultiaddr())

View File

@ -26,7 +26,7 @@ var (
Name: "connections_opened_total",
Help: "Connections Opened",
},
[]string{"dir", "transport", "security", "muxer"},
[]string{"dir", "transport", "security", "muxer", "ip_version"},
)
keyTypes = prometheus.NewCounterVec(
prometheus.CounterOpts{
@ -42,7 +42,7 @@ var (
Name: "connections_closed_total",
Help: "Connections Closed",
},
[]string{"dir", "transport", "security", "muxer"},
[]string{"dir", "transport", "security", "muxer", "ip_version"},
)
dialError = prometheus.NewCounterVec(
prometheus.CounterOpts{
@ -50,7 +50,7 @@ var (
Name: "dial_errors_total",
Help: "Dial Error",
},
[]string{"transport", "error"},
[]string{"transport", "error", "ip_version"},
)
connDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -59,7 +59,7 @@ var (
Help: "Duration of a Connection",
Buckets: prometheus.ExponentialBuckets(1.0/16, 2, 25), // up to 24 days
},
[]string{"dir", "transport", "security", "muxer"},
[]string{"dir", "transport", "security", "muxer", "ip_version"},
)
connHandshakeLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -68,7 +68,7 @@ var (
Help: "Duration of the libp2p Handshake",
Buckets: prometheus.ExponentialBuckets(0.001, 1.3, 35),
},
[]string{"transport", "security", "muxer"},
[]string{"transport", "security", "muxer", "ip_version"},
)
)
@ -79,9 +79,9 @@ func initMetrics() {
}
type MetricsTracer interface {
OpenedConnection(network.Direction, crypto.PubKey, network.ConnectionState)
ClosedConnection(network.Direction, time.Duration, network.ConnectionState)
CompletedHandshake(time.Duration, network.ConnectionState)
OpenedConnection(network.Direction, crypto.PubKey, network.ConnectionState, ma.Multiaddr)
ClosedConnection(network.Direction, time.Duration, network.ConnectionState, ma.Multiaddr)
CompletedHandshake(time.Duration, network.ConnectionState, ma.Multiaddr)
FailedDialing(ma.Multiaddr, error)
}
@ -108,12 +108,28 @@ func appendConnectionState(tags []string, cs network.ConnectionState) []string {
return tags
}
func (m *metricsTracer) OpenedConnection(dir network.Direction, p crypto.PubKey, cs network.ConnectionState) {
func getIPVersion(addr ma.Multiaddr) string {
version := "unknown"
ma.ForEach(addr, func(c ma.Component) bool {
if c.Protocol().Code == ma.P_IP4 {
version = "ip4"
return false
} else if c.Protocol().Code == ma.P_IP6 {
version = "ip6"
return false
}
return true
})
return version
}
func (m *metricsTracer) OpenedConnection(dir network.Direction, p crypto.PubKey, cs network.ConnectionState, laddr ma.Multiaddr) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, metricshelper.GetDirection(dir))
*tags = appendConnectionState(*tags, cs)
*tags = append(*tags, getIPVersion(laddr))
connsOpened.WithLabelValues(*tags...).Inc()
*tags = (*tags)[:0]
@ -122,25 +138,23 @@ func (m *metricsTracer) OpenedConnection(dir network.Direction, p crypto.PubKey,
keyTypes.WithLabelValues(*tags...).Inc()
}
func (m *metricsTracer) ClosedConnection(dir network.Direction, duration time.Duration, cs network.ConnectionState) {
func (m *metricsTracer) ClosedConnection(dir network.Direction, duration time.Duration, cs network.ConnectionState, laddr ma.Multiaddr) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, metricshelper.GetDirection(dir))
*tags = appendConnectionState(*tags, cs)
*tags = append(*tags, getIPVersion(laddr))
connsClosed.WithLabelValues(*tags...).Inc()
*tags = (*tags)[:0]
*tags = append(*tags, metricshelper.GetDirection(dir))
*tags = appendConnectionState(*tags, cs)
connDuration.WithLabelValues(*tags...).Observe(duration.Seconds())
}
func (m *metricsTracer) CompletedHandshake(t time.Duration, cs network.ConnectionState) {
func (m *metricsTracer) CompletedHandshake(t time.Duration, cs network.ConnectionState, laddr ma.Multiaddr) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = appendConnectionState(*tags, cs)
*tags = append(*tags, getIPVersion(laddr))
connHandshakeLatency.WithLabelValues(*tags...).Observe(t.Seconds())
}
@ -171,5 +185,6 @@ func (m *metricsTracer) FailedDialing(addr ma.Multiaddr, err error) {
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, transport, e)
*tags = append(*tags, getIPVersion(addr))
dialError.WithLabelValues(*tags...).Inc()
}

View File

@ -29,13 +29,15 @@ func BenchmarkMetricsConnOpen(b *testing.B) {
}
_, pub, err := crypto.GenerateEd25519Key(rand.Reader)
require.NoError(b, err)
quicAddr := ma.StringCast("/ip4/1.2.3.4/udp/1/quic")
tcpAddr := ma.StringCast("/ip4/1.2.3.4/tcp/1/")
tr := NewMetricsTracer()
for i := 0; i < b.N; i++ {
switch i % 2 {
case 0:
tr.OpenedConnection(network.DirInbound, pub, quicConnState)
tr.OpenedConnection(network.DirInbound, pub, quicConnState, quicAddr)
case 1:
tr.OpenedConnection(network.DirInbound, pub, tcpConnState)
tr.OpenedConnection(network.DirInbound, pub, tcpConnState, tcpAddr)
}
}
}
@ -77,12 +79,16 @@ func TestMetricsNoAllocNoCover(t *testing.T) {
}
tests := map[string]func(){
"OpenedConnection": func() { mt.OpenedConnection(randItem(directions), randItem(keys), randItem(connections)) },
"ClosedConnection": func() {
mt.ClosedConnection(randItem(directions), time.Duration(mrand.Intn(100))*time.Second, randItem(connections))
"OpenedConnection": func() {
mt.OpenedConnection(randItem(directions), randItem(keys), randItem(connections), randItem(addrs))
},
"CompletedHandshake": func() { mt.CompletedHandshake(time.Duration(mrand.Intn(100))*time.Second, randItem(connections)) },
"FailedDialing": func() { mt.FailedDialing(randItem(addrs), randItem(errors)) },
"ClosedConnection": func() {
mt.ClosedConnection(randItem(directions), time.Duration(mrand.Intn(100))*time.Second, randItem(connections), randItem(addrs))
},
"CompletedHandshake": func() {
mt.CompletedHandshake(time.Duration(mrand.Intn(100))*time.Second, randItem(connections), randItem(addrs))
},
"FailedDialing": func() { mt.FailedDialing(randItem(addrs), randItem(errors)) },
}
for method, f := range tests {