From 81931e52c3a0ba920af6e89ae2ea56ff64147c3c Mon Sep 17 00:00:00 2001 From: cskh Date: Thu, 11 Aug 2022 22:09:56 -0400 Subject: [PATCH] feat(telemetry): add labels to serf and memberlist metrics (#14161) * feat(telemetry): add labels to serf and memberlist metrics * changelog * doc update Co-authored-by: R.B. Boyer <4903+rboyer@users.noreply.github.com> --- .changelog/14161.txt | 3 +++ agent/consul/client_serf.go | 2 ++ agent/consul/config.go | 2 ++ agent/consul/server_oss.go | 15 +++++++++++++++ agent/consul/server_serf.go | 10 +++++++--- go.mod | 6 +++--- go.sum | 10 ++++++---- website/content/docs/agent/telemetry.mdx | 4 ++++ 8 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 .changelog/14161.txt diff --git a/.changelog/14161.txt b/.changelog/14161.txt new file mode 100644 index 0000000000..2926ffbe9b --- /dev/null +++ b/.changelog/14161.txt @@ -0,0 +1,3 @@ +```release-note:improvement +metrics: add labels of segment, partition, network area, network (lan or wan) to serf and memberlist metrics +``` diff --git a/agent/consul/client_serf.go b/agent/consul/client_serf.go index 55df7a5471..05db21e2f3 100644 --- a/agent/consul/client_serf.go +++ b/agent/consul/client_serf.go @@ -62,6 +62,8 @@ func (c *Client) setupSerf(conf *serf.Config, ch chan serf.Event, path string) ( return nil, err } + addSerfMetricsLabels(conf, false, "", "", "") + addEnterpriseSerfTags(conf.Tags, c.config.AgentEnterpriseMeta()) conf.ReconnectTimeoutOverride = libserf.NewReconnectOverride(c.logger) diff --git a/agent/consul/config.go b/agent/consul/config.go index b897c4f232..38063f808a 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -584,6 +584,7 @@ func CloneSerfLANConfig(base *serf.Config) *serf.Config { cfg.MemberlistConfig.ProbeTimeout = base.MemberlistConfig.ProbeTimeout cfg.MemberlistConfig.SuspicionMult = base.MemberlistConfig.SuspicionMult cfg.MemberlistConfig.RetransmitMult = base.MemberlistConfig.RetransmitMult + cfg.MemberlistConfig.MetricLabels = base.MemberlistConfig.MetricLabels // agent/keyring.go cfg.MemberlistConfig.Keyring = base.MemberlistConfig.Keyring @@ -593,6 +594,7 @@ func CloneSerfLANConfig(base *serf.Config) *serf.Config { cfg.ReapInterval = base.ReapInterval cfg.TombstoneTimeout = base.TombstoneTimeout cfg.MemberlistConfig.SecretKey = base.MemberlistConfig.SecretKey + cfg.MetricLabels = base.MetricLabels return cfg } diff --git a/agent/consul/server_oss.go b/agent/consul/server_oss.go index 5ae2fc3ea6..4ae524b65c 100644 --- a/agent/consul/server_oss.go +++ b/agent/consul/server_oss.go @@ -159,3 +159,18 @@ func (s *Server) addEnterpriseStats(stats map[string]map[string]string) { func getSerfMemberEnterpriseMeta(member serf.Member) *acl.EnterpriseMeta { return structs.NodeEnterpriseMetaInDefaultPartition() } + +func addSerfMetricsLabels(conf *serf.Config, wan bool, segment string, partition string, areaID string) { + conf.MetricLabels = []metrics.Label{} + + networkMetric := metrics.Label{ + Name: "network", + } + if wan { + networkMetric.Value = "wan" + } else { + networkMetric.Value = "lan" + } + + conf.MetricLabels = append(conf.MetricLabels, networkMetric) +} diff --git a/agent/consul/server_serf.go b/agent/consul/server_serf.go index 5e29b47dd2..b9c8ad95f9 100644 --- a/agent/consul/server_serf.go +++ b/agent/consul/server_serf.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/armon/go-metrics" "github.com/hashicorp/go-hclog" "github.com/hashicorp/memberlist" "github.com/hashicorp/raft" @@ -177,9 +178,10 @@ func (s *Server) setupSerfConfig(opts setupSerfOptions) (*serf.Config, error) { if opts.WAN { nt, err := memberlist.NewNetTransport(&memberlist.NetTransportConfig{ - BindAddrs: []string{conf.MemberlistConfig.BindAddr}, - BindPort: conf.MemberlistConfig.BindPort, - Logger: conf.MemberlistConfig.Logger, + BindAddrs: []string{conf.MemberlistConfig.BindAddr}, + BindPort: conf.MemberlistConfig.BindPort, + Logger: conf.MemberlistConfig.Logger, + MetricLabels: []metrics.Label{{Name: "network", Value: "wan"}}, }) if err != nil { return nil, err @@ -230,6 +232,8 @@ func (s *Server) setupSerfConfig(opts setupSerfOptions) (*serf.Config, error) { conf.ReconnectTimeoutOverride = libserf.NewReconnectOverride(s.logger) + addSerfMetricsLabels(conf, opts.WAN, "", "", "") + addEnterpriseSerfTags(conf.Tags, s.config.AgentEnterpriseMeta()) if s.config.OverrideInitialSerfTags != nil { diff --git a/go.mod b/go.mod index cb048763d6..e2fbafed4b 100644 --- a/go.mod +++ b/go.mod @@ -45,11 +45,11 @@ require ( github.com/hashicorp/golang-lru v0.5.4 github.com/hashicorp/hcl v1.0.0 github.com/hashicorp/hil v0.0.0-20200423225030-a18a1cd20038 - github.com/hashicorp/memberlist v0.3.1 + github.com/hashicorp/memberlist v0.4.0 github.com/hashicorp/raft v1.3.9 github.com/hashicorp/raft-autopilot v0.1.6 github.com/hashicorp/raft-boltdb/v2 v2.2.2 - github.com/hashicorp/serf v0.9.8 + github.com/hashicorp/serf v0.10.0 github.com/hashicorp/vault/api v1.0.5-0.20200717191844-f687267c8086 github.com/hashicorp/vault/sdk v0.1.14-0.20200519221838-e0cfd64bc267 github.com/hashicorp/yamux v0.0.0-20210826001029-26ff87cf9493 @@ -77,7 +77,7 @@ require ( golang.org/x/net v0.0.0-20211216030914-fe4d6282115f golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d golang.org/x/sync v0.0.0-20210220032951-036812b2e83c - golang.org/x/sys v0.0.0-20220412211240-33da011f77ad + golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e google.golang.org/genproto v0.0.0-20200623002339-fbb79eadd5eb google.golang.org/grpc v1.37.1 diff --git a/go.sum b/go.sum index 8f2afaa45d..ee3e0beda1 100644 --- a/go.sum +++ b/go.sum @@ -364,8 +364,9 @@ github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg github.com/hashicorp/mdns v1.0.4 h1:sY0CMhFmjIPDMlTB+HfymFHCaYLhgifZ0QhjaYKD/UQ= github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/hashicorp/memberlist v0.3.1 h1:MXgUXLqva1QvpVEDQW1IQLG0wivQAtmFlHRQ+1vWZfM= github.com/hashicorp/memberlist v0.3.1/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= +github.com/hashicorp/memberlist v0.4.0 h1:k3uda5gZcltmafuFF+UFqNEl5PrH+yPZ4zkjp1f/H/8= +github.com/hashicorp/memberlist v0.4.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0= github.com/hashicorp/raft v1.1.0/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM= github.com/hashicorp/raft v1.1.1/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8= github.com/hashicorp/raft v1.2.0/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8= @@ -380,8 +381,8 @@ github.com/hashicorp/raft-boltdb v0.0.0-20211202195631-7d34b9fb3f42/go.mod h1:wc github.com/hashicorp/raft-boltdb/v2 v2.2.2 h1:rlkPtOllgIcKLxVT4nutqlTH2NRFn+tO1wwZk/4Dxqw= github.com/hashicorp/raft-boltdb/v2 v2.2.2/go.mod h1:N8YgaZgNJLpZC+h+by7vDu5rzsRgONThTEeUS3zWbfY= github.com/hashicorp/serf v0.9.7/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= -github.com/hashicorp/serf v0.9.8 h1:JGklO/2Drf1QGa312EieQN3zhxQ+aJg6pG+aC3MFaVo= -github.com/hashicorp/serf v0.9.8/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= +github.com/hashicorp/serf v0.10.0 h1:89qvvpfMQnz6c2y4pv7j2vUUmeT1+5TSZMexuTbtsPs= +github.com/hashicorp/serf v0.10.0/go.mod h1:bXN03oZc5xlH46k/K1qTrpXb9ERKyY1/i/N5mxvgrZw= github.com/hashicorp/vault/api v1.0.5-0.20200717191844-f687267c8086 h1:OKsyxKi2sNmqm1Gv93adf2AID2FOBFdCbbZn9fGtIdg= github.com/hashicorp/vault/api v1.0.5-0.20200717191844-f687267c8086/go.mod h1:R3Umvhlxi2TN7Ex2hzOowyeNb+SfbVWI973N+ctaFMk= github.com/hashicorp/vault/sdk v0.1.14-0.20200519221838-e0cfd64bc267 h1:e1ok06zGrWJW91rzRroyl5nRNqraaBe4d5hiKcVZuHM= @@ -793,8 +794,9 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220412211240-33da011f77ad h1:ntjMns5wyP/fN65tdBD4g8J5w8n015+iIIs9rtjXkY0= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 h1:WIoqL4EROvwiPdUtaip4VcDdpZ4kha7wBWZrbVKCIZg= +golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/website/content/docs/agent/telemetry.mdx b/website/content/docs/agent/telemetry.mdx index 575f3d7e5b..8b4f923435 100644 --- a/website/content/docs/agent/telemetry.mdx +++ b/website/content/docs/agent/telemetry.mdx @@ -605,6 +605,10 @@ Any metric in this section can be turned off with the [`prefix_filter`](/docs/ag ## Cluster Health These metrics give insight into the health of the cluster as a whole. +Query for the `consul.memberlist.*` and `consul.serf.*` metrics can be appended +with certain labels to further distinguish data between different gossip pools. +The supported label for OSS is `network`, while `segment`, `partition`, `area` +are allowed for . | Metric | Description | Unit | Type | |----------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|