Merge pull request #9088 from hashicorp/mkcp/telemetry/add-key-metrics-definitions

Add prometheus definitions for key metrics.
This commit is contained in:
Kit Patella 2020-11-05 12:20:05 -08:00 committed by GitHub
commit b3ecdcc0c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 244 additions and 65 deletions

3
go.mod
View File

@ -13,7 +13,7 @@ require (
github.com/NYTimes/gziphandler v1.0.1
github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 // indirect
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e
github.com/armon/go-metrics v0.3.5-0.20200914211745-2bc64ebd2914
github.com/armon/go-metrics v0.3.5-0.20201104215618-6fd5a4ddf425
github.com/armon/go-radix v1.0.0
github.com/aws/aws-sdk-go v1.25.41
github.com/coredns/coredns v1.1.2
@ -63,7 +63,6 @@ require (
github.com/joyent/triton-go v1.7.1-0.20200416154420-6801d15b779f // indirect
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
github.com/kr/text v0.1.0
github.com/mattn/go-colorable v0.1.7 // indirect
github.com/miekg/dns v1.1.31
github.com/mitchellh/cli v1.1.0
github.com/mitchellh/copystructure v1.0.0

6
go.sum
View File

@ -59,8 +59,8 @@ github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg=
github.com/armon/go-metrics v0.3.0/go.mod h1:zXjbSimjXTd7vOpY8B0/2LpvNvDoXBuplAD+gJD3GYs=
github.com/armon/go-metrics v0.3.5-0.20200914211745-2bc64ebd2914 h1:Yiw8vrY+7jX6pCOdAkIUNU8QBS9c6HJAct+K36MeANw=
github.com/armon/go-metrics v0.3.5-0.20200914211745-2bc64ebd2914/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
github.com/armon/go-metrics v0.3.5-0.20201104215618-6fd5a4ddf425 h1:23nUvGE+8HYFc0AUXuYxgFws6IdyzOrSJJmKfPMJmi8=
github.com/armon/go-metrics v0.3.5-0.20201104215618-6fd5a4ddf425/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
@ -345,8 +345,6 @@ github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaO
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.6 h1:6Su7aK7lXmJ/U79bYtBjLNaha4Fs1Rg9plHpcH+vvnE=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.7 h1:bQGKb3vps/j0E9GfJQ03JyhRuxsvdAanXlT9BTw3mdw=
github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=

View File

@ -280,8 +280,75 @@ func prometheusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, e
if cfg.PrometheusRetentionTime.Nanoseconds() < 1 {
return nil, nil
}
// TODO(kit) define these in vars in the package/file they're used
gaugeDefs := []prometheus.GaugeDefinition{
{
Name: []string{"consul", "autopilot", "healthy"},
Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.",
},
}
// TODO(kit) define these in vars in the package/file they're used
counterDefs := []prometheus.CounterDefinition{
{
Name: []string{"consul", "raft", "apply"},
Help: "This counts the number of Raft transactions occurring over the interval.",
},
{
Name: []string{"consul", "raft", "state", "candidate"},
Help: "This increments whenever a Consul server starts an election.",
},
{
Name: []string{"consul", "raft", "state", "leader"},
Help: "This increments whenever a Consul server becomes a leader.",
},
{
Name: []string{"consul", "client", "api", "catalog_register"},
Help: "Increments whenever a Consul agent receives a catalog register request.",
},
{
Name: []string{"consul", "runtime", "total_gc_pause_ns"},
Help: "Number of nanoseconds consumed by stop-the-world garbage collection (GC) pauses since Consul started.",
},
{
Name: []string{"consul", "client", "rpc"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.",
},
{
Name: []string{"consul", "client", "rpc", "exceeded"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.",
},
{
Name: []string{"consul", "client", "rpc", "failed"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.",
},
}
// TODO(kit) define these in vars in the package/file they're used
summaryDefs := []prometheus.SummaryDefinition{
{
Name: []string{"consul", "kvs", "apply"},
Help: "This measures the time it takes to complete an update to the KV store.",
},
{
Name: []string{"consul", "txn", "apply"},
Help: "This measures the time spent applying a transaction operation.",
},
{
Name: []string{"consul", "raft", "commitTime"},
Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.",
},
{
Name: []string{"consul", "raft", "leader", "lastContact"},
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
},
}
prometheusOpts := prometheus.PrometheusOpts{
Expiration: cfg.PrometheusRetentionTime,
Expiration: cfg.PrometheusRetentionTime,
GaugeDefinitions: gaugeDefs,
CounterDefinitions: counterDefs,
SummaryDefinitions: summaryDefs,
}
sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
if err != nil {

View File

@ -5,6 +5,7 @@ package prometheus
import (
"fmt"
"log"
"math"
"regexp"
"strings"
"sync"
@ -29,6 +30,26 @@ type PrometheusOpts struct {
// untracked. If the value is zero, a metric is never expired.
Expiration time.Duration
Registerer prometheus.Registerer
// Gauges, Summaries, and Counters allow us to pre-declare metrics by giving their Name, Help, and ConstLabels to
// the PrometheusSink when it is created. Metrics declared in this way will be initialized at zero and will not be
// deleted when their expiry is reached.
// - Gauges and Summaries will be set to NaN when they expire.
// - Counters continue to Collect their last known value.
// Ex:
// PrometheusOpts{
// Expiration: 10 * time.Second,
// Gauges: []GaugeDefinition{
// {
// Name: []string{ "application", "component", "measurement"},
// Help: "application_component_measurement provides an example of how to declare static metrics",
// ConstLabels: []metrics.Label{ { Name: "my_label", Value: "does_not_change" }, },
// },
// },
// }
GaugeDefinitions []GaugeDefinition
SummaryDefinitions []SummaryDefinition
CounterDefinitions []CounterDefinition
}
type PrometheusSink struct {
@ -39,19 +60,44 @@ type PrometheusSink struct {
expiration time.Duration
}
type PrometheusGauge struct {
// GaugeDefinition can be provided to PrometheusOpts to declare a constant gauge that is not deleted on expiry.
type GaugeDefinition struct {
Name []string
ConstLabels []metrics.Label
Help string
}
type gauge struct {
prometheus.Gauge
updatedAt time.Time
// canDelete is set if the metric is created during runtime so we know it's ephemeral and can delete it on expiry.
canDelete bool
}
type PrometheusSummary struct {
// SummaryDefinition can be provided to PrometheusOpts to declare a constant summary that is not deleted on expiry.
type SummaryDefinition struct {
Name []string
ConstLabels []metrics.Label
Help string
}
type summary struct {
prometheus.Summary
updatedAt time.Time
canDelete bool
}
type PrometheusCounter struct {
// CounterDefinition can be provided to PrometheusOpts to declare a constant counter that is not deleted on expiry.
type CounterDefinition struct {
Name []string
ConstLabels []metrics.Label
Help string
}
type counter struct {
prometheus.Counter
updatedAt time.Time
canDelete bool
}
// NewPrometheusSink creates a new PrometheusSink using the default options.
@ -68,6 +114,10 @@ func NewPrometheusSinkFrom(opts PrometheusOpts) (*PrometheusSink, error) {
expiration: opts.Expiration,
}
initGauges(&sink.gauges, opts.GaugeDefinitions)
initSummaries(&sink.summaries, opts.SummaryDefinitions)
initCounters(&sink.counters, opts.CounterDefinitions)
reg := opts.Registerer
if reg == nil {
reg = prometheus.DefaultRegisterer
@ -90,43 +140,101 @@ func (p *PrometheusSink) Collect(c chan<- prometheus.Metric) {
expire := p.expiration != 0
now := time.Now()
p.gauges.Range(func(k, v interface{}) bool {
if v != nil {
lastUpdate := v.(*PrometheusGauge).updatedAt
if expire && lastUpdate.Add(p.expiration).Before(now) {
p.gauges.Delete(k)
} else {
v.(*PrometheusGauge).Collect(c)
}
if v == nil {
return true
}
g := v.(*gauge)
lastUpdate := g.updatedAt
if expire && lastUpdate.Add(p.expiration).Before(now) {
if g.canDelete {
p.gauges.Delete(k)
return true
}
// We have not observed the gauge this interval so we don't know its value.
g.Set(math.NaN())
}
g.Collect(c)
return true
})
p.summaries.Range(func(k, v interface{}) bool {
if v != nil {
lastUpdate := v.(*PrometheusSummary).updatedAt
if expire && lastUpdate.Add(p.expiration).Before(now) {
p.summaries.Delete(k)
} else {
v.(*PrometheusSummary).Collect(c)
}
if v == nil {
return true
}
s := v.(*summary)
lastUpdate := s.updatedAt
if expire && lastUpdate.Add(p.expiration).Before(now) {
if s.canDelete {
p.summaries.Delete(k)
return true
}
// We have observed nothing in this interval.
s.Observe(math.NaN())
}
s.Collect(c)
return true
})
p.counters.Range(func(k, v interface{}) bool {
if v != nil {
lastUpdate := v.(*PrometheusCounter).updatedAt
if expire && lastUpdate.Add(p.expiration).Before(now) {
p.counters.Delete(k)
} else {
v.(*PrometheusCounter).Collect(c)
}
if v == nil {
return true
}
count := v.(*counter)
lastUpdate := count.updatedAt
if expire && lastUpdate.Add(p.expiration).Before(now) {
if count.canDelete {
p.counters.Delete(k)
return true
}
// Counters remain at their previous value when not observed, so we do not set it to NaN.
}
count.Collect(c)
return true
})
}
func initGauges(m *sync.Map, gauges []GaugeDefinition) {
for _, g := range gauges {
key, hash := flattenKey(g.Name, g.ConstLabels)
pG := prometheus.NewGauge(prometheus.GaugeOpts{
Name: key,
Help: g.Help,
ConstLabels: prometheusLabels(g.ConstLabels),
})
m.Store(hash, &gauge{ Gauge: pG })
}
return
}
func initSummaries(m *sync.Map, summaries []SummaryDefinition) {
for _, s := range summaries {
key, hash := flattenKey(s.Name, s.ConstLabels)
pS := prometheus.NewSummary(prometheus.SummaryOpts{
Name: key,
Help: s.Help,
MaxAge: 10 * time.Second,
ConstLabels: prometheusLabels(s.ConstLabels),
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
})
m.Store(hash, &summary{ Summary: pS })
}
return
}
func initCounters(m *sync.Map, counters []CounterDefinition) {
for _, c := range counters {
key, hash := flattenKey(c.Name, c.ConstLabels)
pC := prometheus.NewCounter(prometheus.CounterOpts{
Name: key,
Help: c.Help,
ConstLabels: prometheusLabels(c.ConstLabels),
})
m.Store(hash, &counter{ Counter: pC })
}
return
}
var forbiddenChars = regexp.MustCompile("[ .=\\-/]")
func (p *PrometheusSink) flattenKey(parts []string, labels []metrics.Label) (string, string) {
func flattenKey(parts []string, labels []metrics.Label) (string, string) {
key := strings.Join(parts, "_")
key = forbiddenChars.ReplaceAllString(key, "_")
@ -151,7 +259,7 @@ func (p *PrometheusSink) SetGauge(parts []string, val float32) {
}
func (p *PrometheusSink) SetGaugeWithLabels(parts []string, val float32, labels []metrics.Label) {
key, hash := p.flattenKey(parts, labels)
key, hash := flattenKey(parts, labels)
pg, ok := p.gauges.Load(hash)
// The sync.Map underlying gauges stores pointers to our structs. If we need to make updates,
@ -161,10 +269,12 @@ func (p *PrometheusSink) SetGaugeWithLabels(parts []string, val float32, labels
// so there's no issues there. It's possible for racy updates to occur to the updatedAt
// value, but since we're always setting it to time.Now(), it doesn't really matter.
if ok {
localGauge := *pg.(*PrometheusGauge)
localGauge := *pg.(*gauge)
localGauge.Set(float64(val))
localGauge.updatedAt = time.Now()
p.gauges.Store(hash, &localGauge)
// The gauge does not exist, create the gauge and allow it to be deleted
} else {
g := prometheus.NewGauge(prometheus.GaugeOpts{
Name: key,
@ -172,8 +282,10 @@ func (p *PrometheusSink) SetGaugeWithLabels(parts []string, val float32, labels
ConstLabels: prometheusLabels(labels),
})
g.Set(float64(val))
pg = &PrometheusGauge{
g, time.Now(),
pg = &gauge{
Gauge: g,
updatedAt: time.Now(),
canDelete: true,
}
p.gauges.Store(hash, pg)
}
@ -184,14 +296,17 @@ func (p *PrometheusSink) AddSample(parts []string, val float32) {
}
func (p *PrometheusSink) AddSampleWithLabels(parts []string, val float32, labels []metrics.Label) {
key, hash := p.flattenKey(parts, labels)
key, hash := flattenKey(parts, labels)
ps, ok := p.summaries.Load(hash)
// Does the summary already exist for this sample type?
if ok {
localSummary := *ps.(*PrometheusSummary)
localSummary := *ps.(*summary)
localSummary.Observe(float64(val))
localSummary.updatedAt = time.Now()
p.summaries.Store(hash, &localSummary)
// The summary does not exist, create the Summary and allow it to be deleted
} else {
s := prometheus.NewSummary(prometheus.SummaryOpts{
Name: key,
@ -201,8 +316,10 @@ func (p *PrometheusSink) AddSampleWithLabels(parts []string, val float32, labels
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
})
s.Observe(float64(val))
ps = &PrometheusSummary{
s, time.Now(),
ps = &summary{
Summary: s,
updatedAt: time.Now(),
canDelete: true,
}
p.summaries.Store(hash, ps)
}
@ -219,14 +336,17 @@ func (p *PrometheusSink) IncrCounter(parts []string, val float32) {
}
func (p *PrometheusSink) IncrCounterWithLabels(parts []string, val float32, labels []metrics.Label) {
key, hash := p.flattenKey(parts, labels)
key, hash := flattenKey(parts, labels)
pc, ok := p.counters.Load(hash)
// Does the counter exist?
if ok {
localCounter := *pc.(*PrometheusCounter)
localCounter := *pc.(*counter)
localCounter.Add(float64(val))
localCounter.updatedAt = time.Now()
p.counters.Store(hash, &localCounter)
// The counter does not exist yet, create it and allow it to be deleted
} else {
c := prometheus.NewCounter(prometheus.CounterOpts{
Name: key,
@ -234,13 +354,17 @@ func (p *PrometheusSink) IncrCounterWithLabels(parts []string, val float32, labe
ConstLabels: prometheusLabels(labels),
})
c.Add(float64(val))
pc = &PrometheusCounter{
c, time.Now(),
pc = &counter{
Counter: c,
updatedAt: time.Now(),
canDelete: true,
}
p.counters.Store(hash, pc)
}
}
// PrometheusPushSink wraps a normal prometheus sink and provides an address and facilities to export it to an address
// on an interval.
type PrometheusPushSink struct {
*PrometheusSink
pusher *push.Pusher
@ -249,7 +373,8 @@ type PrometheusPushSink struct {
stopChan chan struct{}
}
func NewPrometheusPushSink(address string, pushIterval time.Duration, name string) (*PrometheusPushSink, error) {
// NewPrometheusPushSink creates a PrometheusPushSink by taking an address, interval, and destination name.
func NewPrometheusPushSink(address string, pushInterval time.Duration, name string) (*PrometheusPushSink, error) {
promSink := &PrometheusSink{
gauges: sync.Map{},
summaries: sync.Map{},
@ -263,7 +388,7 @@ func NewPrometheusPushSink(address string, pushIterval time.Duration, name strin
promSink,
pusher,
address,
pushIterval,
pushInterval,
make(chan struct{}),
}

View File

@ -10,7 +10,6 @@ import (
"os"
"strconv"
"strings"
"sync"
"syscall"
"unsafe"
@ -28,7 +27,6 @@ const (
backgroundRed = 0x40
backgroundIntensity = 0x80
backgroundMask = (backgroundRed | backgroundBlue | backgroundGreen | backgroundIntensity)
commonLvbUnderscore = 0x8000
cENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x4
)
@ -95,7 +93,6 @@ type Writer struct {
oldattr word
oldpos coord
rest bytes.Buffer
mutex sync.Mutex
}
// NewColorable returns new instance of Writer which handles escape sequence from File.
@ -435,8 +432,6 @@ func atoiWithDefault(s string, def int) (int, error) {
// Write writes data on console
func (w *Writer) Write(data []byte) (n int, err error) {
w.mutex.Lock()
defer w.mutex.Unlock()
var csbi consoleScreenBufferInfo
procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
@ -688,19 +683,14 @@ loop:
switch {
case n == 0 || n == 100:
attr = w.oldattr
case n == 4:
attr |= commonLvbUnderscore
case (1 <= n && n <= 3) || n == 5:
case 1 <= n && n <= 5:
attr |= foregroundIntensity
case n == 7 || n == 27:
attr =
(attr &^ (foregroundMask | backgroundMask)) |
((attr & foregroundMask) << 4) |
((attr & backgroundMask) >> 4)
case n == 22:
attr &^= foregroundIntensity
case n == 24:
attr &^= commonLvbUnderscore
case n == 7:
attr = ((attr & foregroundMask) << 4) | ((attr & backgroundMask) >> 4)
case n == 22 || n == 25:
attr |= foregroundIntensity
case n == 27:
attr = ((attr & foregroundMask) << 4) | ((attr & backgroundMask) >> 4)
case 30 <= n && n <= 37:
attr &= backgroundMask
if (n-30)&1 != 0 {

4
vendor/modules.txt vendored
View File

@ -32,7 +32,7 @@ github.com/NYTimes/gziphandler
github.com/StackExchange/wmi
# github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e
github.com/armon/circbuf
# github.com/armon/go-metrics v0.3.5-0.20200914211745-2bc64ebd2914
# github.com/armon/go-metrics v0.3.5-0.20201104215618-6fd5a4ddf425
github.com/armon/go-metrics
github.com/armon/go-metrics/circonus
github.com/armon/go-metrics/datadog
@ -316,7 +316,7 @@ github.com/konsorten/go-windows-terminal-sequences
github.com/kr/text
# github.com/linode/linodego v0.7.1
github.com/linode/linodego
# github.com/mattn/go-colorable v0.1.7
# github.com/mattn/go-colorable v0.1.6
github.com/mattn/go-colorable
# github.com/mattn/go-isatty v0.0.12
github.com/mattn/go-isatty