mirror of
https://github.com/status-im/consul.git
synced 2025-01-12 14:55:02 +00:00
0f48b7af5e
* Add Enabler interface to turn sink on/off * Use h for hcpProviderImpl vars, fix PR feeback and fix errors * Keep nil check in exporter and fix tests * Clarify comment and fix function name * Use disable instead of enable * Fix errors nit in otlp_transform * Add test for refreshInterval of updateConfig * Add disabled field in MetricsConfig struct * Fix PR feedback: improve comment and remove double colons * Fix deps test which requires a maybe * Update hcp-sdk-go to v0.61.0 * use disabled flag in telemetry_config.go * Handle 4XX errors in telemetry_provider * Fix deps test * Check 4XX instead * Run make go-mod-tidy
295 lines
9.9 KiB
Go
295 lines
9.9 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package telemetry
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"regexp"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
gometrics "github.com/armon/go-metrics"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
otelmetric "go.opentelemetry.io/otel/metric"
|
|
otelsdk "go.opentelemetry.io/otel/sdk/metric"
|
|
"go.opentelemetry.io/otel/sdk/resource"
|
|
|
|
"github.com/hashicorp/go-hclog"
|
|
)
|
|
|
|
const (
|
|
// defaultExportInterval is a default time interval between export of aggregated metrics.
|
|
// At the time of writing this is the same as the otelsdk.Reader's default export interval.
|
|
defaultExportInterval = 60 * time.Second
|
|
|
|
// defaultExportTimeout is the time the otelsdk.Reader waits on an export before cancelling it.
|
|
// At the time of writing this is the same as the otelsdk.Reader's default export timeout default.
|
|
//
|
|
// note: in practice we are more likely to hit the http.Client Timeout in telemetry.MetricsClient.
|
|
// That http.Client Timeout is 15 seconds (at the time of writing). The otelsdk.Reader will use
|
|
// defaultExportTimeout for the entire Export call, but since the http.Client's Timeout is 15s,
|
|
// we should hit that first before reaching the 30 second timeout set here.
|
|
defaultExportTimeout = 30 * time.Second
|
|
)
|
|
|
|
// Disabled should be implemented to turn on/off metrics processing
|
|
type Disabled interface {
|
|
// IsDisabled() can return true disallow the sink from accepting metrics.
|
|
IsDisabled() bool
|
|
}
|
|
|
|
// ConfigProvider is required to provide custom metrics processing.
|
|
type ConfigProvider interface {
|
|
Disabled
|
|
// GetLabels should return a set of OTEL attributes added by default all metrics.
|
|
GetLabels() map[string]string
|
|
|
|
// GetFilters should return filtesr that are required to enable metric processing.
|
|
// Filters act as an allowlist to collect only the required metrics.
|
|
GetFilters() *regexp.Regexp
|
|
}
|
|
|
|
// OTELSinkOpts is used to provide configuration when initializing an OTELSink using NewOTELSink.
|
|
type OTELSinkOpts struct {
|
|
Reader otelsdk.Reader
|
|
ConfigProvider ConfigProvider
|
|
}
|
|
|
|
// OTELSink captures and aggregates telemetry data as per the OpenTelemetry (OTEL) specification.
|
|
// Metric data is exported in OpenTelemetry Protocol (OTLP) wire format.
|
|
// This should be used as a Go Metrics backend, as it implements the MetricsSink interface.
|
|
type OTELSink struct {
|
|
// spaceReplacer cleans the flattened key by removing any spaces.
|
|
spaceReplacer *strings.Replacer
|
|
logger hclog.Logger
|
|
cfgProvider ConfigProvider
|
|
|
|
// meterProvider is an OTEL MeterProvider, the entrypoint to the OTEL Metrics SDK.
|
|
// It handles reading/export of aggregated metric data.
|
|
// It enables creation and usage of an OTEL Meter.
|
|
meterProvider *otelsdk.MeterProvider
|
|
|
|
// meter is an OTEL Meter, which enables the creation of OTEL instruments.
|
|
meter *otelmetric.Meter
|
|
|
|
// Instrument stores contain an OTEL Instrument per metric name (<name, instrument>)
|
|
// for each gauge, counter and histogram types.
|
|
// An instrument allows us to record a measurement for a particular metric, and continuously aggregates metrics.
|
|
// We lazy load the creation of these intruments until a metric is seen, and use them repeatedly to record measurements.
|
|
gaugeInstruments map[string]otelmetric.Float64ObservableGauge
|
|
counterInstruments map[string]otelmetric.Float64Counter
|
|
histogramInstruments map[string]otelmetric.Float64Histogram
|
|
|
|
// gaugeStore is required to hold last-seen values of gauges
|
|
// This is a workaround, as OTEL currently does not have synchronous gauge instruments.
|
|
// It only allows the registration of "callbacks", which obtain values when the callback is called.
|
|
// We must hold gauge values until the callback is called, when the measurement is exported, and can be removed.
|
|
gaugeStore *gaugeStore
|
|
|
|
mutex sync.Mutex
|
|
}
|
|
|
|
// NewOTELReader returns a configured OTEL PeriodicReader to export metrics every X seconds.
|
|
// It configures the reader with a custom OTELExporter with a MetricsClient to transform and export
|
|
// metrics in OTLP format to an external url.
|
|
func NewOTELReader(client MetricsClient, endpointProvider EndpointProvider) otelsdk.Reader {
|
|
return otelsdk.NewPeriodicReader(
|
|
newOTELExporter(client, endpointProvider),
|
|
otelsdk.WithInterval(defaultExportInterval),
|
|
otelsdk.WithTimeout(defaultExportTimeout),
|
|
)
|
|
}
|
|
|
|
// NewOTELSink returns a sink which fits the Go Metrics MetricsSink interface.
|
|
// It sets up a MeterProvider and Meter, key pieces of the OTEL Metrics SDK which
|
|
// enable us to create OTEL Instruments to record measurements.
|
|
func NewOTELSink(ctx context.Context, opts *OTELSinkOpts) (*OTELSink, error) {
|
|
if opts.Reader == nil {
|
|
return nil, errors.New("ferror: provide valid reader")
|
|
}
|
|
|
|
if opts.ConfigProvider == nil {
|
|
return nil, errors.New("ferror: provide valid config provider")
|
|
}
|
|
|
|
logger := hclog.FromContext(ctx).Named("otel_sink")
|
|
|
|
// Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically.
|
|
res := resource.NewSchemaless()
|
|
meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader))
|
|
meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry")
|
|
|
|
return &OTELSink{
|
|
cfgProvider: opts.ConfigProvider,
|
|
spaceReplacer: strings.NewReplacer(" ", "_"),
|
|
logger: logger,
|
|
meterProvider: meterProvider,
|
|
meter: &meter,
|
|
gaugeStore: NewGaugeStore(),
|
|
gaugeInstruments: make(map[string]otelmetric.Float64ObservableGauge, 0),
|
|
counterInstruments: make(map[string]otelmetric.Float64Counter, 0),
|
|
histogramInstruments: make(map[string]otelmetric.Float64Histogram, 0),
|
|
}, nil
|
|
}
|
|
|
|
// SetGauge emits a Consul gauge metric.
|
|
func (o *OTELSink) SetGauge(key []string, val float32) {
|
|
o.SetGaugeWithLabels(key, val, nil)
|
|
}
|
|
|
|
// AddSample emits a Consul histogram metric.
|
|
func (o *OTELSink) AddSample(key []string, val float32) {
|
|
o.AddSampleWithLabels(key, val, nil)
|
|
}
|
|
|
|
// IncrCounter emits a Consul counter metric.
|
|
func (o *OTELSink) IncrCounter(key []string, val float32) {
|
|
o.IncrCounterWithLabels(key, val, nil)
|
|
}
|
|
|
|
// AddSampleWithLabels emits a Consul gauge metric that gets
|
|
// registed by an OpenTelemetry Histogram instrument.
|
|
func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) {
|
|
if o.cfgProvider.IsDisabled() {
|
|
return
|
|
}
|
|
|
|
k := o.flattenKey(key)
|
|
if !o.allowedMetric(k) {
|
|
return
|
|
}
|
|
|
|
// Set value in global Gauge store.
|
|
o.gaugeStore.Set(k, float64(val), o.labelsToAttributes(labels))
|
|
|
|
o.mutex.Lock()
|
|
defer o.mutex.Unlock()
|
|
|
|
// If instrument does not exist, create it and register callback to emit last value in global Gauge store.
|
|
if _, ok := o.gaugeInstruments[k]; !ok {
|
|
// The registration of a callback only needs to happen once, when the instrument is created.
|
|
// The callback will be triggered every export cycle for that metric.
|
|
// It must be explicitly de-registered to be removed (which we do not do), to ensure new gauge values are exported every cycle.
|
|
inst, err := (*o.meter).Float64ObservableGauge(k, otelmetric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k)))
|
|
if err != nil {
|
|
o.logger.Error("Failed to create gauge instrument", "error", err)
|
|
return
|
|
}
|
|
o.gaugeInstruments[k] = inst
|
|
}
|
|
}
|
|
|
|
// AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument.
|
|
func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) {
|
|
if o.cfgProvider.IsDisabled() {
|
|
return
|
|
}
|
|
|
|
k := o.flattenKey(key)
|
|
if !o.allowedMetric(k) {
|
|
return
|
|
}
|
|
|
|
o.mutex.Lock()
|
|
defer o.mutex.Unlock()
|
|
|
|
inst, ok := o.histogramInstruments[k]
|
|
if !ok {
|
|
histogram, err := (*o.meter).Float64Histogram(k)
|
|
if err != nil {
|
|
o.logger.Error("Failed create histogram instrument", "error", err)
|
|
return
|
|
}
|
|
inst = histogram
|
|
o.histogramInstruments[k] = inst
|
|
}
|
|
|
|
attrs := o.labelsToAttributes(labels)
|
|
inst.Record(context.TODO(), float64(val), otelmetric.WithAttributes(attrs...))
|
|
}
|
|
|
|
// IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument.
|
|
func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) {
|
|
if o.cfgProvider.IsDisabled() {
|
|
return
|
|
}
|
|
|
|
k := o.flattenKey(key)
|
|
if !o.allowedMetric(k) {
|
|
return
|
|
}
|
|
|
|
o.mutex.Lock()
|
|
defer o.mutex.Unlock()
|
|
|
|
inst, ok := o.counterInstruments[k]
|
|
if !ok {
|
|
counter, err := (*o.meter).Float64Counter(k)
|
|
if err != nil {
|
|
o.logger.Error("Failed to create counter instrument:", "error", err)
|
|
return
|
|
}
|
|
|
|
inst = counter
|
|
o.counterInstruments[k] = inst
|
|
}
|
|
|
|
attrs := o.labelsToAttributes(labels)
|
|
inst.Add(context.TODO(), float64(val), otelmetric.WithAttributes(attrs...))
|
|
}
|
|
|
|
// EmitKey unsupported.
|
|
func (o *OTELSink) EmitKey(key []string, val float32) {}
|
|
|
|
// flattenKey key along with its labels.
|
|
func (o *OTELSink) flattenKey(parts []string) string {
|
|
buf := &bytes.Buffer{}
|
|
joined := strings.Join(parts, ".")
|
|
|
|
o.spaceReplacer.WriteString(buf, joined)
|
|
|
|
return buf.String()
|
|
}
|
|
|
|
// filter checks the filter allowlist, if it exists, to verify if this metric should be recorded.
|
|
func (o *OTELSink) allowedMetric(key string) bool {
|
|
if filters := o.cfgProvider.GetFilters(); filters != nil {
|
|
return filters.MatchString(key)
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// labelsToAttributes converts go metrics and provider labels into OTEL format []attributes.KeyValue
|
|
func (o *OTELSink) labelsToAttributes(goMetricsLabels []gometrics.Label) []attribute.KeyValue {
|
|
providerLabels := o.cfgProvider.GetLabels()
|
|
|
|
length := len(goMetricsLabels) + len(providerLabels)
|
|
if length == 0 {
|
|
return []attribute.KeyValue{}
|
|
}
|
|
|
|
attrs := make([]attribute.KeyValue, 0, length)
|
|
// Convert provider labels to OTEL attributes.
|
|
for _, label := range goMetricsLabels {
|
|
attrs = append(attrs, attribute.KeyValue{
|
|
Key: attribute.Key(label.Name),
|
|
Value: attribute.StringValue(label.Value),
|
|
})
|
|
}
|
|
|
|
// Convert provider labels to OTEL attributes.
|
|
for k, v := range providerLabels {
|
|
attrs = append(attrs, attribute.KeyValue{
|
|
Key: attribute.Key(k),
|
|
Value: attribute.StringValue(v),
|
|
})
|
|
}
|
|
|
|
return attrs
|
|
}
|