2018-06-08 13:10:15 +01:00
package lib
import (
2018-06-14 13:52:48 +01:00
"reflect"
2018-06-08 13:10:15 +01:00
"time"
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics/circonus"
"github.com/armon/go-metrics/datadog"
"github.com/armon/go-metrics/prometheus"
)
2018-06-14 13:52:48 +01:00
// TelemetryConfig is embedded in config.RuntimeConfig and holds the
// configuration variables for go-metrics. It is a separate struct to allow it
// to be exported as JSON and passed to other process like managed connect
// proxies so they can inherit the agent's telemetry config.
//
// It is in lib package rather than agent/config because we need to use it in
// the shared InitTelemetry functions below, but we can't import agent/config
// due to a dependency cycle.
type TelemetryConfig struct {
2020-08-17 14:12:04 -04:00
// Disable may be set to true to have InitTelemetry to skip initialization
// and return a nil MetricsSink.
Disable bool
2018-06-14 13:52:48 +01:00
// Circonus*: see https://github.com/circonus-labs/circonus-gometrics
// for more details on the various configuration options.
// Valid configuration combinations:
// - CirconusAPIToken
// metric management enabled (search for existing check or create a new one)
// - CirconusSubmissionUrl
// metric management disabled (use check with specified submission_url,
// broker must be using a public SSL certificate)
// - CirconusAPIToken + CirconusCheckSubmissionURL
// metric management enabled (use check with specified submission_url)
// - CirconusAPIToken + CirconusCheckID
// metric management enabled (use check with specified id)
// CirconusAPIApp is an app name associated with API token.
// Default: "consul"
//
// hcl: telemetry { circonus_api_app = string }
CirconusAPIApp string ` json:"circonus_api_app,omitempty" mapstructure:"circonus_api_app" `
// CirconusAPIToken is a valid API Token used to create/manage check. If provided,
// metric management is enabled.
// Default: none
//
// hcl: telemetry { circonus_api_token = string }
CirconusAPIToken string ` json:"circonus_api_token,omitempty" mapstructure:"circonus_api_token" `
// CirconusAPIURL is the base URL to use for contacting the Circonus API.
// Default: "https://api.circonus.com/v2"
//
// hcl: telemetry { circonus_api_url = string }
CirconusAPIURL string ` json:"circonus_apiurl,omitempty" mapstructure:"circonus_apiurl" `
// CirconusBrokerID is an explicit broker to use when creating a new check. The numeric portion
// of broker._cid. If metric management is enabled and neither a Submission URL nor Check ID
// is provided, an attempt will be made to search for an existing check using Instance ID and
// Search Tag. If one is not found, a new HTTPTRAP check will be created.
// Default: use Select Tag if provided, otherwise, a random Enterprise Broker associated
// with the specified API token or the default Circonus Broker.
// Default: none
//
// hcl: telemetry { circonus_broker_id = string }
CirconusBrokerID string ` json:"circonus_broker_id,omitempty" mapstructure:"circonus_broker_id" `
// CirconusBrokerSelectTag is a special tag which will be used to select a broker when
// a Broker ID is not provided. The best use of this is to as a hint for which broker
// should be used based on *where* this particular instance is running.
// (e.g. a specific geo location or datacenter, dc:sfo)
// Default: none
//
// hcl: telemetry { circonus_broker_select_tag = string }
CirconusBrokerSelectTag string ` json:"circonus_broker_select_tag,omitempty" mapstructure:"circonus_broker_select_tag" `
// CirconusCheckDisplayName is the name for the check which will be displayed in the Circonus UI.
// Default: value of CirconusCheckInstanceID
//
// hcl: telemetry { circonus_check_display_name = string }
CirconusCheckDisplayName string ` json:"circonus_check_display_name,omitempty" mapstructure:"circonus_check_display_name" `
// CirconusCheckForceMetricActivation will force enabling metrics, as they are encountered,
// if the metric already exists and is NOT active. If check management is enabled, the default
// behavior is to add new metrics as they are encountered. If the metric already exists in the
// check, it will *NOT* be activated. This setting overrides that behavior.
// Default: "false"
//
// hcl: telemetry { circonus_check_metrics_activation = (true|false)
CirconusCheckForceMetricActivation string ` json:"circonus_check_force_metric_activation,omitempty" mapstructure:"circonus_check_force_metric_activation" `
// CirconusCheckID is the check id (not check bundle id) from a previously created
// HTTPTRAP check. The numeric portion of the check._cid field.
// Default: none
//
// hcl: telemetry { circonus_check_id = string }
CirconusCheckID string ` json:"circonus_check_id,omitempty" mapstructure:"circonus_check_id" `
// CirconusCheckInstanceID serves to uniquely identify the metrics coming from this "instance".
// It can be used to maintain metric continuity with transient or ephemeral instances as
// they move around within an infrastructure.
// Default: hostname:app
//
// hcl: telemetry { circonus_check_instance_id = string }
CirconusCheckInstanceID string ` json:"circonus_check_instance_id,omitempty" mapstructure:"circonus_check_instance_id" `
// CirconusCheckSearchTag is a special tag which, when coupled with the instance id, helps to
// narrow down the search results when neither a Submission URL or Check ID is provided.
// Default: service:app (e.g. service:consul)
//
// hcl: telemetry { circonus_check_search_tag = string }
CirconusCheckSearchTag string ` json:"circonus_check_search_tag,omitempty" mapstructure:"circonus_check_search_tag" `
// CirconusCheckSearchTag is a special tag which, when coupled with the instance id, helps to
// narrow down the search results when neither a Submission URL or Check ID is provided.
// Default: service:app (e.g. service:consul)
//
// hcl: telemetry { circonus_check_tags = string }
CirconusCheckTags string ` json:"circonus_check_tags,omitempty" mapstructure:"circonus_check_tags" `
// CirconusSubmissionInterval is the interval at which metrics are submitted to Circonus.
// Default: 10s
//
// hcl: telemetry { circonus_submission_interval = "duration" }
CirconusSubmissionInterval string ` json:"circonus_submission_interval,omitempty" mapstructure:"circonus_submission_interval" `
// CirconusCheckSubmissionURL is the check.config.submission_url field from a
// previously created HTTPTRAP check.
// Default: none
//
// hcl: telemetry { circonus_submission_url = string }
CirconusSubmissionURL string ` json:"circonus_submission_url,omitempty" mapstructure:"circonus_submission_url" `
2020-10-07 17:12:52 -07:00
// DisableCompatOneNine is a flag to stop emitting metrics that have been deprecated in version 1.9.
//
// hcl: telemetry { disable_compat_1.9 = (true|false) }
DisableCompatOneNine bool ` json:"disable_compat_1.9,omitempty" mapstructure:"disable_compat_1.9" `
2018-06-14 13:52:48 +01:00
// DisableHostname will disable hostname prefixing for all metrics.
//
// hcl: telemetry { disable_hostname = (true|false)
DisableHostname bool ` json:"disable_hostname,omitempty" mapstructure:"disable_hostname" `
// DogStatsdAddr is the address of a dogstatsd instance. If provided,
// metrics will be sent to that instance
//
// hcl: telemetry { dogstatsd_addr = string }
DogstatsdAddr string ` json:"dogstatsd_addr,omitempty" mapstructure:"dogstatsd_addr" `
// DogStatsdTags are the global tags that should be sent with each packet to dogstatsd
// It is a list of strings, where each string looks like "my_tag_name:my_tag_value"
//
// hcl: telemetry { dogstatsd_tags = []string }
DogstatsdTags [ ] string ` json:"dogstatsd_tags,omitempty" mapstructure:"dogstatsd_tags" `
// PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0.
// A value of 0 disable Prometheus support. Regarding Prometheus, it is considered a good
// practice to put large values here (such as a few days), and at least the interval between
// prometheus requests.
//
// hcl: telemetry { prometheus_retention_time = "duration" }
PrometheusRetentionTime time . Duration ` json:"prometheus_retention_time,omitempty" mapstructure:"prometheus_retention_time" `
// FilterDefault is the default for whether to allow a metric that's not
// covered by the filter.
//
// hcl: telemetry { filter_default = (true|false) }
FilterDefault bool ` json:"filter_default,omitempty" mapstructure:"filter_default" `
// AllowedPrefixes is a list of filter rules to apply for allowing metrics
// by prefix. Use the 'prefix_filter' option and prefix rules with '+' to be
// included.
//
// hcl: telemetry { prefix_filter = []string{"+<expr>", "+<expr>", ...} }
AllowedPrefixes [ ] string ` json:"allowed_prefixes,omitempty" mapstructure:"allowed_prefixes" `
// BlockedPrefixes is a list of filter rules to apply for blocking metrics
// by prefix. Use the 'prefix_filter' option and prefix rules with '-' to be
// excluded.
//
// hcl: telemetry { prefix_filter = []string{"-<expr>", "-<expr>", ...} }
BlockedPrefixes [ ] string ` json:"blocked_prefixes,omitempty" mapstructure:"blocked_prefixes" `
// MetricsPrefix is the prefix used to write stats values to.
// Default: "consul."
//
// hcl: telemetry { metrics_prefix = string }
MetricsPrefix string ` json:"metrics_prefix,omitempty" mapstructure:"metrics_prefix" `
// StatsdAddr is the address of a statsd instance. If provided,
// metrics will be sent to that instance.
//
// hcl: telemetry { statsd_address = string }
StatsdAddr string ` json:"statsd_address,omitempty" mapstructure:"statsd_address" `
// StatsiteAddr is the address of a statsite instance. If provided,
// metrics will be streamed to that instance.
//
// hcl: telemetry { statsite_address = string }
StatsiteAddr string ` json:"statsite_address,omitempty" mapstructure:"statsite_address" `
}
// MergeDefaults copies any non-zero field from defaults into the current
// config.
func ( c * TelemetryConfig ) MergeDefaults ( defaults * TelemetryConfig ) {
if defaults == nil {
return
}
cfgPtrVal := reflect . ValueOf ( c )
cfgVal := cfgPtrVal . Elem ( )
otherVal := reflect . ValueOf ( * defaults )
for i := 0 ; i < cfgVal . NumField ( ) ; i ++ {
f := cfgVal . Field ( i )
if ! f . IsValid ( ) || ! f . CanSet ( ) {
continue
}
// See if the current value is a zero-value, if _not_ skip it
//
// No built in way to check for zero-values for all types so only
// implementing this for the types we actually have for now. Test failure
// should catch the case where we add new types later.
switch f . Kind ( ) {
case reflect . Slice :
if ! f . IsNil ( ) {
continue
}
case reflect . Int , reflect . Int64 : // time.Duration == int64
if f . Int ( ) != 0 {
continue
}
case reflect . String :
if f . String ( ) != "" {
continue
}
case reflect . Bool :
2019-07-20 15:37:19 +02:00
if f . Bool ( ) {
2018-06-14 13:52:48 +01:00
continue
}
default :
// Needs implementing, should be caught by tests.
continue
}
// It's zero, copy it from defaults
f . Set ( otherVal . Field ( i ) )
}
}
func statsiteSink ( cfg TelemetryConfig , hostname string ) ( metrics . MetricSink , error ) {
addr := cfg . StatsiteAddr
2018-06-08 13:10:15 +01:00
if addr == "" {
return nil , nil
}
return metrics . NewStatsiteSink ( addr )
}
2018-06-14 13:52:48 +01:00
func statsdSink ( cfg TelemetryConfig , hostname string ) ( metrics . MetricSink , error ) {
addr := cfg . StatsdAddr
2018-06-08 13:10:15 +01:00
if addr == "" {
return nil , nil
}
return metrics . NewStatsdSink ( addr )
}
2018-06-14 13:52:48 +01:00
func dogstatdSink ( cfg TelemetryConfig , hostname string ) ( metrics . MetricSink , error ) {
addr := cfg . DogstatsdAddr
2018-06-08 13:10:15 +01:00
if addr == "" {
return nil , nil
}
sink , err := datadog . NewDogStatsdSink ( addr , hostname )
if err != nil {
return nil , err
}
2018-06-14 13:52:48 +01:00
sink . SetTags ( cfg . DogstatsdTags )
2018-06-08 13:10:15 +01:00
return sink , nil
}
2018-06-14 13:52:48 +01:00
func prometheusSink ( cfg TelemetryConfig , hostname string ) ( metrics . MetricSink , error ) {
if cfg . PrometheusRetentionTime . Nanoseconds ( ) < 1 {
2018-06-08 13:10:15 +01:00
return nil , nil
}
2020-11-05 12:20:05 -08:00
// TODO(kit) define these in vars in the package/file they're used
gaugeDefs := [ ] prometheus . GaugeDefinition {
{
Name : [ ] string { "consul" , "autopilot" , "healthy" } ,
Help : "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy." ,
} ,
}
// TODO(kit) define these in vars in the package/file they're used
counterDefs := [ ] prometheus . CounterDefinition {
{
Name : [ ] string { "consul" , "raft" , "apply" } ,
Help : "This counts the number of Raft transactions occurring over the interval." ,
} ,
{
Name : [ ] string { "consul" , "raft" , "state" , "candidate" } ,
Help : "This increments whenever a Consul server starts an election." ,
} ,
{
Name : [ ] string { "consul" , "raft" , "state" , "leader" } ,
Help : "This increments whenever a Consul server becomes a leader." ,
} ,
{
Name : [ ] string { "consul" , "client" , "api" , "catalog_register" } ,
Help : "Increments whenever a Consul agent receives a catalog register request." ,
} ,
{
Name : [ ] string { "consul" , "runtime" , "total_gc_pause_ns" } ,
Help : "Number of nanoseconds consumed by stop-the-world garbage collection (GC) pauses since Consul started." ,
} ,
{
Name : [ ] string { "consul" , "client" , "rpc" } ,
Help : "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server." ,
} ,
{
Name : [ ] string { "consul" , "client" , "rpc" , "exceeded" } ,
Help : "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration." ,
} ,
{
Name : [ ] string { "consul" , "client" , "rpc" , "failed" } ,
Help : "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails." ,
} ,
}
// TODO(kit) define these in vars in the package/file they're used
summaryDefs := [ ] prometheus . SummaryDefinition {
{
Name : [ ] string { "consul" , "kvs" , "apply" } ,
Help : "This measures the time it takes to complete an update to the KV store." ,
} ,
{
Name : [ ] string { "consul" , "txn" , "apply" } ,
Help : "This measures the time spent applying a transaction operation." ,
} ,
{
Name : [ ] string { "consul" , "raft" , "commitTime" } ,
Help : "This measures the time it takes to commit a new entry to the Raft log on the leader." ,
} ,
{
Name : [ ] string { "consul" , "raft" , "leader" , "lastContact" } ,
Help : "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease." ,
} ,
}
2018-06-08 13:10:15 +01:00
prometheusOpts := prometheus . PrometheusOpts {
2020-11-05 12:20:05 -08:00
Expiration : cfg . PrometheusRetentionTime ,
GaugeDefinitions : gaugeDefs ,
CounterDefinitions : counterDefs ,
SummaryDefinitions : summaryDefs ,
2018-06-08 13:10:15 +01:00
}
sink , err := prometheus . NewPrometheusSinkFrom ( prometheusOpts )
if err != nil {
return nil , err
}
return sink , nil
}
2018-06-14 13:52:48 +01:00
func circonusSink ( cfg TelemetryConfig , hostname string ) ( metrics . MetricSink , error ) {
token := cfg . CirconusAPIToken
url := cfg . CirconusSubmissionURL
2018-06-08 13:10:15 +01:00
if token == "" && url == "" {
return nil , nil
}
conf := & circonus . Config { }
2018-06-14 13:52:48 +01:00
conf . Interval = cfg . CirconusSubmissionInterval
2018-06-08 13:10:15 +01:00
conf . CheckManager . API . TokenKey = token
2018-06-14 13:52:48 +01:00
conf . CheckManager . API . TokenApp = cfg . CirconusAPIApp
conf . CheckManager . API . URL = cfg . CirconusAPIURL
2018-06-08 13:10:15 +01:00
conf . CheckManager . Check . SubmissionURL = url
2018-06-14 13:52:48 +01:00
conf . CheckManager . Check . ID = cfg . CirconusCheckID
conf . CheckManager . Check . ForceMetricActivation = cfg . CirconusCheckForceMetricActivation
conf . CheckManager . Check . InstanceID = cfg . CirconusCheckInstanceID
conf . CheckManager . Check . SearchTag = cfg . CirconusCheckSearchTag
conf . CheckManager . Check . DisplayName = cfg . CirconusCheckDisplayName
conf . CheckManager . Check . Tags = cfg . CirconusCheckTags
conf . CheckManager . Broker . ID = cfg . CirconusBrokerID
conf . CheckManager . Broker . SelectTag = cfg . CirconusBrokerSelectTag
2018-06-08 13:10:15 +01:00
if conf . CheckManager . Check . DisplayName == "" {
conf . CheckManager . Check . DisplayName = "Consul"
}
if conf . CheckManager . API . TokenApp == "" {
conf . CheckManager . API . TokenApp = "consul"
}
if conf . CheckManager . Check . SearchTag == "" {
conf . CheckManager . Check . SearchTag = "service:consul"
}
sink , err := circonus . NewCirconusSink ( conf )
if err != nil {
return nil , err
}
sink . Start ( )
return sink , nil
}
2018-06-13 16:59:22 +01:00
// InitTelemetry configures go-metrics based on map of telemetry config
2018-06-14 13:52:48 +01:00
// values as returned by Runtimecfg.Config().
func InitTelemetry ( cfg TelemetryConfig ) ( * metrics . InmemSink , error ) {
2020-08-17 14:12:04 -04:00
if cfg . Disable {
return nil , nil
}
2018-06-08 13:10:15 +01:00
// Setup telemetry
// Aggregate on 10 second intervals for 1 minute. Expose the
// metrics over stderr when there is a SIGUSR1 received.
memSink := metrics . NewInmemSink ( 10 * time . Second , time . Minute )
metrics . DefaultInmemSignal ( memSink )
2018-06-14 13:52:48 +01:00
metricsConf := metrics . DefaultConfig ( cfg . MetricsPrefix )
metricsConf . EnableHostname = ! cfg . DisableHostname
metricsConf . FilterDefault = cfg . FilterDefault
metricsConf . AllowedPrefixes = cfg . AllowedPrefixes
metricsConf . BlockedPrefixes = cfg . BlockedPrefixes
2018-06-08 13:10:15 +01:00
var sinks metrics . FanoutSink
2020-06-23 13:18:22 -04:00
addSink := func ( fn func ( TelemetryConfig , string ) ( metrics . MetricSink , error ) ) error {
2018-06-08 13:10:15 +01:00
s , err := fn ( cfg , metricsConf . HostName )
if err != nil {
return err
}
if s != nil {
sinks = append ( sinks , s )
}
return nil
}
2020-06-23 13:18:22 -04:00
if err := addSink ( statsiteSink ) ; err != nil {
2018-06-08 13:10:15 +01:00
return nil , err
}
2020-06-23 13:18:22 -04:00
if err := addSink ( statsdSink ) ; err != nil {
2018-06-08 13:10:15 +01:00
return nil , err
}
2020-06-23 13:18:22 -04:00
if err := addSink ( dogstatdSink ) ; err != nil {
2018-06-08 13:10:15 +01:00
return nil , err
}
2020-06-23 13:18:22 -04:00
if err := addSink ( circonusSink ) ; err != nil {
2018-06-08 13:10:15 +01:00
return nil , err
}
2020-06-23 13:18:22 -04:00
if err := addSink ( prometheusSink ) ; err != nil {
2018-06-08 13:10:15 +01:00
return nil , err
}
if len ( sinks ) > 0 {
sinks = append ( sinks , memSink )
metrics . NewGlobal ( metricsConf , sinks )
} else {
metricsConf . EnableHostname = false
metrics . NewGlobal ( metricsConf , memSink )
}
return memSink , nil
}