2023-03-28 18:39:22 +00:00
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
2020-08-08 01:08:43 +00:00
package agent
import (
2023-05-30 18:43:29 +00:00
"context"
2020-08-08 01:08:43 +00:00
"fmt"
"io"
"net"
2020-10-14 20:47:16 +00:00
"sync"
2020-08-08 01:08:43 +00:00
"time"
HCP Telemetry Feature (#17460)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* [HCP Observability] Init OTELSink in Telemetry (#17162)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* [HCP Observability] Metrics filtering and Labels in Go Metrics sink (#17184)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* Added telemetry agent to client and init sink in deps
* Add node_id and __replica__ default labels
* add function for default labels and set x-hcp-resource-id
* Fix labels tests
* Commit suggestion for getDefaultLabels
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* Fixed server.id, and t.Parallel()
* Make defaultLabels a method on the TelemetryConfig object
* Rename FilterList to lowercase filterList
* Cleanup filter implemetation by combining regex into a single one, and making the type lowercase
* Fix append
* use regex directly for filters
* Fix x-resource-id test to use mocked value
* Fix log.Error formats
* Forgot the len(opts.Label) optimization)
* Use cfg.NodeID instead
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* remove replic tag (#17484)
* [HCP Observability] Add custom metrics for OTEL sink, improve logging, upgrade modules and cleanup metrics client (#17455)
* Add custom metrics for Exporter and transform operations
* Improve deps logging
Run go mod tidy
* Upgrade SDK and OTEL
* Remove the partial success implemetation and check for HTTP status code in metrics client
* Add x-channel
* cleanup logs in deps.go based on PR feedback
* Change to debug log and lowercase
* address test operation feedback
* use GetHumanVersion on version
* Fix error wrapping
* Fix metric names
* [HCP Observability] Turn off retries for now until dynamically configurable (#17496)
* Remove retries for now until dynamic configuration is possible
* Clarify comment
* Update changelog
* improve changelog
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
2023-05-29 20:11:08 +00:00
"github.com/armon/go-metrics"
2020-11-13 02:12:12 +00:00
"github.com/armon/go-metrics/prometheus"
2020-10-05 21:31:35 +00:00
"github.com/hashicorp/go-hclog"
2023-02-23 14:07:17 +00:00
wal "github.com/hashicorp/raft-wal"
"github.com/hashicorp/raft-wal/verifier"
2020-10-05 21:31:35 +00:00
"google.golang.org/grpc/grpclog"
2020-08-08 01:08:43 +00:00
autoconf "github.com/hashicorp/consul/agent/auto-config"
"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/config"
2020-09-14 22:31:07 +00:00
"github.com/hashicorp/consul/agent/consul"
2021-02-25 21:22:30 +00:00
"github.com/hashicorp/consul/agent/consul/fsm"
2023-01-06 18:33:53 +00:00
"github.com/hashicorp/consul/agent/consul/rate"
proxycfg: server-local config entry data sources
This is the OSS portion of enterprise PR 2056.
This commit provides server-local implementations of the proxycfg.ConfigEntry
and proxycfg.ConfigEntryList interfaces, that source data from streaming events.
It makes use of the LocalMaterializer type introduced for peering replication,
adding the necessary support for authorization.
It also adds support for "wildcard" subscriptions (within a topic) to the event
publisher, as this is needed to fetch service-resolvers for all services when
configuring mesh gateways.
Currently, events will be emitted for just the ingress-gateway, service-resolver,
and mesh config entry types, as these are the only entries required by proxycfg
— the events will be emitted on topics named IngressGateway, ServiceResolver,
and MeshConfig topics respectively.
Though these events will only be consumed "locally" for now, they can also be
consumed via the gRPC endpoint (confirmed using grpcurl) so using them from
client agents should be a case of swapping the LocalMaterializer for an
RPCMaterializer.
2022-07-01 15:09:47 +00:00
"github.com/hashicorp/consul/agent/consul/stream"
2021-02-25 21:22:30 +00:00
"github.com/hashicorp/consul/agent/consul/usagemetrics"
2022-09-09 14:02:01 +00:00
"github.com/hashicorp/consul/agent/consul/xdscapacity"
"github.com/hashicorp/consul/agent/grpc-external/limiter"
2022-10-11 22:00:32 +00:00
grpcInt "github.com/hashicorp/consul/agent/grpc-internal"
2023-01-05 10:21:27 +00:00
"github.com/hashicorp/consul/agent/grpc-internal/balancer"
2022-07-13 15:33:48 +00:00
"github.com/hashicorp/consul/agent/grpc-internal/resolver"
2022-10-11 22:00:32 +00:00
grpcWare "github.com/hashicorp/consul/agent/grpc-middleware"
2022-10-18 19:05:09 +00:00
"github.com/hashicorp/consul/agent/hcp"
2021-02-25 21:22:30 +00:00
"github.com/hashicorp/consul/agent/local"
2020-08-08 01:08:43 +00:00
"github.com/hashicorp/consul/agent/pool"
2020-08-27 15:23:52 +00:00
"github.com/hashicorp/consul/agent/router"
2022-04-06 21:33:05 +00:00
"github.com/hashicorp/consul/agent/rpc/middleware"
2021-02-25 21:22:30 +00:00
"github.com/hashicorp/consul/agent/submatview"
2020-08-08 01:08:43 +00:00
"github.com/hashicorp/consul/agent/token"
2021-05-14 18:59:13 +00:00
"github.com/hashicorp/consul/agent/xds"
2020-08-08 01:08:43 +00:00
"github.com/hashicorp/consul/ipaddr"
"github.com/hashicorp/consul/lib"
2023-05-30 18:43:29 +00:00
"github.com/hashicorp/consul/lib/hoststats"
2020-08-08 01:08:43 +00:00
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/tlsutil"
)
// TODO: BaseDeps should be renamed in the future once more of Agent.Start
// has been moved out in front of Agent.New, and we can better see the setup
// dependencies.
type BaseDeps struct {
2020-09-14 22:31:07 +00:00
consul . Deps // TODO: un-embed
2022-05-19 20:03:46 +00:00
RuntimeConfig * config . RuntimeConfig
MetricsConfig * lib . MetricsConfig
AutoConfig * autoconf . AutoConfig // TODO: use an interface
Cache * cache . Cache
ViewStore * submatview . Store
WatchedFiles [ ] string
2023-02-28 10:18:38 +00:00
deregisterBalancer , deregisterResolver func ( )
2023-05-30 18:43:29 +00:00
stopHostCollector context . CancelFunc
2020-08-17 18:12:04 +00:00
}
2020-12-21 18:25:32 +00:00
type ConfigLoader func ( source config . Source ) ( config . LoadResult , error )
2020-08-08 01:08:43 +00:00
2022-10-24 22:02:38 +00:00
func NewBaseDeps ( configLoader ConfigLoader , logOut io . Writer , providedLogger hclog . InterceptLogger ) ( BaseDeps , error ) {
2020-08-08 01:08:43 +00:00
d := BaseDeps { }
2020-12-21 18:25:32 +00:00
result , err := configLoader ( nil )
2020-08-08 01:08:43 +00:00
if err != nil {
return d , err
}
2022-03-31 19:11:49 +00:00
d . WatchedFiles = result . WatchedFiles
2020-12-21 18:25:32 +00:00
cfg := result . RuntimeConfig
2020-08-19 17:17:05 +00:00
logConf := cfg . Logging
logConf . Name = logging . Agent
2022-10-24 22:02:38 +00:00
if providedLogger != nil {
d . Logger = providedLogger
} else {
d . Logger , err = logging . Setup ( logConf , logOut )
if err != nil {
return d , err
}
2020-08-08 01:08:43 +00:00
}
2021-04-26 15:57:07 +00:00
grpcLogInitOnce . Do ( func ( ) {
grpclog . SetLoggerV2 ( logging . NewGRPCLogger ( cfg . Logging . LogLevel , d . Logger ) )
} )
2020-08-08 01:08:43 +00:00
2020-12-21 18:25:32 +00:00
for _ , w := range result . Warnings {
2020-08-08 01:08:43 +00:00
d . Logger . Warn ( w )
}
cfg . NodeID , err = newNodeIDFromConfig ( cfg , d . Logger )
if err != nil {
return d , fmt . Errorf ( "failed to setup node ID: %w" , err )
}
2021-10-13 16:25:30 +00:00
isServer := result . RuntimeConfig . ServerMode
2023-02-23 14:07:17 +00:00
gauges , counters , summaries := getPrometheusDefs ( cfg , isServer )
2020-11-16 20:44:47 +00:00
cfg . Telemetry . PrometheusOpts . GaugeDefinitions = gauges
cfg . Telemetry . PrometheusOpts . CounterDefinitions = counters
cfg . Telemetry . PrometheusOpts . SummaryDefinitions = summaries
2022-05-19 20:03:46 +00:00
HCP Telemetry Feature (#17460)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* [HCP Observability] Init OTELSink in Telemetry (#17162)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* [HCP Observability] Metrics filtering and Labels in Go Metrics sink (#17184)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* Added telemetry agent to client and init sink in deps
* Add node_id and __replica__ default labels
* add function for default labels and set x-hcp-resource-id
* Fix labels tests
* Commit suggestion for getDefaultLabels
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* Fixed server.id, and t.Parallel()
* Make defaultLabels a method on the TelemetryConfig object
* Rename FilterList to lowercase filterList
* Cleanup filter implemetation by combining regex into a single one, and making the type lowercase
* Fix append
* use regex directly for filters
* Fix x-resource-id test to use mocked value
* Fix log.Error formats
* Forgot the len(opts.Label) optimization)
* Use cfg.NodeID instead
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* remove replic tag (#17484)
* [HCP Observability] Add custom metrics for OTEL sink, improve logging, upgrade modules and cleanup metrics client (#17455)
* Add custom metrics for Exporter and transform operations
* Improve deps logging
Run go mod tidy
* Upgrade SDK and OTEL
* Remove the partial success implemetation and check for HTTP status code in metrics client
* Add x-channel
* cleanup logs in deps.go based on PR feedback
* Change to debug log and lowercase
* address test operation feedback
* use GetHumanVersion on version
* Fix error wrapping
* Fix metric names
* [HCP Observability] Turn off retries for now until dynamically configurable (#17496)
* Remove retries for now until dynamic configuration is possible
* Clarify comment
* Update changelog
* improve changelog
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
2023-05-29 20:11:08 +00:00
var extraSinks [ ] metrics . MetricSink
if cfg . IsCloudEnabled ( ) {
d . HCP , err = hcp . NewDeps ( cfg . Cloud , d . Logger . Named ( "hcp" ) , cfg . NodeID )
if err != nil {
return d , err
}
if d . HCP . Sink != nil {
extraSinks = append ( extraSinks , d . HCP . Sink )
}
}
d . MetricsConfig , err = lib . InitTelemetry ( cfg . Telemetry , d . Logger , extraSinks ... )
2020-08-08 01:08:43 +00:00
if err != nil {
return d , fmt . Errorf ( "failed to initialize telemetry: %w" , err )
}
2023-05-30 18:43:29 +00:00
if ! cfg . Telemetry . Disable && cfg . Telemetry . EnableHostMetrics {
ctx , cancel := context . WithCancel ( context . Background ( ) )
hoststats . NewCollector ( ctx , d . Logger , cfg . DataDir )
d . stopHostCollector = cancel
}
2020-08-08 01:08:43 +00:00
2022-03-18 10:46:58 +00:00
d . TLSConfigurator , err = tlsutil . NewConfigurator ( cfg . TLS , d . Logger )
2020-08-08 01:08:43 +00:00
if err != nil {
return d , err
}
d . RuntimeConfig = cfg
d . Tokens = new ( token . Store )
2020-08-17 23:30:25 +00:00
2021-02-12 17:43:36 +00:00
cfg . Cache . Logger = d . Logger . Named ( "cache" )
2020-08-08 01:08:43 +00:00
// cache-types are not registered yet, but they won't be used until the components are started.
d . Cache = cache . New ( cfg . Cache )
2021-02-25 21:22:30 +00:00
d . ViewStore = submatview . NewStore ( d . Logger . Named ( "viewstore" ) )
2020-08-08 01:08:43 +00:00
d . ConnPool = newConnPool ( cfg , d . Logger , d . TLSConfigurator )
2023-05-11 16:08:57 +00:00
agentType := "client"
if cfg . ServerMode {
agentType = "server"
}
2023-01-05 10:21:27 +00:00
resolverBuilder := resolver . NewServerResolverBuilder ( resolver . Config {
2023-05-11 16:08:57 +00:00
AgentType : agentType ,
Datacenter : cfg . Datacenter ,
2021-08-24 21:28:44 +00:00
// Set the authority to something sufficiently unique so any usage in
// tests would be self-isolating in the global resolver map, while also
// not incurring a huge penalty for non-test code.
Authority : cfg . Datacenter + "." + string ( cfg . NodeID ) ,
} )
2023-01-05 10:21:27 +00:00
resolver . Register ( resolverBuilder )
2023-02-28 10:18:38 +00:00
d . deregisterResolver = func ( ) {
resolver . Deregister ( resolverBuilder . Authority ( ) )
}
2023-01-05 10:21:27 +00:00
balancerBuilder := balancer . NewBuilder (
resolverBuilder . Authority ( ) ,
d . Logger . Named ( "grpc.balancer" ) ,
)
balancerBuilder . Register ( )
2023-02-28 10:18:38 +00:00
d . deregisterBalancer = balancerBuilder . Deregister
2023-01-05 10:21:27 +00:00
2022-10-11 22:00:32 +00:00
d . GRPCConnPool = grpcInt . NewClientConnPool ( grpcInt . ClientConnPoolConfig {
2023-01-05 10:21:27 +00:00
Servers : resolverBuilder ,
2021-08-24 21:28:44 +00:00
SrcAddr : d . ConnPool . SrcAddr ,
2022-10-11 22:00:32 +00:00
TLSWrapper : grpcInt . TLSWrapper ( d . TLSConfigurator . OutgoingRPCWrapper ( ) ) ,
ALPNWrapper : grpcInt . ALPNWrapper ( d . TLSConfigurator . OutgoingALPNRPCWrapper ( ) ) ,
2021-08-24 21:28:44 +00:00
UseTLSForDC : d . TLSConfigurator . UseTLS ,
DialingFromServer : cfg . ServerMode ,
DialingFromDatacenter : cfg . Datacenter ,
} )
2023-01-05 10:21:27 +00:00
d . LeaderForwarder = resolverBuilder
d . Router = router . NewRouter (
d . Logger ,
cfg . Datacenter ,
fmt . Sprintf ( "%s.%s" , cfg . NodeName , cfg . Datacenter ) ,
grpcInt . NewTracker ( resolverBuilder , balancerBuilder ) ,
)
2020-08-27 15:23:52 +00:00
2021-05-17 20:01:32 +00:00
// this needs to happen prior to creating auto-config as some of the dependencies
// must also be passed to auto-config
d , err = initEnterpriseBaseDeps ( d , cfg )
if err != nil {
return d , err
}
2020-08-08 01:08:43 +00:00
acConf := autoconf . Config {
2021-05-17 20:01:32 +00:00
DirectRPC : d . ConnPool ,
Logger : d . Logger ,
Loader : configLoader ,
ServerProvider : d . Router ,
TLSConfigurator : d . TLSConfigurator ,
Cache : d . Cache ,
Tokens : d . Tokens ,
2021-05-20 14:07:23 +00:00
EnterpriseConfig : initEnterpriseAutoConfig ( d . EnterpriseDeps , cfg ) ,
2020-08-08 01:08:43 +00:00
}
2021-05-17 20:01:32 +00:00
2020-08-08 01:08:43 +00:00
d . AutoConfig , err = autoconf . New ( acConf )
if err != nil {
return d , err
}
2022-04-06 21:33:05 +00:00
d . NewRequestRecorderFunc = middleware . NewRequestRecorder
d . GetNetRPCInterceptorFunc = middleware . GetNetRPCInterceptor
proxycfg: server-local config entry data sources
This is the OSS portion of enterprise PR 2056.
This commit provides server-local implementations of the proxycfg.ConfigEntry
and proxycfg.ConfigEntryList interfaces, that source data from streaming events.
It makes use of the LocalMaterializer type introduced for peering replication,
adding the necessary support for authorization.
It also adds support for "wildcard" subscriptions (within a topic) to the event
publisher, as this is needed to fetch service-resolvers for all services when
configuring mesh gateways.
Currently, events will be emitted for just the ingress-gateway, service-resolver,
and mesh config entry types, as these are the only entries required by proxycfg
— the events will be emitted on topics named IngressGateway, ServiceResolver,
and MeshConfig topics respectively.
Though these events will only be consumed "locally" for now, they can also be
consumed via the gRPC endpoint (confirmed using grpcurl) so using them from
client agents should be a case of swapping the LocalMaterializer for an
RPCMaterializer.
2022-07-01 15:09:47 +00:00
d . EventPublisher = stream . NewEventPublisher ( 10 * time . Second )
2022-09-09 14:02:01 +00:00
d . XDSStreamLimiter = limiter . NewSessionLimiter ( )
2021-05-17 20:01:32 +00:00
return d , nil
2020-08-08 01:08:43 +00:00
}
2023-02-28 10:18:38 +00:00
// Close cleans up any state and goroutines associated to bd's members not
// handled by something else (e.g. the agent stop channel).
func ( bd BaseDeps ) Close ( ) {
bd . AutoConfig . Stop ( )
bd . MetricsConfig . Cancel ( )
2023-05-30 18:43:29 +00:00
for _ , fn := range [ ] func ( ) { bd . deregisterBalancer , bd . deregisterResolver , bd . stopHostCollector } {
if fn != nil {
fn ( )
}
2023-02-28 10:18:38 +00:00
}
}
2021-04-26 15:57:07 +00:00
// grpcLogInitOnce because the test suite will call NewBaseDeps in many tests and
// causes data races when it is re-initialized.
var grpcLogInitOnce sync . Once
2020-08-08 01:08:43 +00:00
func newConnPool ( config * config . RuntimeConfig , logger hclog . Logger , tls * tlsutil . Configurator ) * pool . ConnPool {
var rpcSrcAddr * net . TCPAddr
if ! ipaddr . IsAny ( config . RPCBindAddr ) {
rpcSrcAddr = & net . TCPAddr { IP : config . RPCBindAddr . IP }
}
pool := & pool . ConnPool {
2022-04-21 20:21:35 +00:00
Server : config . ServerMode ,
SrcAddr : rpcSrcAddr ,
Logger : logger . StandardLogger ( & hclog . StandardLoggerOptions { InferLevels : true } ) ,
TLSConfigurator : tls ,
Datacenter : config . Datacenter ,
2022-11-24 15:13:02 +00:00
RPCHoldTimeout : config . RPCHoldTimeout ,
2022-04-21 20:21:35 +00:00
MaxQueryTime : config . MaxQueryTime ,
DefaultQueryTime : config . DefaultQueryTime ,
2020-08-08 01:08:43 +00:00
}
2022-10-18 19:05:09 +00:00
pool . SetRPCClientTimeout ( config . RPCClientTimeout )
2020-08-08 01:08:43 +00:00
if config . ServerMode {
pool . MaxTime = 2 * time . Minute
pool . MaxStreams = 64
} else {
2020-09-14 22:31:07 +00:00
// MaxTime controls how long we keep an idle connection open to a server.
// 127s was chosen as the first prime above 120s
// (arbitrarily chose to use a prime) with the intent of reusing
// connections who are used by once-a-minute cron(8) jobs *and* who
// use a 60s jitter window (e.g. in vixie cron job execution can
// drift by up to 59s per job, or 119s for a once-a-minute cron job).
2020-08-08 01:08:43 +00:00
pool . MaxTime = 127 * time . Second
pool . MaxStreams = 32
}
return pool
}
2020-10-14 20:47:16 +00:00
2020-11-13 02:12:12 +00:00
// getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends
2022-10-12 19:17:58 +00:00
// all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics.
2023-02-23 14:07:17 +00:00
func getPrometheusDefs ( cfg * config . RuntimeConfig , isServer bool ) ( [ ] prometheus . GaugeDefinition , [ ] prometheus . CounterDefinition , [ ] prometheus . SummaryDefinition ) {
2021-05-04 14:36:53 +00:00
// TODO: "raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
raftGauges := [ ] prometheus . GaugeDefinition {
{
Name : [ ] string { "raft" , "fsm" , "lastRestoreDuration" } ,
Help : "This measures how long the last FSM restore (from disk or leader) took." ,
} ,
{
Name : [ ] string { "raft" , "leader" , "oldestLogAge" } ,
Help : "This measures how old the oldest log in the leader's log store is." ,
} ,
}
2022-06-03 17:07:37 +00:00
serverGauges := [ ] prometheus . GaugeDefinition {
{
Name : [ ] string { "server" , "isLeader" } ,
Help : "Tracks if the server is a leader." ,
} ,
}
2020-11-16 22:01:12 +00:00
// Build slice of slices for all gauge definitions
2020-11-13 02:12:12 +00:00
var gauges = [ ] [ ] prometheus . GaugeDefinition {
2020-11-14 00:26:08 +00:00
cache . Gauges ,
2020-11-13 02:12:12 +00:00
consul . RPCGauges ,
consul . SessionGauges ,
2022-10-11 22:00:32 +00:00
grpcWare . StatsGauges ,
2021-05-14 18:59:13 +00:00
xds . StatsGauges ,
2020-11-13 02:12:12 +00:00
usagemetrics . Gauges ,
2021-04-23 21:05:33 +00:00
consul . ReplicationGauges ,
2021-10-27 19:23:29 +00:00
CertExpirationGauges ,
2020-12-09 14:16:53 +00:00
Gauges ,
2021-05-04 14:36:53 +00:00
raftGauges ,
2022-06-03 17:07:37 +00:00
serverGauges ,
2020-11-13 02:12:12 +00:00
}
2021-05-04 14:36:53 +00:00
2023-05-30 18:43:29 +00:00
if cfg . Telemetry . EnableHostMetrics {
gauges = append ( gauges , hoststats . Gauges )
}
2021-10-13 16:25:30 +00:00
// TODO(ffmmm): conditionally add only leader specific metrics to gauges, counters, summaries, etc
if isServer {
2021-10-19 20:49:23 +00:00
gauges = append ( gauges ,
consul . AutopilotGauges ,
2022-07-22 19:05:08 +00:00
consul . LeaderCertExpirationGauges ,
2022-09-09 14:02:01 +00:00
consul . LeaderPeeringMetrics ,
xdscapacity . StatsGauges ,
)
2021-10-13 16:25:30 +00:00
}
2023-02-23 14:07:17 +00:00
if isServer && cfg . RaftLogStoreConfig . Verification . Enabled {
verifierGauges := make ( [ ] prometheus . GaugeDefinition , 0 )
for _ , d := range verifier . MetricDefinitions . Gauges {
verifierGauges = append ( verifierGauges , prometheus . GaugeDefinition {
Name : [ ] string { "raft" , "logstore" , "verifier" , d . Name } ,
Help : d . Desc ,
} )
}
gauges = append ( gauges , verifierGauges )
}
if isServer && cfg . RaftLogStoreConfig . Backend == consul . LogStoreBackendWAL {
walGauges := make ( [ ] prometheus . GaugeDefinition , 0 )
for _ , d := range wal . MetricDefinitions . Gauges {
walGauges = append ( walGauges , prometheus . GaugeDefinition {
Name : [ ] string { "raft" , "wal" , d . Name } ,
Help : d . Desc ,
} )
}
gauges = append ( gauges , walGauges )
}
2020-11-16 22:01:12 +00:00
// Flatten definitions
// NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique?
2020-11-13 02:12:12 +00:00
var gaugeDefs [ ] prometheus . GaugeDefinition
for _ , g := range gauges {
2020-11-13 21:18:04 +00:00
// Set Consul to each definition's namespace
2020-11-16 22:01:12 +00:00
// TODO(kit): Prepending the service to each definition should be handled by go-metrics
2020-11-13 21:18:04 +00:00
var withService [ ] prometheus . GaugeDefinition
for _ , gauge := range g {
2023-02-23 14:07:17 +00:00
gauge . Name = append ( [ ] string { cfg . Telemetry . MetricsPrefix } , gauge . Name ... )
2020-11-13 21:18:04 +00:00
withService = append ( withService , gauge )
}
gaugeDefs = append ( gaugeDefs , withService ... )
2020-11-13 02:12:12 +00:00
}
raftCounters := [ ] prometheus . CounterDefinition {
2020-11-14 00:26:08 +00:00
// TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
2020-11-13 02:12:12 +00:00
{
2020-11-13 21:18:04 +00:00
Name : [ ] string { "raft" , "apply" } ,
2020-11-13 02:12:12 +00:00
Help : "This counts the number of Raft transactions occurring over the interval." ,
} ,
{
2020-11-13 21:18:04 +00:00
Name : [ ] string { "raft" , "state" , "candidate" } ,
2020-11-13 02:12:12 +00:00
Help : "This increments whenever a Consul server starts an election." ,
} ,
{
2020-11-13 21:18:04 +00:00
Name : [ ] string { "raft" , "state" , "leader" } ,
2020-11-13 02:12:12 +00:00
Help : "This increments whenever a Consul server becomes a leader." ,
} ,
}
var counters = [ ] [ ] prometheus . CounterDefinition {
CatalogCounters ,
2020-11-14 00:26:08 +00:00
cache . Counters ,
2020-11-13 02:12:12 +00:00
consul . ACLCounters ,
consul . CatalogCounters ,
consul . ClientCounters ,
consul . RPCCounters ,
2022-10-11 22:00:32 +00:00
grpcWare . StatsCounters ,
2020-11-13 02:12:12 +00:00
local . StateCounters ,
2022-09-09 14:02:01 +00:00
xds . StatsCounters ,
2020-11-13 02:12:12 +00:00
raftCounters ,
2023-01-06 18:33:53 +00:00
rate . Counters ,
2020-11-13 02:12:12 +00:00
}
2023-02-23 14:07:17 +00:00
// For some unknown reason, we seem to add the raft counters above without
// checking if this is a server like we do above for some of the summaries
// above. We should probably fix that but I want to not change behavior right
// now. If we are a server, add summaries for WAL and verifier metrics.
if isServer && cfg . RaftLogStoreConfig . Verification . Enabled {
verifierCounters := make ( [ ] prometheus . CounterDefinition , 0 )
for _ , d := range verifier . MetricDefinitions . Counters {
verifierCounters = append ( verifierCounters , prometheus . CounterDefinition {
Name : [ ] string { "raft" , "logstore" , "verifier" , d . Name } ,
Help : d . Desc ,
} )
}
counters = append ( counters , verifierCounters )
}
if isServer && cfg . RaftLogStoreConfig . Backend == consul . LogStoreBackendWAL {
walCounters := make ( [ ] prometheus . CounterDefinition , 0 )
for _ , d := range wal . MetricDefinitions . Counters {
walCounters = append ( walCounters , prometheus . CounterDefinition {
Name : [ ] string { "raft" , "wal" , d . Name } ,
Help : d . Desc ,
} )
}
counters = append ( counters , walCounters )
}
2020-11-16 22:01:12 +00:00
// Flatten definitions
// NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique?
2020-11-13 02:12:12 +00:00
var counterDefs [ ] prometheus . CounterDefinition
for _ , c := range counters {
2020-11-16 22:01:12 +00:00
// TODO(kit): Prepending the service to each definition should be handled by go-metrics
2020-11-13 21:18:04 +00:00
var withService [ ] prometheus . CounterDefinition
for _ , counter := range c {
2023-02-23 14:07:17 +00:00
counter . Name = append ( [ ] string { cfg . Telemetry . MetricsPrefix } , counter . Name ... )
2020-11-13 21:18:04 +00:00
withService = append ( withService , counter )
}
counterDefs = append ( counterDefs , withService ... )
2020-11-13 02:12:12 +00:00
}
raftSummaries := [ ] prometheus . SummaryDefinition {
2020-11-14 00:26:08 +00:00
// TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
2020-11-13 02:12:12 +00:00
{
2020-11-13 21:18:04 +00:00
Name : [ ] string { "raft" , "commitTime" } ,
2020-11-13 02:12:12 +00:00
Help : "This measures the time it takes to commit a new entry to the Raft log on the leader." ,
} ,
{
2020-11-13 21:18:04 +00:00
Name : [ ] string { "raft" , "leader" , "lastContact" } ,
2020-11-13 02:12:12 +00:00
Help : "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease." ,
} ,
2021-05-04 14:36:53 +00:00
{
Name : [ ] string { "raft" , "snapshot" , "persist" } ,
Help : "Measures the time it takes raft to write a new snapshot to disk." ,
} ,
{
Name : [ ] string { "raft" , "rpc" , "installSnapshot" } ,
Help : "Measures the time it takes the raft leader to install a snapshot on a follower that is catching up after being down or has just joined the cluster." ,
} ,
2020-11-13 02:12:12 +00:00
}
var summaries = [ ] [ ] prometheus . SummaryDefinition {
HTTPSummaries ,
consul . ACLSummaries ,
consul . ACLEndpointSummaries ,
consul . CatalogSummaries ,
consul . FederationStateSummaries ,
consul . IntentionSummaries ,
consul . KVSummaries ,
2020-11-14 00:26:08 +00:00
consul . LeaderSummaries ,
2020-11-13 02:12:12 +00:00
consul . PreparedQuerySummaries ,
consul . RPCSummaries ,
2020-11-14 00:26:08 +00:00
consul . SegmentOSSSummaries ,
2020-11-13 02:12:12 +00:00
consul . SessionSummaries ,
2020-11-14 00:26:08 +00:00
consul . SessionEndpointSummaries ,
2020-11-13 02:12:12 +00:00
consul . TxnSummaries ,
2020-11-14 00:26:08 +00:00
fsm . CommandsSummaries ,
fsm . SnapshotSummaries ,
2020-11-13 02:12:12 +00:00
raftSummaries ,
2022-10-12 19:17:58 +00:00
xds . StatsSummaries ,
2020-11-13 02:12:12 +00:00
}
2020-11-16 22:01:12 +00:00
// Flatten definitions
// NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique?
2020-11-13 02:12:12 +00:00
var summaryDefs [ ] prometheus . SummaryDefinition
for _ , s := range summaries {
2020-11-16 22:01:12 +00:00
// TODO(kit): Prepending the service to each definition should be handled by go-metrics
2020-11-13 21:18:04 +00:00
var withService [ ] prometheus . SummaryDefinition
for _ , summary := range s {
2023-02-23 14:07:17 +00:00
summary . Name = append ( [ ] string { cfg . Telemetry . MetricsPrefix } , summary . Name ... )
2020-11-13 21:18:04 +00:00
withService = append ( withService , summary )
}
summaryDefs = append ( summaryDefs , withService ... )
2020-11-13 02:12:12 +00:00
}
2020-11-16 20:44:47 +00:00
return gaugeDefs , counterDefs , summaryDefs
2020-11-13 02:12:12 +00:00
}