2023-03-28 19:39:22 +01:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
2023-08-11 09:12:13 -04:00
|
|
|
// SPDX-License-Identifier: BUSL-1.1
|
2023-03-28 19:39:22 +01:00
|
|
|
|
2022-09-26 14:58:15 -04:00
|
|
|
package hcp
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2024-01-30 09:40:36 -06:00
|
|
|
"reflect"
|
2022-09-26 14:58:15 -04:00
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
Move HCP Manager lifecycle management out of Link controller (#20401)
* Add function to get update channel for watching HCP Link
* Add MonitorHCPLink function
This function can be called in a goroutine to manage the lifecycle
of the HCP manager.
* Update HCP Manager config in link monitor before starting
This updates HCPMonitorLink so it updates the HCP manager
with an HCP client and management token when a Link is upserted.
* Let MonitorHCPManager handle lifecycle instead of link controller
* Remove cleanup from Link controller and move it to MonitorHCPLink
Previously, the Link Controller was responsible for cleaning up the
HCP-related files on the file system. This change makes it so
MonitorHCPLink handles this cleanup. As a result, we are able to remove
the PlacementEachServer placement strategy for the Link controller
because it no longer needs to do this per-node cleanup.
* Remove HCP Manager dependency from Link Controller
The Link controller does not need to have HCP Manager
as a dependency anymore, so this removes that dependency
in order to simplify the design.
* Add Linked prefix to Linked status variables
This is in preparation for adding a new status type to the
Link resource.
* Add new "validated" status type to link resource
The link resource controller will now set a "validated" status
in addition to the "linked" status. This is needed so that other
components (eg the HCP manager) know when the Link is ready to link
with HCP.
* Fix tests
* Handle new 'EndOfSnapshot' WatchList event
* Fix watch test
* Remove unnecessary config from TestAgent_scadaProvider
Since the Scada provider is now started on agent startup
regardless of whether a cloud config is provided, this removes
the cloud config override from the relevant test.
This change is not exactly related to the changes from this PR,
but rather is something small and sort of related that was noticed
while working on this PR.
* Simplify link watch test and remove sleep from link watch
This updates the link watch test so that it uses more mocks
and does not require setting up the infrastructure for the HCP Link
controller.
This also removes the time.Sleep delay in the link watcher loop in favor
of an error counter. When we receive 10 consecutive errors, we shut down
the link watcher loop.
* Add better logging for link validation. Remove EndOfSnapshot test.
* Refactor link monitor test into a table test
* Add some clarifying comments to link monitor
* Simplify link watch test
* Test a bunch more errors cases in link monitor test
* Use exponential backoff instead of errorCounter in LinkWatch
* Move link watch and link monitor into a single goroutine called from server.go
* Refactor HCP link watcher to use single go-routine.
Previously, if the WatchClient errored, we would've never recovered
because we never retry to create the stream. With this change,
we have a single goroutine that runs for the life of the server agent
and if the WatchClient stream ever errors, we retry the creation
of the stream with an exponential backoff.
2024-02-12 10:48:23 -05:00
|
|
|
"github.com/hashicorp/go-hclog"
|
|
|
|
|
HCP Telemetry Feature (#17460)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* [HCP Observability] Init OTELSink in Telemetry (#17162)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* [HCP Observability] Metrics filtering and Labels in Go Metrics sink (#17184)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* Added telemetry agent to client and init sink in deps
* Add node_id and __replica__ default labels
* add function for default labels and set x-hcp-resource-id
* Fix labels tests
* Commit suggestion for getDefaultLabels
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* Fixed server.id, and t.Parallel()
* Make defaultLabels a method on the TelemetryConfig object
* Rename FilterList to lowercase filterList
* Cleanup filter implemetation by combining regex into a single one, and making the type lowercase
* Fix append
* use regex directly for filters
* Fix x-resource-id test to use mocked value
* Fix log.Error formats
* Forgot the len(opts.Label) optimization)
* Use cfg.NodeID instead
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* remove replic tag (#17484)
* [HCP Observability] Add custom metrics for OTEL sink, improve logging, upgrade modules and cleanup metrics client (#17455)
* Add custom metrics for Exporter and transform operations
* Improve deps logging
Run go mod tidy
* Upgrade SDK and OTEL
* Remove the partial success implemetation and check for HTTP status code in metrics client
* Add x-channel
* cleanup logs in deps.go based on PR feedback
* Change to debug log and lowercase
* address test operation feedback
* use GetHumanVersion on version
* Fix error wrapping
* Fix metric names
* [HCP Observability] Turn off retries for now until dynamically configurable (#17496)
* Remove retries for now until dynamic configuration is possible
* Clarify comment
* Update changelog
* improve changelog
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
2023-05-29 16:11:08 -04:00
|
|
|
hcpclient "github.com/hashicorp/consul/agent/hcp/client"
|
2024-01-08 09:49:29 -06:00
|
|
|
"github.com/hashicorp/consul/agent/hcp/config"
|
|
|
|
"github.com/hashicorp/consul/agent/hcp/scada"
|
2022-09-26 14:58:15 -04:00
|
|
|
"github.com/hashicorp/consul/lib"
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
defaultManagerMinInterval = 45 * time.Minute
|
|
|
|
defaultManagerMaxInterval = 75 * time.Minute
|
|
|
|
)
|
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
var _ Manager = (*HCPManager)(nil)
|
|
|
|
|
2022-09-26 14:58:15 -04:00
|
|
|
type ManagerConfig struct {
|
2024-01-16 10:46:12 -06:00
|
|
|
Client hcpclient.Client
|
|
|
|
CloudConfig config.CloudConfig
|
|
|
|
SCADAProvider scada.Provider
|
2024-01-30 09:40:36 -06:00
|
|
|
TelemetryProvider TelemetryProvider
|
2022-09-26 14:58:15 -04:00
|
|
|
|
2024-01-19 13:58:49 -05:00
|
|
|
StatusFn StatusCallback
|
|
|
|
// Idempotent function to upsert the HCP management token. This will be called periodically in
|
|
|
|
// the manager's main loop.
|
|
|
|
ManagementTokenUpserterFn ManagementTokenUpserter
|
2024-01-30 09:40:36 -06:00
|
|
|
ManagementTokenDeleterFn ManagementTokenDeleter
|
2024-01-19 13:58:49 -05:00
|
|
|
MinInterval time.Duration
|
|
|
|
MaxInterval time.Duration
|
2022-09-26 14:58:15 -04:00
|
|
|
|
|
|
|
Logger hclog.Logger
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cfg *ManagerConfig) enabled() bool {
|
|
|
|
return cfg.Client != nil && cfg.StatusFn != nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cfg *ManagerConfig) nextHeartbeat() time.Duration {
|
|
|
|
min := cfg.MinInterval
|
|
|
|
if min == 0 {
|
|
|
|
min = defaultManagerMinInterval
|
|
|
|
}
|
|
|
|
|
|
|
|
max := cfg.MaxInterval
|
|
|
|
if max == 0 {
|
|
|
|
max = defaultManagerMaxInterval
|
|
|
|
}
|
|
|
|
if max < min {
|
|
|
|
max = min
|
|
|
|
}
|
|
|
|
return min + lib.RandomStagger(max-min)
|
|
|
|
}
|
|
|
|
|
HCP Telemetry Feature (#17460)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* [HCP Observability] Init OTELSink in Telemetry (#17162)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* [HCP Observability] Metrics filtering and Labels in Go Metrics sink (#17184)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* Added telemetry agent to client and init sink in deps
* Add node_id and __replica__ default labels
* add function for default labels and set x-hcp-resource-id
* Fix labels tests
* Commit suggestion for getDefaultLabels
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* Fixed server.id, and t.Parallel()
* Make defaultLabels a method on the TelemetryConfig object
* Rename FilterList to lowercase filterList
* Cleanup filter implemetation by combining regex into a single one, and making the type lowercase
* Fix append
* use regex directly for filters
* Fix x-resource-id test to use mocked value
* Fix log.Error formats
* Forgot the len(opts.Label) optimization)
* Use cfg.NodeID instead
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* remove replic tag (#17484)
* [HCP Observability] Add custom metrics for OTEL sink, improve logging, upgrade modules and cleanup metrics client (#17455)
* Add custom metrics for Exporter and transform operations
* Improve deps logging
Run go mod tidy
* Upgrade SDK and OTEL
* Remove the partial success implemetation and check for HTTP status code in metrics client
* Add x-channel
* cleanup logs in deps.go based on PR feedback
* Change to debug log and lowercase
* address test operation feedback
* use GetHumanVersion on version
* Fix error wrapping
* Fix metric names
* [HCP Observability] Turn off retries for now until dynamically configurable (#17496)
* Remove retries for now until dynamic configuration is possible
* Clarify comment
* Update changelog
* improve changelog
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
2023-05-29 16:11:08 -04:00
|
|
|
type StatusCallback func(context.Context) (hcpclient.ServerStatus, error)
|
2024-01-19 13:58:49 -05:00
|
|
|
type ManagementTokenUpserter func(name, secretId string) error
|
2024-01-30 09:40:36 -06:00
|
|
|
type ManagementTokenDeleter func(secretId string) error
|
|
|
|
|
|
|
|
//go:generate mockery --name Manager --with-expecter --inpackage
|
|
|
|
type Manager interface {
|
|
|
|
Start(context.Context) error
|
|
|
|
Stop() error
|
|
|
|
GetCloudConfig() config.CloudConfig
|
|
|
|
UpdateConfig(hcpclient.Client, config.CloudConfig)
|
|
|
|
}
|
2022-09-26 14:58:15 -04:00
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
type HCPManager struct {
|
2022-09-26 14:58:15 -04:00
|
|
|
logger hclog.Logger
|
|
|
|
|
2024-01-29 16:31:44 -06:00
|
|
|
running bool
|
|
|
|
runLock sync.RWMutex
|
|
|
|
|
2022-09-26 14:58:15 -04:00
|
|
|
cfg ManagerConfig
|
|
|
|
cfgMu sync.RWMutex
|
|
|
|
|
|
|
|
updateCh chan struct{}
|
2024-01-30 09:40:36 -06:00
|
|
|
stopCh chan struct{}
|
2022-09-26 14:58:15 -04:00
|
|
|
|
|
|
|
// testUpdateSent is set by unit tests to signal when the manager's status update has triggered
|
|
|
|
testUpdateSent chan struct{}
|
|
|
|
}
|
|
|
|
|
2024-01-16 10:46:12 -06:00
|
|
|
// NewManager returns a Manager initialized with the given configuration.
|
2024-01-30 09:40:36 -06:00
|
|
|
func NewManager(cfg ManagerConfig) *HCPManager {
|
|
|
|
return &HCPManager{
|
2022-09-26 14:58:15 -04:00
|
|
|
logger: cfg.Logger,
|
|
|
|
cfg: cfg,
|
|
|
|
|
|
|
|
updateCh: make(chan struct{}, 1),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-29 16:31:44 -06:00
|
|
|
// Start executes the logic for connecting to HCP and sending periodic server updates. If the
|
|
|
|
// manager has been previously started, it will not start again.
|
2024-01-30 09:40:36 -06:00
|
|
|
func (m *HCPManager) Start(ctx context.Context) error {
|
2024-01-29 16:31:44 -06:00
|
|
|
// Check if the manager has already started
|
|
|
|
changed := m.setRunning(true)
|
|
|
|
if !changed {
|
|
|
|
m.logger.Trace("HCP manager already started")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-09-26 14:58:15 -04:00
|
|
|
var err error
|
2024-01-29 16:31:44 -06:00
|
|
|
m.logger.Info("HCP manager starting")
|
2022-09-26 14:58:15 -04:00
|
|
|
|
2024-01-08 09:49:29 -06:00
|
|
|
// Update and start the SCADA provider
|
|
|
|
err = m.startSCADAProvider()
|
|
|
|
if err != nil {
|
2024-01-16 10:46:12 -06:00
|
|
|
m.logger.Error("failed to start scada provider", "error", err)
|
2024-01-29 16:31:44 -06:00
|
|
|
m.setRunning(false)
|
2024-01-16 10:46:12 -06:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update and start the telemetry provider to enable the HCP metrics sink
|
|
|
|
if err := m.startTelemetryProvider(ctx); err != nil {
|
|
|
|
m.logger.Error("failed to update telemetry config provider", "error", err)
|
2024-01-29 16:31:44 -06:00
|
|
|
m.setRunning(false)
|
2024-01-16 10:46:12 -06:00
|
|
|
return err
|
2024-01-08 09:49:29 -06:00
|
|
|
}
|
|
|
|
|
2022-09-26 14:58:15 -04:00
|
|
|
// immediately send initial update
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
2024-01-29 16:31:44 -06:00
|
|
|
m.setRunning(false)
|
2024-01-16 10:46:12 -06:00
|
|
|
return nil
|
2024-01-30 09:40:36 -06:00
|
|
|
case <-m.stopCh:
|
|
|
|
return nil
|
2022-09-26 14:58:15 -04:00
|
|
|
case <-m.updateCh: // empty the update chan if there is a queued update to prevent repeated update in main loop
|
|
|
|
err = m.sendUpdate()
|
2024-01-29 16:31:44 -06:00
|
|
|
if err != nil {
|
|
|
|
m.setRunning(false)
|
|
|
|
return err
|
|
|
|
}
|
2022-09-26 14:58:15 -04:00
|
|
|
default:
|
|
|
|
err = m.sendUpdate()
|
2024-01-29 16:31:44 -06:00
|
|
|
if err != nil {
|
|
|
|
m.setRunning(false)
|
|
|
|
return err
|
|
|
|
}
|
2022-09-26 14:58:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// main loop
|
2024-01-29 16:31:44 -06:00
|
|
|
go func() {
|
|
|
|
for {
|
|
|
|
m.cfgMu.RLock()
|
|
|
|
cfg := m.cfg
|
|
|
|
m.cfgMu.RUnlock()
|
|
|
|
|
|
|
|
// Check for configured management token from HCP and upsert it if found
|
|
|
|
if hcpManagement := cfg.CloudConfig.ManagementToken; len(hcpManagement) > 0 {
|
|
|
|
if cfg.ManagementTokenUpserterFn != nil {
|
|
|
|
upsertTokenErr := cfg.ManagementTokenUpserterFn("HCP Management Token", hcpManagement)
|
|
|
|
if upsertTokenErr != nil {
|
|
|
|
m.logger.Error("failed to upsert HCP management token", "err", upsertTokenErr)
|
|
|
|
}
|
|
|
|
}
|
2024-01-19 13:58:49 -05:00
|
|
|
}
|
|
|
|
|
2024-01-29 16:31:44 -06:00
|
|
|
nextUpdate := cfg.nextHeartbeat()
|
|
|
|
if err != nil {
|
|
|
|
m.logger.Error("failed to send server status to HCP", "err", err, "next_heartbeat", nextUpdate.String())
|
|
|
|
}
|
2022-09-26 14:58:15 -04:00
|
|
|
|
2024-01-29 16:31:44 -06:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
m.setRunning(false)
|
|
|
|
return
|
2022-09-26 14:58:15 -04:00
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
case <-m.stopCh:
|
|
|
|
return
|
|
|
|
|
2024-01-29 16:31:44 -06:00
|
|
|
case <-m.updateCh:
|
|
|
|
err = m.sendUpdate()
|
2022-09-26 14:58:15 -04:00
|
|
|
|
2024-01-29 16:31:44 -06:00
|
|
|
case <-time.After(nextUpdate):
|
|
|
|
err = m.sendUpdate()
|
|
|
|
}
|
2022-09-26 14:58:15 -04:00
|
|
|
}
|
2024-01-29 16:31:44 -06:00
|
|
|
}()
|
|
|
|
|
|
|
|
return err
|
2022-09-26 14:58:15 -04:00
|
|
|
}
|
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
func (m *HCPManager) startSCADAProvider() error {
|
2024-01-08 09:49:29 -06:00
|
|
|
provider := m.cfg.SCADAProvider
|
|
|
|
if provider == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the SCADA provider configuration with HCP configurations
|
|
|
|
m.logger.Debug("updating scada provider with HCP configuration")
|
|
|
|
err := provider.UpdateHCPConfig(m.cfg.CloudConfig)
|
|
|
|
if err != nil {
|
|
|
|
m.logger.Error("failed to update scada provider with HCP configuration", "err", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the SCADA provider metadata
|
|
|
|
provider.UpdateMeta(map[string]string{
|
|
|
|
"consul_server_id": string(m.cfg.CloudConfig.NodeID),
|
|
|
|
})
|
|
|
|
|
|
|
|
// Start the SCADA provider
|
|
|
|
err = provider.Start()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2024-01-16 10:46:12 -06:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
func (m *HCPManager) startTelemetryProvider(ctx context.Context) error {
|
|
|
|
if m.cfg.TelemetryProvider == nil || reflect.ValueOf(m.cfg.TelemetryProvider).IsNil() {
|
2024-01-16 10:46:12 -06:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
m.cfg.TelemetryProvider.Start(ctx, &HCPProviderCfg{
|
2024-01-16 10:46:12 -06:00
|
|
|
HCPClient: m.cfg.Client,
|
|
|
|
HCPConfig: &m.cfg.CloudConfig,
|
|
|
|
})
|
2024-01-08 09:49:29 -06:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
func (m *HCPManager) GetCloudConfig() config.CloudConfig {
|
2024-01-29 16:31:44 -06:00
|
|
|
m.cfgMu.RLock()
|
|
|
|
defer m.cfgMu.RUnlock()
|
|
|
|
|
|
|
|
return m.cfg.CloudConfig
|
|
|
|
}
|
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
func (m *HCPManager) UpdateConfig(client hcpclient.Client, cloudCfg config.CloudConfig) {
|
2022-09-26 14:58:15 -04:00
|
|
|
m.cfgMu.Lock()
|
2024-01-29 16:31:44 -06:00
|
|
|
// Save original values
|
|
|
|
originalCfg := m.cfg.CloudConfig
|
|
|
|
originalClient := m.cfg.Client
|
|
|
|
|
|
|
|
// Update with new values
|
|
|
|
m.cfg.Client = client
|
|
|
|
m.cfg.CloudConfig = cloudCfg
|
|
|
|
m.cfgMu.Unlock()
|
|
|
|
|
|
|
|
// Send update if already running and values were updated
|
|
|
|
if m.isRunning() && (originalClient != client || originalCfg != cloudCfg) {
|
|
|
|
m.SendUpdate()
|
|
|
|
}
|
2022-09-26 14:58:15 -04:00
|
|
|
}
|
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
func (m *HCPManager) SendUpdate() {
|
2022-09-26 14:58:15 -04:00
|
|
|
m.logger.Debug("HCP triggering status update")
|
|
|
|
select {
|
|
|
|
case m.updateCh <- struct{}{}:
|
|
|
|
// trigger update
|
|
|
|
default:
|
|
|
|
// if chan is full then there is already an update triggered that will soon
|
|
|
|
// be acted on so don't bother blocking.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: we should have retried on failures here with backoff but take into
|
|
|
|
// account that if a new update is triggered while we are still retrying we
|
|
|
|
// should not start another retry loop. Something like have a "dirty" flag which
|
|
|
|
// we mark on first PushUpdate and then a retry timer as well as the interval
|
|
|
|
// and a "isRetrying" state or something so that we attempt to send update, but
|
|
|
|
// then fetch fresh info on each attempt to send so if we are already in a retry
|
|
|
|
// backoff a new push is a no-op.
|
2024-01-30 09:40:36 -06:00
|
|
|
func (m *HCPManager) sendUpdate() error {
|
2022-09-26 14:58:15 -04:00
|
|
|
m.cfgMu.RLock()
|
|
|
|
cfg := m.cfg
|
|
|
|
m.cfgMu.RUnlock()
|
|
|
|
|
|
|
|
if !cfg.enabled() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if m.testUpdateSent != nil {
|
|
|
|
defer func() {
|
|
|
|
select {
|
|
|
|
case m.testUpdateSent <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
s, err := cfg.StatusFn(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-01-29 16:31:44 -06:00
|
|
|
return cfg.Client.PushServerStatus(ctx, &s)
|
|
|
|
}
|
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
func (m *HCPManager) isRunning() bool {
|
2024-01-29 16:31:44 -06:00
|
|
|
m.runLock.RLock()
|
|
|
|
defer m.runLock.RUnlock()
|
|
|
|
return m.running
|
|
|
|
}
|
|
|
|
|
|
|
|
// setRunning sets the running status of the manager to the given value. If the
|
|
|
|
// given value is the same as the current running status, it returns false. If
|
|
|
|
// current status is updated to the given status, it returns true.
|
2024-01-30 09:40:36 -06:00
|
|
|
func (m *HCPManager) setRunning(r bool) bool {
|
2024-01-29 16:31:44 -06:00
|
|
|
m.runLock.Lock()
|
|
|
|
defer m.runLock.Unlock()
|
|
|
|
|
|
|
|
if m.running == r {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-01-30 09:40:36 -06:00
|
|
|
// Initialize or close the stop channel depending what running status
|
|
|
|
// we're transitioning to. Channel must be initialized on start since
|
|
|
|
// a provider can be stopped and started multiple times.
|
|
|
|
if r {
|
|
|
|
m.stopCh = make(chan struct{})
|
|
|
|
} else {
|
|
|
|
close(m.stopCh)
|
|
|
|
}
|
|
|
|
|
2024-01-29 16:31:44 -06:00
|
|
|
m.running = r
|
|
|
|
return true
|
2022-09-26 14:58:15 -04:00
|
|
|
}
|
2024-01-30 09:40:36 -06:00
|
|
|
|
|
|
|
// Stop stops the manager's main loop that sends updates
|
|
|
|
// and stops the SCADA provider and telemetry provider.
|
|
|
|
func (m *HCPManager) Stop() error {
|
|
|
|
changed := m.setRunning(false)
|
|
|
|
if !changed {
|
|
|
|
m.logger.Trace("HCP manager already stopped")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
m.logger.Info("HCP manager stopping")
|
|
|
|
|
|
|
|
m.cfgMu.RLock()
|
|
|
|
defer m.cfgMu.RUnlock()
|
|
|
|
|
|
|
|
if m.cfg.SCADAProvider != nil {
|
|
|
|
m.cfg.SCADAProvider.Stop()
|
|
|
|
}
|
|
|
|
|
|
|
|
if m.cfg.TelemetryProvider != nil && !reflect.ValueOf(m.cfg.TelemetryProvider).IsNil() {
|
|
|
|
m.cfg.TelemetryProvider.Stop()
|
|
|
|
}
|
|
|
|
|
|
|
|
if m.cfg.ManagementTokenDeleterFn != nil && m.cfg.CloudConfig.ManagementToken != "" {
|
|
|
|
err := m.cfg.ManagementTokenDeleterFn(m.cfg.CloudConfig.ManagementToken)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
m.logger.Info("HCP manager stopped")
|
|
|
|
return nil
|
|
|
|
}
|