2023-08-11 09:12:13 -04:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
|
2023-04-27 14:27:39 -06:00
|
|
|
package bootstrap
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2024-01-24 09:51:43 -06:00
|
|
|
"errors"
|
2023-04-27 14:27:39 -06:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"testing"
|
2024-01-24 09:51:43 -06:00
|
|
|
"time"
|
2023-04-27 14:27:39 -06:00
|
|
|
|
Move HCP Manager lifecycle management out of Link controller (#20401)
* Add function to get update channel for watching HCP Link
* Add MonitorHCPLink function
This function can be called in a goroutine to manage the lifecycle
of the HCP manager.
* Update HCP Manager config in link monitor before starting
This updates HCPMonitorLink so it updates the HCP manager
with an HCP client and management token when a Link is upserted.
* Let MonitorHCPManager handle lifecycle instead of link controller
* Remove cleanup from Link controller and move it to MonitorHCPLink
Previously, the Link Controller was responsible for cleaning up the
HCP-related files on the file system. This change makes it so
MonitorHCPLink handles this cleanup. As a result, we are able to remove
the PlacementEachServer placement strategy for the Link controller
because it no longer needs to do this per-node cleanup.
* Remove HCP Manager dependency from Link Controller
The Link controller does not need to have HCP Manager
as a dependency anymore, so this removes that dependency
in order to simplify the design.
* Add Linked prefix to Linked status variables
This is in preparation for adding a new status type to the
Link resource.
* Add new "validated" status type to link resource
The link resource controller will now set a "validated" status
in addition to the "linked" status. This is needed so that other
components (eg the HCP manager) know when the Link is ready to link
with HCP.
* Fix tests
* Handle new 'EndOfSnapshot' WatchList event
* Fix watch test
* Remove unnecessary config from TestAgent_scadaProvider
Since the Scada provider is now started on agent startup
regardless of whether a cloud config is provided, this removes
the cloud config override from the relevant test.
This change is not exactly related to the changes from this PR,
but rather is something small and sort of related that was noticed
while working on this PR.
* Simplify link watch test and remove sleep from link watch
This updates the link watch test so that it uses more mocks
and does not require setting up the infrastructure for the HCP Link
controller.
This also removes the time.Sleep delay in the link watcher loop in favor
of an error counter. When we receive 10 consecutive errors, we shut down
the link watcher loop.
* Add better logging for link validation. Remove EndOfSnapshot test.
* Refactor link monitor test into a table test
* Add some clarifying comments to link monitor
* Simplify link watch test
* Test a bunch more errors cases in link monitor test
* Use exponential backoff instead of errorCounter in LinkWatch
* Move link watch and link monitor into a single goroutine called from server.go
* Refactor HCP link watcher to use single go-routine.
Previously, if the WatchClient errored, we would've never recovered
because we never retry to create the stream. With this change,
we have a single goroutine that runs for the life of the server agent
and if the WatchClient stream ever errors, we retry the creation
of the stream with an exponential backoff.
2024-02-12 10:48:23 -05:00
|
|
|
"github.com/mitchellh/cli"
|
|
|
|
"github.com/stretchr/testify/mock"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
|
|
|
"github.com/hashicorp/go-hclog"
|
|
|
|
"github.com/hashicorp/go-uuid"
|
|
|
|
|
|
|
|
"github.com/hashicorp/consul/agent/hcp/bootstrap/constants"
|
HCP Telemetry Feature (#17460)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* [HCP Observability] Init OTELSink in Telemetry (#17162)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* [HCP Observability] Metrics filtering and Labels in Go Metrics sink (#17184)
* Move hcp client to subpackage hcpclient (#16800)
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* [HCP Observability] New MetricsClient (#17100)
* Client configured with TLS using HCP config and retry/throttle
* Add tests and godoc for metrics client
* close body after request
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* remove clone
* Extract CloudConfig and mock for future PR
* Switch to hclog.FromContext
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* [HCP Observability] OTELExporter (#17128)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Initialize OTELSink with sync.Map for all the instrument stores.
* Added telemetry agent to client and init sink in deps
* Fixed client
* Initalize sink in deps
* init sink in telemetry library
* Init deps before telemetry
* Use concrete telemetry.OtelSink type
* add /v1/metrics
* Avoid returning err for telemetry init
* move sink init within the IsCloudEnabled()
* Use HCPSinkOpts in deps instead
* update golden test for configuration file
* Switch to using extra sinks in the telemetry library
* keep name MetricsConfig
* fix log in verifyCCMRegistration
* Set logger in context
* pass around MetricSink in deps
* Fix imports
* Rebased onto otel sink pr
* Fix URL in test
* [HCP Observability] OTELSink (#17159)
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Create new OTELExporter which uses the MetricsClient
Add transform because the conversion is in an /internal package
* Fix lint error
* early return when there are no metrics
* Add NewOTELExporter() function
* Downgrade to metrics SDK version: v1.15.0-rc.1
* Fix imports
* fix small nits with comments and url.URL
* Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile
* Cleanup error handling and clarify empty metrics case
* Fix input/expected naming in otel_transform_test.go
* add comment for metric tracking
* Add a general isEmpty method
* Add clear error types
* update to latest version 1.15.0 of OTEL
* Client configured with TLS using HCP config and retry/throttle
* run go mod tidy
* Remove one abstraction to use the config from deps
* Address PR feedback
* Initialize OTELSink with sync.Map for all the instrument stores.
* Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests.
* Switch to mutex instead of sync.Map to avoid type assertion
* Add gauge store
* Clarify comments
* return concrete sink type
* Fix lint errors
* Move gauge store to be within sink
* Use context.TODO,rebase and clenaup opts handling
* Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
* Fix imports
* Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx
* Add lots of documentation to the OTELSink
* Fix gauge store comment and check ok
* Add select and ctx.Done() check to gauge callback
* use require.Equal for attributes
* Fixed import naming
* Remove float64 calls and add a NewGaugeStore method
* Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store
* Generate 100 gauge operations
* Seperate the labels into goroutines in sink test
* Generate kv store for the test case keys to avoid using uuid
* Added a race test with 300 samples for OTELSink
* Do not pass in waitgroup and use error channel instead.
* Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel
* Fix nits
* pass extraSinks as function param instead
* Add default interval as package export
* remove verifyCCM func
* Add clusterID
* Fix import and add t.Parallel() for missing tests
* Kick Vercel CI
* Remove scheme from endpoint path, and fix error logging
* return metrics.MetricSink for sink method
* Update SDK
* Added telemetry agent to client and init sink in deps
* Add node_id and __replica__ default labels
* add function for default labels and set x-hcp-resource-id
* Fix labels tests
* Commit suggestion for getDefaultLabels
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* Fixed server.id, and t.Parallel()
* Make defaultLabels a method on the TelemetryConfig object
* Rename FilterList to lowercase filterList
* Cleanup filter implemetation by combining regex into a single one, and making the type lowercase
* Fix append
* use regex directly for filters
* Fix x-resource-id test to use mocked value
* Fix log.Error formats
* Forgot the len(opts.Label) optimization)
* Use cfg.NodeID instead
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
* remove replic tag (#17484)
* [HCP Observability] Add custom metrics for OTEL sink, improve logging, upgrade modules and cleanup metrics client (#17455)
* Add custom metrics for Exporter and transform operations
* Improve deps logging
Run go mod tidy
* Upgrade SDK and OTEL
* Remove the partial success implemetation and check for HTTP status code in metrics client
* Add x-channel
* cleanup logs in deps.go based on PR feedback
* Change to debug log and lowercase
* address test operation feedback
* use GetHumanVersion on version
* Fix error wrapping
* Fix metric names
* [HCP Observability] Turn off retries for now until dynamically configurable (#17496)
* Remove retries for now until dynamic configuration is possible
* Clarify comment
* Update changelog
* improve changelog
---------
Co-authored-by: Joshua Timmons <joshua.timmons1@gmail.com>
2023-05-29 16:11:08 -04:00
|
|
|
hcpclient "github.com/hashicorp/consul/agent/hcp/client"
|
2023-04-27 14:27:39 -06:00
|
|
|
"github.com/hashicorp/consul/lib"
|
2024-01-24 09:51:43 -06:00
|
|
|
"github.com/hashicorp/consul/sdk/testutil"
|
2023-04-27 14:27:39 -06:00
|
|
|
"github.com/hashicorp/consul/tlsutil"
|
|
|
|
)
|
|
|
|
|
|
|
|
func Test_loadPersistedBootstrapConfig(t *testing.T) {
|
|
|
|
type expect struct {
|
|
|
|
loaded bool
|
|
|
|
warning string
|
|
|
|
}
|
|
|
|
type testCase struct {
|
2023-07-21 10:33:22 -07:00
|
|
|
existingCluster bool
|
|
|
|
disableManagementToken bool
|
|
|
|
mutateFn func(t *testing.T, dir string)
|
|
|
|
expect expect
|
2023-04-27 14:27:39 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
run := func(t *testing.T, tc testCase) {
|
2024-01-24 09:51:43 -06:00
|
|
|
dataDir := testutil.TempDir(t, "load-bootstrap-cfg")
|
2023-04-27 14:27:39 -06:00
|
|
|
|
Move HCP Manager lifecycle management out of Link controller (#20401)
* Add function to get update channel for watching HCP Link
* Add MonitorHCPLink function
This function can be called in a goroutine to manage the lifecycle
of the HCP manager.
* Update HCP Manager config in link monitor before starting
This updates HCPMonitorLink so it updates the HCP manager
with an HCP client and management token when a Link is upserted.
* Let MonitorHCPManager handle lifecycle instead of link controller
* Remove cleanup from Link controller and move it to MonitorHCPLink
Previously, the Link Controller was responsible for cleaning up the
HCP-related files on the file system. This change makes it so
MonitorHCPLink handles this cleanup. As a result, we are able to remove
the PlacementEachServer placement strategy for the Link controller
because it no longer needs to do this per-node cleanup.
* Remove HCP Manager dependency from Link Controller
The Link controller does not need to have HCP Manager
as a dependency anymore, so this removes that dependency
in order to simplify the design.
* Add Linked prefix to Linked status variables
This is in preparation for adding a new status type to the
Link resource.
* Add new "validated" status type to link resource
The link resource controller will now set a "validated" status
in addition to the "linked" status. This is needed so that other
components (eg the HCP manager) know when the Link is ready to link
with HCP.
* Fix tests
* Handle new 'EndOfSnapshot' WatchList event
* Fix watch test
* Remove unnecessary config from TestAgent_scadaProvider
Since the Scada provider is now started on agent startup
regardless of whether a cloud config is provided, this removes
the cloud config override from the relevant test.
This change is not exactly related to the changes from this PR,
but rather is something small and sort of related that was noticed
while working on this PR.
* Simplify link watch test and remove sleep from link watch
This updates the link watch test so that it uses more mocks
and does not require setting up the infrastructure for the HCP Link
controller.
This also removes the time.Sleep delay in the link watcher loop in favor
of an error counter. When we receive 10 consecutive errors, we shut down
the link watcher loop.
* Add better logging for link validation. Remove EndOfSnapshot test.
* Refactor link monitor test into a table test
* Add some clarifying comments to link monitor
* Simplify link watch test
* Test a bunch more errors cases in link monitor test
* Use exponential backoff instead of errorCounter in LinkWatch
* Move link watch and link monitor into a single goroutine called from server.go
* Refactor HCP link watcher to use single go-routine.
Previously, if the WatchClient errored, we would've never recovered
because we never retry to create the stream. With this change,
we have a single goroutine that runs for the life of the server agent
and if the WatchClient stream ever errors, we retry the creation
of the stream with an exponential backoff.
2024-02-12 10:48:23 -05:00
|
|
|
dir := filepath.Join(dataDir, constants.SubDir)
|
2023-04-27 14:27:39 -06:00
|
|
|
|
|
|
|
// Do some common setup as if we received config from HCP and persisted it to disk.
|
|
|
|
require.NoError(t, lib.EnsurePath(dir, true))
|
2023-07-21 10:33:22 -07:00
|
|
|
require.NoError(t, persistSuccessMarker(dir))
|
2023-04-27 14:27:39 -06:00
|
|
|
|
|
|
|
if !tc.existingCluster {
|
|
|
|
caCert, caKey, err := tlsutil.GenerateCA(tlsutil.CAOpts{})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
serverCert, serverKey, err := testLeaf(caCert, caKey)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, persistTLSCerts(dir, serverCert, serverKey, []string{caCert}))
|
|
|
|
|
|
|
|
cfgJSON := `{"bootstrap_expect": 8}`
|
|
|
|
require.NoError(t, persistBootstrapConfig(dir, cfgJSON))
|
|
|
|
}
|
|
|
|
|
2023-07-21 10:33:22 -07:00
|
|
|
var token string
|
|
|
|
if !tc.disableManagementToken {
|
2024-01-24 09:51:43 -06:00
|
|
|
var err error
|
2023-07-21 10:33:22 -07:00
|
|
|
token, err = uuid.GenerateUUID()
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, persistManagementToken(dir, token))
|
|
|
|
}
|
2023-04-27 14:27:39 -06:00
|
|
|
|
|
|
|
// Optionally mutate the persisted data to trigger errors while loading.
|
|
|
|
if tc.mutateFn != nil {
|
|
|
|
tc.mutateFn(t, dir)
|
|
|
|
}
|
|
|
|
|
|
|
|
ui := cli.NewMockUi()
|
2024-01-24 09:51:43 -06:00
|
|
|
cfg, loaded := LoadPersistedBootstrapConfig(dataDir, ui)
|
2023-04-27 14:27:39 -06:00
|
|
|
require.Equal(t, tc.expect.loaded, loaded, ui.ErrorWriter.String())
|
|
|
|
if loaded {
|
|
|
|
require.Equal(t, token, cfg.ManagementToken)
|
|
|
|
require.Empty(t, ui.ErrorWriter.String())
|
|
|
|
} else {
|
|
|
|
require.Nil(t, cfg)
|
|
|
|
require.Contains(t, ui.ErrorWriter.String(), tc.expect.warning)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tt := map[string]testCase{
|
|
|
|
"existing cluster with valid files": {
|
|
|
|
existingCluster: true,
|
|
|
|
// Don't mutate, files from setup are valid.
|
|
|
|
mutateFn: nil,
|
|
|
|
expect: expect{
|
|
|
|
loaded: true,
|
|
|
|
warning: "",
|
|
|
|
},
|
|
|
|
},
|
2023-07-21 10:33:22 -07:00
|
|
|
"existing cluster no token": {
|
|
|
|
existingCluster: true,
|
|
|
|
disableManagementToken: true,
|
2023-04-27 14:27:39 -06:00
|
|
|
expect: expect{
|
2023-07-21 10:33:22 -07:00
|
|
|
loaded: false,
|
2023-04-27 14:27:39 -06:00
|
|
|
},
|
|
|
|
},
|
|
|
|
"existing cluster no files": {
|
|
|
|
existingCluster: true,
|
|
|
|
mutateFn: func(t *testing.T, dir string) {
|
|
|
|
// Remove all files
|
|
|
|
require.NoError(t, os.RemoveAll(dir))
|
|
|
|
},
|
|
|
|
expect: expect{
|
|
|
|
loaded: false,
|
|
|
|
// No warnings since we assume we need to fetch config from HCP for the first time.
|
|
|
|
warning: "",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"new cluster with valid files": {
|
|
|
|
// Don't mutate, files from setup are valid.
|
|
|
|
mutateFn: nil,
|
|
|
|
expect: expect{
|
|
|
|
loaded: true,
|
|
|
|
warning: "",
|
|
|
|
},
|
|
|
|
},
|
2023-07-21 10:33:22 -07:00
|
|
|
"new cluster with no token": {
|
|
|
|
disableManagementToken: true,
|
|
|
|
expect: expect{
|
|
|
|
loaded: false,
|
|
|
|
},
|
|
|
|
},
|
2023-04-27 14:27:39 -06:00
|
|
|
"new cluster some files": {
|
|
|
|
mutateFn: func(t *testing.T, dir string) {
|
|
|
|
// Remove one of the required files
|
2024-01-24 09:51:43 -06:00
|
|
|
require.NoError(t, os.Remove(filepath.Join(dir, CertFileName)))
|
2023-04-27 14:27:39 -06:00
|
|
|
},
|
|
|
|
expect: expect{
|
|
|
|
loaded: false,
|
|
|
|
warning: "configuration files on disk are incomplete",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"new cluster no files": {
|
|
|
|
mutateFn: func(t *testing.T, dir string) {
|
|
|
|
// Remove all files
|
|
|
|
require.NoError(t, os.RemoveAll(dir))
|
|
|
|
},
|
|
|
|
expect: expect{
|
|
|
|
loaded: false,
|
|
|
|
// No warnings since we assume we need to fetch config from HCP for the first time.
|
|
|
|
warning: "",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"new cluster invalid cert": {
|
|
|
|
mutateFn: func(t *testing.T, dir string) {
|
2024-01-24 09:51:43 -06:00
|
|
|
name := filepath.Join(dir, CertFileName)
|
2023-04-27 14:27:39 -06:00
|
|
|
require.NoError(t, os.WriteFile(name, []byte("not-a-cert"), 0600))
|
|
|
|
},
|
|
|
|
expect: expect{
|
|
|
|
loaded: false,
|
|
|
|
warning: "invalid server certificate",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"new cluster invalid CA": {
|
|
|
|
mutateFn: func(t *testing.T, dir string) {
|
2024-01-24 09:51:43 -06:00
|
|
|
name := filepath.Join(dir, CAFileName)
|
2023-04-27 14:27:39 -06:00
|
|
|
require.NoError(t, os.WriteFile(name, []byte("not-a-ca-cert"), 0600))
|
|
|
|
},
|
|
|
|
expect: expect{
|
|
|
|
loaded: false,
|
|
|
|
warning: "invalid CA certificate",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"existing cluster invalid token": {
|
|
|
|
existingCluster: true,
|
|
|
|
mutateFn: func(t *testing.T, dir string) {
|
2024-01-24 09:51:43 -06:00
|
|
|
name := filepath.Join(dir, TokenFileName)
|
2023-04-27 14:27:39 -06:00
|
|
|
require.NoError(t, os.WriteFile(name, []byte("not-a-uuid"), 0600))
|
|
|
|
},
|
|
|
|
expect: expect{
|
|
|
|
loaded: false,
|
|
|
|
warning: "is not a valid UUID",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for name, tc := range tt {
|
|
|
|
t.Run(name, func(t *testing.T) {
|
|
|
|
run(t, tc)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2024-01-24 09:51:43 -06:00
|
|
|
|
|
|
|
func TestFetchBootstrapConfig(t *testing.T) {
|
|
|
|
type testCase struct {
|
|
|
|
expectFetchErr error
|
|
|
|
expectRetry bool
|
|
|
|
}
|
|
|
|
|
|
|
|
run := func(t *testing.T, tc testCase) {
|
|
|
|
ui := cli.NewMockUi()
|
|
|
|
dataDir := testutil.TempDir(t, "fetch-bootstrap-cfg")
|
|
|
|
clientM := hcpclient.NewMockClient(t)
|
|
|
|
|
|
|
|
if tc.expectFetchErr != nil && tc.expectRetry {
|
|
|
|
clientM.On("FetchBootstrap", mock.Anything).
|
|
|
|
Return(nil, tc.expectFetchErr)
|
|
|
|
} else if tc.expectFetchErr != nil && !tc.expectRetry {
|
|
|
|
clientM.On("FetchBootstrap", mock.Anything).
|
|
|
|
Return(nil, tc.expectFetchErr).Once()
|
|
|
|
} else {
|
|
|
|
validToken, err := uuid.GenerateUUID()
|
|
|
|
require.NoError(t, err)
|
|
|
|
clientM.EXPECT().FetchBootstrap(mock.Anything).Return(&hcpclient.BootstrapConfig{
|
|
|
|
ManagementToken: validToken,
|
|
|
|
ConsulConfig: "{}",
|
|
|
|
}, nil).Once()
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
|
|
|
defer cancel()
|
|
|
|
cfg, err := FetchBootstrapConfig(ctx, clientM, dataDir, ui)
|
|
|
|
|
|
|
|
if tc.expectFetchErr == nil {
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, cfg)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
require.Error(t, err)
|
|
|
|
require.Nil(t, cfg)
|
|
|
|
if tc.expectRetry {
|
|
|
|
require.ErrorIs(t, err, context.DeadlineExceeded)
|
|
|
|
} else {
|
|
|
|
require.ErrorIs(t, err, tc.expectFetchErr)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tt := map[string]testCase{
|
|
|
|
"success": {},
|
|
|
|
"unauthorized": {
|
|
|
|
expectFetchErr: hcpclient.ErrUnauthorized,
|
|
|
|
},
|
|
|
|
"forbidden": {
|
|
|
|
expectFetchErr: hcpclient.ErrForbidden,
|
|
|
|
},
|
|
|
|
"retryable fetch error": {
|
|
|
|
expectFetchErr: errors.New("error"),
|
|
|
|
expectRetry: true,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for name, tc := range tt {
|
|
|
|
t.Run(name, func(t *testing.T) {
|
|
|
|
run(t, tc)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestLoadManagementToken(t *testing.T) {
|
|
|
|
type testCase struct {
|
|
|
|
skipHCPConfigDir bool
|
|
|
|
skipTokenFile bool
|
|
|
|
tokenFileContent string
|
|
|
|
skipBootstrap bool
|
|
|
|
}
|
|
|
|
|
|
|
|
validToken, err := uuid.GenerateUUID()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
run := func(t *testing.T, tc testCase) {
|
|
|
|
dataDir := testutil.TempDir(t, "load-management-token")
|
|
|
|
|
Move HCP Manager lifecycle management out of Link controller (#20401)
* Add function to get update channel for watching HCP Link
* Add MonitorHCPLink function
This function can be called in a goroutine to manage the lifecycle
of the HCP manager.
* Update HCP Manager config in link monitor before starting
This updates HCPMonitorLink so it updates the HCP manager
with an HCP client and management token when a Link is upserted.
* Let MonitorHCPManager handle lifecycle instead of link controller
* Remove cleanup from Link controller and move it to MonitorHCPLink
Previously, the Link Controller was responsible for cleaning up the
HCP-related files on the file system. This change makes it so
MonitorHCPLink handles this cleanup. As a result, we are able to remove
the PlacementEachServer placement strategy for the Link controller
because it no longer needs to do this per-node cleanup.
* Remove HCP Manager dependency from Link Controller
The Link controller does not need to have HCP Manager
as a dependency anymore, so this removes that dependency
in order to simplify the design.
* Add Linked prefix to Linked status variables
This is in preparation for adding a new status type to the
Link resource.
* Add new "validated" status type to link resource
The link resource controller will now set a "validated" status
in addition to the "linked" status. This is needed so that other
components (eg the HCP manager) know when the Link is ready to link
with HCP.
* Fix tests
* Handle new 'EndOfSnapshot' WatchList event
* Fix watch test
* Remove unnecessary config from TestAgent_scadaProvider
Since the Scada provider is now started on agent startup
regardless of whether a cloud config is provided, this removes
the cloud config override from the relevant test.
This change is not exactly related to the changes from this PR,
but rather is something small and sort of related that was noticed
while working on this PR.
* Simplify link watch test and remove sleep from link watch
This updates the link watch test so that it uses more mocks
and does not require setting up the infrastructure for the HCP Link
controller.
This also removes the time.Sleep delay in the link watcher loop in favor
of an error counter. When we receive 10 consecutive errors, we shut down
the link watcher loop.
* Add better logging for link validation. Remove EndOfSnapshot test.
* Refactor link monitor test into a table test
* Add some clarifying comments to link monitor
* Simplify link watch test
* Test a bunch more errors cases in link monitor test
* Use exponential backoff instead of errorCounter in LinkWatch
* Move link watch and link monitor into a single goroutine called from server.go
* Refactor HCP link watcher to use single go-routine.
Previously, if the WatchClient errored, we would've never recovered
because we never retry to create the stream. With this change,
we have a single goroutine that runs for the life of the server agent
and if the WatchClient stream ever errors, we retry the creation
of the stream with an exponential backoff.
2024-02-12 10:48:23 -05:00
|
|
|
hcpCfgDir := filepath.Join(dataDir, constants.SubDir)
|
2024-01-24 09:51:43 -06:00
|
|
|
if !tc.skipHCPConfigDir {
|
|
|
|
err := os.Mkdir(hcpCfgDir, 0755)
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
tokenFilePath := filepath.Join(hcpCfgDir, TokenFileName)
|
|
|
|
if !tc.skipTokenFile {
|
|
|
|
err := os.WriteFile(tokenFilePath, []byte(tc.tokenFileContent), 0600)
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
clientM := hcpclient.NewMockClient(t)
|
|
|
|
if !tc.skipBootstrap {
|
|
|
|
clientM.EXPECT().FetchBootstrap(mock.Anything).Return(&hcpclient.BootstrapConfig{
|
|
|
|
ManagementToken: validToken,
|
|
|
|
ConsulConfig: "{}",
|
|
|
|
}, nil).Once()
|
|
|
|
}
|
|
|
|
|
|
|
|
token, err := LoadManagementToken(context.Background(), hclog.NewNullLogger(), clientM, dataDir)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, validToken, token)
|
|
|
|
|
|
|
|
bytes, err := os.ReadFile(tokenFilePath)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, validToken, string(bytes))
|
|
|
|
}
|
|
|
|
|
|
|
|
tt := map[string]testCase{
|
|
|
|
"token configured": {
|
|
|
|
skipBootstrap: true,
|
|
|
|
tokenFileContent: validToken,
|
|
|
|
},
|
|
|
|
"no token configured": {
|
|
|
|
skipTokenFile: true,
|
|
|
|
},
|
|
|
|
"invalid token configured": {
|
|
|
|
tokenFileContent: "invalid",
|
|
|
|
},
|
|
|
|
"no hcp-config directory": {
|
|
|
|
skipHCPConfigDir: true,
|
|
|
|
skipTokenFile: true,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for name, tc := range tt {
|
|
|
|
t.Run(name, func(t *testing.T) {
|
|
|
|
run(t, tc)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|