// Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 package autoconf import ( "context" "crypto/x509" "fmt" "net" "os" "path/filepath" "sync" "testing" "time" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "github.com/hashicorp/consul/agent/cache" cachetype "github.com/hashicorp/consul/agent/cache-types" "github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/leafcert" "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/lib/retry" "github.com/hashicorp/consul/proto/private/pbautoconf" "github.com/hashicorp/consul/proto/private/pbconfig" "github.com/hashicorp/consul/sdk/testutil" testretry "github.com/hashicorp/consul/sdk/testutil/retry" ) type configLoader struct { opts config.LoadOpts } func (c *configLoader) Load(source config.Source) (config.LoadResult, error) { opts := c.opts opts.DefaultConfig = source return config.Load(opts) } func (c *configLoader) addConfigHCL(cfg string) { c.opts.HCL = append(c.opts.HCL, cfg) } func requireChanNotReady(t *testing.T, ch <-chan struct{}) { select { case <-ch: require.Fail(t, "chan is ready when it shouldn't be") default: return } } func requireChanReady(t *testing.T, ch <-chan struct{}) { select { case <-ch: return default: require.Fail(t, "chan is not ready when it should be") } } func waitForChan(timer *time.Timer, ch <-chan struct{}) bool { select { case <-timer.C: return false case <-ch: return true } } func waitForChans(timeout time.Duration, chans ...<-chan struct{}) bool { timer := time.NewTimer(timeout) defer timer.Stop() for _, ch := range chans { if !waitForChan(timer, ch) { return false } } return true } func TestNew(t *testing.T) { type testCase struct { modify func(*Config) err string validate func(t *testing.T, ac *AutoConfig) } cases := map[string]testCase{ "no-direct-rpc": { modify: func(c *Config) { c.DirectRPC = nil }, err: "must provide a direct RPC delegate", }, "no-config-loader": { modify: func(c *Config) { c.Loader = nil }, err: "must provide a config loader", }, "no-cache": { modify: func(c *Config) { c.Cache = nil }, err: "must provide a cache", }, "no-tls-configurator": { modify: func(c *Config) { c.TLSConfigurator = nil }, err: "must provide a TLS configurator", }, "no-tokens": { modify: func(c *Config) { c.Tokens = nil }, err: "must provide a token store", }, "ok": { validate: func(t *testing.T, ac *AutoConfig) { t.Helper() require.NotNil(t, ac.logger) require.NotNil(t, ac.acConfig.Waiter) require.Equal(t, time.Minute, ac.acConfig.FallbackRetry) require.Equal(t, 10*time.Second, ac.acConfig.FallbackLeeway) }, }, } for name, tcase := range cases { t.Run(name, func(t *testing.T) { cfg := Config{ Loader: func(source config.Source) (result config.LoadResult, err error) { return config.LoadResult{}, nil }, DirectRPC: newMockDirectRPC(t), Tokens: newMockTokenStore(t), Cache: newMockCache(t), TLSConfigurator: newMockTLSConfigurator(t), ServerProvider: newMockServerProvider(t), EnterpriseConfig: newEnterpriseConfig(t), } if tcase.modify != nil { tcase.modify(&cfg) } ac, err := New(cfg) if tcase.err != "" { testutil.RequireErrorContains(t, err, tcase.err) } else { require.NoError(t, err) require.NotNil(t, ac) if tcase.validate != nil { tcase.validate(t, ac) } } }) } } func TestReadConfig(t *testing.T) { // just testing that some auto config source gets injected ac := AutoConfig{ autoConfigSource: config.LiteralSource{ Name: autoConfigFileName, Config: config.Config{NodeName: stringPointer("hobbiton")}, }, logger: testutil.Logger(t), acConfig: Config{ Loader: func(source config.Source) (config.LoadResult, error) { r := config.LoadResult{} cfg, _, err := source.Parse() if err != nil { return r, err } r.RuntimeConfig = &config.RuntimeConfig{ DevMode: true, NodeName: *cfg.NodeName, } return r, nil }, }, } cfg, err := ac.ReadConfig() require.NoError(t, err) require.NotNil(t, cfg) require.Equal(t, "hobbiton", cfg.NodeName) require.True(t, cfg.DevMode) require.Same(t, ac.config, cfg) } func setupRuntimeConfig(t *testing.T) *configLoader { t.Helper() dataDir := testutil.TempDir(t, "auto-config") opts := config.LoadOpts{ FlagValues: config.FlagValuesTarget{ Config: config.Config{ DataDir: &dataDir, Datacenter: stringPointer("dc1"), NodeName: stringPointer("autoconf"), BindAddr: stringPointer("127.0.0.1"), }, }, } return &configLoader{opts: opts} } func TestInitialConfiguration_disabled(t *testing.T) { mcfg := newMockedConfig(t) mcfg.loader.addConfigHCL(` primary_datacenter = "primary" auto_config = { enabled = false } `) ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) cfg, err := ac.InitialConfiguration(context.Background()) require.NoError(t, err) require.NotNil(t, cfg) require.Equal(t, "primary", cfg.PrimaryDatacenter) require.NoFileExists(t, filepath.Join(*mcfg.loader.opts.FlagValues.DataDir, autoConfigFileName)) } func TestInitialConfiguration_cancelled(t *testing.T) { if testing.Short() { t.Skip("too slow for testing.Short") } mcfg := newMockedConfig(t) loader := setupRuntimeConfig(t) loader.addConfigHCL(` primary_datacenter = "primary" auto_config = { enabled = true intro_token = "blarg" server_addresses = ["127.0.0.1:8300"] } verify_outgoing = true `) mcfg.Config.Loader = loader.Load expectedRequest := pbautoconf.AutoConfigRequest{ Datacenter: "dc1", Node: "autoconf", JWT: "blarg", } mcfg.directRPC.On("RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}, "AutoConfig.InitialConfiguration", &expectedRequest, mock.Anything).Return(fmt.Errorf("injected error")).Times(0).Maybe() mcfg.serverProvider.On("FindLANServer").Return(nil).Times(0).Maybe() ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) ctx, cancelFn := context.WithDeadline(context.Background(), time.Now().Add(100*time.Millisecond)) defer cancelFn() cfg, err := ac.InitialConfiguration(ctx) testutil.RequireErrorContains(t, err, context.DeadlineExceeded.Error()) require.Nil(t, cfg) } func TestInitialConfiguration_restored(t *testing.T) { mcfg := newMockedConfig(t) loader := setupRuntimeConfig(t) loader.addConfigHCL(` auto_config = { enabled = true intro_token ="blarg" server_addresses = ["127.0.0.1:8300"] } verify_outgoing = true `) mcfg.Config.Loader = loader.Load indexedRoots, cert, extraCACerts := mcfg.setupInitialTLS(t, "autoconf", "dc1", "secret") // persist an auto config response to the data dir where it is expected persistedFile := filepath.Join(*loader.opts.FlagValues.DataDir, autoConfigFileName) response := &pbautoconf.AutoConfigResponse{ Config: &pbconfig.Config{ PrimaryDatacenter: "primary", TLS: &pbconfig.TLS{ VerifyServerHostname: true, }, ACL: &pbconfig.ACL{ Tokens: &pbconfig.ACLTokens{ Agent: "secret", }, }, }, CARoots: mustTranslateCARootsToProtobuf(t, indexedRoots), Certificate: mustTranslateIssuedCertToProtobuf(t, cert), ExtraCACertificates: extraCACerts, } data, err := pbMarshaler.Marshal(response) require.NoError(t, err) require.NoError(t, os.WriteFile(persistedFile, data, 0600)) // recording the initial configuration even when restoring is going to update // the agent token in the token store mcfg.tokens.On("UpdateAgentToken", "secret", token.TokenSourceConfig).Return(true).Once() // prepopulation is going to grab the token to populate the correct cache key mcfg.tokens.On("AgentToken").Return("secret").Times(0) ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) cfg, err := ac.InitialConfiguration(context.Background()) require.NoError(t, err, data) require.NotNil(t, cfg) require.Equal(t, "primary", cfg.PrimaryDatacenter) } func TestInitialConfiguration_success(t *testing.T) { mcfg := newMockedConfig(t) loader := setupRuntimeConfig(t) loader.addConfigHCL(` auto_config = { enabled = true intro_token ="blarg" server_addresses = ["127.0.0.1:8300"] } verify_outgoing = true `) mcfg.Config.Loader = loader.Load indexedRoots, cert, extraCerts := mcfg.setupInitialTLS(t, "autoconf", "dc1", "secret") // this gets called when InitialConfiguration is invoked to record the token from the // auto-config response mcfg.tokens.On("UpdateAgentToken", "secret", token.TokenSourceConfig).Return(true).Once() // prepopulation is going to grab the token to populate the correct cache key mcfg.tokens.On("AgentToken").Return("secret").Times(0) // no server provider mcfg.serverProvider.On("FindLANServer").Return(nil).Times(0) populateResponse := func(args mock.Arguments) { resp, ok := args.Get(5).(*pbautoconf.AutoConfigResponse) require.True(t, ok) resp.Config = &pbconfig.Config{ PrimaryDatacenter: "primary", TLS: &pbconfig.TLS{ VerifyServerHostname: true, }, ACL: &pbconfig.ACL{ Tokens: &pbconfig.ACLTokens{ Agent: "secret", }, }, } resp.CARoots = mustTranslateCARootsToProtobuf(t, indexedRoots) resp.Certificate = mustTranslateIssuedCertToProtobuf(t, cert) resp.ExtraCACertificates = extraCerts } expectedRequest := pbautoconf.AutoConfigRequest{ Datacenter: "dc1", Node: "autoconf", JWT: "blarg", } mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}, "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(nil).Run(populateResponse) ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) cfg, err := ac.InitialConfiguration(context.Background()) require.NoError(t, err) require.NotNil(t, cfg) require.Equal(t, "primary", cfg.PrimaryDatacenter) // the file was written to. persistedFile := filepath.Join(*loader.opts.FlagValues.DataDir, autoConfigFileName) require.FileExists(t, persistedFile) } func TestInitialConfiguration_retries(t *testing.T) { mcfg := newMockedConfig(t) loader := setupRuntimeConfig(t) loader.addConfigHCL(` auto_config = { enabled = true intro_token ="blarg" server_addresses = [ "198.18.0.1:8300", "198.18.0.2:8398", "198.18.0.3:8399", "127.0.0.1:1234" ] } verify_outgoing = true `) mcfg.Config.Loader = loader.Load // reduce the retry wait times to make this test run faster mcfg.Config.Waiter = &retry.Waiter{MinFailures: 2, MaxWait: time.Millisecond} indexedRoots, cert, extraCerts := mcfg.setupInitialTLS(t, "autoconf", "dc1", "secret") // this gets called when InitialConfiguration is invoked to record the token from the // auto-config response mcfg.tokens.On("UpdateAgentToken", "secret", token.TokenSourceConfig).Return(true).Once() // prepopulation is going to grab the token to populate the correct cache key mcfg.tokens.On("AgentToken").Return("secret").Times(0) // no server provider mcfg.serverProvider.On("FindLANServer").Return(nil).Times(0) populateResponse := func(args mock.Arguments) { resp, ok := args.Get(5).(*pbautoconf.AutoConfigResponse) require.True(t, ok) resp.Config = &pbconfig.Config{ PrimaryDatacenter: "primary", TLS: &pbconfig.TLS{ VerifyServerHostname: true, }, ACL: &pbconfig.ACL{ Tokens: &pbconfig.ACLTokens{ Agent: "secret", }, }, } resp.CARoots = mustTranslateCARootsToProtobuf(t, indexedRoots) resp.Certificate = mustTranslateIssuedCertToProtobuf(t, cert) resp.ExtraCACertificates = extraCerts } expectedRequest := pbautoconf.AutoConfigRequest{ Datacenter: "dc1", Node: "autoconf", JWT: "blarg", } // basically the 198.18.0.* addresses should fail indefinitely. the first time through the // outer loop we inject a failure for the DNS resolution of localhost to 127.0.0.1. Then // the second time through the outer loop we allow the localhost one to work. mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300}, "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(fmt.Errorf("injected failure")).Times(0) mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(198, 18, 0, 2), Port: 8398}, "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(fmt.Errorf("injected failure")).Times(0) mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(198, 18, 0, 3), Port: 8399}, "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(fmt.Errorf("injected failure")).Times(0) mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 1234}, "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(fmt.Errorf("injected failure")).Once() mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 1234}, "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(nil).Run(populateResponse).Once() ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) cfg, err := ac.InitialConfiguration(context.Background()) require.NoError(t, err) require.NotNil(t, cfg) require.Equal(t, "primary", cfg.PrimaryDatacenter) // the file was written to. persistedFile := filepath.Join(*loader.opts.FlagValues.DataDir, autoConfigFileName) require.FileExists(t, persistedFile) } func TestGoRoutineManagement(t *testing.T) { mcfg := newMockedConfig(t) loader := setupRuntimeConfig(t) loader.addConfigHCL(` auto_config = { enabled = true intro_token ="blarg" server_addresses = ["127.0.0.1:8300"] } verify_outgoing = true `) mcfg.Config.Loader = loader.Load // prepopulation is going to grab the token to populate the correct cache key mcfg.tokens.On("AgentToken").Return("secret").Times(0) ac, err := New(mcfg.Config) require.NoError(t, err) // priming the config so some other requests will work properly that need to // read from the configuration. We are going to avoid doing InitialConfiguration // for this test as we only are really concerned with the go routine management _, err = ac.ReadConfig() require.NoError(t, err) var rootsCtx context.Context var leafCtx context.Context var ctxLock sync.Mutex rootsReq := ac.caRootsRequest() mcfg.cache.On("Notify", mock.Anything, cachetype.ConnectCARootName, &rootsReq, rootsWatchID, mock.Anything, ).Return(nil).Times(2).Run(func(args mock.Arguments) { ctxLock.Lock() rootsCtx = args.Get(0).(context.Context) ctxLock.Unlock() }) leafReq := ac.leafCertRequest() mcfg.leafCerts.On("Notify", mock.Anything, &leafReq, leafWatchID, mock.Anything, ).Return(nil).Times(2).Run(func(args mock.Arguments) { ctxLock.Lock() leafCtx = args.Get(0).(context.Context) ctxLock.Unlock() }) // we will start/stop things twice mcfg.tokens.On("Notify", token.TokenKindAgent).Return(token.Notifier{}).Times(2) mcfg.tokens.On("StopNotify", token.Notifier{}).Times(2) mcfg.tlsCfg.On("AutoEncryptCert").Return(&x509.Certificate{ NotAfter: time.Now().Add(10 * time.Minute), }).Times(0) // ensure that auto-config isn't running require.False(t, ac.IsRunning()) // ensure that nothing bad happens and that it reports as stopped require.False(t, ac.Stop()) // ensure that the Done chan also reports that things are not running // in other words the chan is immediately selectable requireChanReady(t, ac.Done()) // start auto-config ctx, cancel := context.WithCancel(context.Background()) defer cancel() require.NoError(t, ac.Start(ctx)) waitForContexts := func() bool { ctxLock.Lock() defer ctxLock.Unlock() return !(rootsCtx == nil || leafCtx == nil) } // wait for the cache notifications to get started require.Eventually(t, waitForContexts, 100*time.Millisecond, 10*time.Millisecond) // hold onto the Done chan to test for the go routine exiting done := ac.Done() // ensure we report as running require.True(t, ac.IsRunning()) // ensure the done chan is not selectable yet requireChanNotReady(t, done) // ensure we error if we attempt to start again err = ac.Start(ctx) testutil.RequireErrorContains(t, err, "AutoConfig is already running") // now stop things - it should return true indicating that it was running // when we attempted to stop it. require.True(t, ac.Stop()) // ensure that the go routine shuts down - it will close the done chan. Also it should cancel // the cache watches by cancelling the context it passed into the Notify call. require.True(t, waitForChans(100*time.Millisecond, done, leafCtx.Done(), rootsCtx.Done()), "AutoConfig didn't shut down") require.False(t, ac.IsRunning()) // restart it require.NoError(t, ac.Start(ctx)) // get the new Done chan done = ac.Done() // ensure that context cancellation causes us to stop as well cancel() require.True(t, waitForChans(100*time.Millisecond, done)) } type testAutoConfig struct { mcfg *mockedConfig ac *AutoConfig tokenUpdates chan struct{} originalToken string stop func() initialRoots *structs.IndexedCARoots initialCert *structs.IssuedCert extraCerts []string } func startedAutoConfig(t *testing.T, autoEncrypt bool) testAutoConfig { t.Helper() mcfg := newMockedConfig(t) loader := setupRuntimeConfig(t) if !autoEncrypt { loader.addConfigHCL(` auto_config = { enabled = true intro_token ="blarg" server_addresses = ["127.0.0.1:8300"] } verify_outgoing = true `) } else { loader.addConfigHCL(` auto_encrypt { tls = true } verify_outgoing = true `) } mcfg.Config.Loader = loader.Load mcfg.Config.FallbackLeeway = time.Nanosecond originalToken := "a5deaa25-11ca-48bf-a979-4c3a7aa4b9a9" if !autoEncrypt { // this gets called when InitialConfiguration is invoked to record the token from the // auto-config response mcfg.tokens.On("UpdateAgentToken", originalToken, token.TokenSourceConfig).Return(true).Once() } // we expect this to be retrieved twice: first during cache prepopulation // and then again when setting up the cache watch for the leaf cert. // However one of those expectations is setup in the expectInitialTLS // method so we only need one more here mcfg.tokens.On("AgentToken").Return(originalToken).Once() if autoEncrypt { // when using AutoEncrypt we also have to grab the token once more // when setting up the initial RPC as the ACL token is what is used // to authorize the request. mcfg.tokens.On("AgentToken").Return(originalToken).Once() } // this is called once during Start to initialze the token watches tokenUpdateCh := make(chan struct{}) tokenNotifier := token.Notifier{ Ch: tokenUpdateCh, } mcfg.tokens.On("Notify", token.TokenKindAgent).Once().Return(tokenNotifier) mcfg.tokens.On("StopNotify", tokenNotifier).Once() // expect the roots watch on the cache mcfg.cache.On("Notify", mock.Anything, cachetype.ConnectCARootName, &structs.DCSpecificRequest{Datacenter: "dc1"}, rootsWatchID, mock.Anything, ).Return(nil).Once() mcfg.leafCerts.On("Notify", mock.Anything, &leafcert.ConnectCALeafRequest{ Datacenter: "dc1", Agent: "autoconf", Token: originalToken, DNSSAN: defaultDNSSANs, IPSAN: defaultIPSANs, }, leafWatchID, mock.Anything, ).Return(nil).Once() // override the server provider - most of the other tests set it up so that this // always returns no server (simulating a state where we haven't joined gossip). // this seems like a good place to ensure this other way of finding server information // works mcfg.serverProvider.On("FindLANServer").Once().Return(&metadata.Server{ Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300}, }) indexedRoots, cert, extraCerts := mcfg.setupInitialTLS(t, "autoconf", "dc1", originalToken) mcfg.tlsCfg.On("AutoEncryptCert").Return(&x509.Certificate{ NotAfter: cert.ValidBefore, }).Once() populateResponse := func(args mock.Arguments) { method := args.String(3) switch method { case "AutoConfig.InitialConfiguration": resp, ok := args.Get(5).(*pbautoconf.AutoConfigResponse) require.True(t, ok) resp.Config = &pbconfig.Config{ PrimaryDatacenter: "primary", TLS: &pbconfig.TLS{ VerifyServerHostname: true, }, ACL: &pbconfig.ACL{ Tokens: &pbconfig.ACLTokens{ Agent: originalToken, }, }, } resp.CARoots = mustTranslateCARootsToProtobuf(t, indexedRoots) resp.Certificate = mustTranslateIssuedCertToProtobuf(t, cert) resp.ExtraCACertificates = extraCerts case "AutoEncrypt.Sign": resp, ok := args.Get(5).(*structs.SignedResponse) require.True(t, ok) *resp = structs.SignedResponse{ VerifyServerHostname: true, ConnectCARoots: *indexedRoots, IssuedCert: *cert, ManualCARoots: extraCerts, } } } if !autoEncrypt { expectedRequest := pbautoconf.AutoConfigRequest{ Datacenter: "dc1", Node: "autoconf", JWT: "blarg", } mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300}, "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(nil).Run(populateResponse).Once() } else { expectedRequest := structs.CASignRequest{ WriteRequest: structs.WriteRequest{Token: originalToken}, Datacenter: "dc1", // TODO (autoconf) Maybe in the future we should populate a CSR // and do some manual parsing/verification of the contents. The // bits not having to do with the signing key such as the requested // SANs and CN. For now though the mockDirectRPC type will empty // the CSR so we have to pass in an empty string to the expectation. CSR: "", } mcfg.directRPC.On( "RPC", "dc1", "autoconf", // reusing the same name to prevent needing more configurability &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300}, "AutoEncrypt.Sign", &expectedRequest, &structs.SignedResponse{}).Return(nil).Run(populateResponse) } ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) cfg, err := ac.InitialConfiguration(context.Background()) require.NoError(t, err) require.NotNil(t, cfg) if !autoEncrypt { // auto-encrypt doesn't modify the config but rather sets the value // in the TLS configurator require.True(t, cfg.TLS.InternalRPC.VerifyServerHostname) } ctx, cancel := context.WithCancel(context.Background()) require.NoError(t, ac.Start(ctx)) t.Cleanup(func() { done := ac.Done() cancel() timer := time.NewTimer(1 * time.Second) defer timer.Stop() select { case <-done: // do nothing case <-timer.C: t.Fatalf("AutoConfig wasn't stopped within 1 second after test completion") } }) return testAutoConfig{ mcfg: mcfg, ac: ac, tokenUpdates: tokenUpdateCh, originalToken: originalToken, initialRoots: indexedRoots, initialCert: cert, extraCerts: extraCerts, stop: cancel, } } // this test ensures that the cache watches are restarted with // the updated token after receiving a token update func TestTokenUpdate(t *testing.T) { testAC := startedAutoConfig(t, false) newToken := "1a4cc445-86ed-46b4-a355-bbf5a11dddb0" rootsCtx, rootsCancel := context.WithCancel(context.Background()) testAC.mcfg.cache.On("Notify", mock.Anything, cachetype.ConnectCARootName, &structs.DCSpecificRequest{Datacenter: testAC.ac.config.Datacenter}, rootsWatchID, mock.Anything, ).Return(nil).Once().Run(func(args mock.Arguments) { rootsCancel() }) leafCtx, leafCancel := context.WithCancel(context.Background()) testAC.mcfg.leafCerts.On("Notify", mock.Anything, &leafcert.ConnectCALeafRequest{ Datacenter: "dc1", Agent: "autoconf", Token: newToken, DNSSAN: defaultDNSSANs, IPSAN: defaultIPSANs, }, leafWatchID, mock.Anything, ).Return(nil).Once().Run(func(args mock.Arguments) { leafCancel() }) // this will be retrieved once when resetting the leaf cert watch testAC.mcfg.tokens.On("AgentToken").Return(newToken).Once() // send the notification about the token update testAC.tokenUpdates <- struct{}{} // wait for the leaf cert watches require.True(t, waitForChans(100*time.Millisecond, leafCtx.Done(), rootsCtx.Done()), "New cache watches were not started within 100ms") } func TestRootsUpdate(t *testing.T) { testAC := startedAutoConfig(t, false) secondCA := connect.TestCA(t, testAC.initialRoots.Roots[0]) secondRoots := structs.IndexedCARoots{ ActiveRootID: secondCA.ID, TrustDomain: connect.TestClusterID, Roots: []*structs.CARoot{ secondCA, testAC.initialRoots.Roots[0], }, QueryMeta: structs.QueryMeta{ Index: 99, }, } updatedCtx, cancel := context.WithCancel(context.Background()) testAC.mcfg.tlsCfg.On("UpdateAutoTLS", testAC.extraCerts, []string{secondCA.RootCert, testAC.initialRoots.Roots[0].RootCert}, testAC.initialCert.CertPEM, "redacted", true, ).Return(nil).Once().Run(func(args mock.Arguments) { cancel() }) // when a cache event comes in we end up recalculating the fallback timer which requires this call testAC.mcfg.tlsCfg.On("AutoEncryptCert").Return(&x509.Certificate{ NotAfter: time.Now().Add(10 * time.Minute), }).Once() req := structs.DCSpecificRequest{Datacenter: "dc1"} require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{ CorrelationID: rootsWatchID, Result: &secondRoots, Meta: cache.ResultMeta{ Index: secondRoots.Index, }, })) require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time") // persisting these to disk happens right after the chan we are waiting for will have fired above // however there is no deterministic way to know once its been written outside of maybe a filesystem // event notifier. That seems a little heavy handed just for this and especially to do in any sort // of cross platform way. testretry.Run(t, func(r *testretry.R) { resp, err := testAC.ac.readPersistedAutoConfig() require.NoError(r, err) require.Equal(r, secondRoots.ActiveRootID, resp.CARoots.GetActiveRootID()) }) } func TestCertUpdate(t *testing.T) { testAC := startedAutoConfig(t, false) secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 99, 10*time.Minute) updatedCtx, cancel := context.WithCancel(context.Background()) testAC.mcfg.tlsCfg.On("UpdateAutoTLS", testAC.extraCerts, []string{testAC.initialRoots.Roots[0].RootCert}, secondCert.CertPEM, "redacted", true, ).Return(nil).Once().Run(func(args mock.Arguments) { cancel() }) // when a cache event comes in we end up recalculating the fallback timer which requires this call testAC.mcfg.tlsCfg.On("AutoEncryptCert").Return(&x509.Certificate{ NotAfter: secondCert.ValidBefore, }).Once() req := leafcert.ConnectCALeafRequest{ Datacenter: "dc1", Agent: "autoconf", Token: testAC.originalToken, DNSSAN: defaultDNSSANs, IPSAN: defaultIPSANs, } require.True(t, testAC.mcfg.leafCerts.sendNotification(context.Background(), req.Key(), cache.UpdateEvent{ CorrelationID: leafWatchID, Result: secondCert, Meta: cache.ResultMeta{ Index: secondCert.ModifyIndex, }, })) require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time") // persisting these to disk happens after all the things we would wait for in assertCertUpdated // will have fired. There is no deterministic way to know once its been written so we wrap // this in a retry. testretry.Run(t, func(r *testretry.R) { resp, err := testAC.ac.readPersistedAutoConfig() require.NoError(r, err) // ensure the roots got persisted to disk require.Equal(r, secondCert.CertPEM, resp.Certificate.GetCertPEM()) }) } func TestFallback(t *testing.T) { testAC := startedAutoConfig(t, false) // at this point everything is operating normally and we are just // waiting for events. We are going to send a new cert that is basically // already expired and then allow the fallback routine to kick in. secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 100, time.Nanosecond) secondCA := caRootRoundtrip(t, connect.TestCA(t, testAC.initialRoots.Roots[0])) secondRoots := caRootsRoundtrip(t, &structs.IndexedCARoots{ ActiveRootID: secondCA.ID, TrustDomain: connect.TestClusterID, Roots: []*structs.CARoot{ secondCA, testAC.initialRoots.Roots[0], }, QueryMeta: structs.QueryMeta{ Index: 101, }, }) thirdCert := newLeaf(t, "autoconf", "dc1", secondCA, 102, 10*time.Minute) // setup the expectation for when the certs got updated initially updatedCtx, updateCancel := context.WithCancel(context.Background()) testAC.mcfg.tlsCfg.On("UpdateAutoTLS", testAC.extraCerts, []string{testAC.initialRoots.Roots[0].RootCert}, secondCert.CertPEM, "redacted", true, ).Return(nil).Once().Run(func(args mock.Arguments) { updateCancel() }) // when a cache event comes in we end up recalculating the fallback timer which requires this call testAC.mcfg.tlsCfg.On("AutoEncryptCert").Return(&x509.Certificate{ NotAfter: secondCert.ValidBefore, }).Times(2) fallbackCtx, fallbackCancel := context.WithCancel(context.Background()) // also testing here that we can change server IPs for ongoing operations testAC.mcfg.serverProvider.On("FindLANServer").Once().Return(&metadata.Server{ Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300}, }) // after sending the notification for the cert update another InitialConfiguration RPC // will be made to pull down the latest configuration. So we need to set up the response // for the second RPC populateResponse := func(args mock.Arguments) { resp, ok := args.Get(5).(*pbautoconf.AutoConfigResponse) require.True(t, ok) resp.Config = &pbconfig.Config{ PrimaryDatacenter: "primary", TLS: &pbconfig.TLS{ VerifyServerHostname: true, }, ACL: &pbconfig.ACL{ Tokens: &pbconfig.ACLTokens{ Agent: testAC.originalToken, }, }, } resp.CARoots = mustTranslateCARootsToProtobuf(t, secondRoots) resp.Certificate = mustTranslateIssuedCertToProtobuf(t, thirdCert) resp.ExtraCACertificates = testAC.extraCerts fallbackCancel() } expectedRequest := pbautoconf.AutoConfigRequest{ Datacenter: "dc1", Node: "autoconf", JWT: "blarg", } testAC.mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300}, "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(nil).Run(populateResponse).Once() // this gets called when InitialConfiguration is invoked to record the token from the // auto-config response which is how the Fallback for auto-config works testAC.mcfg.tokens.On("UpdateAgentToken", testAC.originalToken, token.TokenSourceConfig).Return(true).Once() testAC.mcfg.expectInitialTLS(t, "autoconf", "dc1", testAC.originalToken, secondCA, secondRoots, thirdCert, testAC.extraCerts) // after the second RPC we now will use the new certs validity period in the next run loop iteration testAC.mcfg.tlsCfg.On("AutoEncryptCert").Return(&x509.Certificate{ NotAfter: time.Now().Add(10 * time.Minute), }).Once() // now that all the mocks are set up we can trigger the whole thing by sending the second expired cert // as a cache update event. req := leafcert.ConnectCALeafRequest{ Datacenter: "dc1", Agent: "autoconf", Token: testAC.originalToken, DNSSAN: defaultDNSSANs, IPSAN: defaultIPSANs, } require.True(t, testAC.mcfg.leafCerts.sendNotification(context.Background(), req.Key(), cache.UpdateEvent{ CorrelationID: leafWatchID, Result: secondCert, Meta: cache.ResultMeta{ Index: secondCert.ModifyIndex, }, })) // wait for the TLS certificates to get updated require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time") // now wait for the fallback routine to be invoked require.True(t, waitForChans(100*time.Millisecond, fallbackCtx.Done()), "fallback routines did not get invoked within the alotted time") testAC.stop() <-testAC.ac.done resp, err := testAC.ac.readPersistedAutoConfig() require.NoError(t, err) // ensure the roots got persisted to disk require.Equal(t, thirdCert.CertPEM, resp.Certificate.GetCertPEM()) require.Equal(t, secondRoots.ActiveRootID, resp.CARoots.GetActiveRootID()) } func TestIntroToken(t *testing.T) { tokenFile := testutil.TempFile(t, "intro-token") t.Cleanup(func() { os.Remove(tokenFile.Name()) }) tokenFileEmpty := testutil.TempFile(t, "intro-token-empty") t.Cleanup(func() { os.Remove(tokenFileEmpty.Name()) }) tokenFromFile := "8ae34d3a-8adf-446a-b236-69874597cb5b" tokenFromConfig := "3ad9b572-ea42-4e47-9cd0-53a398a98abf" require.NoError(t, os.WriteFile(tokenFile.Name(), []byte(tokenFromFile), 0600)) type testCase struct { config *config.RuntimeConfig err string token string } cases := map[string]testCase{ "config": { config: &config.RuntimeConfig{ AutoConfig: config.AutoConfig{ IntroToken: tokenFromConfig, IntroTokenFile: tokenFile.Name(), }, }, token: tokenFromConfig, }, "file": { config: &config.RuntimeConfig{ AutoConfig: config.AutoConfig{ IntroTokenFile: tokenFile.Name(), }, }, token: tokenFromFile, }, "file-empty": { config: &config.RuntimeConfig{ AutoConfig: config.AutoConfig{ IntroTokenFile: tokenFileEmpty.Name(), }, }, err: "intro_token_file did not contain any token", }, } for name, tcase := range cases { t.Run(name, func(t *testing.T) { ac := AutoConfig{ config: tcase.config, } token, err := ac.introToken() if tcase.err != "" { testutil.RequireErrorContains(t, err, tcase.err) } else { require.NoError(t, err) require.Equal(t, tcase.token, token) } }) } }