From 6e62166f6d13da7b52d5684139bdd1b797711a7c Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Mon, 30 Nov 2020 14:49:28 -0800 Subject: [PATCH] Merge pull request #9009 from hashicorp/update-secondary-ca connect: Fix an issue with updating CA config in a secondary datacenter --- agent/connect/ca/provider_consul_test.go | 25 +- agent/connect/ca/provider_vault.go | 2 +- agent/connect/ca/testing.go | 29 + agent/connect/testing_ca.go | 24 +- agent/connect/testing_spiffe.go | 8 +- agent/consul/connect_ca_endpoint.go | 215 +--- agent/consul/connect_ca_endpoint_test.go | 134 +++ agent/consul/leader.go | 6 +- agent/consul/leader_connect.go | 764 +-------------- agent/consul/leader_connect_ca.go | 1142 ++++++++++++++++++++++ agent/consul/leader_connect_ca_test.go | 285 ++++++ agent/consul/leader_connect_test.go | 24 +- agent/consul/server.go | 22 +- agent/consul/server_connect.go | 2 +- agent/consul/server_test.go | 2 +- agent/structs/acl.go | 6 +- agent/structs/connect_ca.go | 10 + 17 files changed, 1675 insertions(+), 1025 deletions(-) create mode 100644 agent/consul/leader_connect_ca.go create mode 100644 agent/consul/leader_connect_ca_test.go diff --git a/agent/connect/ca/provider_consul_test.go b/agent/connect/ca/provider_consul_test.go index 13ac312564..0a642e5cf8 100644 --- a/agent/connect/ca/provider_consul_test.go +++ b/agent/connect/ca/provider_consul_test.go @@ -21,30 +21,7 @@ func (c *consulCAMockDelegate) State() *state.Store { } func (c *consulCAMockDelegate) ApplyCARequest(req *structs.CARequest) (interface{}, error) { - idx, _, err := c.state.CAConfig(nil) - if err != nil { - return nil, err - } - - switch req.Op { - case structs.CAOpSetProviderState: - _, err := c.state.CASetProviderState(idx+1, req.ProviderState) - if err != nil { - return nil, err - } - - return true, nil - case structs.CAOpDeleteProviderState: - if err := c.state.CADeleteProviderState(req.ProviderState.ID); err != nil { - return nil, err - } - - return true, nil - case structs.CAOpIncrementProviderSerialNumber: - return uint64(2), nil - default: - return nil, fmt.Errorf("Invalid CA operation '%s'", req.Op) - } + return ApplyCARequestToStore(c.state, req) } func newMockDelegate(t *testing.T, conf *structs.CAConfiguration) *consulCAMockDelegate { diff --git a/agent/connect/ca/provider_vault.go b/agent/connect/ca/provider_vault.go index ea39801a21..0510fe8665 100644 --- a/agent/connect/ca/provider_vault.go +++ b/agent/connect/ca/provider_vault.go @@ -135,7 +135,7 @@ func (v *VaultProvider) renewToken(ctx context.Context, watcher *vaultapi.Lifeti go watcher.Start() case <-watcher.RenewCh(): - v.logger.Error("Successfully renewed token for Vault provider") + v.logger.Info("Successfully renewed token for Vault provider") } } } diff --git a/agent/connect/ca/testing.go b/agent/connect/ca/testing.go index 25533f8dd4..83b52d60d5 100644 --- a/agent/connect/ca/testing.go +++ b/agent/connect/ca/testing.go @@ -8,6 +8,8 @@ import ( "sync" "github.com/hashicorp/consul/agent/connect" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/sdk/freeport" "github.com/hashicorp/consul/sdk/testutil/retry" "github.com/hashicorp/go-hclog" @@ -234,3 +236,30 @@ func (v *TestVaultServer) Stop() error { return nil } + +func ApplyCARequestToStore(store *state.Store, req *structs.CARequest) (interface{}, error) { + idx, _, err := store.CAConfig(nil) + if err != nil { + return nil, err + } + + switch req.Op { + case structs.CAOpSetProviderState: + _, err := store.CASetProviderState(idx+1, req.ProviderState) + if err != nil { + return nil, err + } + + return true, nil + case structs.CAOpDeleteProviderState: + if err := store.CADeleteProviderState(req.ProviderState.ID); err != nil { + return nil, err + } + + return true, nil + case structs.CAOpIncrementProviderSerialNumber: + return uint64(2), nil + default: + return nil, fmt.Errorf("Invalid CA operation '%s'", req.Op) + } +} diff --git a/agent/connect/testing_ca.go b/agent/connect/testing_ca.go index e623c1872f..4851b9db55 100644 --- a/agent/connect/testing_ca.go +++ b/agent/connect/testing_ca.go @@ -56,7 +56,7 @@ func ValidateLeaf(caPEM string, leafPEM string, intermediatePEMs []string) error return err } -func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *structs.CARoot { +func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int, ttl time.Duration) *structs.CARoot { var result structs.CARoot result.Active = true result.Name = fmt.Sprintf("Test CA %d", atomic.AddUint64(&testCACounter, 1)) @@ -76,6 +76,14 @@ func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *struc id := &SpiffeIDSigning{ClusterID: TestClusterID, Domain: "consul"} // Create the CA cert + now := time.Now() + before := now + after := now + if ttl != 0 { + after = after.Add(ttl) + } else { + after = after.AddDate(10, 0, 0) + } template := x509.Certificate{ SerialNumber: sn, Subject: pkix.Name{CommonName: result.Name}, @@ -85,8 +93,8 @@ func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *struc x509.KeyUsageCRLSign | x509.KeyUsageDigitalSignature, IsCA: true, - NotAfter: time.Now().AddDate(10, 0, 0), - NotBefore: time.Now(), + NotAfter: after, + NotBefore: before, AuthorityKeyId: testKeyID(t, signer.Public()), SubjectKeyId: testKeyID(t, signer.Public()), } @@ -159,13 +167,19 @@ func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *struc // that is cross-signed with the previous cert, and this will be set as // SigningCert. func TestCA(t testing.T, xc *structs.CARoot) *structs.CARoot { - return testCA(t, xc, DefaultPrivateKeyType, DefaultPrivateKeyBits) + return testCA(t, xc, DefaultPrivateKeyType, DefaultPrivateKeyBits, 0) +} + +// TestCAWithTTL is similar to TestCA, except that it +// takes a custom duration for the lifetime of the certificate. +func TestCAWithTTL(t testing.T, xc *structs.CARoot, ttl time.Duration) *structs.CARoot { + return testCA(t, xc, DefaultPrivateKeyType, DefaultPrivateKeyBits, ttl) } // TestCAWithKeyType is similar to TestCA, except that it // takes two additional arguments to override the default private key type and size. func TestCAWithKeyType(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *structs.CARoot { - return testCA(t, xc, keyType, keyBits) + return testCA(t, xc, keyType, keyBits, 0) } // testCertID is an interface to be implemented the various spiffe ID / CertURI types diff --git a/agent/connect/testing_spiffe.go b/agent/connect/testing_spiffe.go index c7fa6f753f..4a0577880c 100644 --- a/agent/connect/testing_spiffe.go +++ b/agent/connect/testing_spiffe.go @@ -12,10 +12,16 @@ func TestSpiffeIDService(t testing.T, service string) *SpiffeIDService { // TestSpiffeIDServiceWithHost returns a SPIFFE ID representing a service with // the specified trust domain. func TestSpiffeIDServiceWithHost(t testing.T, service, host string) *SpiffeIDService { + return TestSpiffeIDServiceWithHostDC(t, service, host, "dc1") +} + +// TestSpiffeIDServiceWithHostDC returns a SPIFFE ID representing a service with +// the specified trust domain for the given datacenter. +func TestSpiffeIDServiceWithHostDC(t testing.T, service, host, datacenter string) *SpiffeIDService { return &SpiffeIDService{ Host: host, Namespace: "default", - Datacenter: "dc1", + Datacenter: datacenter, Service: service, } } diff --git a/agent/consul/connect_ca_endpoint.go b/agent/consul/connect_ca_endpoint.go index e7ae0c7237..ffecaae1c2 100644 --- a/agent/consul/connect_ca_endpoint.go +++ b/agent/consul/connect_ca_endpoint.go @@ -3,14 +3,12 @@ package consul import ( "errors" "fmt" - "reflect" "time" "github.com/hashicorp/go-hclog" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/connect" - "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/go-memdb" @@ -103,216 +101,7 @@ func (s *ConnectCA) ConfigurationSet( return acl.ErrPermissionDenied } - // Exit early if it's a no-op change - state := s.srv.fsm.State() - confIdx, config, err := state.CAConfig(nil) - if err != nil { - return err - } - - // Don't allow state changes. Either it needs to be empty or the same to allow - // read-modify-write loops that don't touch the State field. - if len(args.Config.State) > 0 && - !reflect.DeepEqual(args.Config.State, config.State) { - return ErrStateReadOnly - } - - // Don't allow users to change the ClusterID. - args.Config.ClusterID = config.ClusterID - if args.Config.Provider == config.Provider && reflect.DeepEqual(args.Config.Config, config.Config) { - return nil - } - - // If the provider hasn't changed, we need to load the current Provider state - // so it can decide if it needs to change resources or not based on the config - // change. - if args.Config.Provider == config.Provider { - // Note this is a shallow copy since the State method doc requires the - // provider return a map that will not be further modified and should not - // modify the one we pass to Configure. - args.Config.State = config.State - } - - // Create a new instance of the provider described by the config - // and get the current active root CA. This acts as a good validation - // of the config and makes sure the provider is functioning correctly - // before we commit any changes to Raft. - newProvider, err := s.srv.createCAProvider(args.Config) - if err != nil { - return fmt.Errorf("could not initialize provider: %v", err) - } - pCfg := ca.ProviderConfig{ - ClusterID: args.Config.ClusterID, - Datacenter: s.srv.config.Datacenter, - // This endpoint can be called in a secondary DC too so set this correctly. - IsPrimary: s.srv.config.Datacenter == s.srv.config.PrimaryDatacenter, - RawConfig: args.Config.Config, - State: args.Config.State, - } - if err := newProvider.Configure(pCfg); err != nil { - return fmt.Errorf("error configuring provider: %v", err) - } - - // Set up a defer to clean up the new provider if we exit early due to an error. - cleanupNewProvider := true - defer func() { - if cleanupNewProvider { - if err := newProvider.Cleanup(); err != nil { - s.logger.Warn("failed to clean up CA provider while handling startup failure", "provider", newProvider, "error", err) - } - } - }() - - if err := newProvider.GenerateRoot(); err != nil { - return fmt.Errorf("error generating CA root certificate: %v", err) - } - - newRootPEM, err := newProvider.ActiveRoot() - if err != nil { - return err - } - - newActiveRoot, err := parseCARoot(newRootPEM, args.Config.Provider, args.Config.ClusterID) - if err != nil { - return err - } - - // See if the provider needs to persist any state along with the config - pState, err := newProvider.State() - if err != nil { - return fmt.Errorf("error getting provider state: %v", err) - } - args.Config.State = pState - - // Compare the new provider's root CA ID to the current one. If they - // match, just update the existing provider with the new config. - // If they don't match, begin the root rotation process. - _, root, err := state.CARootActive(nil) - if err != nil { - return err - } - - // If the root didn't change or if this is a secondary DC, just update the - // config and return. - if (s.srv.config.Datacenter != s.srv.config.PrimaryDatacenter) || - root != nil && root.ID == newActiveRoot.ID { - args.Op = structs.CAOpSetConfig - resp, err := s.srv.raftApply(structs.ConnectCARequestType, args) - if err != nil { - return err - } - if respErr, ok := resp.(error); ok { - return respErr - } - - // If the config has been committed, update the local provider instance - cleanupNewProvider = false - s.srv.setCAProvider(newProvider, newActiveRoot) - - s.logger.Info("CA provider config updated") - - return nil - } - - // At this point, we know the config change has trigged a root rotation, - // either by swapping the provider type or changing the provider's config - // to use a different root certificate. - - // First up, sanity check that the current provider actually supports - // cross-signing. - oldProvider, _ := s.srv.getCAProvider() - if oldProvider == nil { - return fmt.Errorf("internal error: CA provider is nil") - } - canXSign, err := oldProvider.SupportsCrossSigning() - if err != nil { - return fmt.Errorf("CA provider error: %s", err) - } - if !canXSign && !args.Config.ForceWithoutCrossSigning { - return errors.New("The current CA Provider does not support cross-signing. " + - "You can try again with ForceWithoutCrossSigningSet but this may cause " + - "disruption - see documentation for more.") - } - if !canXSign && args.Config.ForceWithoutCrossSigning { - s.logger.Warn("current CA doesn't support cross signing but " + - "CA reconfiguration forced anyway with ForceWithoutCrossSigning") - } - - // If it's a config change that would trigger a rotation (different provider/root): - // 1. Get the root from the new provider. - // 2. Call CrossSignCA on the old provider to sign the new root with the old one to - // get a cross-signed certificate. - // 3. Take the active root for the new provider and append the intermediate from step 2 - // to its list of intermediates. - newRoot, err := connect.ParseCert(newRootPEM) - if err != nil { - return err - } - - if canXSign { - // Have the old provider cross-sign the new root - xcCert, err := oldProvider.CrossSignCA(newRoot) - if err != nil { - return err - } - - // Add the cross signed cert to the new CA's intermediates (to be attached - // to leaf certs). - newActiveRoot.IntermediateCerts = []string{xcCert} - } - - intermediate, err := newProvider.GenerateIntermediate() - if err != nil { - return err - } - if intermediate != newRootPEM { - newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediate) - } - - // Update the roots and CA config in the state store at the same time - idx, roots, err := state.CARoots(nil) - if err != nil { - return err - } - - var newRoots structs.CARoots - for _, r := range roots { - newRoot := *r - if newRoot.Active { - newRoot.Active = false - newRoot.RotatedOutAt = time.Now() - } - newRoots = append(newRoots, &newRoot) - } - newRoots = append(newRoots, newActiveRoot) - - args.Op = structs.CAOpSetRootsAndConfig - args.Index = idx - args.Config.ModifyIndex = confIdx - args.Roots = newRoots - resp, err := s.srv.raftApply(structs.ConnectCARequestType, args) - if err != nil { - return err - } - if respErr, ok := resp.(error); ok { - return respErr - } - if respOk, ok := resp.(bool); ok && !respOk { - return fmt.Errorf("could not atomically update roots and config") - } - - // If the config has been committed, update the local provider instance - // and call teardown on the old provider - cleanupNewProvider = false - s.srv.setCAProvider(newProvider, newActiveRoot) - - if err := oldProvider.Cleanup(); err != nil { - s.logger.Warn("failed to clean up old provider", "provider", config.Provider) - } - - s.logger.Info("CA rotated to new root under provider", "provider", args.Config.Provider) - - return nil + return s.srv.caManager.UpdateConfiguration(args) } // Roots returns the currently trusted root certificates. @@ -438,7 +227,7 @@ func (s *ConnectCA) SignIntermediate( return acl.ErrPermissionDenied } - provider, _ := s.srv.getCAProvider() + provider, _ := s.srv.caManager.getCAProvider() if provider == nil { return fmt.Errorf("internal error: CA provider is nil") } diff --git a/agent/consul/connect_ca_endpoint_test.go b/agent/consul/connect_ca_endpoint_test.go index 4e2885e473..00cfbfa3e4 100644 --- a/agent/consul/connect_ca_endpoint_test.go +++ b/agent/consul/connect_ca_endpoint_test.go @@ -492,6 +492,140 @@ func TestConnectCAConfig_TriggerRotation(t *testing.T) { } } +func TestConnectCAConfig_UpdateSecondary(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + require := require.New(t) + + // Initialize primary as the primary DC + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "primary" + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + testrpc.WaitForLeader(t, s1.RPC, "primary") + + // secondary as a secondary DC + dir2, s2 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "secondary" + c.PrimaryDatacenter = "primary" + }) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + codec := rpcClient(t, s2) + defer codec.Close() + + // Create the WAN link + joinWAN(t, s2, s1) + testrpc.WaitForLeader(t, s2.RPC, "secondary") + + // Capture the current root + rootList, activeRoot, err := getTestRoots(s1, "primary") + require.NoError(err) + require.Len(rootList.Roots, 1) + rootCert := activeRoot + + waitForActiveCARoot(t, s1, rootCert) + waitForActiveCARoot(t, s2, rootCert) + + // Capture the current intermediate + rootList, activeRoot, err = getTestRoots(s2, "secondary") + require.NoError(err) + require.Len(rootList.Roots, 1) + require.Len(activeRoot.IntermediateCerts, 1) + oldIntermediatePEM := activeRoot.IntermediateCerts[0] + + // Update the secondary CA config to use a new private key, which should + // cause a re-signing with a new intermediate. + _, newKey, err := connect.GeneratePrivateKey() + assert.NoError(err) + newConfig := &structs.CAConfiguration{ + Provider: "consul", + Config: map[string]interface{}{ + "PrivateKey": newKey, + "RootCert": "", + "RotationPeriod": 90 * 24 * time.Hour, + }, + } + { + args := &structs.CARequest{ + Datacenter: "secondary", + Config: newConfig, + } + var reply interface{} + + require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply)) + } + + // Make sure the new intermediate has replaced the old one in the active root, + // and that the root itself hasn't changed. + var newIntermediatePEM string + { + args := &structs.DCSpecificRequest{ + Datacenter: "secondary", + } + var reply structs.IndexedCARoots + require.Nil(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", args, &reply)) + require.Len(reply.Roots, 1) + require.Len(reply.Roots[0].IntermediateCerts, 1) + newIntermediatePEM = reply.Roots[0].IntermediateCerts[0] + require.NotEqual(oldIntermediatePEM, newIntermediatePEM) + require.Equal(reply.Roots[0].RootCert, rootCert.RootCert) + } + + // Verify the new config was set. + { + args := &structs.DCSpecificRequest{ + Datacenter: "secondary", + } + var reply structs.CAConfiguration + require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationGet", args, &reply)) + + actual, err := ca.ParseConsulCAConfig(reply.Config) + require.NoError(err) + expected, err := ca.ParseConsulCAConfig(newConfig.Config) + require.NoError(err) + assert.Equal(reply.Provider, newConfig.Provider) + assert.Equal(actual, expected) + } + + // Verify that new leaf certs get the new intermediate bundled + { + // Generate a CSR and request signing + spiffeId := connect.TestSpiffeIDServiceWithHostDC(t, "web", connect.TestClusterID+".consul", "secondary") + csr, _ := connect.TestCSR(t, spiffeId) + args := &structs.CASignRequest{ + Datacenter: "secondary", + CSR: csr, + } + var reply structs.IssuedCert + require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.Sign", args, &reply)) + + // Verify the leaf cert has the new intermediate. + { + roots := x509.NewCertPool() + assert.True(roots.AppendCertsFromPEM([]byte(rootCert.RootCert))) + leaf, err := connect.ParseCert(reply.CertPEM) + require.NoError(err) + + intermediates := x509.NewCertPool() + require.True(intermediates.AppendCertsFromPEM([]byte(newIntermediatePEM))) + + _, err = leaf.Verify(x509.VerifyOptions{ + Roots: roots, + Intermediates: intermediates, + }) + require.NoError(err) + } + + // Verify other fields + assert.Equal("web", reply.Service) + assert.Equal(spiffeId.URI().String(), reply.ServiceURI) + } +} + // Test CA signing func TestConnectCASign(t *testing.T) { t.Parallel() diff --git a/agent/consul/leader.go b/agent/consul/leader.go index bd95260acc..928d8dab03 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -334,8 +334,7 @@ func (s *Server) establishLeadership() error { s.getOrCreateAutopilotConfig() s.autopilot.Start() - // todo(kyhavlov): start a goroutine here for handling periodic CA rotation - if err := s.initializeCA(); err != nil { + if err := s.caManager.InitializeCA(); err != nil { return err } @@ -371,7 +370,8 @@ func (s *Server) revokeLeadership() { s.stopConnectLeader() - s.setCAProvider(nil, nil) + s.caManager.setCAProvider(nil, nil) + s.caManager.setState(CAStateUninitialized, false) s.stopACLTokenReaping() diff --git a/agent/consul/leader_connect.go b/agent/consul/leader_connect.go index 4739bd4254..976b5fa2c6 100644 --- a/agent/consul/leader_connect.go +++ b/agent/consul/leader_connect.go @@ -4,17 +4,13 @@ import ( "bytes" "context" "fmt" - "reflect" - "strings" "time" "golang.org/x/time/rate" - "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/logging" - uuid "github.com/hashicorp/go-uuid" ) const ( @@ -37,76 +33,36 @@ var ( maxRetryBackoff = 256 ) -// initializeCAConfig is used to initialize the CA config if necessary -// when setting up the CA during establishLeadership -func (s *Server) initializeCAConfig() (*structs.CAConfiguration, error) { - state := s.fsm.State() - _, config, err := state.CAConfig(nil) - if err != nil { - return nil, err - } - if config == nil { - config = s.config.CAConfig - if config.ClusterID == "" { - id, err := uuid.GenerateUUID() - if err != nil { - return nil, err - } - config.ClusterID = id - } - } else if _, ok := config.Config["IntermediateCertTTL"]; !ok { - dup := *config - copied := make(map[string]interface{}) - for k, v := range dup.Config { - copied[k] = v - } - copied["IntermediateCertTTL"] = connect.DefaultIntermediateCertTTL.String() - dup.Config = copied - config = &dup - } else { - return config, nil +// startConnectLeader starts multi-dc connect leader routines. +func (s *Server) startConnectLeader() { + if !s.config.ConnectEnabled { + return } - req := structs.CARequest{ - Op: structs.CAOpSetConfig, - Config: config, - } - if resp, err := s.raftApply(structs.ConnectCARequestType, req); err != nil { - return nil, err - } else if respErr, ok := resp.(error); ok { - return nil, respErr + // Start the Connect secondary DC actions if enabled. + if s.config.Datacenter != s.config.PrimaryDatacenter { + s.leaderRoutineManager.Start(secondaryCARootWatchRoutineName, s.caManager.secondaryCARootWatch) + s.leaderRoutineManager.Start(intentionReplicationRoutineName, s.replicateIntentions) } - return config, nil + s.leaderRoutineManager.Start(intermediateCertRenewWatchRoutineName, s.caManager.intermediateCertRenewalWatch) + s.leaderRoutineManager.Start(caRootPruningRoutineName, s.runCARootPruning) } -// parseCARoot returns a filled-in structs.CARoot from a raw PEM value. -func parseCARoot(pemValue, provider, clusterID string) (*structs.CARoot, error) { - id, err := connect.CalculateCertFingerprint(pemValue) - if err != nil { - return nil, fmt.Errorf("error parsing root fingerprint: %v", err) +// stopConnectLeader stops connect specific leader functions. +func (s *Server) stopConnectLeader() { + s.leaderRoutineManager.Stop(secondaryCARootWatchRoutineName) + s.leaderRoutineManager.Stop(intentionReplicationRoutineName) + s.leaderRoutineManager.Stop(intermediateCertRenewWatchRoutineName) + s.leaderRoutineManager.Stop(caRootPruningRoutineName) + + // If the provider implements NeedsStop, we call Stop to perform any shutdown actions. + provider, _ := s.caManager.getCAProvider() + if provider != nil { + if needsStop, ok := provider.(ca.NeedsStop); ok { + needsStop.Stop() + } } - rootCert, err := connect.ParseCert(pemValue) - if err != nil { - return nil, fmt.Errorf("error parsing root cert: %v", err) - } - keyType, keyBits, err := connect.KeyInfoFromCert(rootCert) - if err != nil { - return nil, fmt.Errorf("error extracting root key info: %v", err) - } - return &structs.CARoot{ - ID: id, - Name: fmt.Sprintf("%s CA Root Cert", strings.Title(provider)), - SerialNumber: rootCert.SerialNumber.Uint64(), - SigningKeyID: connect.EncodeSigningKeyID(rootCert.SubjectKeyId), - ExternalTrustDomain: clusterID, - NotBefore: rootCert.NotBefore, - NotAfter: rootCert.NotAfter, - RootCert: pemValue, - PrivateKeyType: keyType, - PrivateKeyBits: keyBits, - Active: true, - }, nil } // createProvider returns a connect CA provider from the given config. @@ -131,483 +87,6 @@ func (s *Server) createCAProvider(conf *structs.CAConfiguration) (ca.Provider, e return p, nil } -// getCAProvider is being called while holding caProviderReconfigurationLock -// which means it must never take that lock itself or call anything that does. -func (s *Server) getCAProvider() (ca.Provider, *structs.CARoot) { - retries := 0 - var result ca.Provider - var resultRoot *structs.CARoot - for result == nil { - s.caProviderLock.RLock() - result = s.caProvider - resultRoot = s.caProviderRoot - s.caProviderLock.RUnlock() - - // In cases where an agent is started with managed proxies, we may ask - // for the provider before establishLeadership completes. If we're the - // leader, then wait and get the provider again - if result == nil && s.IsLeader() && retries < 10 { - retries++ - time.Sleep(50 * time.Millisecond) - continue - } - - break - } - - return result, resultRoot -} - -// setCAProvider is being called while holding caProviderReconfigurationLock -// which means it must never take that lock itself or call anything that does. -func (s *Server) setCAProvider(newProvider ca.Provider, root *structs.CARoot) { - s.caProviderLock.Lock() - defer s.caProviderLock.Unlock() - s.caProvider = newProvider - s.caProviderRoot = root -} - -// initializeCA sets up the CA provider when gaining leadership, either bootstrapping -// the CA if this is the primary DC or making a remote RPC for intermediate signing -// if this is a secondary DC. -func (s *Server) initializeCA() error { - connectLogger := s.loggers.Named(logging.Connect) - // Bail if connect isn't enabled. - if !s.config.ConnectEnabled { - return nil - } - - // Initialize the provider based on the current config. - conf, err := s.initializeCAConfig() - if err != nil { - return err - } - provider, err := s.createCAProvider(conf) - if err != nil { - return err - } - - s.caProviderReconfigurationLock.Lock() - defer s.caProviderReconfigurationLock.Unlock() - s.setCAProvider(provider, nil) - - // If this isn't the primary DC, run the secondary DC routine if the primary has already been upgraded to at least 1.6.0 - if s.config.PrimaryDatacenter != s.config.Datacenter { - versionOk, foundPrimary := ServersInDCMeetMinimumVersion(s, s.config.PrimaryDatacenter, minMultiDCConnectVersion) - if !foundPrimary { - connectLogger.Warn("primary datacenter is configured but unreachable - deferring initialization of the secondary datacenter CA") - // return nil because we will initialize the secondary CA later - return nil - } else if !versionOk { - // return nil because we will initialize the secondary CA later - connectLogger.Warn("servers in the primary datacenter are not at least at the minimum version - deferring initialization of the secondary datacenter CA", - "min_version", minMultiDCConnectVersion.String(), - ) - return nil - } - - // Get the root CA to see if we need to refresh our intermediate. - args := structs.DCSpecificRequest{ - Datacenter: s.config.PrimaryDatacenter, - } - var roots structs.IndexedCARoots - if err := s.forwardDC("ConnectCA.Roots", s.config.PrimaryDatacenter, &args, &roots); err != nil { - return err - } - - // Configure the CA provider and initialize the intermediate certificate if necessary. - if err := s.initializeSecondaryProvider(provider, roots); err != nil { - return fmt.Errorf("error configuring provider: %v", err) - } - if err := s.initializeSecondaryCA(provider, roots); err != nil { - return err - } - - connectLogger.Info("initialized secondary datacenter CA with provider", "provider", conf.Provider) - return nil - } - - return s.initializeRootCA(provider, conf) -} - -// initializeRootCA runs the initialization logic for a root CA. -// It is being called while holding caProviderReconfigurationLock -// which means it must never take that lock itself or call anything that does. -func (s *Server) initializeRootCA(provider ca.Provider, conf *structs.CAConfiguration) error { - connectLogger := s.loggers.Named(logging.Connect) - pCfg := ca.ProviderConfig{ - ClusterID: conf.ClusterID, - Datacenter: s.config.Datacenter, - IsPrimary: true, - RawConfig: conf.Config, - State: conf.State, - } - if err := provider.Configure(pCfg); err != nil { - return fmt.Errorf("error configuring provider: %v", err) - } - if err := provider.GenerateRoot(); err != nil { - return fmt.Errorf("error generating CA root certificate: %v", err) - } - - // Get the active root cert from the CA - rootPEM, err := provider.ActiveRoot() - if err != nil { - return fmt.Errorf("error getting root cert: %v", err) - } - rootCA, err := parseCARoot(rootPEM, conf.Provider, conf.ClusterID) - if err != nil { - return err - } - - // Also create the intermediate CA, which is the one that actually signs leaf certs - interPEM, err := provider.GenerateIntermediate() - if err != nil { - return fmt.Errorf("error generating intermediate cert: %v", err) - } - _, err = connect.ParseCert(interPEM) - if err != nil { - return fmt.Errorf("error getting intermediate cert: %v", err) - } - - // If the provider has state to persist and it's changed or new then update - // CAConfig. - pState, err := provider.State() - if err != nil { - return fmt.Errorf("error getting provider state: %v", err) - } - if !reflect.DeepEqual(conf.State, pState) { - // Update the CAConfig in raft to persist the provider state - conf.State = pState - req := structs.CARequest{ - Op: structs.CAOpSetConfig, - Config: conf, - } - if _, err = s.raftApply(structs.ConnectCARequestType, req); err != nil { - return fmt.Errorf("error persisting provider state: %v", err) - } - } - - // Check if the CA root is already initialized and exit if it is, - // adding on any existing intermediate certs since they aren't directly - // tied to the provider. - // Every change to the CA after this initial bootstrapping should - // be done through the rotation process. - state := s.fsm.State() - _, activeRoot, err := state.CARootActive(nil) - if err != nil { - return err - } - if activeRoot != nil { - // This state shouldn't be possible to get into because we update the root and - // CA config in the same FSM operation. - if activeRoot.ID != rootCA.ID { - return fmt.Errorf("stored CA root %q is not the active root (%s)", rootCA.ID, activeRoot.ID) - } - - rootCA.IntermediateCerts = activeRoot.IntermediateCerts - s.setCAProvider(provider, rootCA) - - return nil - } - - // Get the highest index - idx, _, err := state.CARoots(nil) - if err != nil { - return err - } - - // Store the root cert in raft - resp, err := s.raftApply(structs.ConnectCARequestType, &structs.CARequest{ - Op: structs.CAOpSetRoots, - Index: idx, - Roots: []*structs.CARoot{rootCA}, - }) - if err != nil { - connectLogger.Error("Raft apply failed", "error", err) - return err - } - if respErr, ok := resp.(error); ok { - return respErr - } - - s.setCAProvider(provider, rootCA) - - connectLogger.Info("initialized primary datacenter CA with provider", "provider", conf.Provider) - - return nil -} - -// initializeSecondaryCA runs the routine for generating an intermediate CA CSR and getting -// it signed by the primary DC if the root CA of the primary DC has changed since the last -// intermediate. -// It is being called while holding caProviderReconfigurationLock -// which means it must never take that lock itself or call anything that does. -func (s *Server) initializeSecondaryCA(provider ca.Provider, primaryRoots structs.IndexedCARoots) error { - activeIntermediate, err := provider.ActiveIntermediate() - if err != nil { - return err - } - - var ( - storedRootID string - expectedSigningKeyID string - currentSigningKeyID string - activeSecondaryRoot *structs.CARoot - ) - if activeIntermediate != "" { - // In the event that we already have an intermediate, we must have - // already replicated some primary root information locally, so check - // to see if we're up to date by fetching the rootID and the - // signingKeyID used in the secondary. - // - // Note that for the same rootID the primary representation of the root - // will have a different SigningKeyID field than the secondary - // representation of the same root. This is because it's derived from - // the intermediate which is different in all datacenters. - storedRoot, err := provider.ActiveRoot() - if err != nil { - return err - } - - storedRootID, err = connect.CalculateCertFingerprint(storedRoot) - if err != nil { - return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, storedRoot) - } - - intermediateCert, err := connect.ParseCert(activeIntermediate) - if err != nil { - return fmt.Errorf("error parsing active intermediate cert: %v", err) - } - expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) - - // This will fetch the secondary's exact current representation of the - // active root. Note that this data should only be used if the IDs - // match, otherwise it's out of date and should be regenerated. - _, activeSecondaryRoot, err = s.fsm.State().CARootActive(nil) - if err != nil { - return err - } - if activeSecondaryRoot != nil { - currentSigningKeyID = activeSecondaryRoot.SigningKeyID - } - } - - // Determine which of the provided PRIMARY representations of roots is the - // active one. We'll use this as a template to generate any new root - // representations meant for this secondary. - var newActiveRoot *structs.CARoot - for _, root := range primaryRoots.Roots { - if root.ID == primaryRoots.ActiveRootID && root.Active { - newActiveRoot = root - break - } - } - if newActiveRoot == nil { - return fmt.Errorf("primary datacenter does not have an active root CA for Connect") - } - - // Get a signed intermediate from the primary DC if the provider - // hasn't been initialized yet or if the primary's root has changed. - needsNewIntermediate := false - if activeIntermediate == "" || storedRootID != primaryRoots.ActiveRootID { - needsNewIntermediate = true - } - - // Also we take this opportunity to correct an incorrectly persisted SigningKeyID - // in secondary datacenters (see PR-6513). - if expectedSigningKeyID != "" && currentSigningKeyID != expectedSigningKeyID { - needsNewIntermediate = true - } - - newIntermediate := false - if needsNewIntermediate { - if err := s.getIntermediateCASigned(provider, newActiveRoot); err != nil { - return err - } - newIntermediate = true - } else { - // Discard the primary's representation since our local one is - // sufficiently up to date. - newActiveRoot = activeSecondaryRoot - } - - // Update the roots list in the state store if there's a new active root. - state := s.fsm.State() - _, activeRoot, err := state.CARootActive(nil) - if err != nil { - return err - } - if activeRoot == nil || activeRoot.ID != newActiveRoot.ID || newIntermediate { - if err := s.persistNewRoot(provider, newActiveRoot); err != nil { - return err - } - } - - s.setCAProvider(provider, newActiveRoot) - return nil -} - -// persistNewRoot is being called while holding caProviderReconfigurationLock -// which means it must never take that lock itself or call anything that does. -func (s *Server) persistNewRoot(provider ca.Provider, newActiveRoot *structs.CARoot) error { - connectLogger := s.loggers.Named(logging.Connect) - state := s.fsm.State() - idx, oldRoots, err := state.CARoots(nil) - if err != nil { - return err - } - - _, config, err := state.CAConfig(nil) - if err != nil { - return err - } - if config == nil { - return fmt.Errorf("local CA not initialized yet") - } - newConf := *config - newConf.ClusterID = newActiveRoot.ExternalTrustDomain - - // Persist any state the provider needs us to - newConf.State, err = provider.State() - if err != nil { - return fmt.Errorf("error getting provider state: %v", err) - } - - // Copy the root list and append the new active root, updating the old root - // with the time it was rotated out. - var newRoots structs.CARoots - for _, r := range oldRoots { - newRoot := *r - if newRoot.Active { - newRoot.Active = false - newRoot.RotatedOutAt = time.Now() - } - if newRoot.ExternalTrustDomain == "" { - newRoot.ExternalTrustDomain = config.ClusterID - } - newRoots = append(newRoots, &newRoot) - } - newRoots = append(newRoots, newActiveRoot) - - args := &structs.CARequest{ - Op: structs.CAOpSetRootsAndConfig, - Index: idx, - Roots: newRoots, - Config: &newConf, - } - resp, err := s.raftApply(structs.ConnectCARequestType, &args) - if err != nil { - return err - } - if respErr, ok := resp.(error); ok { - return respErr - } - if respOk, ok := resp.(bool); ok && !respOk { - return fmt.Errorf("could not atomically update roots and config") - } - - connectLogger.Info("updated root certificates from primary datacenter") - return nil -} - -// getIntermediateCAPrimary regenerates the intermediate cert in the primary datacenter. -// This is only run for CAs that require an intermediary in the primary DC, such as Vault. -// This function is being called while holding caProviderReconfigurationLock -// which means it must never take that lock itself or call anything that does. -func (s *Server) getIntermediateCAPrimary(provider ca.Provider, newActiveRoot *structs.CARoot) error { - connectLogger := s.loggers.Named(logging.Connect) - // Generate and sign an intermediate cert using the root CA. - intermediatePEM, err := provider.GenerateIntermediate() - if err != nil { - return fmt.Errorf("error generating new intermediate cert: %v", err) - } - - intermediateCert, err := connect.ParseCert(intermediatePEM) - if err != nil { - return fmt.Errorf("error parsing intermediate cert: %v", err) - } - - // Append the new intermediate to our local active root entry. This is - // where the root representations start to diverge. - newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM) - newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) - - connectLogger.Info("generated new intermediate certificate for primary datacenter") - return nil -} - -// getIntermediateCASigned is being called while holding caProviderReconfigurationLock -// which means it must never take that lock itself or call anything that does. -func (s *Server) getIntermediateCASigned(provider ca.Provider, newActiveRoot *structs.CARoot) error { - connectLogger := s.loggers.Named(logging.Connect) - csr, err := provider.GenerateIntermediateCSR() - if err != nil { - return err - } - - var intermediatePEM string - if err := s.forwardDC("ConnectCA.SignIntermediate", s.config.PrimaryDatacenter, s.generateCASignRequest(csr), &intermediatePEM); err != nil { - // this is a failure in the primary and shouldn't be capable of erroring out our establishing leadership - connectLogger.Warn("Primary datacenter refused to sign our intermediate CA certificate", "error", err) - return nil - } - - if err := provider.SetIntermediate(intermediatePEM, newActiveRoot.RootCert); err != nil { - return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err) - } - - intermediateCert, err := connect.ParseCert(intermediatePEM) - if err != nil { - return fmt.Errorf("error parsing intermediate cert: %v", err) - } - - // Append the new intermediate to our local active root entry. This is - // where the root representations start to diverge. - newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM) - newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) - - connectLogger.Info("received new intermediate certificate from primary datacenter") - return nil -} - -func (s *Server) generateCASignRequest(csr string) *structs.CASignRequest { - return &structs.CASignRequest{ - Datacenter: s.config.PrimaryDatacenter, - CSR: csr, - WriteRequest: structs.WriteRequest{Token: s.tokens.ReplicationToken()}, - } -} - -// startConnectLeader starts multi-dc connect leader routines. -func (s *Server) startConnectLeader() { - // Start the Connect secondary DC actions if enabled. - if s.config.ConnectEnabled && s.config.Datacenter != s.config.PrimaryDatacenter { - s.leaderRoutineManager.Start(secondaryCARootWatchRoutineName, s.secondaryCARootWatch) - s.leaderRoutineManager.Start(intentionReplicationRoutineName, s.replicateIntentions) - } - - s.leaderRoutineManager.Start(intermediateCertRenewWatchRoutineName, s.intermediateCertRenewalWatch) - s.leaderRoutineManager.Start(caRootPruningRoutineName, s.runCARootPruning) -} - -// stopConnectLeader stops connect specific leader functions. -func (s *Server) stopConnectLeader() { - s.leaderRoutineManager.Stop(secondaryCARootWatchRoutineName) - s.leaderRoutineManager.Stop(intentionReplicationRoutineName) - s.leaderRoutineManager.Stop(intermediateCertRenewWatchRoutineName) - s.leaderRoutineManager.Stop(caRootPruningRoutineName) - - // If the provider implements NeedsStop, we call Stop to perform any shutdown actions. - s.caProviderReconfigurationLock.Lock() - defer s.caProviderReconfigurationLock.Unlock() - provider, _ := s.getCAProvider() - if provider != nil { - if needsStop, ok := provider.(ca.NeedsStop); ok { - needsStop.Stop() - } - } -} - func (s *Server) runCARootPruning(ctx context.Context) error { ticker := time.NewTicker(caRootPruneInterval) defer ticker.Stop() @@ -677,155 +156,6 @@ func (s *Server) pruneCARoots() error { return nil } -// intermediateCertRenewalWatch checks the intermediate cert for -// expiration. As soon as more than half the time a cert is valid has passed, -// it will try to renew it. -func (s *Server) intermediateCertRenewalWatch(ctx context.Context) error { - connectLogger := s.loggers.Named(logging.Connect) - isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter - - for { - select { - case <-ctx.Done(): - return nil - case <-time.After(structs.IntermediateCertRenewInterval): - retryLoopBackoffAbortOnSuccess(ctx, func() error { - s.caProviderReconfigurationLock.Lock() - defer s.caProviderReconfigurationLock.Unlock() - - provider, _ := s.getCAProvider() - if provider == nil { - // this happens when leadership is being revoked and this go routine will be stopped - return nil - } - // If this isn't the primary, make sure the CA has been initialized. - if !isPrimary && !s.configuredSecondaryCA() { - return fmt.Errorf("secondary CA is not yet configured.") - } - - state := s.fsm.State() - _, activeRoot, err := state.CARootActive(nil) - if err != nil { - return err - } - - // If this is the primary, check if this is a provider that uses an intermediate cert. If - // it isn't, we don't need to check for a renewal. - if isPrimary { - _, config, err := state.CAConfig(nil) - if err != nil { - return err - } - - if _, ok := ca.PrimaryIntermediateProviders[config.Provider]; !ok { - return nil - } - } - - activeIntermediate, err := provider.ActiveIntermediate() - if err != nil { - return err - } - - if activeIntermediate == "" { - return fmt.Errorf("datacenter doesn't have an active intermediate.") - } - - intermediateCert, err := connect.ParseCert(activeIntermediate) - if err != nil { - return fmt.Errorf("error parsing active intermediate cert: %v", err) - } - - if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer), - intermediateCert.NotAfter) { - return nil - } - - renewalFunc := s.getIntermediateCAPrimary - if !isPrimary { - renewalFunc = s.getIntermediateCASigned - } - if err := renewalFunc(provider, activeRoot); err != nil { - return err - } - - if err := s.persistNewRoot(provider, activeRoot); err != nil { - return err - } - - s.setCAProvider(provider, activeRoot) - return nil - }, func(err error) { - connectLogger.Error("error renewing intermediate certs", - "routine", intermediateCertRenewWatchRoutineName, - "error", err, - ) - }) - } - } -} - -// secondaryCARootWatch maintains a blocking query to the primary datacenter's -// ConnectCA.Roots endpoint to monitor when it needs to request a new signed -// intermediate certificate. -func (s *Server) secondaryCARootWatch(ctx context.Context) error { - connectLogger := s.loggers.Named(logging.Connect) - args := structs.DCSpecificRequest{ - Datacenter: s.config.PrimaryDatacenter, - QueryOptions: structs.QueryOptions{ - // the maximum time the primary roots watch query can block before returning - MaxQueryTime: s.config.MaxQueryTime, - }, - } - - connectLogger.Debug("starting Connect CA root replication from primary datacenter", "primary", s.config.PrimaryDatacenter) - - retryLoopBackoff(ctx, func() error { - var roots structs.IndexedCARoots - if err := s.forwardDC("ConnectCA.Roots", s.config.PrimaryDatacenter, &args, &roots); err != nil { - return fmt.Errorf("Error retrieving the primary datacenter's roots: %v", err) - } - - // Check to see if the primary has been upgraded in case we're waiting to switch to - // secondary mode. - provider, _ := s.getCAProvider() - if provider == nil { - // this happens when leadership is being revoked and this go routine will be stopped - return nil - } - if !s.configuredSecondaryCA() { - versionOk, primaryFound := ServersInDCMeetMinimumVersion(s, s.config.PrimaryDatacenter, minMultiDCConnectVersion) - if !primaryFound { - return fmt.Errorf("Primary datacenter is unreachable - deferring secondary CA initialization") - } - - if versionOk { - if err := s.initializeSecondaryProvider(provider, roots); err != nil { - return fmt.Errorf("Failed to initialize secondary CA provider: %v", err) - } - } - } - - // Run the secondary CA init routine to see if we need to request a new - // intermediate. - if s.configuredSecondaryCA() { - if err := s.initializeSecondaryCA(provider, roots); err != nil { - return fmt.Errorf("Failed to initialize the secondary CA: %v", err) - } - } - - args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, roots.QueryMeta.Index) - return nil - }, func(err error) { - connectLogger.Error("CA root replication failed, will retry", - "routine", secondaryCARootWatchRoutineName, - "error", err, - ) - }) - - return nil -} - // replicateIntentions executes a blocking query to the primary datacenter to replicate // the intentions there to the local state. func (s *Server) replicateIntentions(ctx context.Context) error { @@ -1014,46 +344,6 @@ func nextIndexVal(prevIdx, idx uint64) uint64 { return idx } -// initializeSecondaryProvider configures the given provider for a secondary, non-root datacenter. -// It is being called while holding caProviderReconfigurationLock which means -// it must never take that lock itself or call anything that does. -func (s *Server) initializeSecondaryProvider(provider ca.Provider, roots structs.IndexedCARoots) error { - if roots.TrustDomain == "" { - return fmt.Errorf("trust domain from primary datacenter is not initialized") - } - - clusterID := strings.Split(roots.TrustDomain, ".")[0] - _, conf, err := s.fsm.State().CAConfig(nil) - if err != nil { - return err - } - - pCfg := ca.ProviderConfig{ - ClusterID: clusterID, - Datacenter: s.config.Datacenter, - IsPrimary: false, - RawConfig: conf.Config, - State: conf.State, - } - if err := provider.Configure(pCfg); err != nil { - return fmt.Errorf("error configuring provider: %v", err) - } - - s.actingSecondaryLock.Lock() - s.actingSecondaryCA = true - s.actingSecondaryLock.Unlock() - - return nil -} - -// configuredSecondaryCA is being called while holding caProviderReconfigurationLock -// which means it must never take that lock itself or call anything that does. -func (s *Server) configuredSecondaryCA() bool { - s.actingSecondaryLock.RLock() - defer s.actingSecondaryLock.RUnlock() - return s.actingSecondaryCA -} - // halfTime returns a duration that is half the time between notBefore and // notAfter. func halfTime(notBefore, notAfter time.Time) time.Duration { @@ -1069,3 +359,11 @@ func lessThanHalfTimePassed(now, notBefore, notAfter time.Time) bool { t := notBefore.Add(halfTime(notBefore, notAfter)) return t.Sub(now) > 0 } + +func (s *Server) generateCASignRequest(csr string) *structs.CASignRequest { + return &structs.CASignRequest{ + Datacenter: s.config.PrimaryDatacenter, + CSR: csr, + WriteRequest: structs.WriteRequest{Token: s.tokens.ReplicationToken()}, + } +} diff --git a/agent/consul/leader_connect_ca.go b/agent/consul/leader_connect_ca.go new file mode 100644 index 0000000000..a00ffe2b5e --- /dev/null +++ b/agent/consul/leader_connect_ca.go @@ -0,0 +1,1142 @@ +package consul + +import ( + "context" + "errors" + "fmt" + "reflect" + "strings" + "sync" + "time" + + "github.com/hashicorp/consul/agent/connect" + "github.com/hashicorp/consul/agent/connect/ca" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/go-hclog" + uuid "github.com/hashicorp/go-uuid" +) + +type CAState string + +const ( + CAStateUninitialized CAState = "UNINITIALIZED" + CAStateInitializing = "INITIALIZING" + CAStateReady = "READY" + CAStateRenewIntermediate = "RENEWING" + CAStateReconfig = "RECONFIGURING" +) + +// caServerDelegate is an interface for server operations for facilitating +// easier testing. +type caServerDelegate interface { + State() *state.Store + IsLeader() bool + + createCAProvider(conf *structs.CAConfiguration) (ca.Provider, error) + forwardDC(method, dc string, args interface{}, reply interface{}) error + generateCASignRequest(csr string) *structs.CASignRequest + raftApply(t structs.MessageType, msg interface{}) (interface{}, error) + + checkServersProvider +} + +// CAManager is a wrapper around CA operations such as updating roots, an intermediate +// or the configuration. All operations should go through the CAManager in order to +// avoid data races. +type CAManager struct { + delegate caServerDelegate + serverConf *Config + logger hclog.Logger + + providerLock sync.RWMutex + // provider is the current CA provider in use for Connect. This is + // only non-nil when we are the leader. + provider ca.Provider + // providerRoot is the CARoot that was stored along with the ca.Provider + // active. It's only updated in lock-step with the provider. This prevents + // races between state updates to active roots and the fetch of the provider + // instance. + providerRoot *structs.CARoot + + // stateLock protects the internal state used for administrative CA tasks. + stateLock sync.Mutex + state CAState + primaryRoots structs.IndexedCARoots // The most recently seen state of the root CAs from the primary datacenter. + actingSecondaryCA bool // True if this datacenter has been initialized as a secondary CA. +} + +type caDelegateWithState struct { + *Server +} + +func (c *caDelegateWithState) State() *state.Store { + return c.fsm.State() +} + +func NewCAManager(delegate caServerDelegate, logger hclog.Logger, config *Config) *CAManager { + return &CAManager{ + delegate: delegate, + logger: logger, + serverConf: config, + state: CAStateUninitialized, + } +} + +func (c *CAManager) reset() { + c.state = CAStateUninitialized + c.primaryRoots = structs.IndexedCARoots{} + c.actingSecondaryCA = false + c.setCAProvider(nil, nil) +} + +// setState attempts to update the CA state to the given state. +// If the current state is not READY, this will fail. The only exception is when +// the current state is UNINITIALIZED, and the function is called with CAStateInitializing. +func (c *CAManager) setState(newState CAState, validateState bool) error { + c.stateLock.Lock() + defer c.stateLock.Unlock() + state := c.state + + if !validateState || state == CAStateReady || (state == CAStateUninitialized && newState == CAStateInitializing) { + c.state = newState + } else { + return fmt.Errorf("CA is already in state %q", state) + } + return nil +} + +// setPrimaryRoots updates the most recently seen roots from the primary. +func (c *CAManager) setPrimaryRoots(newRoots structs.IndexedCARoots) error { + c.stateLock.Lock() + defer c.stateLock.Unlock() + + if c.state == CAStateInitializing || c.state == CAStateReconfig { + c.primaryRoots = newRoots + } else { + return fmt.Errorf("Cannot update primary roots in state %q", c.state) + } + return nil +} + +func (c *CAManager) getPrimaryRoots() structs.IndexedCARoots { + c.stateLock.Lock() + defer c.stateLock.Unlock() + return c.primaryRoots +} + +// initializeCAConfig is used to initialize the CA config if necessary +// when setting up the CA during establishLeadership. The state should be set to +// non-ready before calling this. +func (c *CAManager) initializeCAConfig() (*structs.CAConfiguration, error) { + state := c.delegate.State() + _, config, err := state.CAConfig(nil) + if err != nil { + return nil, err + } + if config == nil { + config = c.serverConf.CAConfig + if config.ClusterID == "" { + id, err := uuid.GenerateUUID() + if err != nil { + return nil, err + } + config.ClusterID = id + } + } else if _, ok := config.Config["IntermediateCertTTL"]; !ok { + dup := *config + copied := make(map[string]interface{}) + for k, v := range dup.Config { + copied[k] = v + } + copied["IntermediateCertTTL"] = connect.DefaultIntermediateCertTTL.String() + dup.Config = copied + config = &dup + } else { + return config, nil + } + + req := structs.CARequest{ + Op: structs.CAOpSetConfig, + Config: config, + } + if resp, err := c.delegate.raftApply(structs.ConnectCARequestType, req); err != nil { + return nil, err + } else if respErr, ok := resp.(error); ok { + return nil, respErr + } + + return config, nil +} + +// parseCARoot returns a filled-in structs.CARoot from a raw PEM value. +func parseCARoot(pemValue, provider, clusterID string) (*structs.CARoot, error) { + id, err := connect.CalculateCertFingerprint(pemValue) + if err != nil { + return nil, fmt.Errorf("error parsing root fingerprint: %v", err) + } + rootCert, err := connect.ParseCert(pemValue) + if err != nil { + return nil, fmt.Errorf("error parsing root cert: %v", err) + } + keyType, keyBits, err := connect.KeyInfoFromCert(rootCert) + if err != nil { + return nil, fmt.Errorf("error extracting root key info: %v", err) + } + return &structs.CARoot{ + ID: id, + Name: fmt.Sprintf("%s CA Root Cert", strings.Title(provider)), + SerialNumber: rootCert.SerialNumber.Uint64(), + SigningKeyID: connect.EncodeSigningKeyID(rootCert.SubjectKeyId), + ExternalTrustDomain: clusterID, + NotBefore: rootCert.NotBefore, + NotAfter: rootCert.NotAfter, + RootCert: pemValue, + PrivateKeyType: keyType, + PrivateKeyBits: keyBits, + Active: true, + }, nil +} + +// getCAProvider returns the currently active instance of the CA Provider, +// as well as the active root. +func (c *CAManager) getCAProvider() (ca.Provider, *structs.CARoot) { + retries := 0 + var result ca.Provider + var resultRoot *structs.CARoot + for result == nil { + c.providerLock.RLock() + result = c.provider + resultRoot = c.providerRoot + c.providerLock.RUnlock() + + // In cases where an agent is started with managed proxies, we may ask + // for the provider before establishLeadership completes. If we're the + // leader, then wait and get the provider again + if result == nil && c.delegate.IsLeader() && retries < 10 { + retries++ + time.Sleep(50 * time.Millisecond) + continue + } + + break + } + + return result, resultRoot +} + +// setCAProvider is being called while holding the stateLock +// which means it must never take that lock itself or call anything that does. +func (c *CAManager) setCAProvider(newProvider ca.Provider, root *structs.CARoot) { + c.providerLock.Lock() + c.provider = newProvider + c.providerRoot = root + c.providerLock.Unlock() +} + +// InitializeCA sets up the CA provider when gaining leadership, either bootstrapping +// the CA if this is the primary DC or making a remote RPC for intermediate signing +// if this is a secondary DC. +func (c *CAManager) InitializeCA() error { + // Bail if connect isn't enabled. + if !c.serverConf.ConnectEnabled { + return nil + } + + // Update the state before doing anything else. + err := c.setState(CAStateInitializing, true) + if err != nil { + return err + } + defer c.setState(CAStateReady, false) + + // Initialize the provider based on the current config. + conf, err := c.initializeCAConfig() + if err != nil { + return err + } + provider, err := c.delegate.createCAProvider(conf) + if err != nil { + return err + } + + c.setCAProvider(provider, nil) + + // Run the root CA initialization if this is the primary DC. + if c.serverConf.PrimaryDatacenter == c.serverConf.Datacenter { + return c.initializeRootCA(provider, conf) + } + + // If this isn't the primary DC, run the secondary DC routine if the primary has already been upgraded to at least 1.6.0 + versionOk, foundPrimary := ServersInDCMeetMinimumVersion(c.delegate, c.serverConf.PrimaryDatacenter, minMultiDCConnectVersion) + if !foundPrimary { + c.logger.Warn("primary datacenter is configured but unreachable - deferring initialization of the secondary datacenter CA") + // return nil because we will initialize the secondary CA later + return nil + } else if !versionOk { + // return nil because we will initialize the secondary CA later + c.logger.Warn("servers in the primary datacenter are not at least at the minimum version - deferring initialization of the secondary datacenter CA", + "min_version", minMultiDCConnectVersion.String(), + ) + return nil + } + + // Get the root CA to see if we need to refresh our intermediate. + args := structs.DCSpecificRequest{ + Datacenter: c.serverConf.PrimaryDatacenter, + } + var roots structs.IndexedCARoots + if err := c.delegate.forwardDC("ConnectCA.Roots", c.serverConf.PrimaryDatacenter, &args, &roots); err != nil { + return err + } + if err := c.setPrimaryRoots(roots); err != nil { + return err + } + + // Configure the CA provider and initialize the intermediate certificate if necessary. + if err := c.initializeSecondaryProvider(provider, roots); err != nil { + return fmt.Errorf("error configuring provider: %v", err) + } + if err := c.initializeSecondaryCA(provider, nil); err != nil { + return err + } + + c.logger.Info("initialized secondary datacenter CA with provider", "provider", conf.Provider) + return nil +} + +// initializeRootCA runs the initialization logic for a root CA. It should only +// be called while the state lock is held by setting the state to non-ready. +func (c *CAManager) initializeRootCA(provider ca.Provider, conf *structs.CAConfiguration) error { + pCfg := ca.ProviderConfig{ + ClusterID: conf.ClusterID, + Datacenter: c.serverConf.Datacenter, + IsPrimary: true, + RawConfig: conf.Config, + State: conf.State, + } + if err := provider.Configure(pCfg); err != nil { + return fmt.Errorf("error configuring provider: %v", err) + } + if err := provider.GenerateRoot(); err != nil { + return fmt.Errorf("error generating CA root certificate: %v", err) + } + + // Get the active root cert from the CA + rootPEM, err := provider.ActiveRoot() + if err != nil { + return fmt.Errorf("error getting root cert: %v", err) + } + rootCA, err := parseCARoot(rootPEM, conf.Provider, conf.ClusterID) + if err != nil { + return err + } + + // Also create the intermediate CA, which is the one that actually signs leaf certs + interPEM, err := provider.GenerateIntermediate() + if err != nil { + return fmt.Errorf("error generating intermediate cert: %v", err) + } + _, err = connect.ParseCert(interPEM) + if err != nil { + return fmt.Errorf("error getting intermediate cert: %v", err) + } + + // If the provider has state to persist and it's changed or new then update + // CAConfig. + pState, err := provider.State() + if err != nil { + return fmt.Errorf("error getting provider state: %v", err) + } + if !reflect.DeepEqual(conf.State, pState) { + // Update the CAConfig in raft to persist the provider state + conf.State = pState + req := structs.CARequest{ + Op: structs.CAOpSetConfig, + Config: conf, + } + if _, err = c.delegate.raftApply(structs.ConnectCARequestType, req); err != nil { + return fmt.Errorf("error persisting provider state: %v", err) + } + } + + // Check if the CA root is already initialized and exit if it is, + // adding on any existing intermediate certs since they aren't directly + // tied to the provider. + // Every change to the CA after this initial bootstrapping should + // be done through the rotation process. + state := c.delegate.State() + _, activeRoot, err := state.CARootActive(nil) + if err != nil { + return err + } + if activeRoot != nil { + // This state shouldn't be possible to get into because we update the root and + // CA config in the same FSM operation. + if activeRoot.ID != rootCA.ID { + return fmt.Errorf("stored CA root %q is not the active root (%s)", rootCA.ID, activeRoot.ID) + } + + rootCA.IntermediateCerts = activeRoot.IntermediateCerts + c.setCAProvider(provider, rootCA) + + return nil + } + + // Get the highest index + idx, _, err := state.CARoots(nil) + if err != nil { + return err + } + + // Store the root cert in raft + resp, err := c.delegate.raftApply(structs.ConnectCARequestType, &structs.CARequest{ + Op: structs.CAOpSetRoots, + Index: idx, + Roots: []*structs.CARoot{rootCA}, + }) + if err != nil { + c.logger.Error("Raft apply failed", "error", err) + return err + } + if respErr, ok := resp.(error); ok { + return respErr + } + + c.setCAProvider(provider, rootCA) + + c.logger.Info("initialized primary datacenter CA with provider", "provider", conf.Provider) + + return nil +} + +// initializeSecondaryCA runs the routine for generating an intermediate CA CSR and getting +// it signed by the primary DC if the root CA of the primary DC has changed since the last +// intermediate. It should only be called while the state lock is held by setting the state +// to non-ready. +func (c *CAManager) initializeSecondaryCA(provider ca.Provider, config *structs.CAConfiguration) error { + activeIntermediate, err := provider.ActiveIntermediate() + if err != nil { + return err + } + + var ( + storedRootID string + expectedSigningKeyID string + currentSigningKeyID string + activeSecondaryRoot *structs.CARoot + ) + if activeIntermediate != "" { + // In the event that we already have an intermediate, we must have + // already replicated some primary root information locally, so check + // to see if we're up to date by fetching the rootID and the + // signingKeyID used in the secondary. + // + // Note that for the same rootID the primary representation of the root + // will have a different SigningKeyID field than the secondary + // representation of the same root. This is because it's derived from + // the intermediate which is different in all datacenters. + storedRoot, err := provider.ActiveRoot() + if err != nil { + return err + } + + storedRootID, err = connect.CalculateCertFingerprint(storedRoot) + if err != nil { + return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, storedRoot) + } + + intermediateCert, err := connect.ParseCert(activeIntermediate) + if err != nil { + return fmt.Errorf("error parsing active intermediate cert: %v", err) + } + expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) + + // This will fetch the secondary's exact current representation of the + // active root. Note that this data should only be used if the IDs + // match, otherwise it's out of date and should be regenerated. + _, activeSecondaryRoot, err = c.delegate.State().CARootActive(nil) + if err != nil { + return err + } + if activeSecondaryRoot != nil { + currentSigningKeyID = activeSecondaryRoot.SigningKeyID + } + } + + // Determine which of the provided PRIMARY representations of roots is the + // active one. We'll use this as a template to generate any new root + // representations meant for this secondary. + var newActiveRoot *structs.CARoot + primaryRoots := c.getPrimaryRoots() + for _, root := range primaryRoots.Roots { + if root.ID == primaryRoots.ActiveRootID && root.Active { + newActiveRoot = root + break + } + } + if newActiveRoot == nil { + return fmt.Errorf("primary datacenter does not have an active root CA for Connect") + } + + // Get a signed intermediate from the primary DC if the provider + // hasn't been initialized yet or if the primary's root has changed. + needsNewIntermediate := false + if activeIntermediate == "" || storedRootID != primaryRoots.ActiveRootID { + needsNewIntermediate = true + } + + // Also we take this opportunity to correct an incorrectly persisted SigningKeyID + // in secondary datacenters (see PR-6513). + if expectedSigningKeyID != "" && currentSigningKeyID != expectedSigningKeyID { + needsNewIntermediate = true + } + + newIntermediate := false + if needsNewIntermediate { + if err := c.getIntermediateCASigned(provider, newActiveRoot); err != nil { + return err + } + newIntermediate = true + } else { + // Discard the primary's representation since our local one is + // sufficiently up to date. + newActiveRoot = activeSecondaryRoot + } + + // Update the roots list in the state store if there's a new active root. + state := c.delegate.State() + _, activeRoot, err := state.CARootActive(nil) + if err != nil { + return err + } + + // Determine whether a root update is needed, and persist the roots/config accordingly. + var newRoot *structs.CARoot + if activeRoot == nil || activeRoot.ID != newActiveRoot.ID || newIntermediate { + newRoot = newActiveRoot + } + if err := c.persistNewRootAndConfig(provider, newRoot, config); err != nil { + return err + } + + c.setCAProvider(provider, newActiveRoot) + return nil +} + +// persistNewRootAndConfig should only be called while the state lock is held +// by setting the state to non-ready. +// If newActiveRoot is non-nil, it will be appended to the current roots list. +// If config is non-nil, it will be used to overwrite the existing config. +func (c *CAManager) persistNewRootAndConfig(provider ca.Provider, newActiveRoot *structs.CARoot, config *structs.CAConfiguration) error { + state := c.delegate.State() + idx, oldRoots, err := state.CARoots(nil) + if err != nil { + return err + } + + // Look up the existing CA config if a new one wasn't provided. + var newConf structs.CAConfiguration + _, storedConfig, err := state.CAConfig(nil) + if err != nil { + return err + } + if storedConfig == nil { + return fmt.Errorf("local CA not initialized yet") + } + // Exit early if the change is a no-op. + if newActiveRoot == nil && config != nil && config.Provider == storedConfig.Provider && reflect.DeepEqual(config.Config, storedConfig.Config) { + return nil + } + + if config != nil { + newConf = *config + } else { + newConf = *storedConfig + } + + // Update the trust domain for the config if there's a new root, or keep the old + // one if the root isn't being updated. + newConf.ModifyIndex = storedConfig.ModifyIndex + if newActiveRoot != nil { + newConf.ClusterID = newActiveRoot.ExternalTrustDomain + } else { + _, activeRoot, err := state.CARootActive(nil) + if err != nil { + return err + } + newConf.ClusterID = activeRoot.ExternalTrustDomain + } + + // Persist any state the provider needs us to + newConf.State, err = provider.State() + if err != nil { + return fmt.Errorf("error getting provider state: %v", err) + } + + // If there's a new active root, copy the root list and append it, updating + // the old root with the time it was rotated out. + var newRoots structs.CARoots + for _, r := range oldRoots { + newRoot := *r + if newRoot.Active { + newRoot.Active = false + newRoot.RotatedOutAt = time.Now() + } + if newRoot.ExternalTrustDomain == "" { + newRoot.ExternalTrustDomain = newConf.ClusterID + } + newRoots = append(newRoots, &newRoot) + } + if newActiveRoot != nil { + newRoots = append(newRoots, newActiveRoot) + } + + args := &structs.CARequest{ + Op: structs.CAOpSetRootsAndConfig, + Index: idx, + Roots: newRoots, + Config: &newConf, + } + resp, err := c.delegate.raftApply(structs.ConnectCARequestType, args) + if err != nil { + return err + } + if respErr, ok := resp.(error); ok { + return respErr + } + if respOk, ok := resp.(bool); ok && !respOk { + return fmt.Errorf("could not atomically update roots and config") + } + + c.logger.Info("updated root certificates from primary datacenter") + return nil +} + +func (c *CAManager) UpdateConfiguration(args *structs.CARequest) error { + // Attempt to update the state first. + if err := c.setState(CAStateReconfig, true); err != nil { + return err + } + defer c.setState(CAStateReady, false) + + // Exit early if it's a no-op change + state := c.delegate.State() + confIdx, config, err := state.CAConfig(nil) + if err != nil { + return err + } + + // Don't allow state changes. Either it needs to be empty or the same to allow + // read-modify-write loops that don't touch the State field. + if len(args.Config.State) > 0 && + !reflect.DeepEqual(args.Config.State, config.State) { + return ErrStateReadOnly + } + + // Don't allow users to change the ClusterID. + args.Config.ClusterID = config.ClusterID + if args.Config.Provider == config.Provider && reflect.DeepEqual(args.Config.Config, config.Config) { + return nil + } + + // If the provider hasn't changed, we need to load the current Provider state + // so it can decide if it needs to change resources or not based on the config + // change. + if args.Config.Provider == config.Provider { + // Note this is a shallow copy since the State method doc requires the + // provider return a map that will not be further modified and should not + // modify the one we pass to Configure. + args.Config.State = config.State + } + + // Create a new instance of the provider described by the config + // and get the current active root CA. This acts as a good validation + // of the config and makes sure the provider is functioning correctly + // before we commit any changes to Raft. + newProvider, err := c.delegate.createCAProvider(args.Config) + if err != nil { + return fmt.Errorf("could not initialize provider: %v", err) + } + pCfg := ca.ProviderConfig{ + ClusterID: args.Config.ClusterID, + Datacenter: c.serverConf.Datacenter, + // This endpoint can be called in a secondary DC too so set this correctly. + IsPrimary: c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter, + RawConfig: args.Config.Config, + State: args.Config.State, + } + if err := newProvider.Configure(pCfg); err != nil { + return fmt.Errorf("error configuring provider: %v", err) + } + + // Set up a defer to clean up the new provider if we exit early due to an error. + cleanupNewProvider := true + defer func() { + if cleanupNewProvider { + if err := newProvider.Cleanup(); err != nil { + c.logger.Warn("failed to clean up CA provider while handling startup failure", "provider", newProvider, "error", err) + } + } + }() + + // If this is a secondary, just check if the intermediate needs to be regenerated. + if c.serverConf.Datacenter != c.serverConf.PrimaryDatacenter { + if err := c.initializeSecondaryCA(newProvider, args.Config); err != nil { + return fmt.Errorf("Error updating secondary datacenter CA config: %v", err) + } + cleanupNewProvider = false + c.logger.Info("Secondary CA provider config updated") + return nil + } + + if err := newProvider.GenerateRoot(); err != nil { + return fmt.Errorf("error generating CA root certificate: %v", err) + } + + newRootPEM, err := newProvider.ActiveRoot() + if err != nil { + return err + } + + newActiveRoot, err := parseCARoot(newRootPEM, args.Config.Provider, args.Config.ClusterID) + if err != nil { + return err + } + + // See if the provider needs to persist any state along with the config + pState, err := newProvider.State() + if err != nil { + return fmt.Errorf("error getting provider state: %v", err) + } + args.Config.State = pState + + // Compare the new provider's root CA ID to the current one. If they + // match, just update the existing provider with the new config. + // If they don't match, begin the root rotation process. + _, root, err := state.CARootActive(nil) + if err != nil { + return err + } + + // If the root didn't change, just update the config and return. + if root != nil && root.ID == newActiveRoot.ID { + args.Op = structs.CAOpSetConfig + resp, err := c.delegate.raftApply(structs.ConnectCARequestType, args) + if err != nil { + return err + } + if respErr, ok := resp.(error); ok { + return respErr + } + + // If the config has been committed, update the local provider instance + cleanupNewProvider = false + c.setCAProvider(newProvider, newActiveRoot) + + c.logger.Info("CA provider config updated") + + return nil + } + + // At this point, we know the config change has trigged a root rotation, + // either by swapping the provider type or changing the provider's config + // to use a different root certificate. + + // First up, sanity check that the current provider actually supports + // cross-signing. + oldProvider, _ := c.getCAProvider() + if oldProvider == nil { + return fmt.Errorf("internal error: CA provider is nil") + } + canXSign, err := oldProvider.SupportsCrossSigning() + if err != nil { + return fmt.Errorf("CA provider error: %s", err) + } + if !canXSign && !args.Config.ForceWithoutCrossSigning { + return errors.New("The current CA Provider does not support cross-signing. " + + "You can try again with ForceWithoutCrossSigningSet but this may cause " + + "disruption - see documentation for more.") + } + if !canXSign && args.Config.ForceWithoutCrossSigning { + c.logger.Warn("current CA doesn't support cross signing but " + + "CA reconfiguration forced anyway with ForceWithoutCrossSigning") + } + + // If it's a config change that would trigger a rotation (different provider/root): + // 1. Get the root from the new provider. + // 2. Call CrossSignCA on the old provider to sign the new root with the old one to + // get a cross-signed certificate. + // 3. Take the active root for the new provider and append the intermediate from step 2 + // to its list of intermediates. + newRoot, err := connect.ParseCert(newRootPEM) + if err != nil { + return err + } + + if canXSign { + // Have the old provider cross-sign the new root + xcCert, err := oldProvider.CrossSignCA(newRoot) + if err != nil { + return err + } + + // Add the cross signed cert to the new CA's intermediates (to be attached + // to leaf certs). + newActiveRoot.IntermediateCerts = []string{xcCert} + } + + intermediate, err := newProvider.GenerateIntermediate() + if err != nil { + return err + } + if intermediate != newRootPEM { + newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediate) + } + + // Update the roots and CA config in the state store at the same time + idx, roots, err := state.CARoots(nil) + if err != nil { + return err + } + + var newRoots structs.CARoots + for _, r := range roots { + newRoot := *r + if newRoot.Active { + newRoot.Active = false + newRoot.RotatedOutAt = time.Now() + } + newRoots = append(newRoots, &newRoot) + } + newRoots = append(newRoots, newActiveRoot) + + args.Op = structs.CAOpSetRootsAndConfig + args.Index = idx + args.Config.ModifyIndex = confIdx + args.Roots = newRoots + resp, err := c.delegate.raftApply(structs.ConnectCARequestType, args) + if err != nil { + return err + } + if respErr, ok := resp.(error); ok { + return respErr + } + if respOk, ok := resp.(bool); ok && !respOk { + return fmt.Errorf("could not atomically update roots and config") + } + + // If the config has been committed, update the local provider instance + // and call teardown on the old provider + cleanupNewProvider = false + c.setCAProvider(newProvider, newActiveRoot) + + if err := oldProvider.Cleanup(); err != nil { + c.logger.Warn("failed to clean up old provider", "provider", config.Provider) + } + + c.logger.Info("CA rotated to new root under provider", "provider", args.Config.Provider) + + return nil +} + +// getIntermediateCAPrimary regenerates the intermediate cert in the primary datacenter. +// This is only run for CAs that require an intermediary in the primary DC, such as Vault. +// It should only be called while the state lock is held by setting the state to non-ready. +func (c *CAManager) getIntermediateCAPrimary(provider ca.Provider, newActiveRoot *structs.CARoot) error { + // Generate and sign an intermediate cert using the root CA. + intermediatePEM, err := provider.GenerateIntermediate() + if err != nil { + return fmt.Errorf("error generating new intermediate cert: %v", err) + } + + intermediateCert, err := connect.ParseCert(intermediatePEM) + if err != nil { + return fmt.Errorf("error parsing intermediate cert: %v", err) + } + + // Append the new intermediate to our local active root entry. This is + // where the root representations start to diverge. + newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM) + newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) + + c.logger.Info("generated new intermediate certificate for primary datacenter") + return nil +} + +// getIntermediateCASigned should only be called while the state lock is held by +// setting the state to non-ready. +func (c *CAManager) getIntermediateCASigned(provider ca.Provider, newActiveRoot *structs.CARoot) error { + csr, err := provider.GenerateIntermediateCSR() + if err != nil { + return err + } + + var intermediatePEM string + if err := c.delegate.forwardDC("ConnectCA.SignIntermediate", c.serverConf.PrimaryDatacenter, c.delegate.generateCASignRequest(csr), &intermediatePEM); err != nil { + // this is a failure in the primary and shouldn't be capable of erroring out our establishing leadership + c.logger.Warn("Primary datacenter refused to sign our intermediate CA certificate", "error", err) + return nil + } + + if err := provider.SetIntermediate(intermediatePEM, newActiveRoot.RootCert); err != nil { + return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err) + } + + intermediateCert, err := connect.ParseCert(intermediatePEM) + if err != nil { + return fmt.Errorf("error parsing intermediate cert: %v", err) + } + + // Append the new intermediate to our local active root entry. This is + // where the root representations start to diverge. + newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM) + newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) + + c.logger.Info("received new intermediate certificate from primary datacenter") + return nil +} + +// intermediateCertRenewalWatch periodically attempts to renew the intermediate cert. +func (c *CAManager) intermediateCertRenewalWatch(ctx context.Context) error { + isPrimary := c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter + + for { + select { + case <-ctx.Done(): + return nil + case <-time.After(structs.IntermediateCertRenewInterval): + retryLoopBackoffAbortOnSuccess(ctx, func() error { + return c.RenewIntermediate(ctx, isPrimary) + }, func(err error) { + c.logger.Error("error renewing intermediate certs", + "routine", intermediateCertRenewWatchRoutineName, + "error", err, + ) + }) + } + } +} + +// RenewIntermediate checks the intermediate cert for +// expiration. If more than half the time a cert is valid has passed, +// it will try to renew it. +func (c *CAManager) RenewIntermediate(ctx context.Context, isPrimary bool) error { + // Grab the 'lock' right away so the provider/config can't be changed out while we check + // the intermediate. + if err := c.setState(CAStateRenewIntermediate, true); err != nil { + return err + } + defer c.setState(CAStateReady, false) + + provider, _ := c.getCAProvider() + if provider == nil { + // this happens when leadership is being revoked and this go routine will be stopped + return nil + } + // If this isn't the primary, make sure the CA has been initialized. + if !isPrimary && !c.configuredSecondaryCA() { + return fmt.Errorf("secondary CA is not yet configured.") + } + + state := c.delegate.State() + _, root, err := state.CARootActive(nil) + if err != nil { + return err + } + activeRoot := root.Clone() + + // If this is the primary, check if this is a provider that uses an intermediate cert. If + // it isn't, we don't need to check for a renewal. + if isPrimary { + _, config, err := state.CAConfig(nil) + if err != nil { + return err + } + + if _, ok := ca.PrimaryIntermediateProviders[config.Provider]; !ok { + return nil + } + } + + activeIntermediate, err := provider.ActiveIntermediate() + if err != nil { + return err + } + + if activeIntermediate == "" { + return fmt.Errorf("datacenter doesn't have an active intermediate.") + } + + intermediateCert, err := connect.ParseCert(activeIntermediate) + if err != nil { + return fmt.Errorf("error parsing active intermediate cert: %v", err) + } + + if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer), + intermediateCert.NotAfter) { + return nil + } + + // Enough time has passed, go ahead with getting a new intermediate. + renewalFunc := c.getIntermediateCAPrimary + if !isPrimary { + renewalFunc = c.getIntermediateCASigned + } + errCh := make(chan error, 1) + go func() { + errCh <- renewalFunc(provider, activeRoot) + }() + + // Wait for the renewal func to return or for the context to be canceled. + select { + case <-ctx.Done(): + return ctx.Err() + case err := <-errCh: + if err != nil { + return err + } + } + + if err := c.persistNewRootAndConfig(provider, activeRoot, nil); err != nil { + return err + } + + c.setCAProvider(provider, activeRoot) + return nil +} + +// secondaryCARootWatch maintains a blocking query to the primary datacenter's +// ConnectCA.Roots endpoint to monitor when it needs to request a new signed +// intermediate certificate. +func (c *CAManager) secondaryCARootWatch(ctx context.Context) error { + args := structs.DCSpecificRequest{ + Datacenter: c.serverConf.PrimaryDatacenter, + QueryOptions: structs.QueryOptions{ + // the maximum time the primary roots watch query can block before returning + MaxQueryTime: c.serverConf.MaxQueryTime, + }, + } + + c.logger.Debug("starting Connect CA root replication from primary datacenter", "primary", c.serverConf.PrimaryDatacenter) + + retryLoopBackoff(ctx, func() error { + var roots structs.IndexedCARoots + if err := c.delegate.forwardDC("ConnectCA.Roots", c.serverConf.PrimaryDatacenter, &args, &roots); err != nil { + return fmt.Errorf("Error retrieving the primary datacenter's roots: %v", err) + } + + // Return if the context has been canceled while waiting on the RPC. + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + // Attempt to update the roots using the returned data. + if err := c.UpdateRoots(roots); err != nil { + return err + } + args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, roots.QueryMeta.Index) + return nil + }, func(err error) { + c.logger.Error("CA root replication failed, will retry", + "routine", secondaryCARootWatchRoutineName, + "error", err, + ) + }) + + return nil +} + +// UpdateRoots updates the cached roots from the primary and regenerates the intermediate +// certificate if necessary. +func (c *CAManager) UpdateRoots(roots structs.IndexedCARoots) error { + // Update the state first to claim the 'lock'. + if err := c.setState(CAStateReconfig, true); err != nil { + return err + } + defer c.setState(CAStateReady, false) + + // Update the cached primary roots now that the lock is held. + if err := c.setPrimaryRoots(roots); err != nil { + return err + } + + // Check to see if the primary has been upgraded in case we're waiting to switch to + // secondary mode. + provider, _ := c.getCAProvider() + if provider == nil { + // this happens when leadership is being revoked and this go routine will be stopped + return nil + } + if !c.configuredSecondaryCA() { + versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.delegate, c.serverConf.PrimaryDatacenter, minMultiDCConnectVersion) + if !primaryFound { + return fmt.Errorf("Primary datacenter is unreachable - deferring secondary CA initialization") + } + + if versionOk { + if err := c.initializeSecondaryProvider(provider, roots); err != nil { + return fmt.Errorf("Failed to initialize secondary CA provider: %v", err) + } + } + } + + // Run the secondary CA init routine to see if we need to request a new + // intermediate. + if c.configuredSecondaryCA() { + if err := c.initializeSecondaryCA(provider, nil); err != nil { + return fmt.Errorf("Failed to initialize the secondary CA: %v", err) + } + } + + return nil +} + +// initializeSecondaryProvider configures the given provider for a secondary, non-root datacenter. +func (c *CAManager) initializeSecondaryProvider(provider ca.Provider, roots structs.IndexedCARoots) error { + if roots.TrustDomain == "" { + return fmt.Errorf("trust domain from primary datacenter is not initialized") + } + + clusterID := strings.Split(roots.TrustDomain, ".")[0] + _, conf, err := c.delegate.State().CAConfig(nil) + if err != nil { + return err + } + + pCfg := ca.ProviderConfig{ + ClusterID: clusterID, + Datacenter: c.serverConf.Datacenter, + IsPrimary: false, + RawConfig: conf.Config, + State: conf.State, + } + if err := provider.Configure(pCfg); err != nil { + return fmt.Errorf("error configuring provider: %v", err) + } + + return c.setSecondaryCA() +} + +// setSecondaryCA sets the flag for acting as a secondary CA to true. +func (c *CAManager) setSecondaryCA() error { + c.stateLock.Lock() + defer c.stateLock.Unlock() + + if c.state == CAStateInitializing || c.state == CAStateReconfig { + c.actingSecondaryCA = true + } else { + return fmt.Errorf("Cannot update secondary CA flag in state %q", c.state) + } + + return nil +} + +// configuredSecondaryCA returns true if we have been initialized as a secondary datacenter's CA. +func (c *CAManager) configuredSecondaryCA() bool { + c.stateLock.Lock() + defer c.stateLock.Unlock() + return c.actingSecondaryCA +} diff --git a/agent/consul/leader_connect_ca_test.go b/agent/consul/leader_connect_ca_test.go new file mode 100644 index 0000000000..cc1ba16afa --- /dev/null +++ b/agent/consul/leader_connect_ca_test.go @@ -0,0 +1,285 @@ +package consul + +import ( + "context" + "crypto/x509" + "errors" + "fmt" + "testing" + "time" + + "github.com/hashicorp/consul/agent/connect" + ca "github.com/hashicorp/consul/agent/connect/ca" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/metadata" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/sdk/testutil" + "github.com/hashicorp/go-version" + "github.com/hashicorp/serf/serf" + "github.com/stretchr/testify/require" +) + +// TODO(kyhavlov): replace with t.Deadline() +const CATestTimeout = 7 * time.Second + +type mockCAServerDelegate struct { + t *testing.T + config *Config + store *state.Store + primaryRoot *structs.CARoot + callbackCh chan string +} + +func NewMockCAServerDelegate(t *testing.T, config *Config) *mockCAServerDelegate { + store, err := state.NewStateStore(nil) + require.NoError(t, err) + delegate := &mockCAServerDelegate{ + t: t, + config: config, + store: store, + primaryRoot: connect.TestCAWithTTL(t, nil, 1*time.Second), + callbackCh: make(chan string, 0), + } + delegate.store.CASetConfig(1, testCAConfig()) + + return delegate +} + +func (m *mockCAServerDelegate) State() *state.Store { + return m.store +} + +func (m *mockCAServerDelegate) IsLeader() bool { + return true +} + +func (m *mockCAServerDelegate) CheckServers(datacenter string, fn func(*metadata.Server) bool) { + ver, _ := version.NewVersion("1.6.0") + fn(&metadata.Server{ + Status: serf.StatusAlive, + Build: *ver, + }) +} + +func (m *mockCAServerDelegate) ApplyCARequest(req *structs.CARequest) (interface{}, error) { + return ca.ApplyCARequestToStore(m.store, req) +} + +func (m *mockCAServerDelegate) createCAProvider(conf *structs.CAConfiguration) (ca.Provider, error) { + return &mockCAProvider{ + callbackCh: m.callbackCh, + rootPEM: m.primaryRoot.RootCert, + }, nil +} + +func (m *mockCAServerDelegate) forwardDC(method, dc string, args interface{}, reply interface{}) error { + switch method { + case "ConnectCA.Roots": + roots := reply.(*structs.IndexedCARoots) + roots.TrustDomain = connect.TestClusterID + roots.Roots = []*structs.CARoot{m.primaryRoot} + roots.ActiveRootID = m.primaryRoot.ID + case "ConnectCA.SignIntermediate": + r := reply.(*string) + *r = m.primaryRoot.RootCert + default: + return fmt.Errorf("received call to unsupported method %q", method) + } + + m.callbackCh <- fmt.Sprintf("forwardDC/%s", method) + + return nil +} + +func (m *mockCAServerDelegate) generateCASignRequest(csr string) *structs.CASignRequest { + return &structs.CASignRequest{ + Datacenter: m.config.PrimaryDatacenter, + CSR: csr, + } +} + +func (m *mockCAServerDelegate) raftApply(t structs.MessageType, msg interface{}) (interface{}, error) { + if t == structs.ConnectCARequestType { + req := msg.(*structs.CARequest) + act, err := m.store.CARootSetCAS(1, req.Index, req.Roots) + require.NoError(m.t, err) + require.True(m.t, act) + + act, err = m.store.CACheckAndSetConfig(1, req.Config.ModifyIndex, req.Config) + require.NoError(m.t, err) + require.True(m.t, act) + } else { + return nil, fmt.Errorf("got invalid MessageType %v", t) + } + m.callbackCh <- fmt.Sprintf("raftApply/%v", t) + return nil, nil +} + +// mockCAProvider mocks an empty provider implementation with a channel in order to coordinate +// waiting for certain methods to be called. +type mockCAProvider struct { + callbackCh chan string + rootPEM string +} + +func (m *mockCAProvider) Configure(cfg ca.ProviderConfig) error { return nil } +func (m *mockCAProvider) State() (map[string]string, error) { return nil, nil } +func (m *mockCAProvider) GenerateRoot() error { return nil } +func (m *mockCAProvider) ActiveRoot() (string, error) { return m.rootPEM, nil } +func (m *mockCAProvider) GenerateIntermediateCSR() (string, error) { + m.callbackCh <- "provider/GenerateIntermediateCSR" + return "", nil +} +func (m *mockCAProvider) SetIntermediate(intermediatePEM, rootPEM string) error { + m.callbackCh <- "provider/SetIntermediate" + return nil +} +func (m *mockCAProvider) ActiveIntermediate() (string, error) { return m.rootPEM, nil } +func (m *mockCAProvider) GenerateIntermediate() (string, error) { return "", nil } +func (m *mockCAProvider) Sign(*x509.CertificateRequest) (string, error) { return "", nil } +func (m *mockCAProvider) SignIntermediate(*x509.CertificateRequest) (string, error) { return "", nil } +func (m *mockCAProvider) CrossSignCA(*x509.Certificate) (string, error) { return "", nil } +func (m *mockCAProvider) SupportsCrossSigning() (bool, error) { return false, nil } +func (m *mockCAProvider) Cleanup() error { return nil } + +func waitForCh(t *testing.T, ch chan string, expected string) { + select { + case op := <-ch: + if op != expected { + t.Fatalf("got unexpected op %q, wanted %q", op, expected) + } + case <-time.After(CATestTimeout): + t.Fatalf("never got op %q", expected) + } +} + +func waitForEmptyCh(t *testing.T, ch chan string) { + select { + case op := <-ch: + t.Fatalf("got unexpected op %q", op) + case <-time.After(1 * time.Second): + } +} + +func testCAConfig() *structs.CAConfiguration { + return &structs.CAConfiguration{ + ClusterID: connect.TestClusterID, + Provider: "mock", + Config: map[string]interface{}{ + "LeafCertTTL": "72h", + "IntermediateCertTTL": "2160h", + }, + } +} + +// initTestManager initializes a CAManager with a mockCAServerDelegate, consuming +// the ops that come through the channels and returning when initialization has finished. +func initTestManager(t *testing.T, manager *CAManager, delegate *mockCAServerDelegate) { + initCh := make(chan struct{}) + go func() { + require.NoError(t, manager.InitializeCA()) + close(initCh) + }() + for i := 0; i < 5; i++ { + select { + case <-delegate.callbackCh: + case <-time.After(CATestTimeout): + t.Fatal("failed waiting for initialization events") + } + } + select { + case <-initCh: + case <-time.After(CATestTimeout): + t.Fatal("failed waiting for initialization") + } +} + +func TestCAManager_Initialize(t *testing.T) { + conf := DefaultConfig() + conf.ConnectEnabled = true + conf.PrimaryDatacenter = "dc1" + conf.Datacenter = "dc2" + delegate := NewMockCAServerDelegate(t, conf) + manager := NewCAManager(delegate, testutil.Logger(t), conf) + + // Call InitializeCA and then confirm the RPCs and provider calls + // happen in the expected order. + require.EqualValues(t, CAStateUninitialized, manager.state) + errCh := make(chan error) + go func() { + errCh <- manager.InitializeCA() + }() + + waitForCh(t, delegate.callbackCh, "forwardDC/ConnectCA.Roots") + require.EqualValues(t, CAStateInitializing, manager.state) + waitForCh(t, delegate.callbackCh, "provider/GenerateIntermediateCSR") + waitForCh(t, delegate.callbackCh, "forwardDC/ConnectCA.SignIntermediate") + waitForCh(t, delegate.callbackCh, "provider/SetIntermediate") + waitForCh(t, delegate.callbackCh, fmt.Sprintf("raftApply/%v", structs.ConnectCARequestType)) + waitForEmptyCh(t, delegate.callbackCh) + + // Make sure the InitializeCA call returned successfully. + select { + case err := <-errCh: + require.NoError(t, err) + case <-time.After(CATestTimeout): + t.Fatal("never got result from errCh") + } + + require.EqualValues(t, CAStateReady, manager.state) +} + +func TestCAManager_UpdateConfigWhileRenewIntermediate(t *testing.T) { + // No parallel execution because we change globals + // Set the interval and drift buffer low for renewing the cert. + origInterval := structs.IntermediateCertRenewInterval + origDriftBuffer := ca.CertificateTimeDriftBuffer + defer func() { + structs.IntermediateCertRenewInterval = origInterval + ca.CertificateTimeDriftBuffer = origDriftBuffer + }() + structs.IntermediateCertRenewInterval = time.Millisecond + ca.CertificateTimeDriftBuffer = 0 + + conf := DefaultConfig() + conf.ConnectEnabled = true + conf.PrimaryDatacenter = "dc1" + conf.Datacenter = "dc2" + delegate := NewMockCAServerDelegate(t, conf) + manager := NewCAManager(delegate, testutil.Logger(t), conf) + initTestManager(t, manager, delegate) + + // Wait half the TTL for the cert to need renewing. + time.Sleep(500 * time.Millisecond) + + // Call RenewIntermediate and then confirm the RPCs and provider calls + // happen in the expected order. + errCh := make(chan error) + go func() { + errCh <- manager.RenewIntermediate(context.TODO(), false) + }() + + waitForCh(t, delegate.callbackCh, "provider/GenerateIntermediateCSR") + + // Call UpdateConfiguration while RenewIntermediate is still in-flight to + // make sure we get an error about the state being occupied. + go func() { + require.EqualValues(t, CAStateRenewIntermediate, manager.state) + require.Error(t, errors.New("already in state"), manager.UpdateConfiguration(&structs.CARequest{})) + }() + + waitForCh(t, delegate.callbackCh, "forwardDC/ConnectCA.SignIntermediate") + waitForCh(t, delegate.callbackCh, "provider/SetIntermediate") + waitForCh(t, delegate.callbackCh, fmt.Sprintf("raftApply/%v", structs.ConnectCARequestType)) + waitForEmptyCh(t, delegate.callbackCh) + + // Make sure the RenewIntermediate call returned successfully. + select { + case err := <-errCh: + require.NoError(t, err) + case <-time.After(CATestTimeout): + t.Fatal("never got result from errCh") + } + + require.EqualValues(t, CAStateReady, manager.state) +} diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index 63060d71ae..4dd59aa27f 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -81,11 +81,11 @@ func TestLeader_SecondaryCA_Initialize(t *testing.T) { s2.tokens.UpdateAgentToken(masterToken, token.TokenSourceConfig) s2.tokens.UpdateReplicationToken(masterToken, token.TokenSourceConfig) - testrpc.WaitForLeader(t, s2.RPC, "secondary") - // Create the WAN link joinWAN(t, s2, s1) + testrpc.WaitForLeader(t, s2.RPC, "secondary") + waitForNewACLs(t, s1) waitForNewACLs(t, s2) @@ -176,9 +176,7 @@ func waitForActiveCARoot(t *testing.T, srv *Server, expect *structs.CARoot) { } func getCAProviderWithLock(s *Server) (ca.Provider, *structs.CARoot) { - s.caProviderReconfigurationLock.Lock() - defer s.caProviderReconfigurationLock.Unlock() - return s.getCAProvider() + return s.caManager.getCAProvider() } func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) { @@ -703,22 +701,6 @@ func TestLeader_SecondaryCA_TransitionFromPrimary(t *testing.T) { require.NoError(t, s2.RPC("ConnectCA.Roots", &args, &dc2PrimaryRoots)) require.Len(t, dc2PrimaryRoots.Roots, 1) - // Set the ExternalTrustDomain to a blank string to simulate an old version (pre-1.4.0) - // it's fine to change the roots struct directly here because the RPC endpoint already - // makes a copy to return. - dc2PrimaryRoots.Roots[0].ExternalTrustDomain = "" - rootSetArgs := structs.CARequest{ - Op: structs.CAOpSetRoots, - Datacenter: "dc2", - Index: dc2PrimaryRoots.Index, - Roots: dc2PrimaryRoots.Roots, - } - resp, err := s2.raftApply(structs.ConnectCARequestType, rootSetArgs) - require.NoError(t, err) - if respErr, ok := resp.(error); ok { - t.Fatal(respErr) - } - // Shutdown s2 and restart it with the dc1 as the primary s2.Shutdown() dir3, s3 := testServerWithConfig(t, func(c *Config) { diff --git a/agent/consul/server.go b/agent/consul/server.go index 16d78f288c..a190dd740b 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -19,7 +19,6 @@ import ( metrics "github.com/armon/go-metrics" "github.com/hashicorp/consul/acl" - ca "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/consul/authmethod" "github.com/hashicorp/consul/agent/consul/authmethod/ssoauth" "github.com/hashicorp/consul/agent/consul/autopilot" @@ -139,20 +138,8 @@ type Server struct { // autopilot is the Autopilot instance for this server. autopilot *autopilot.Autopilot - // autopilotWaitGroup is used to block until Autopilot shuts down. - autopilotWaitGroup sync.WaitGroup - - // caProviderReconfigurationLock guards the provider reconfiguration. - caProviderReconfigurationLock sync.Mutex - // caProvider is the current CA provider in use for Connect. This is - // only non-nil when we are the leader. - caProvider ca.Provider - // caProviderRoot is the CARoot that was stored along with the ca.Provider - // active. It's only updated in lock-step with the caProvider. This prevents - // races between state updates to active roots and the fetch of the provider - // instance. - caProviderRoot *structs.CARoot - caProviderLock sync.RWMutex + // caManager is used to synchronize CA operations across the leader and RPC functions. + caManager *CAManager // rate limiter to use when signing leaf certificates caLeafLimiter connectSignRateLimiter @@ -302,10 +289,6 @@ type Server struct { shutdownCh chan struct{} shutdownLock sync.Mutex - // State for whether this datacenter is acting as a secondary CA. - actingSecondaryCA bool - actingSecondaryLock sync.RWMutex - // Manager to handle starting/stopping go routines when establishing/revoking raft leadership leaderRoutineManager *LeaderRoutineManager @@ -497,6 +480,7 @@ func NewServer(config *Config, options ...ConsulOption) (*Server, error) { return nil, fmt.Errorf("Failed to start Raft: %v", err) } + s.caManager = NewCAManager(&caDelegateWithState{s}, s.loggers.Named(logging.Connect), s.config) if s.config.ConnectEnabled && (s.config.AutoEncryptAllowTLS || s.config.AutoConfigAuthzEnabled) { go s.connectCARootsMonitor(&lib.StopChannelContext{StopCh: s.shutdownCh}) } diff --git a/agent/consul/server_connect.go b/agent/consul/server_connect.go index ecc6481581..e854d813a6 100644 --- a/agent/consul/server_connect.go +++ b/agent/consul/server_connect.go @@ -132,7 +132,7 @@ func (s *Server) getCARoots(ws memdb.WatchSet, state *state.Store) (*structs.Ind } func (s *Server) SignCertificate(csr *x509.CertificateRequest, spiffeID connect.CertURI) (*structs.IssuedCert, error) { - provider, caRoot := s.getCAProvider() + provider, caRoot := s.caManager.getCAProvider() if provider == nil { return nil, fmt.Errorf("internal error: CA provider is nil") } else if caRoot == nil { diff --git a/agent/consul/server_test.go b/agent/consul/server_test.go index 6006d3e2c7..e6c675ced8 100644 --- a/agent/consul/server_test.go +++ b/agent/consul/server_test.go @@ -1504,7 +1504,7 @@ func TestServer_CALogging(t *testing.T) { defer s1.Shutdown() testrpc.WaitForLeader(t, s1.RPC, "dc1") - if _, ok := s1.caProvider.(ca.NeedsLogger); !ok { + if _, ok := s1.caManager.provider.(ca.NeedsLogger); !ok { t.Fatalf("provider does not implement NeedsLogger") } diff --git a/agent/structs/acl.go b/agent/structs/acl.go index de01c6e2f7..fef8d08750 100644 --- a/agent/structs/acl.go +++ b/agent/structs/acl.go @@ -151,7 +151,7 @@ type ACLServiceIdentity struct { func (s *ACLServiceIdentity) Clone() *ACLServiceIdentity { s2 := *s - s2.Datacenters = cloneStringSlice(s.Datacenters) + s2.Datacenters = CloneStringSlice(s.Datacenters) return &s2 } @@ -666,7 +666,7 @@ func (t *ACLPolicy) UnmarshalJSON(data []byte) error { func (p *ACLPolicy) Clone() *ACLPolicy { p2 := *p - p2.Datacenters = cloneStringSlice(p.Datacenters) + p2.Datacenters = CloneStringSlice(p.Datacenters) return &p2 } @@ -1460,7 +1460,7 @@ type ACLPolicyBatchDeleteRequest struct { PolicyIDs []string } -func cloneStringSlice(s []string) []string { +func CloneStringSlice(s []string) []string { if len(s) == 0 { return nil } diff --git a/agent/structs/connect_ca.go b/agent/structs/connect_ca.go index 6b57e9e398..54f156141d 100644 --- a/agent/structs/connect_ca.go +++ b/agent/structs/connect_ca.go @@ -122,6 +122,16 @@ type CARoot struct { RaftIndex } +func (c *CARoot) Clone() *CARoot { + if c == nil { + return nil + } + + newCopy := *c + newCopy.IntermediateCerts = CloneStringSlice(c.IntermediateCerts) + return &newCopy +} + // CARoots is a list of CARoot structures. type CARoots []*CARoot