From 16e95f1d7bfb638ee732badb275530967fe9434f Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Fri, 13 Nov 2020 14:33:19 -0800 Subject: [PATCH] Reorganize some CA manager code for correctness/readability --- agent/consul/connect_ca_endpoint.go | 227 +----------- agent/consul/leader.go | 3 +- agent/consul/leader_connect_ca.go | 551 ++++++++++++++++++++-------- agent/consul/server.go | 3 - 4 files changed, 408 insertions(+), 376 deletions(-) diff --git a/agent/consul/connect_ca_endpoint.go b/agent/consul/connect_ca_endpoint.go index f75ac33261..46607e3b6f 100644 --- a/agent/consul/connect_ca_endpoint.go +++ b/agent/consul/connect_ca_endpoint.go @@ -3,14 +3,12 @@ package consul import ( "errors" "fmt" - "reflect" "time" "github.com/hashicorp/go-hclog" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/connect" - "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/go-memdb" @@ -103,230 +101,7 @@ func (s *ConnectCA) ConfigurationSet( return acl.ErrPermissionDenied } - // Exit early if it's a no-op change - state := s.srv.fsm.State() - confIdx, config, err := state.CAConfig(nil) - if err != nil { - return err - } - - // Don't allow state changes. Either it needs to be empty or the same to allow - // read-modify-write loops that don't touch the State field. - if len(args.Config.State) > 0 && - !reflect.DeepEqual(args.Config.State, config.State) { - return ErrStateReadOnly - } - - // Don't allow users to change the ClusterID. - args.Config.ClusterID = config.ClusterID - if args.Config.Provider == config.Provider && reflect.DeepEqual(args.Config.Config, config.Config) { - return nil - } - - // If the provider hasn't changed, we need to load the current Provider state - // so it can decide if it needs to change resources or not based on the config - // change. - if args.Config.Provider == config.Provider { - // Note this is a shallow copy since the State method doc requires the - // provider return a map that will not be further modified and should not - // modify the one we pass to Configure. - args.Config.State = config.State - } - - // Create a new instance of the provider described by the config - // and get the current active root CA. This acts as a good validation - // of the config and makes sure the provider is functioning correctly - // before we commit any changes to Raft. - newProvider, err := s.srv.createCAProvider(args.Config) - if err != nil { - return fmt.Errorf("could not initialize provider: %v", err) - } - pCfg := ca.ProviderConfig{ - ClusterID: args.Config.ClusterID, - Datacenter: s.srv.config.Datacenter, - // This endpoint can be called in a secondary DC too so set this correctly. - IsPrimary: s.srv.config.Datacenter == s.srv.config.PrimaryDatacenter, - RawConfig: args.Config.Config, - State: args.Config.State, - } - if err := newProvider.Configure(pCfg); err != nil { - return fmt.Errorf("error configuring provider: %v", err) - } - - // Set up a defer to clean up the new provider if we exit early due to an error. - cleanupNewProvider := true - defer func() { - if cleanupNewProvider { - if err := newProvider.Cleanup(); err != nil { - s.logger.Warn("failed to clean up CA provider while handling startup failure", "provider", newProvider, "error", err) - } - } - }() - - // If this is a secondary, check if the intermediate needs to be regenerated. - if s.srv.config.Datacenter != s.srv.config.PrimaryDatacenter { - // Attempt to take the CA lock in order to update the config. - if err := s.srv.caManager.setState(CAStateReconfig); err != nil { - return err - } - defer s.srv.caManager.setReady() - - if err := s.srv.caManager.initializeSecondaryCA(newProvider, args.Config); err != nil { - return fmt.Errorf("Error updating secondary datacenter CA config: %v", err) - } - cleanupNewProvider = false - s.logger.Info("Secondary CA provider config updated") - return nil - } - - if err := newProvider.GenerateRoot(); err != nil { - return fmt.Errorf("error generating CA root certificate: %v", err) - } - - newRootPEM, err := newProvider.ActiveRoot() - if err != nil { - return err - } - - newActiveRoot, err := parseCARoot(newRootPEM, args.Config.Provider, args.Config.ClusterID) - if err != nil { - return err - } - - // See if the provider needs to persist any state along with the config - pState, err := newProvider.State() - if err != nil { - return fmt.Errorf("error getting provider state: %v", err) - } - args.Config.State = pState - - // Compare the new provider's root CA ID to the current one. If they - // match, just update the existing provider with the new config. - // If they don't match, begin the root rotation process. - _, root, err := state.CARootActive(nil) - if err != nil { - return err - } - - // If the root didn't change, just update the config and return. - if root != nil && root.ID == newActiveRoot.ID { - args.Op = structs.CAOpSetConfig - resp, err := s.srv.raftApply(structs.ConnectCARequestType, args) - if err != nil { - return err - } - if respErr, ok := resp.(error); ok { - return respErr - } - - // If the config has been committed, update the local provider instance - cleanupNewProvider = false - s.srv.caManager.setCAProvider(newProvider, newActiveRoot) - - s.logger.Info("CA provider config updated") - - return nil - } - - // At this point, we know the config change has trigged a root rotation, - // either by swapping the provider type or changing the provider's config - // to use a different root certificate. - - // First up, sanity check that the current provider actually supports - // cross-signing. - oldProvider, _ := s.srv.caManager.getCAProvider() - if oldProvider == nil { - return fmt.Errorf("internal error: CA provider is nil") - } - canXSign, err := oldProvider.SupportsCrossSigning() - if err != nil { - return fmt.Errorf("CA provider error: %s", err) - } - if !canXSign && !args.Config.ForceWithoutCrossSigning { - return errors.New("The current CA Provider does not support cross-signing. " + - "You can try again with ForceWithoutCrossSigningSet but this may cause " + - "disruption - see documentation for more.") - } - if !canXSign && args.Config.ForceWithoutCrossSigning { - s.logger.Warn("current CA doesn't support cross signing but " + - "CA reconfiguration forced anyway with ForceWithoutCrossSigning") - } - - // If it's a config change that would trigger a rotation (different provider/root): - // 1. Get the root from the new provider. - // 2. Call CrossSignCA on the old provider to sign the new root with the old one to - // get a cross-signed certificate. - // 3. Take the active root for the new provider and append the intermediate from step 2 - // to its list of intermediates. - newRoot, err := connect.ParseCert(newRootPEM) - if err != nil { - return err - } - - if canXSign { - // Have the old provider cross-sign the new root - xcCert, err := oldProvider.CrossSignCA(newRoot) - if err != nil { - return err - } - - // Add the cross signed cert to the new CA's intermediates (to be attached - // to leaf certs). - newActiveRoot.IntermediateCerts = []string{xcCert} - } - - intermediate, err := newProvider.GenerateIntermediate() - if err != nil { - return err - } - if intermediate != newRootPEM { - newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediate) - } - - // Update the roots and CA config in the state store at the same time - idx, roots, err := state.CARoots(nil) - if err != nil { - return err - } - - var newRoots structs.CARoots - for _, r := range roots { - newRoot := *r - if newRoot.Active { - newRoot.Active = false - newRoot.RotatedOutAt = time.Now() - } - newRoots = append(newRoots, &newRoot) - } - newRoots = append(newRoots, newActiveRoot) - - args.Op = structs.CAOpSetRootsAndConfig - args.Index = idx - args.Config.ModifyIndex = confIdx - args.Roots = newRoots - resp, err := s.srv.raftApply(structs.ConnectCARequestType, args) - if err != nil { - return err - } - if respErr, ok := resp.(error); ok { - return respErr - } - if respOk, ok := resp.(bool); ok && !respOk { - return fmt.Errorf("could not atomically update roots and config") - } - - // If the config has been committed, update the local provider instance - // and call teardown on the old provider - cleanupNewProvider = false - s.srv.caManager.setCAProvider(newProvider, newActiveRoot) - - if err := oldProvider.Cleanup(); err != nil { - s.logger.Warn("failed to clean up old provider", "provider", config.Provider) - } - - s.logger.Info("CA rotated to new root under provider", "provider", args.Config.Provider) - - return nil + return s.srv.caManager.UpdateConfiguration(args) } // Roots returns the currently trusted root certificates. diff --git a/agent/consul/leader.go b/agent/consul/leader.go index a251d63b3b..b4b200c808 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -327,8 +327,7 @@ func (s *Server) establishLeadership(ctx context.Context) error { s.getOrCreateAutopilotConfig() s.autopilot.Start(ctx) - // todo(kyhavlov): start a goroutine here for handling periodic CA rotation - if err := s.caManager.initializeCA(); err != nil { + if err := s.caManager.InitializeCA(); err != nil { return err } diff --git a/agent/consul/leader_connect_ca.go b/agent/consul/leader_connect_ca.go index e1d7ffec1c..39103d8e17 100644 --- a/agent/consul/leader_connect_ca.go +++ b/agent/consul/leader_connect_ca.go @@ -2,6 +2,7 @@ package consul import ( "context" + "errors" "fmt" "reflect" "strings" @@ -33,25 +34,21 @@ type CAManager struct { srv *Server logger hclog.Logger + providerLock sync.RWMutex // provider is the current CA provider in use for Connect. This is // only non-nil when we are the leader. provider ca.Provider - // providerRoot is the CARoot that was stored along with the ca.Provider // active. It's only updated in lock-step with the provider. This prevents // races between state updates to active roots and the fetch of the provider // instance. providerRoot *structs.CARoot - providerLock sync.RWMutex - // primaryRoots is the most recently seen state of the root CAs from the primary datacenter. - // This is protected by the stateLock and updated by initializeCA and the root CA watch routine. - primaryRoots structs.IndexedCARoots - - // actingSecondaryCA is whether this datacenter has been initialized as a secondary CA. - actingSecondaryCA bool + // stateLock protects the internal state used for administrative CA tasks. + stateLock sync.Mutex state CAState - stateLock sync.RWMutex + primaryRoots structs.IndexedCARoots // The most recently seen state of the root CAs from the primary datacenter. + actingSecondaryCA bool // True if this datacenter has been initialized as a secondary CA. } func NewCAManager(srv *Server) *CAManager { @@ -65,27 +62,36 @@ func NewCAManager(srv *Server) *CAManager { // setState attempts to update the CA state to the given state. // If the current state is not READY, this will fail. The only exception is when // the current state is UNINITIALIZED, and the function is called with CAStateInitializing. -func (c *CAManager) setState(newState CAState) error { - c.stateLock.RLock() +func (c *CAManager) setState(newState CAState, validateState bool) error { + c.stateLock.Lock() + defer c.stateLock.Unlock() state := c.state - c.stateLock.RUnlock() - if state == CAStateReady || (state == CAStateUninitialized && newState == CAStateInitializing) { - c.stateLock.Lock() + if !validateState || state == CAStateReady || (state == CAStateUninitialized && newState == CAStateInitializing) { c.state = newState - c.stateLock.Unlock() } else { - return fmt.Errorf("CA is already in %s state", state) + return fmt.Errorf("CA is already in state %q", state) } return nil } -// setReady sets the CA state back to READY. This should only be called by a function -// that has successfully called setState beforehand. -func (c *CAManager) setReady() { +// setPrimaryRoots updates the most recently seen roots from the primary. +func (c *CAManager) setPrimaryRoots(newRoots structs.IndexedCARoots) error { c.stateLock.Lock() - c.state = CAStateReady - c.stateLock.Unlock() + defer c.stateLock.Unlock() + + if c.state == CAStateInitializing || c.state == CAStateReconfig { + c.primaryRoots = newRoots + } else { + return fmt.Errorf("Cannot update primary roots in state %q", c.state) + } + return nil +} + +func (c *CAManager) getPrimaryRoots() structs.IndexedCARoots { + c.stateLock.Lock() + defer c.stateLock.Unlock() + return c.primaryRoots } // initializeCAConfig is used to initialize the CA config if necessary @@ -196,20 +202,21 @@ func (c *CAManager) setCAProvider(newProvider ca.Provider, root *structs.CARoot) c.providerLock.Unlock() } -// initializeCA sets up the CA provider when gaining leadership, either bootstrapping +// InitializeCA sets up the CA provider when gaining leadership, either bootstrapping // the CA if this is the primary DC or making a remote RPC for intermediate signing // if this is a secondary DC. -func (c *CAManager) initializeCA() error { +func (c *CAManager) InitializeCA() error { // Bail if connect isn't enabled. if !c.srv.config.ConnectEnabled { return nil } - err := c.setState(CAStateInitializing) + // Update the state before doing anything else. + err := c.setState(CAStateInitializing, true) if err != nil { return err } - defer c.setReady() + defer c.setState(CAStateReady, false) // Initialize the provider based on the current config. conf, err := c.initializeCAConfig() @@ -246,7 +253,9 @@ func (c *CAManager) initializeCA() error { if err := c.srv.forwardDC("ConnectCA.Roots", c.srv.config.PrimaryDatacenter, &args, &roots); err != nil { return err } - c.primaryRoots = roots + if err := c.setPrimaryRoots(roots); err != nil { + return err + } // Configure the CA provider and initialize the intermediate certificate if necessary. if err := c.initializeSecondaryProvider(provider, roots); err != nil { @@ -428,8 +437,9 @@ func (c *CAManager) initializeSecondaryCA(provider ca.Provider, config *structs. // active one. We'll use this as a template to generate any new root // representations meant for this secondary. var newActiveRoot *structs.CARoot - for _, root := range c.primaryRoots.Roots { - if root.ID == c.primaryRoots.ActiveRootID && root.Active { + primaryRoots := c.getPrimaryRoots() + for _, root := range primaryRoots.Roots { + if root.ID == primaryRoots.ActiveRootID && root.Active { newActiveRoot = root break } @@ -441,7 +451,7 @@ func (c *CAManager) initializeSecondaryCA(provider ca.Provider, config *structs. // Get a signed intermediate from the primary DC if the provider // hasn't been initialized yet or if the primary's root has changed. needsNewIntermediate := false - if activeIntermediate == "" || storedRootID != c.primaryRoots.ActiveRootID { + if activeIntermediate == "" || storedRootID != primaryRoots.ActiveRootID { needsNewIntermediate = true } @@ -559,6 +569,233 @@ func (c *CAManager) persistNewRootAndConfig(provider ca.Provider, newActiveRoot return nil } +func (c *CAManager) UpdateConfiguration(args *structs.CARequest) error { + // Attempt to update the state first. + if err := c.setState(CAStateReconfig, true); err != nil { + return err + } + defer c.setState(CAStateReady, false) + + // Exit early if it's a no-op change + state := c.srv.fsm.State() + confIdx, config, err := state.CAConfig(nil) + if err != nil { + return err + } + + // Don't allow state changes. Either it needs to be empty or the same to allow + // read-modify-write loops that don't touch the State field. + if len(args.Config.State) > 0 && + !reflect.DeepEqual(args.Config.State, config.State) { + return ErrStateReadOnly + } + + // Don't allow users to change the ClusterID. + args.Config.ClusterID = config.ClusterID + if args.Config.Provider == config.Provider && reflect.DeepEqual(args.Config.Config, config.Config) { + return nil + } + + // If the provider hasn't changed, we need to load the current Provider state + // so it can decide if it needs to change resources or not based on the config + // change. + if args.Config.Provider == config.Provider { + // Note this is a shallow copy since the State method doc requires the + // provider return a map that will not be further modified and should not + // modify the one we pass to Configure. + args.Config.State = config.State + } + + // Create a new instance of the provider described by the config + // and get the current active root CA. This acts as a good validation + // of the config and makes sure the provider is functioning correctly + // before we commit any changes to Raft. + newProvider, err := c.srv.createCAProvider(args.Config) + if err != nil { + return fmt.Errorf("could not initialize provider: %v", err) + } + pCfg := ca.ProviderConfig{ + ClusterID: args.Config.ClusterID, + Datacenter: c.srv.config.Datacenter, + // This endpoint can be called in a secondary DC too so set this correctly. + IsPrimary: c.srv.config.Datacenter == c.srv.config.PrimaryDatacenter, + RawConfig: args.Config.Config, + State: args.Config.State, + } + if err := newProvider.Configure(pCfg); err != nil { + return fmt.Errorf("error configuring provider: %v", err) + } + + // Set up a defer to clean up the new provider if we exit early due to an error. + cleanupNewProvider := true + defer func() { + if cleanupNewProvider { + if err := newProvider.Cleanup(); err != nil { + c.logger.Warn("failed to clean up CA provider while handling startup failure", "provider", newProvider, "error", err) + } + } + }() + + // If this is a secondary, just check if the intermediate needs to be regenerated. + if c.srv.config.Datacenter != c.srv.config.PrimaryDatacenter { + if err := c.srv.caManager.initializeSecondaryCA(newProvider, args.Config); err != nil { + return fmt.Errorf("Error updating secondary datacenter CA config: %v", err) + } + cleanupNewProvider = false + c.logger.Info("Secondary CA provider config updated") + return nil + } + + if err := newProvider.GenerateRoot(); err != nil { + return fmt.Errorf("error generating CA root certificate: %v", err) + } + + newRootPEM, err := newProvider.ActiveRoot() + if err != nil { + return err + } + + newActiveRoot, err := parseCARoot(newRootPEM, args.Config.Provider, args.Config.ClusterID) + if err != nil { + return err + } + + // See if the provider needs to persist any state along with the config + pState, err := newProvider.State() + if err != nil { + return fmt.Errorf("error getting provider state: %v", err) + } + args.Config.State = pState + + // Compare the new provider's root CA ID to the current one. If they + // match, just update the existing provider with the new config. + // If they don't match, begin the root rotation process. + _, root, err := state.CARootActive(nil) + if err != nil { + return err + } + + // If the root didn't change, just update the config and return. + if root != nil && root.ID == newActiveRoot.ID { + args.Op = structs.CAOpSetConfig + resp, err := c.srv.raftApply(structs.ConnectCARequestType, args) + if err != nil { + return err + } + if respErr, ok := resp.(error); ok { + return respErr + } + + // If the config has been committed, update the local provider instance + cleanupNewProvider = false + c.setCAProvider(newProvider, newActiveRoot) + + c.logger.Info("CA provider config updated") + + return nil + } + + // At this point, we know the config change has trigged a root rotation, + // either by swapping the provider type or changing the provider's config + // to use a different root certificate. + + // First up, sanity check that the current provider actually supports + // cross-signing. + oldProvider, _ := c.getCAProvider() + if oldProvider == nil { + return fmt.Errorf("internal error: CA provider is nil") + } + canXSign, err := oldProvider.SupportsCrossSigning() + if err != nil { + return fmt.Errorf("CA provider error: %s", err) + } + if !canXSign && !args.Config.ForceWithoutCrossSigning { + return errors.New("The current CA Provider does not support cross-signing. " + + "You can try again with ForceWithoutCrossSigningSet but this may cause " + + "disruption - see documentation for more.") + } + if !canXSign && args.Config.ForceWithoutCrossSigning { + c.logger.Warn("current CA doesn't support cross signing but " + + "CA reconfiguration forced anyway with ForceWithoutCrossSigning") + } + + // If it's a config change that would trigger a rotation (different provider/root): + // 1. Get the root from the new provider. + // 2. Call CrossSignCA on the old provider to sign the new root with the old one to + // get a cross-signed certificate. + // 3. Take the active root for the new provider and append the intermediate from step 2 + // to its list of intermediates. + newRoot, err := connect.ParseCert(newRootPEM) + if err != nil { + return err + } + + if canXSign { + // Have the old provider cross-sign the new root + xcCert, err := oldProvider.CrossSignCA(newRoot) + if err != nil { + return err + } + + // Add the cross signed cert to the new CA's intermediates (to be attached + // to leaf certs). + newActiveRoot.IntermediateCerts = []string{xcCert} + } + + intermediate, err := newProvider.GenerateIntermediate() + if err != nil { + return err + } + if intermediate != newRootPEM { + newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediate) + } + + // Update the roots and CA config in the state store at the same time + idx, roots, err := state.CARoots(nil) + if err != nil { + return err + } + + var newRoots structs.CARoots + for _, r := range roots { + newRoot := *r + if newRoot.Active { + newRoot.Active = false + newRoot.RotatedOutAt = time.Now() + } + newRoots = append(newRoots, &newRoot) + } + newRoots = append(newRoots, newActiveRoot) + + args.Op = structs.CAOpSetRootsAndConfig + args.Index = idx + args.Config.ModifyIndex = confIdx + args.Roots = newRoots + resp, err := c.srv.raftApply(structs.ConnectCARequestType, args) + if err != nil { + return err + } + if respErr, ok := resp.(error); ok { + return respErr + } + if respOk, ok := resp.(bool); ok && !respOk { + return fmt.Errorf("could not atomically update roots and config") + } + + // If the config has been committed, update the local provider instance + // and call teardown on the old provider + cleanupNewProvider = false + c.setCAProvider(newProvider, newActiveRoot) + + if err := oldProvider.Cleanup(); err != nil { + c.logger.Warn("failed to clean up old provider", "provider", config.Provider) + } + + c.logger.Info("CA rotated to new root under provider", "provider", args.Config.Provider) + + return nil +} + // getIntermediateCAPrimary regenerates the intermediate cert in the primary datacenter. // This is only run for CAs that require an intermediary in the primary DC, such as Vault. // This function is being called while holding caProviderReconfigurationLock @@ -617,9 +854,7 @@ func (c *CAManager) getIntermediateCASigned(provider ca.Provider, newActiveRoot return nil } -// intermediateCertRenewalWatch checks the intermediate cert for -// expiration. As soon as more than half the time a cert is valid has passed, -// it will try to renew it. +// intermediateCertRenewalWatch periodically attempts to renew the intermediate cert. func (c *CAManager) intermediateCertRenewalWatch(ctx context.Context) error { isPrimary := c.srv.config.Datacenter == c.srv.config.PrimaryDatacenter @@ -629,81 +864,7 @@ func (c *CAManager) intermediateCertRenewalWatch(ctx context.Context) error { return nil case <-time.After(structs.IntermediateCertRenewInterval): retryLoopBackoffAbortOnSuccess(ctx, func() error { - if !isPrimary { - c.logger.Info("starting check for intermediate renewal") - } - // Grab the 'lock' right away so the provider/config can't be changed out while we check - // the intermediate. - if err := c.setState(CAStateRenewIntermediate); err != nil { - return err - } - defer c.setReady() - - provider, _ := c.getCAProvider() - if provider == nil { - // this happens when leadership is being revoked and this go routine will be stopped - return nil - } - // If this isn't the primary, make sure the CA has been initialized. - if !isPrimary && !c.configuredSecondaryCA() { - return fmt.Errorf("secondary CA is not yet configured.") - } - - state := c.srv.fsm.State() - _, activeRoot, err := state.CARootActive(nil) - if err != nil { - return err - } - - // If this is the primary, check if this is a provider that uses an intermediate cert. If - // it isn't, we don't need to check for a renewal. - if isPrimary { - _, config, err := state.CAConfig(nil) - if err != nil { - return err - } - - if _, ok := ca.PrimaryIntermediateProviders[config.Provider]; !ok { - return nil - } - } else { - c.logger.Info("Checking for intermediate renewal") - } - - activeIntermediate, err := provider.ActiveIntermediate() - if err != nil { - return err - } - - if activeIntermediate == "" { - return fmt.Errorf("datacenter doesn't have an active intermediate.") - } - - intermediateCert, err := connect.ParseCert(activeIntermediate) - if err != nil { - return fmt.Errorf("error parsing active intermediate cert: %v", err) - } - - if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer), - intermediateCert.NotAfter) { - return nil - } - - // Enough time has passed, go ahead with getting a new intermediate. - renewalFunc := c.getIntermediateCAPrimary - if !isPrimary { - renewalFunc = c.getIntermediateCASigned - } - if err := renewalFunc(provider, activeRoot); err != nil { - return err - } - - if err := c.persistNewRootAndConfig(provider, activeRoot, nil); err != nil { - return err - } - - c.setCAProvider(provider, activeRoot) - return nil + return c.RenewIntermediate(isPrimary) }, func(err error) { c.logger.Error("error renewing intermediate certs", "routine", intermediateCertRenewWatchRoutineName, @@ -714,6 +875,82 @@ func (c *CAManager) intermediateCertRenewalWatch(ctx context.Context) error { } } +// RenewIntermediate checks the intermediate cert for +// expiration. If more than half the time a cert is valid has passed, +// it will try to renew it. +func (c *CAManager) RenewIntermediate(isPrimary bool) error { + // Grab the 'lock' right away so the provider/config can't be changed out while we check + // the intermediate. + if err := c.setState(CAStateRenewIntermediate, true); err != nil { + return err + } + defer c.setState(CAStateReady, false) + + provider, _ := c.getCAProvider() + if provider == nil { + // this happens when leadership is being revoked and this go routine will be stopped + return nil + } + // If this isn't the primary, make sure the CA has been initialized. + if !isPrimary && !c.configuredSecondaryCA() { + return fmt.Errorf("secondary CA is not yet configured.") + } + + state := c.srv.fsm.State() + _, activeRoot, err := state.CARootActive(nil) + if err != nil { + return err + } + + // If this is the primary, check if this is a provider that uses an intermediate cert. If + // it isn't, we don't need to check for a renewal. + if isPrimary { + _, config, err := state.CAConfig(nil) + if err != nil { + return err + } + + if _, ok := ca.PrimaryIntermediateProviders[config.Provider]; !ok { + return nil + } + } + + activeIntermediate, err := provider.ActiveIntermediate() + if err != nil { + return err + } + + if activeIntermediate == "" { + return fmt.Errorf("datacenter doesn't have an active intermediate.") + } + + intermediateCert, err := connect.ParseCert(activeIntermediate) + if err != nil { + return fmt.Errorf("error parsing active intermediate cert: %v", err) + } + + if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer), + intermediateCert.NotAfter) { + return nil + } + + // Enough time has passed, go ahead with getting a new intermediate. + renewalFunc := c.getIntermediateCAPrimary + if !isPrimary { + renewalFunc = c.getIntermediateCASigned + } + if err := renewalFunc(provider, activeRoot); err != nil { + return err + } + + if err := c.persistNewRootAndConfig(provider, activeRoot, nil); err != nil { + return err + } + + c.setCAProvider(provider, activeRoot) + return nil +} + // secondaryCARootWatch maintains a blocking query to the primary datacenter's // ConnectCA.Roots endpoint to monitor when it needs to request a new signed // intermediate certificate. @@ -734,43 +971,10 @@ func (c *CAManager) secondaryCARootWatch(ctx context.Context) error { return fmt.Errorf("Error retrieving the primary datacenter's roots: %v", err) } - // Update the state first to claim the 'lock'. - if err := c.setState(CAStateReconfig); err != nil { + // Attempt to update the roots using the returned data. + if err := c.UpdateRoots(roots); err != nil { return err } - defer c.setReady() - - // Update the cached primary roots now that the lock is held. - c.primaryRoots = roots - - // Check to see if the primary has been upgraded in case we're waiting to switch to - // secondary mode. - provider, _ := c.getCAProvider() - if provider == nil { - // this happens when leadership is being revoked and this go routine will be stopped - return nil - } - if !c.configuredSecondaryCA() { - versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.srv, c.srv.config.PrimaryDatacenter, minMultiDCConnectVersion) - if !primaryFound { - return fmt.Errorf("Primary datacenter is unreachable - deferring secondary CA initialization") - } - - if versionOk { - if err := c.initializeSecondaryProvider(provider, roots); err != nil { - return fmt.Errorf("Failed to initialize secondary CA provider: %v", err) - } - } - } - - // Run the secondary CA init routine to see if we need to request a new - // intermediate. - if c.configuredSecondaryCA() { - if err := c.initializeSecondaryCA(provider, nil); err != nil { - return fmt.Errorf("Failed to initialize the secondary CA: %v", err) - } - } - args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, roots.QueryMeta.Index) return nil }, func(err error) { @@ -783,6 +987,51 @@ func (c *CAManager) secondaryCARootWatch(ctx context.Context) error { return nil } +// UpdateRoots updates the cached roots from the primary and regenerates the intermediate +// certificate if necessary. +func (c *CAManager) UpdateRoots(roots structs.IndexedCARoots) error { + // Update the state first to claim the 'lock'. + if err := c.setState(CAStateReconfig, true); err != nil { + return err + } + defer c.setState(CAStateReady, false) + + // Update the cached primary roots now that the lock is held. + if err := c.setPrimaryRoots(roots); err != nil { + return err + } + + // Check to see if the primary has been upgraded in case we're waiting to switch to + // secondary mode. + provider, _ := c.getCAProvider() + if provider == nil { + // this happens when leadership is being revoked and this go routine will be stopped + return nil + } + if !c.configuredSecondaryCA() { + versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.srv, c.srv.config.PrimaryDatacenter, minMultiDCConnectVersion) + if !primaryFound { + return fmt.Errorf("Primary datacenter is unreachable - deferring secondary CA initialization") + } + + if versionOk { + if err := c.initializeSecondaryProvider(provider, roots); err != nil { + return fmt.Errorf("Failed to initialize secondary CA provider: %v", err) + } + } + } + + // Run the secondary CA init routine to see if we need to request a new + // intermediate. + if c.configuredSecondaryCA() { + if err := c.initializeSecondaryCA(provider, nil); err != nil { + return fmt.Errorf("Failed to initialize the secondary CA: %v", err) + } + } + + return nil +} + // initializeSecondaryProvider configures the given provider for a secondary, non-root datacenter. // It is being called while holding the stateLock in order to update actingSecondaryCA, which means // it must never take that lock itself or call anything that does. @@ -808,14 +1057,26 @@ func (c *CAManager) initializeSecondaryProvider(provider ca.Provider, roots stru return fmt.Errorf("error configuring provider: %v", err) } - c.actingSecondaryCA = true + return c.setSecondaryCA() +} + +// setSecondaryCA sets the flag for acting as a secondary CA to true. +func (c *CAManager) setSecondaryCA() error { + c.stateLock.Lock() + defer c.stateLock.Unlock() + + if c.state == CAStateInitializing || c.state == CAStateReconfig { + c.actingSecondaryCA = true + } else { + return fmt.Errorf("Cannot update secondary CA flag in state %q", c.state) + } return nil } // configuredSecondaryCA returns true if we have been initialized as a secondary datacenter's CA. func (c *CAManager) configuredSecondaryCA() bool { - c.stateLock.RLock() - defer c.stateLock.RUnlock() + c.stateLock.Lock() + defer c.stateLock.Unlock() return c.actingSecondaryCA } diff --git a/agent/consul/server.go b/agent/consul/server.go index 914eeb198e..0f34f25ba6 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -136,9 +136,6 @@ type Server struct { // autopilot is the Autopilot instance for this server. autopilot *autopilot.Autopilot - // autopilotWaitGroup is used to block until Autopilot shuts down. - autopilotWaitGroup sync.WaitGroup - // caManager is used to synchronize CA operations across the leader and RPC functions. caManager *CAManager