From 01bd3d118d303d223acfd1a5497447516d73dcf3 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Fri, 5 Nov 2021 16:37:10 -0400 Subject: [PATCH 1/4] ca: return an error when secondary fails to initialize Previously secondaryInitialize would return nil in this case, which prevented the deferred initialize from happening, and left the CA in an uninitialized state until a config update or root rotation. To fix this I extracted the common parts into the delegate implementation. However looking at this again, it seems like the handling in secondaryUpdateRoots is impossible, because that function should never be called before the secondary is initialzied. I beleive we can remove some of that logic in a follow up. --- agent/consul/leader_connect_ca.go | 39 ++++++++++++-------------- agent/consul/leader_connect_ca_test.go | 11 ++------ agent/consul/leader_connect_test.go | 6 ++-- 3 files changed, 24 insertions(+), 32 deletions(-) diff --git a/agent/consul/leader_connect_ca.go b/agent/consul/leader_connect_ca.go index 03b2acbc9d..3127218695 100644 --- a/agent/consul/leader_connect_ca.go +++ b/agent/consul/leader_connect_ca.go @@ -44,7 +44,7 @@ type caServerDelegate interface { forwardDC(method, dc string, args interface{}, reply interface{}) error generateCASignRequest(csr string) *structs.CASignRequest - checkServersProvider + ServersSupportMultiDCConnectCA() error } // CAManager is a wrapper around CA operations such as updating roots, an intermediate @@ -127,6 +127,17 @@ func (c *caDelegateWithState) generateCASignRequest(csr string) *structs.CASignR } } +func (c *caDelegateWithState) ServersSupportMultiDCConnectCA() error { + versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.Server, c.Server.config.PrimaryDatacenter, minMultiDCConnectVersion) + if !primaryFound { + return fmt.Errorf("primary datacenter is unreachable") + } + if !versionOk { + return fmt.Errorf("all servers in the primary datacenter are not at the minimum version %v", minMultiDCConnectVersion) + } + return nil +} + func NewCAManager(delegate caServerDelegate, leaderRoutineManager *routine.Manager, logger hclog.Logger, config *Config) *CAManager { return &CAManager{ delegate: delegate, @@ -408,18 +419,8 @@ func (c *CAManager) InitializeCA() (reterr error) { } func (c *CAManager) secondaryInitialize(provider ca.Provider, conf *structs.CAConfiguration) error { - // If this isn't the primary DC, run the secondary DC routine if the primary has already been upgraded to at least 1.6.0 - versionOk, foundPrimary := ServersInDCMeetMinimumVersion(c.delegate, c.serverConf.PrimaryDatacenter, minMultiDCConnectVersion) - if !foundPrimary { - c.logger.Warn("primary datacenter is configured but unreachable - deferring initialization of the secondary datacenter CA") - // return nil because we will initialize the secondary CA later - return nil - } else if !versionOk { - // return nil because we will initialize the secondary CA later - c.logger.Warn("servers in the primary datacenter are not at least at the minimum version - deferring initialization of the secondary datacenter CA", - "min_version", minMultiDCConnectVersion.String(), - ) - return nil + if err := c.delegate.ServersSupportMultiDCConnectCA(); err != nil { + return fmt.Errorf("initialization will be deferred: %w", err) } // Get the root CA to see if we need to refresh our intermediate. @@ -1266,15 +1267,11 @@ func (c *CAManager) secondaryUpdateRoots(roots structs.IndexedCARoots) error { return nil } if !c.secondaryIsCAConfigured() { - versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.delegate, c.serverConf.PrimaryDatacenter, minMultiDCConnectVersion) - if !primaryFound { - return fmt.Errorf("Primary datacenter is unreachable - deferring secondary CA initialization") + if err := c.delegate.ServersSupportMultiDCConnectCA(); err != nil { + return fmt.Errorf("failed to initialize while updating primary roots: %w", err) } - - if versionOk { - if err := c.secondaryInitializeProvider(provider, roots); err != nil { - return fmt.Errorf("Failed to initialize secondary CA provider: %v", err) - } + if err := c.secondaryInitializeProvider(provider, roots); err != nil { + return fmt.Errorf("Failed to initialize secondary CA provider: %v", err) } } diff --git a/agent/consul/leader_connect_ca_test.go b/agent/consul/leader_connect_ca_test.go index c69373e514..9ea55fd2d1 100644 --- a/agent/consul/leader_connect_ca_test.go +++ b/agent/consul/leader_connect_ca_test.go @@ -14,15 +14,12 @@ import ( "testing" "time" - "github.com/hashicorp/go-version" - "github.com/hashicorp/serf/serf" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/hashicorp/consul/agent/connect" ca "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/consul/state" - "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/sdk/testutil" @@ -60,12 +57,8 @@ func (m *mockCAServerDelegate) IsLeader() bool { return true } -func (m *mockCAServerDelegate) CheckServers(datacenter string, fn func(*metadata.Server) bool) { - ver, _ := version.NewVersion("1.6.0") - fn(&metadata.Server{ - Status: serf.StatusAlive, - Build: *ver, - }) +func (m *mockCAServerDelegate) ServersSupportMultiDCConnectCA() error { + return nil } func (m *mockCAServerDelegate) ApplyCALeafRequest() (uint64, error) { diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index 0151d068c4..f4a65bf9e3 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -1094,7 +1094,6 @@ func TestLeader_SecondaryCA_UpgradeBeforePrimary(t *testing.T) { // Wait for the secondary transition to happen and then verify the secondary DC // has both roots present. - secondaryProvider, _ := getCAProviderWithLock(s2) retry.Run(t, func(r *retry.R) { state1 := s1.fsm.State() _, roots1, err := state1.CARoots(nil) @@ -1110,15 +1109,18 @@ func TestLeader_SecondaryCA_UpgradeBeforePrimary(t *testing.T) { require.Equal(r, roots1[0].ID, roots2[0].ID) require.Equal(r, roots1[0].RootCert, roots2[0].RootCert) + secondaryProvider, _ := getCAProviderWithLock(s2) inter, err := secondaryProvider.ActiveIntermediate() require.NoError(r, err) require.NotEmpty(r, inter, "should have valid intermediate") }) - _, caRoot := getCAProviderWithLock(s1) + secondaryProvider, _ := getCAProviderWithLock(s2) intermediatePEM, err := secondaryProvider.ActiveIntermediate() require.NoError(t, err) + _, caRoot := getCAProviderWithLock(s1) + // Have dc2 sign a leaf cert and make sure the chain is correct. spiffeService := &connect.SpiffeIDService{ Host: "node1", From d9110136f239b6d89471df028d0ce1093f398ab9 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Fri, 5 Nov 2021 18:08:44 -0400 Subject: [PATCH 2/4] ca: Only initialize clusterID in the primary The secondary must get the clusterID from the primary --- agent/consul/leader_connect_ca.go | 3 ++- agent/consul/state/connect_ca.go | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/agent/consul/leader_connect_ca.go b/agent/consul/leader_connect_ca.go index 3127218695..413f0d20ec 100644 --- a/agent/consul/leader_connect_ca.go +++ b/agent/consul/leader_connect_ca.go @@ -213,7 +213,8 @@ func (c *CAManager) initializeCAConfig() (*structs.CAConfiguration, error) { } if config == nil { config = c.serverConf.CAConfig - if config.ClusterID == "" { + + if c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter && config.ClusterID == "" { id, err := uuid.GenerateUUID() if err != nil { return nil, err diff --git a/agent/consul/state/connect_ca.go b/agent/consul/state/connect_ca.go index ab54ae69a2..0b35d03934 100644 --- a/agent/consul/state/connect_ca.go +++ b/agent/consul/state/connect_ca.go @@ -180,8 +180,6 @@ func (s *Store) caSetConfigTxn(idx uint64, tx WriteTxn, config *structs.CAConfig if prev != nil { existing := prev.(*structs.CAConfiguration) config.CreateIndex = existing.CreateIndex - // Allow the ClusterID to change if it's provided by an internal operation, such - // as a primary datacenter being switched to secondary mode. if config.ClusterID == "" { config.ClusterID = existing.ClusterID } From cc5a7ed36c893622f3735977bdc3feb56335d70a Mon Sep 17 00:00:00 2001 From: freddygv Date: Mon, 8 Nov 2021 16:51:49 -0700 Subject: [PATCH 3/4] Avoid returning empty roots with uninitialized CA Currently getCARoots could return an empty object with an empty trust domain before the CA is initialized. This commit returns an error while there is no CA config or no trust domain. There could be a CA config and no trust domain because the CA config can be created in InitializeCA before initialization succeeds. --- agent/consul/server_connect.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/agent/consul/server_connect.go b/agent/consul/server_connect.go index 09453a5ee9..f424437996 100644 --- a/agent/consul/server_connect.go +++ b/agent/consul/server_connect.go @@ -16,19 +16,23 @@ func (s *Server) getCARoots(ws memdb.WatchSet, state *state.Store) (*structs.Ind if err != nil { return nil, err } + if config == nil { + return nil, fmt.Errorf("CA has not finished initializing") + } indexedRoots := &structs.IndexedCARoots{} - if config != nil { - // Build TrustDomain based on the ClusterID stored. - signingID := connect.SpiffeIDSigningForCluster(config) - if signingID == nil { - // If CA is bootstrapped at all then this should never happen but be - // defensive. - return nil, fmt.Errorf("no cluster trust domain setup") - } + // Build TrustDomain based on the ClusterID stored. + signingID := connect.SpiffeIDSigningForCluster(config) + if signingID == nil { + // If CA is bootstrapped at all then this should never happen but be + // defensive. + return nil, fmt.Errorf("no cluster trust domain setup") + } - indexedRoots.TrustDomain = signingID.Host() + indexedRoots.TrustDomain = signingID.Host() + if indexedRoots.TrustDomain == "" { + return nil, fmt.Errorf("CA has not finished initializing") } indexedRoots.Index, indexedRoots.Roots = index, roots From 44748bf234166aa1daad020f3144317e188493ca Mon Sep 17 00:00:00 2001 From: freddygv Date: Mon, 8 Nov 2021 17:14:27 -0700 Subject: [PATCH 4/4] Add changelog entry --- .changelog/11514.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changelog/11514.txt diff --git a/.changelog/11514.txt b/.changelog/11514.txt new file mode 100644 index 0000000000..d869f46ab4 --- /dev/null +++ b/.changelog/11514.txt @@ -0,0 +1,6 @@ +```release-note:improvement +connect/ca: Return an error when querying roots from uninitialized CA. +``` +```release-note:bug +connect/ca: Allow secondary initialization to resume after being deferred due to unreachable or incompatible primary DC servers. +``` \ No newline at end of file