ca: fix flakes in RenewIntermediate tests

I suspect one problem was that we set structs.IntermediateCertRenewInterval to 1ms, which meant
that in some cases the intermediate could renew before we stored the original value.

Another problem was that the 'wait for intermediate' loop was calling the provider.ActiveIntermediate,
but the comparison needs to use the RPC endpoint to accurately represent a user request. So
changing the 'wait for' to use the state store ensures we don't race.

Also moves the patching into a separate function.

Removes the addition of ca.CertificateTimeDriftBuffer as part of calculating halfTime. This was added
in a previous commit to attempt to fix the flake, but it did not appear to fix the problem. Adding the
time here was making the tests fail when using the shared patch
function. It's not clear to me why, but there's no reason we should be
including this time in the halfTime calculation.
This commit is contained in:
Daniel Nephin 2021-12-08 12:21:52 -05:00
parent 2e4e8bd791
commit 1dec6bb815
3 changed files with 38 additions and 57 deletions

View File

@ -347,7 +347,7 @@ func (c *CAManager) startPostInitializeRoutines(ctx context.Context) {
c.leaderRoutineManager.Start(ctx, secondaryCARootWatchRoutineName, c.secondaryCARootWatch) c.leaderRoutineManager.Start(ctx, secondaryCARootWatchRoutineName, c.secondaryCARootWatch)
} }
c.leaderRoutineManager.Start(ctx, intermediateCertRenewWatchRoutineName, c.intermediateCertRenewalWatch) c.leaderRoutineManager.Start(ctx, intermediateCertRenewWatchRoutineName, c.runRenewIntermediate)
} }
func (c *CAManager) backgroundCAInitialization(ctx context.Context) error { func (c *CAManager) backgroundCAInitialization(ctx context.Context) error {
@ -1111,8 +1111,8 @@ func setLeafSigningCert(caRoot *structs.CARoot, pem string) error {
return nil return nil
} }
// intermediateCertRenewalWatch periodically attempts to renew the intermediate cert. // runRenewIntermediate periodically attempts to renew the intermediate cert.
func (c *CAManager) intermediateCertRenewalWatch(ctx context.Context) error { func (c *CAManager) runRenewIntermediate(ctx context.Context) error {
isPrimary := c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter isPrimary := c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter
for { for {
@ -1180,8 +1180,7 @@ func (c *CAManager) RenewIntermediate(ctx context.Context, isPrimary bool) error
return fmt.Errorf("error parsing active intermediate cert: %v", err) return fmt.Errorf("error parsing active intermediate cert: %v", err)
} }
if lessThanHalfTimePassed(c.timeNow(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer), if lessThanHalfTimePassed(c.timeNow(), intermediateCert.NotBefore, intermediateCert.NotAfter) {
intermediateCert.NotAfter) {
return nil return nil
} }

View File

@ -349,15 +349,7 @@ func TestCAManager_Initialize(t *testing.T) {
func TestCAManager_UpdateConfigWhileRenewIntermediate(t *testing.T) { func TestCAManager_UpdateConfigWhileRenewIntermediate(t *testing.T) {
// No parallel execution because we change globals // No parallel execution because we change globals
// Set the interval and drift buffer low for renewing the cert. patchIntermediateCertRenewInterval(t)
origInterval := structs.IntermediateCertRenewInterval
origDriftBuffer := ca.CertificateTimeDriftBuffer
defer func() {
structs.IntermediateCertRenewInterval = origInterval
ca.CertificateTimeDriftBuffer = origDriftBuffer
}()
structs.IntermediateCertRenewInterval = time.Millisecond
ca.CertificateTimeDriftBuffer = 0
conf := DefaultConfig() conf := DefaultConfig()
conf.ConnectEnabled = true conf.ConnectEnabled = true

View File

@ -338,19 +338,7 @@ func TestCAManager_RenewIntermediate_Vault_Primary(t *testing.T) {
ca.SkipIfVaultNotPresent(t) ca.SkipIfVaultNotPresent(t)
// no parallel execution because we change globals // no parallel execution because we change globals
origInterval := structs.IntermediateCertRenewInterval patchIntermediateCertRenewInterval(t)
origMinTTL := structs.MinLeafCertTTL
origDriftBuffer := ca.CertificateTimeDriftBuffer
defer func() {
structs.IntermediateCertRenewInterval = origInterval
structs.MinLeafCertTTL = origMinTTL
ca.CertificateTimeDriftBuffer = origDriftBuffer
}()
// Vault backdates certs by 30s by default.
ca.CertificateTimeDriftBuffer = 30 * time.Second
structs.IntermediateCertRenewInterval = time.Millisecond
structs.MinLeafCertTTL = time.Second
require := require.New(t) require := require.New(t)
testVault := ca.NewTestVaultServer(t) testVault := ca.NewTestVaultServer(t)
@ -380,7 +368,7 @@ func TestCAManager_RenewIntermediate_Vault_Primary(t *testing.T) {
s1.leaderRoutineManager.Wait() s1.leaderRoutineManager.Wait()
}() }()
testrpc.WaitForLeader(t, s1.RPC, "dc1") testrpc.WaitForActiveCARoot(t, s1.RPC, "dc1", nil)
// Capture the current root. // Capture the current root.
var originalRoot *structs.CARoot var originalRoot *structs.CARoot
@ -390,6 +378,7 @@ func TestCAManager_RenewIntermediate_Vault_Primary(t *testing.T) {
require.Len(rootList.Roots, 1) require.Len(rootList.Roots, 1)
originalRoot = activeRoot originalRoot = activeRoot
} }
t.Log("original SigningKeyID", originalRoot.SigningKeyID)
// Get the original intermediate. // Get the original intermediate.
waitForActiveCARoot(t, s1, originalRoot) waitForActiveCARoot(t, s1, originalRoot)
@ -403,15 +392,16 @@ func TestCAManager_RenewIntermediate_Vault_Primary(t *testing.T) {
store := s1.caManager.delegate.State() store := s1.caManager.delegate.State()
_, activeRoot, err := store.CARootActive(nil) _, activeRoot, err := store.CARootActive(nil)
require.NoError(err) require.NoError(err)
require.Equal(intermediatePEM, s1.caManager.getLeafSigningCertFromRoot(activeRoot))
require.Equal(connect.HexString(intermediateCert.SubjectKeyId), activeRoot.SigningKeyID) require.Equal(connect.HexString(intermediateCert.SubjectKeyId), activeRoot.SigningKeyID)
require.Equal(intermediatePEM, s1.caManager.getLeafSigningCertFromRoot(activeRoot))
// Wait for dc1's intermediate to be refreshed. // Wait for dc1's intermediate to be refreshed.
// It is possible that test fails when the blocking query doesn't return.
retry.Run(t, func(r *retry.R) { retry.Run(t, func(r *retry.R) {
provider, _ = getCAProviderWithLock(s1) store := s1.caManager.delegate.State()
newIntermediatePEM, err := provider.ActiveIntermediate() _, storedRoot, err := store.CARootActive(nil)
r.Check(err) r.Check(err)
newIntermediatePEM := s1.caManager.getLeafSigningCertFromRoot(storedRoot)
if newIntermediatePEM == intermediatePEM { if newIntermediatePEM == intermediatePEM {
r.Fatal("not a renewed intermediate") r.Fatal("not a renewed intermediate")
} }
@ -427,8 +417,8 @@ func TestCAManager_RenewIntermediate_Vault_Primary(t *testing.T) {
require.Len(roots.Roots, 1) require.Len(roots.Roots, 1)
activeRoot = roots.Active() activeRoot = roots.Active()
require.Equal(intermediatePEM, s1.caManager.getLeafSigningCertFromRoot(activeRoot))
require.Equal(connect.HexString(intermediateCert.SubjectKeyId), activeRoot.SigningKeyID) require.Equal(connect.HexString(intermediateCert.SubjectKeyId), activeRoot.SigningKeyID)
require.Equal(intermediatePEM, s1.caManager.getLeafSigningCertFromRoot(activeRoot))
// Get the root from dc1 and validate a chain of: // Get the root from dc1 and validate a chain of:
// dc1 leaf -> dc1 intermediate -> dc1 root // dc1 leaf -> dc1 intermediate -> dc1 root
@ -450,21 +440,26 @@ func TestCAManager_RenewIntermediate_Vault_Primary(t *testing.T) {
verifyLeafCert(t, caRoot, cert.CertPEM) verifyLeafCert(t, caRoot, cert.CertPEM)
} }
func patchIntermediateCertRenewInterval(t *testing.T) {
origInterval := structs.IntermediateCertRenewInterval
origMinTTL := structs.MinLeafCertTTL
structs.IntermediateCertRenewInterval = 200 * time.Millisecond
structs.MinLeafCertTTL = time.Second
t.Cleanup(func() {
structs.IntermediateCertRenewInterval = origInterval
structs.MinLeafCertTTL = origMinTTL
})
}
func TestCAManager_RenewIntermediate_Secondary(t *testing.T) { func TestCAManager_RenewIntermediate_Secondary(t *testing.T) {
if testing.Short() { if testing.Short() {
t.Skip("too slow for testing.Short") t.Skip("too slow for testing.Short")
} }
// no parallel execution because we change globals // no parallel execution because we change globals
origInterval := structs.IntermediateCertRenewInterval patchIntermediateCertRenewInterval(t)
origMinTTL := structs.MinLeafCertTTL
defer func() {
structs.IntermediateCertRenewInterval = origInterval
structs.MinLeafCertTTL = origMinTTL
}()
structs.IntermediateCertRenewInterval = time.Millisecond
structs.MinLeafCertTTL = time.Second
require := require.New(t) require := require.New(t)
dir1, s1 := testServerWithConfig(t, func(c *Config) { dir1, s1 := testServerWithConfig(t, func(c *Config) {
@ -517,8 +512,6 @@ func TestCAManager_RenewIntermediate_Secondary(t *testing.T) {
require.NoError(err) require.NoError(err)
intermediateCert, err := connect.ParseCert(intermediatePEM) intermediateCert, err := connect.ParseCert(intermediatePEM)
require.NoError(err) require.NoError(err)
currentCertSerialNumber := intermediateCert.SerialNumber
currentCertAuthorityKeyId := intermediateCert.AuthorityKeyId
// Capture the current root // Capture the current root
var originalRoot *structs.CARoot var originalRoot *structs.CARoot
@ -528,6 +521,7 @@ func TestCAManager_RenewIntermediate_Secondary(t *testing.T) {
require.Len(rootList.Roots, 1) require.Len(rootList.Roots, 1)
originalRoot = activeRoot originalRoot = activeRoot
} }
t.Log("original SigningKeyID", originalRoot.SigningKeyID)
waitForActiveCARoot(t, s1, originalRoot) waitForActiveCARoot(t, s1, originalRoot)
waitForActiveCARoot(t, s2, originalRoot) waitForActiveCARoot(t, s2, originalRoot)
@ -539,22 +533,18 @@ func TestCAManager_RenewIntermediate_Secondary(t *testing.T) {
require.Equal(connect.HexString(intermediateCert.SubjectKeyId), activeRoot.SigningKeyID) require.Equal(connect.HexString(intermediateCert.SubjectKeyId), activeRoot.SigningKeyID)
// Wait for dc2's intermediate to be refreshed. // Wait for dc2's intermediate to be refreshed.
// It is possible that test fails when the blocking query doesn't return.
// When https://github.com/hashicorp/consul/pull/3777 is merged
// however, defaultQueryTime will be configurable and we con lower it
// so that it returns for sure.
retry.Run(t, func(r *retry.R) { retry.Run(t, func(r *retry.R) {
secondaryProvider, _ = getCAProviderWithLock(s2) store := s2.caManager.delegate.State()
intermediatePEM, err = secondaryProvider.ActiveIntermediate() _, storedRoot, err := store.CARootActive(nil)
r.Check(err) r.Check(err)
cert, err := connect.ParseCert(intermediatePEM)
r.Check(err) newIntermediatePEM := s2.caManager.getLeafSigningCertFromRoot(storedRoot)
if cert.SerialNumber.Cmp(currentCertSerialNumber) == 0 || !reflect.DeepEqual(cert.AuthorityKeyId, currentCertAuthorityKeyId) { if newIntermediatePEM == intermediatePEM {
currentCertSerialNumber = cert.SerialNumber
currentCertAuthorityKeyId = cert.AuthorityKeyId
r.Fatal("not a renewed intermediate") r.Fatal("not a renewed intermediate")
} }
intermediateCert = cert intermediateCert, err = connect.ParseCert(newIntermediatePEM)
r.Check(err)
intermediatePEM = newIntermediatePEM
}) })
codec := rpcClient(t, s2) codec := rpcClient(t, s2)
@ -565,8 +555,8 @@ func TestCAManager_RenewIntermediate_Secondary(t *testing.T) {
_, activeRoot, err = store.CARootActive(nil) _, activeRoot, err = store.CARootActive(nil)
require.NoError(err) require.NoError(err)
require.Equal(intermediatePEM, s2.caManager.getLeafSigningCertFromRoot(activeRoot))
require.Equal(connect.HexString(intermediateCert.SubjectKeyId), activeRoot.SigningKeyID) require.Equal(connect.HexString(intermediateCert.SubjectKeyId), activeRoot.SigningKeyID)
require.Equal(intermediatePEM, s2.caManager.getLeafSigningCertFromRoot(activeRoot))
// Have dc2 sign a leaf cert and make sure the chain is correct. // Have dc2 sign a leaf cert and make sure the chain is correct.
spiffeService := &connect.SpiffeIDService{ spiffeService := &connect.SpiffeIDService{