From 804eb170948dd19920aca34e2fae7116055af1cf Mon Sep 17 00:00:00 2001 From: Hans Hasselberg Date: Fri, 17 Jan 2020 23:27:13 +0100 Subject: [PATCH] connect: check if intermediate cert needs to be renewed. (#6835) Currently when using the built-in CA provider for Connect, root certificates are valid for 10 years, however secondary DCs get intermediates that are valid for only 1 year. There is no mechanism currently short of rotating the root in the primary that will cause the secondary DCs to renew their intermediates. This PR adds a check that renews the cert if it is half way through its validity period. In order to be able to test these changes, a new configuration option was added: IntermediateCertTTL which is set extremely low in the tests. --- agent/config/builder.go | 7 +- agent/config/runtime_test.go | 11 +- agent/connect/ca/provider_consul.go | 2 +- agent/connect/ca/provider_consul_test.go | 3 +- agent/connect/testing_ca.go | 7 +- agent/connect_ca_endpoint_test.go | 23 +- agent/consul/config.go | 5 +- agent/consul/fsm/commands_oss_test.go | 10 +- agent/consul/leader_connect.go | 281 +++++++++++++----- agent/consul/leader_connect_test.go | 149 +++++++++- agent/consul/server.go | 23 +- agent/consul/server_test.go | 9 +- agent/structs/connect_ca.go | 7 +- api/connect_ca.go | 7 +- api/connect_ca_test.go | 7 +- command/connect/ca/set/connect_ca_set_test.go | 1 + .../ca/set/test-fixtures/ca_config.json | 5 +- sdk/freeport/freeport.go | 19 +- sdk/freeport/freeport_test.go | 4 +- sdk/freeport/systemlimit.go | 11 + sdk/freeport/systemlimit_windows.go | 7 + testrpc/wait.go | 2 +- website/source/api/connect/ca.html.md | 6 +- website/source/docs/connect/ca.html.md | 3 +- website/source/docs/connect/ca/consul.html.md | 9 +- 25 files changed, 480 insertions(+), 138 deletions(-) create mode 100644 sdk/freeport/systemlimit.go create mode 100644 sdk/freeport/systemlimit_windows.go diff --git a/agent/config/builder.go b/agent/config/builder.go index 13ec766986..917f27fca3 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -621,9 +621,10 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { if connectCAConfig != nil { lib.TranslateKeys(connectCAConfig, map[string]string{ // Consul CA config - "private_key": "PrivateKey", - "root_cert": "RootCert", - "rotation_period": "RotationPeriod", + "private_key": "PrivateKey", + "root_cert": "RootCert", + "rotation_period": "RotationPeriod", + "intermediate_cert_ttl": "IntermediateCertTTL", // Vault CA config "address": "Address", diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index 8fec5ded56..c53e16b186 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -3766,6 +3766,7 @@ func TestFullConfig(t *testing.T) { "ca_provider": "consul", "ca_config": { "rotation_period": "90h", + "intermediate_cert_ttl": "8760h", "leaf_cert_ttl": "1h", "csr_max_per_second": 100, "csr_max_concurrent": 2 @@ -4367,6 +4368,7 @@ func TestFullConfig(t *testing.T) { ca_provider = "consul" ca_config { rotation_period = "90h" + intermediate_cert_ttl = "8760h" leaf_cert_ttl = "1h" # hack float since json parses numbers as float and we have to # assert against the same thing @@ -5079,10 +5081,11 @@ func TestFullConfig(t *testing.T) { ExposeMaxPort: 2222, ConnectCAProvider: "consul", ConnectCAConfig: map[string]interface{}{ - "RotationPeriod": "90h", - "LeafCertTTL": "1h", - "CSRMaxPerSecond": float64(100), - "CSRMaxConcurrent": float64(2), + "RotationPeriod": "90h", + "IntermediateCertTTL": "8760h", + "LeafCertTTL": "1h", + "CSRMaxPerSecond": float64(100), + "CSRMaxConcurrent": float64(2), }, DNSAddrs: []net.Addr{tcpAddr("93.95.95.81:7001"), udpAddr("93.95.95.81:7001")}, DNSARecordLimit: 29907, diff --git a/agent/connect/ca/provider_consul.go b/agent/connect/ca/provider_consul.go index d95ff6819d..782b7751f7 100644 --- a/agent/connect/ca/provider_consul.go +++ b/agent/connect/ca/provider_consul.go @@ -480,7 +480,7 @@ func (c *ConsulProvider) SignIntermediate(csr *x509.CertificateRequest) (string, x509.KeyUsageDigitalSignature, IsCA: true, MaxPathLenZero: true, - NotAfter: effectiveNow.AddDate(1, 0, 0), + NotAfter: effectiveNow.Add(c.config.IntermediateCertTTL), NotBefore: effectiveNow, SubjectKeyId: subjectKeyID, } diff --git a/agent/connect/ca/provider_consul_test.go b/agent/connect/ca/provider_consul_test.go index 76e0dd5c2a..6c757b7f7c 100644 --- a/agent/connect/ca/provider_consul_test.go +++ b/agent/connect/ca/provider_consul_test.go @@ -68,7 +68,8 @@ func testConsulCAConfig() *structs.CAConfiguration { Provider: "consul", Config: map[string]interface{}{ // Tests duration parsing after msgpack type mangling during raft apply. - "LeafCertTTL": []uint8("72h"), + "LeafCertTTL": []uint8("72h"), + "IntermediateCertTTL": []uint8("72h"), }, } } diff --git a/agent/connect/testing_ca.go b/agent/connect/testing_ca.go index 50dc3334c6..36d9909e7d 100644 --- a/agent/connect/testing_ca.go +++ b/agent/connect/testing_ca.go @@ -361,9 +361,10 @@ func testCAConfigSet(t testing.T, a TestAgentRPC, newConfig := &structs.CAConfiguration{ Provider: "consul", Config: map[string]interface{}{ - "PrivateKey": ca.SigningKey, - "RootCert": ca.RootCert, - "RotationPeriod": 180 * 24 * time.Hour, + "PrivateKey": ca.SigningKey, + "RootCert": ca.RootCert, + "RotationPeriod": 180 * 24 * time.Hour, + "IntermediateCertTTL": 72 * time.Hour, }, } args := &structs.CARequest{ diff --git a/agent/connect_ca_endpoint_test.go b/agent/connect_ca_endpoint_test.go index 09398ef87e..b5fb061421 100644 --- a/agent/connect_ca_endpoint_test.go +++ b/agent/connect_ca_endpoint_test.go @@ -89,6 +89,28 @@ func TestConnectCAConfig(t *testing.T) { }, }, }, + { + name: "basic with IntermediateCertTTL", + body: ` + { + "Provider": "consul", + "Config": { + "LeafCertTTL": "72h", + "RotationPeriod": "1h", + "IntermediateCertTTL": "2h" + } + }`, + wantErr: false, + wantCfg: structs.CAConfiguration{ + Provider: "consul", + ClusterID: connect.TestClusterID, + Config: map[string]interface{}{ + "LeafCertTTL": "72h", + "RotationPeriod": "1h", + "IntermediateCertTTL": "2h", + }, + }, + }, { name: "force without cross sign CamelCase", body: ` @@ -211,7 +233,6 @@ func TestConnectCAConfig(t *testing.T) { } require.NoError(err) } - // The config should be updated now. { req, _ := http.NewRequest("GET", "/v1/connect/ca/configuration", nil) diff --git a/agent/consul/config.go b/agent/consul/config.go index 6be6d3c66a..89c4bc5b85 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -549,8 +549,9 @@ func DefaultConfig() *Config { CAConfig: &structs.CAConfiguration{ Provider: "consul", Config: map[string]interface{}{ - "RotationPeriod": "2160h", - "LeafCertTTL": "72h", + "RotationPeriod": "2160h", + "LeafCertTTL": "72h", + "IntermediateCertTTL": "8760h", // 365 * 24h }, }, diff --git a/agent/consul/fsm/commands_oss_test.go b/agent/consul/fsm/commands_oss_test.go index c6ee4a06e2..b48e7f71f4 100644 --- a/agent/consul/fsm/commands_oss_test.go +++ b/agent/consul/fsm/commands_oss_test.go @@ -1280,9 +1280,10 @@ func TestFSM_CAConfig(t *testing.T) { Config: &structs.CAConfiguration{ Provider: "consul", Config: map[string]interface{}{ - "PrivateKey": "asdf", - "RootCert": "qwer", - "RotationPeriod": 90 * 24 * time.Hour, + "PrivateKey": "asdf", + "RootCert": "qwer", + "RotationPeriod": 90 * 24 * time.Hour, + "IntermediateCertTTL": 365 * 24 * time.Hour, }, }, } @@ -1314,6 +1315,9 @@ func TestFSM_CAConfig(t *testing.T) { if got, want := conf.RotationPeriod, 90*24*time.Hour; got != want { t.Fatalf("got %v, want %v", got, want) } + if got, want := conf.IntermediateCertTTL, 365*24*time.Hour; got != want { + t.Fatalf("got %v, want %v", got, want) + } // Now use CAS and provide an old index req.Config.Provider = "static" diff --git a/agent/consul/leader_connect.go b/agent/consul/leader_connect.go index 8455885923..79bc20f916 100644 --- a/agent/consul/leader_connect.go +++ b/agent/consul/leader_connect.go @@ -34,6 +34,10 @@ var ( // maxRetryBackoff is the maximum number of seconds to wait between failed blocking // queries when backing off. maxRetryBackoff = 256 + + // intermediateCertRenewInterval is the interval at which the expiration + // of the intermediate cert is checked and renewed if necessary. + intermediateCertRenewInterval = time.Hour ) // initializeCAConfig is used to initialize the CA config if necessary @@ -119,6 +123,8 @@ func (s *Server) createCAProvider(conf *structs.CAConfiguration) (ca.Provider, e return p, nil } +// getCAProvider is being called while holding caProviderReconfigurationLock +// which means it must never take that lock itself or call anything that does. func (s *Server) getCAProvider() (ca.Provider, *structs.CARoot) { retries := 0 var result ca.Provider @@ -144,6 +150,8 @@ func (s *Server) getCAProvider() (ca.Provider, *structs.CARoot) { return result, resultRoot } +// setCAProvider is being called while holding caProviderReconfigurationLock +// which means it must never take that lock itself or call anything that does. func (s *Server) setCAProvider(newProvider ca.Provider, root *structs.CARoot) { s.caProviderLock.Lock() defer s.caProviderLock.Unlock() @@ -169,6 +177,9 @@ func (s *Server) initializeCA() error { if err != nil { return err } + + s.caProviderReconfigurationLock.Lock() + defer s.caProviderReconfigurationLock.Unlock() s.setCAProvider(provider, nil) // If this isn't the primary DC, run the secondary DC routine if the primary has already been upgraded to at least 1.6.0 @@ -209,6 +220,8 @@ func (s *Server) initializeCA() error { } // initializeRootCA runs the initialization logic for a root CA. +// It is being called while holding caProviderReconfigurationLock +// which means it must never take that lock itself or call anything that does. func (s *Server) initializeRootCA(provider ca.Provider, conf *structs.CAConfiguration) error { pCfg := ca.ProviderConfig{ ClusterID: conf.ClusterID, @@ -315,6 +328,8 @@ func (s *Server) initializeRootCA(provider ca.Provider, conf *structs.CAConfigur // initializeSecondaryCA runs the routine for generating an intermediate CA CSR and getting // it signed by the primary DC if the root CA of the primary DC has changed since the last // intermediate. +// It is being called while holding caProviderReconfigurationLock +// which means it must never take that lock itself or call anything that does. func (s *Server) initializeSecondaryCA(provider ca.Provider, primaryRoots structs.IndexedCARoots) error { activeIntermediate, err := provider.ActiveIntermediate() if err != nil { @@ -344,7 +359,7 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, primaryRoots struct storedRootID, err = connect.CalculateCertFingerprint(storedRoot) if err != nil { - return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, primaryRoots) + return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, storedRoot) } intermediateCert, err := connect.ParseCert(activeIntermediate) @@ -394,34 +409,10 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, primaryRoots struct newIntermediate := false if needsNewIntermediate { - csr, err := provider.GenerateIntermediateCSR() - if err != nil { + if err := s.getIntermediateCASigned(provider, newActiveRoot); err != nil { return err } - - var intermediatePEM string - if err := s.forwardDC("ConnectCA.SignIntermediate", s.config.PrimaryDatacenter, s.generateCASignRequest(csr), &intermediatePEM); err != nil { - // this is a failure in the primary and shouldn't be capable of erroring out our establishing leadership - s.logger.Printf("[WARN] connect: Primary datacenter refused to sign our intermediate CA certificate: %v", err) - return nil - } - - if err := provider.SetIntermediate(intermediatePEM, newActiveRoot.RootCert); err != nil { - return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err) - } - - intermediateCert, err := connect.ParseCert(intermediatePEM) - if err != nil { - return fmt.Errorf("error parsing intermediate cert: %v", err) - } - - // Append the new intermediate to our local active root entry. This is - // where the root representations start to diverge. - newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM) - newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) newIntermediate = true - - s.logger.Printf("[INFO] connect: received new intermediate certificate from primary datacenter") } else { // Discard the primary's representation since our local one is // sufficiently up to date. @@ -435,67 +426,110 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, primaryRoots struct return err } if activeRoot == nil || activeRoot.ID != newActiveRoot.ID || newIntermediate { - idx, oldRoots, err := state.CARoots(nil) - if err != nil { + if err := s.persistNewRoot(provider, newActiveRoot); err != nil { return err } - - _, config, err := state.CAConfig(nil) - if err != nil { - return err - } - if config == nil { - return fmt.Errorf("local CA not initialized yet") - } - newConf := *config - newConf.ClusterID = newActiveRoot.ExternalTrustDomain - - // Persist any state the provider needs us to - newConf.State, err = provider.State() - if err != nil { - return fmt.Errorf("error getting provider state: %v", err) - } - - // Copy the root list and append the new active root, updating the old root - // with the time it was rotated out. - var newRoots structs.CARoots - for _, r := range oldRoots { - newRoot := *r - if newRoot.Active { - newRoot.Active = false - newRoot.RotatedOutAt = time.Now() - } - if newRoot.ExternalTrustDomain == "" { - newRoot.ExternalTrustDomain = config.ClusterID - } - newRoots = append(newRoots, &newRoot) - } - newRoots = append(newRoots, newActiveRoot) - - args := &structs.CARequest{ - Op: structs.CAOpSetRootsAndConfig, - Index: idx, - Roots: newRoots, - Config: &newConf, - } - resp, err := s.raftApply(structs.ConnectCARequestType, &args) - if err != nil { - return err - } - if respErr, ok := resp.(error); ok { - return respErr - } - if respOk, ok := resp.(bool); ok && !respOk { - return fmt.Errorf("could not atomically update roots and config") - } - - s.logger.Printf("[INFO] connect: updated root certificates from primary datacenter") } s.setCAProvider(provider, newActiveRoot) return nil } +// persistNewRoot is being called while holding caProviderReconfigurationLock +// which means it must never take that lock itself or call anything that does. +func (s *Server) persistNewRoot(provider ca.Provider, newActiveRoot *structs.CARoot) error { + state := s.fsm.State() + idx, oldRoots, err := state.CARoots(nil) + if err != nil { + return err + } + + _, config, err := state.CAConfig(nil) + if err != nil { + return err + } + if config == nil { + return fmt.Errorf("local CA not initialized yet") + } + newConf := *config + newConf.ClusterID = newActiveRoot.ExternalTrustDomain + + // Persist any state the provider needs us to + newConf.State, err = provider.State() + if err != nil { + return fmt.Errorf("error getting provider state: %v", err) + } + + // Copy the root list and append the new active root, updating the old root + // with the time it was rotated out. + var newRoots structs.CARoots + for _, r := range oldRoots { + newRoot := *r + if newRoot.Active { + newRoot.Active = false + newRoot.RotatedOutAt = time.Now() + } + if newRoot.ExternalTrustDomain == "" { + newRoot.ExternalTrustDomain = config.ClusterID + } + newRoots = append(newRoots, &newRoot) + } + newRoots = append(newRoots, newActiveRoot) + + args := &structs.CARequest{ + Op: structs.CAOpSetRootsAndConfig, + Index: idx, + Roots: newRoots, + Config: &newConf, + } + resp, err := s.raftApply(structs.ConnectCARequestType, &args) + if err != nil { + return err + } + if respErr, ok := resp.(error); ok { + return respErr + } + if respOk, ok := resp.(bool); ok && !respOk { + return fmt.Errorf("could not atomically update roots and config") + } + + s.logger.Printf("[INFO] connect: updated root certificates from primary datacenter") + return nil +} + +// getIntermediateCASigned is being called while holding caProviderReconfigurationLock +// which means it must never take that lock itself or call anything that does. +func (s *Server) getIntermediateCASigned(provider ca.Provider, newActiveRoot *structs.CARoot) error { + csr, err := provider.GenerateIntermediateCSR() + if err != nil { + return err + } + + var intermediatePEM string + if err := s.forwardDC("ConnectCA.SignIntermediate", s.config.PrimaryDatacenter, s.generateCASignRequest(csr), &intermediatePEM); err != nil { + // this is a failure in the primary and shouldn't be capable of erroring out our establishing leadership + s.logger.Printf("[WARN] connect: Primary datacenter refused to sign our intermediate CA certificate: %v", err) + return nil + } + + if err := provider.SetIntermediate(intermediatePEM, newActiveRoot.RootCert); err != nil { + return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err) + } + + intermediateCert, err := connect.ParseCert(intermediatePEM) + if err != nil { + return fmt.Errorf("error parsing intermediate cert: %v", err) + } + + // Append the new intermediate to our local active root entry. This is + // where the root representations start to diverge. + newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM) + newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) + + s.logger.Printf("[INFO] connect: received new intermediate certificate from primary datacenter") + return nil +} + func (s *Server) generateCASignRequest(csr string) *structs.CASignRequest { return &structs.CASignRequest{ Datacenter: s.config.PrimaryDatacenter, @@ -510,6 +544,7 @@ func (s *Server) startConnectLeader() { if s.config.ConnectEnabled && s.config.Datacenter != s.config.PrimaryDatacenter { s.leaderRoutineManager.Start(secondaryCARootWatchRoutineName, s.secondaryCARootWatch) s.leaderRoutineManager.Start(intentionReplicationRoutineName, s.replicateIntentions) + s.leaderRoutineManager.Start(secondaryCertRenewWatchRoutineName, s.secondaryIntermediateCertRenewalWatch) } s.leaderRoutineManager.Start(caRootPruningRoutineName, s.runCARootPruning) @@ -591,6 +626,70 @@ func (s *Server) pruneCARoots() error { return nil } +// secondaryIntermediateCertRenewalWatch checks the intermediate cert for +// expiration. As soon as more than half the time a cert is valid has passed, +// it will try to renew it. +func (s *Server) secondaryIntermediateCertRenewalWatch(ctx context.Context) error { + for { + select { + case <-ctx.Done(): + return nil + case <-time.After(intermediateCertRenewInterval): + retryLoopBackoff(ctx.Done(), func() error { + s.caProviderReconfigurationLock.Lock() + defer s.caProviderReconfigurationLock.Unlock() + + provider, _ := s.getCAProvider() + if provider == nil { + // this happens when leadership is being revoked and this go routine will be stopped + return nil + } + if !s.configuredSecondaryCA() { + return fmt.Errorf("secondary CA is not yet configured.") + } + + state := s.fsm.State() + _, activeRoot, err := state.CARootActive(nil) + if err != nil { + return err + } + + activeIntermediate, err := provider.ActiveIntermediate() + if err != nil { + return err + } + + if activeIntermediate == "" { + return fmt.Errorf("secondary datacenter doesn't have an active intermediate.") + } + + intermediateCert, err := connect.ParseCert(activeIntermediate) + if err != nil { + return fmt.Errorf("error parsing active intermediate cert: %v", err) + } + + if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore, + intermediateCert.NotAfter) { + return nil + } + + if err := s.getIntermediateCASigned(provider, activeRoot); err != nil { + return err + } + + if err := s.persistNewRoot(provider, activeRoot); err != nil { + return err + } + + s.setCAProvider(provider, activeRoot) + return nil + }, func(err error) { + s.logger.Printf("[ERR] connect: %s: %v", secondaryCertRenewWatchRoutineName, err) + }) + } + } +} + // secondaryCARootWatch maintains a blocking query to the primary datacenter's // ConnectCA.Roots endpoint to monitor when it needs to request a new signed // intermediate certificate. @@ -642,7 +741,7 @@ func (s *Server) secondaryCARootWatch(ctx context.Context) error { args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, roots.QueryMeta.Index) return nil }, func(err error) { - s.logger.Printf("[ERR] connect: %v", err) + s.logger.Printf("[ERR] connect: %s: %v", secondaryCARootWatchRoutineName, err) }) return nil @@ -699,7 +798,7 @@ func (s *Server) replicateIntentions(ctx context.Context) error { args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, remote.QueryMeta.Index) return nil }, func(err error) { - s.logger.Printf("[ERR] connect: error replicating intentions: %v", err) + s.logger.Printf("[ERR] connect: %s: %v", intentionReplicationRoutineName, err) }) return nil } @@ -816,6 +915,8 @@ func nextIndexVal(prevIdx, idx uint64) uint64 { } // initializeSecondaryProvider configures the given provider for a secondary, non-root datacenter. +// It is being called while holding caProviderReconfigurationLock which means +// it must never take that lock itself or call anything that does. func (s *Server) initializeSecondaryProvider(provider ca.Provider, roots structs.IndexedCARoots) error { if roots.TrustDomain == "" { return fmt.Errorf("trust domain from primary datacenter is not initialized") @@ -845,8 +946,26 @@ func (s *Server) initializeSecondaryProvider(provider ca.Provider, roots structs return nil } +// configuredSecondaryCA is being called while holding caProviderReconfigurationLock +// which means it must never take that lock itself or call anything that does. func (s *Server) configuredSecondaryCA() bool { s.actingSecondaryLock.RLock() defer s.actingSecondaryLock.RUnlock() return s.actingSecondaryCA } + +// halfTime returns a duration that is half the time between notBefore and +// notAfter. +func halfTime(notBefore, notAfter time.Time) time.Duration { + interval := notAfter.Sub(notBefore) + return interval / 2 +} + +// lessThanHalfTimePassed decides if half the time between notBefore and +// notAfter has passed relative to now. +// lessThanHalfTimePassed is being called while holding caProviderReconfigurationLock +// which means it must never take that lock itself or call anything that does. +func lessThanHalfTimePassed(now, notBefore, notAfter time.Time) bool { + t := notBefore.Add(halfTime(notBefore, notAfter)) + return t.Sub(now) > 0 +} diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index 51bf09cdd1..16712841e8 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -162,6 +162,138 @@ func TestLeader_SecondaryCA_Initialize(t *testing.T) { } } +func waitForActiveCARoot(t *testing.T, srv *Server, expect *structs.CARoot) { + retry.Run(t, func(r *retry.R) { + _, root := srv.getCAProvider() + if root == nil { + r.Fatal("no root") + } + if root.ID != expect.ID { + r.Fatalf("current active root is %s; waiting for %s", root.ID, expect.ID) + } + }) +} + +func TestLeader_SecondaryCA_IntermediateRenew(t *testing.T) { + t.Parallel() + + intermediateCertRenewInterval = time.Millisecond + require := require.New(t) + + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.Build = "1.6.0" + c.CAConfig = &structs.CAConfiguration{ + Provider: "consul", + Config: map[string]interface{}{ + "PrivateKey": "", + "RootCert": "", + "RotationPeriod": "2160h", + "LeafCertTTL": "72h", + // The retry loop only retries for 7sec max and + // the ttl needs to be below so that it + // triggers definitely. + // Since certs are created so that they are + // valid from 1minute in the past, we need to + // account for that, otherwise it will be + // expired immediately. + "IntermediateCertTTL": time.Minute + (5 * time.Second), + }, + } + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + testrpc.WaitForLeader(t, s1.RPC, "dc1") + + // dc2 as a secondary DC + dir2, s2 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "dc2" + c.PrimaryDatacenter = "dc1" + c.Build = "1.6.0" + }) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + // Create the WAN link + joinWAN(t, s2, s1) + testrpc.WaitForLeader(t, s2.RPC, "dc2") + + // Get the original intermediate + secondaryProvider, _ := s2.getCAProvider() + intermediatePEM, err := secondaryProvider.ActiveIntermediate() + require.NoError(err) + cert, err := connect.ParseCert(intermediatePEM) + require.NoError(err) + currentCertSerialNumber := cert.SerialNumber + currentCertAuthorityKeyId := cert.AuthorityKeyId + + // Capture the current root + var originalRoot *structs.CARoot + { + rootList, activeRoot, err := getTestRoots(s1, "dc1") + require.NoError(err) + require.Len(rootList.Roots, 1) + originalRoot = activeRoot + } + + waitForActiveCARoot(t, s1, originalRoot) + waitForActiveCARoot(t, s2, originalRoot) + + // Wait for dc2's intermediate to be refreshed. + // It is possible that test fails when the blocking query doesn't return. + // When https://github.com/hashicorp/consul/pull/3777 is merged + // however, defaultQueryTime will be configurable and we con lower it + // so that it returns for sure. + retry.Run(t, func(r *retry.R) { + secondaryProvider, _ := s2.getCAProvider() + intermediatePEM, err = secondaryProvider.ActiveIntermediate() + r.Check(err) + cert, err := connect.ParseCert(intermediatePEM) + r.Check(err) + if cert.SerialNumber.Cmp(currentCertSerialNumber) == 0 || !reflect.DeepEqual(cert.AuthorityKeyId, currentCertAuthorityKeyId) { + currentCertSerialNumber = cert.SerialNumber + currentCertAuthorityKeyId = cert.AuthorityKeyId + r.Fatal("not a renewed intermediate") + } + }) + require.NoError(err) + + // Get the new root from dc1 and validate a chain of: + // dc2 leaf -> dc2 intermediate -> dc1 root + _, caRoot := s1.getCAProvider() + + // Have dc2 sign a leaf cert and make sure the chain is correct. + spiffeService := &connect.SpiffeIDService{ + Host: "node1", + Namespace: "default", + Datacenter: "dc1", + Service: "foo", + } + raw, _ := connect.TestCSR(t, spiffeService) + + leafCsr, err := connect.ParseCSR(raw) + require.NoError(err) + + leafPEM, err := secondaryProvider.Sign(leafCsr) + require.NoError(err) + + cert, err = connect.ParseCert(leafPEM) + require.NoError(err) + + // Check that the leaf signed by the new intermediate can be verified using the + // returned cert chain (signed intermediate + remote root). + intermediatePool := x509.NewCertPool() + intermediatePool.AppendCertsFromPEM([]byte(intermediatePEM)) + rootPool := x509.NewCertPool() + rootPool.AppendCertsFromPEM([]byte(caRoot.RootCert)) + + _, err = cert.Verify(x509.VerifyOptions{ + Intermediates: intermediatePool, + Roots: rootPool, + }) + require.NoError(err) +} + func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) { t.Parallel() @@ -214,9 +346,10 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) { newConfig := &structs.CAConfiguration{ Provider: "consul", Config: map[string]interface{}{ - "PrivateKey": newKey, - "RootCert": "", - "RotationPeriod": 90 * 24 * time.Hour, + "PrivateKey": newKey, + "RootCert": "", + "RotationPeriod": 90 * 24 * time.Hour, + "IntermediateCertTTL": 72 * 24 * time.Hour, }, } { @@ -1292,3 +1425,13 @@ func readTestData(t *testing.T, name string) string { } return string(bs) } + +func TestLeader_lessThanHalfTimePassed(t *testing.T) { + now := time.Now() + require.False(t, lessThanHalfTimePassed(now, now.Add(-10*time.Second), now.Add(-5*time.Second))) + require.False(t, lessThanHalfTimePassed(now, now.Add(-10*time.Second), now)) + require.False(t, lessThanHalfTimePassed(now, now.Add(-10*time.Second), now.Add(5*time.Second))) + require.False(t, lessThanHalfTimePassed(now, now.Add(-10*time.Second), now.Add(10*time.Second))) + + require.True(t, lessThanHalfTimePassed(now, now.Add(-10*time.Second), now.Add(20*time.Second))) +} diff --git a/agent/consul/server.go b/agent/consul/server.go index 81aa3c5b71..834abc2bca 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -89,16 +89,17 @@ const ( ) const ( - legacyACLReplicationRoutineName = "legacy ACL replication" - aclPolicyReplicationRoutineName = "ACL policy replication" - aclRoleReplicationRoutineName = "ACL role replication" - aclTokenReplicationRoutineName = "ACL token replication" - aclTokenReapingRoutineName = "acl token reaping" - aclUpgradeRoutineName = "legacy ACL token upgrade" - caRootPruningRoutineName = "CA root pruning" - configReplicationRoutineName = "config entry replication" - intentionReplicationRoutineName = "intention replication" - secondaryCARootWatchRoutineName = "secondary CA roots watch" + legacyACLReplicationRoutineName = "legacy ACL replication" + aclPolicyReplicationRoutineName = "ACL policy replication" + aclRoleReplicationRoutineName = "ACL role replication" + aclTokenReplicationRoutineName = "ACL token replication" + aclTokenReapingRoutineName = "acl token reaping" + aclUpgradeRoutineName = "legacy ACL token upgrade" + caRootPruningRoutineName = "CA root pruning" + configReplicationRoutineName = "config entry replication" + intentionReplicationRoutineName = "intention replication" + secondaryCARootWatchRoutineName = "secondary CA roots watch" + secondaryCertRenewWatchRoutineName = "secondary cert renew watch" ) var ( @@ -126,6 +127,8 @@ type Server struct { // autopilotWaitGroup is used to block until Autopilot shuts down. autopilotWaitGroup sync.WaitGroup + // caProviderReconfigurationLock guards the provider reconfiguration. + caProviderReconfigurationLock sync.Mutex // caProvider is the current CA provider in use for Connect. This is // only non-nil when we are the leader. caProvider ca.Provider diff --git a/agent/consul/server_test.go b/agent/consul/server_test.go index 2867382f8e..ea3607eef1 100644 --- a/agent/consul/server_test.go +++ b/agent/consul/server_test.go @@ -155,10 +155,11 @@ func testServerConfig(t *testing.T) (string, *Config) { ClusterID: connect.TestClusterID, Provider: structs.ConsulCAProvider, Config: map[string]interface{}{ - "PrivateKey": "", - "RootCert": "", - "RotationPeriod": "2160h", - "LeafCertTTL": "72h", + "PrivateKey": "", + "RootCert": "", + "RotationPeriod": "2160h", + "LeafCertTTL": "72h", + "IntermediateCertTTL": "72h", }, } diff --git a/agent/structs/connect_ca.go b/agent/structs/connect_ca.go index ca79b59e6c..10f14f5e1c 100644 --- a/agent/structs/connect_ca.go +++ b/agent/structs/connect_ca.go @@ -424,9 +424,10 @@ func (c CommonCAProviderConfig) Validate() error { type ConsulCAProviderConfig struct { CommonCAProviderConfig `mapstructure:",squash"` - PrivateKey string - RootCert string - RotationPeriod time.Duration + PrivateKey string + RootCert string + RotationPeriod time.Duration + IntermediateCertTTL time.Duration // DisableCrossSigning is really only useful in test code to use the built in // provider while exercising logic that depends on the CA provider ability to diff --git a/api/connect_ca.go b/api/connect_ca.go index ea7f251e20..26a7bfb1df 100644 --- a/api/connect_ca.go +++ b/api/connect_ca.go @@ -39,9 +39,10 @@ type CommonCAProviderConfig struct { type ConsulCAProviderConfig struct { CommonCAProviderConfig `mapstructure:",squash"` - PrivateKey string - RootCert string - RotationPeriod time.Duration + PrivateKey string + RootCert string + RotationPeriod time.Duration + IntermediateCertTTL time.Duration } // ParseConsulCAConfig takes a raw config map and returns a parsed diff --git a/api/connect_ca_test.go b/api/connect_ca_test.go index 9ba7e29328..8ae494b60f 100644 --- a/api/connect_ca_test.go +++ b/api/connect_ca_test.go @@ -62,7 +62,8 @@ func TestAPI_ConnectCAConfig_get_set(t *testing.T) { s.WaitForSerfCheck(t) expected := &ConsulCAProviderConfig{ - RotationPeriod: 90 * 24 * time.Hour, + RotationPeriod: 90 * 24 * time.Hour, + IntermediateCertTTL: 365 * 24 * time.Hour, } expected.LeafCertTTL = 72 * time.Hour @@ -83,15 +84,19 @@ func TestAPI_ConnectCAConfig_get_set(t *testing.T) { // Change a config value and update conf.Config["PrivateKey"] = "" conf.Config["RotationPeriod"] = 120 * 24 * time.Hour + conf.Config["IntermediateCertTTL"] = 300 * 24 * time.Hour + // Pass through some state as if the provider stored it so we can make sure // we can read it again. conf.Config["test_state"] = map[string]string{"foo": "bar"} + _, err = connect.CASetConfig(conf, nil) r.Check(err) updated, _, err := connect.CAGetConfig(nil) r.Check(err) expected.RotationPeriod = 120 * 24 * time.Hour + expected.IntermediateCertTTL = 300 * 24 * time.Hour parsed, err = ParseConsulCAConfig(updated.Config) r.Check(err) require.Equal(r, expected, parsed) diff --git a/command/connect/ca/set/connect_ca_set_test.go b/command/connect/ca/set/connect_ca_set_test.go index 88389d46d2..3e31004041 100644 --- a/command/connect/ca/set/connect_ca_set_test.go +++ b/command/connect/ca/set/connect_ca_set_test.go @@ -50,4 +50,5 @@ func TestConnectCASetConfigCommand(t *testing.T) { parsed, err := ca.ParseConsulCAConfig(reply.Config) require.NoError(err) require.Equal(24*time.Hour, parsed.RotationPeriod) + require.Equal(36*time.Hour, parsed.IntermediateCertTTL) } diff --git a/command/connect/ca/set/test-fixtures/ca_config.json b/command/connect/ca/set/test-fixtures/ca_config.json index d29b25e8d2..b05f14bfad 100644 --- a/command/connect/ca/set/test-fixtures/ca_config.json +++ b/command/connect/ca/set/test-fixtures/ca_config.json @@ -3,6 +3,7 @@ "Config": { "PrivateKey": "", "RootCert": "", - "RotationPeriod": "24h" + "RotationPeriod": "24h", + "IntermediateCertTTL": "36h" } -} \ No newline at end of file +} diff --git a/sdk/freeport/freeport.go b/sdk/freeport/freeport.go index 576f39f2fa..4ddc937378 100644 --- a/sdk/freeport/freeport.go +++ b/sdk/freeport/freeport.go @@ -16,10 +16,6 @@ import ( ) const ( - // blockSize is the size of the allocated port block. ports are given out - // consecutively from that block and after that point in a LRU fashion. - blockSize = 1500 - // maxBlocks is the number of available port blocks before exclusions. maxBlocks = 30 @@ -32,6 +28,10 @@ const ( ) var ( + // blockSize is the size of the allocated port block. ports are given out + // consecutively from that block and after that point in a LRU fashion. + blockSize int + // effectiveMaxBlocks is the number of available port blocks. // lowPort + effectiveMaxBlocks * blockSize must be less than 65535. effectiveMaxBlocks int @@ -71,6 +71,17 @@ var ( // initialize is used to initialize freeport. func initialize() { var err error + + blockSize = 1500 + limit, err := systemLimit() + if err != nil { + panic("freeport: error getting system limit: " + err.Error()) + } + if limit > 0 && limit < blockSize { + logf("INFO", "blockSize %d too big for system limit %d. Adjusting...", blockSize, limit) + blockSize = limit - 3 + } + effectiveMaxBlocks, err = adjustMaxBlocks() if err != nil { panic("freeport: ephemeral port range detection failed: " + err.Error()) diff --git a/sdk/freeport/freeport_test.go b/sdk/freeport/freeport_test.go index 598ac789c1..58a5024ec4 100644 --- a/sdk/freeport/freeport_test.go +++ b/sdk/freeport/freeport_test.go @@ -185,10 +185,10 @@ func TestTakeReturn(t *testing.T) { c.Close() } }() - for _, port := range allPorts { + for i, port := range allPorts { ln, err := net.ListenTCP("tcp", tcpAddr("127.0.0.1", port)) if err != nil { - t.Fatalf("err: %v", err) + t.Fatalf("%d err: %v", i, err) } leaked = append(leaked, ln) } diff --git a/sdk/freeport/systemlimit.go b/sdk/freeport/systemlimit.go new file mode 100644 index 0000000000..fdd9021424 --- /dev/null +++ b/sdk/freeport/systemlimit.go @@ -0,0 +1,11 @@ +// +build !windows + +package freeport + +import "golang.org/x/sys/unix" + +func systemLimit() (int, error) { + var limit unix.Rlimit + err := unix.Getrlimit(unix.RLIMIT_NOFILE, &limit) + return int(limit.Cur), err +} diff --git a/sdk/freeport/systemlimit_windows.go b/sdk/freeport/systemlimit_windows.go new file mode 100644 index 0000000000..eec2119576 --- /dev/null +++ b/sdk/freeport/systemlimit_windows.go @@ -0,0 +1,7 @@ +// +build windows + +package freeport + +func systemLimit() (int, error) { + return 0, nil +} diff --git a/testrpc/wait.go b/testrpc/wait.go index 49811ba166..f4b859a163 100644 --- a/testrpc/wait.go +++ b/testrpc/wait.go @@ -24,7 +24,7 @@ func WaitForLeader(t *testing.T, rpc rpcFn, dc string) { r.Fatalf("No leader") } if out.Index < 2 { - r.Fatalf("Consul index should be at least 2") + r.Fatalf("Consul index should be at least 2 in %s", dc) } }) } diff --git a/website/source/api/connect/ca.html.md b/website/source/api/connect/ca.html.md index de137ad587..f89bed8e3c 100644 --- a/website/source/api/connect/ca.html.md +++ b/website/source/api/connect/ca.html.md @@ -97,7 +97,8 @@ $ curl \ "Provider": "consul", "Config": { "LeafCertTTL": "72h", - "RotationPeriod": "2160h" + "RotationPeriod": "2160h", + "IntermediateCertTTL": "8760h" }, "CreateIndex": 5, "ModifyIndex": 5 @@ -148,7 +149,8 @@ providers, see [Provider Config](/docs/connect/ca.html). "LeafCertTTL": "72h", "PrivateKey": "-----BEGIN RSA PRIVATE KEY-----...", "RootCert": "-----BEGIN CERTIFICATE-----...", - "RotationPeriod": "2160h" + "RotationPeriod": "2160h", + "IntermediateCertTTL": "8760h" }, "ForceWithoutCrossSigning": false } diff --git a/website/source/docs/connect/ca.html.md b/website/source/docs/connect/ca.html.md index 28cc869211..239c4cd4be 100644 --- a/website/source/docs/connect/ca.html.md +++ b/website/source/docs/connect/ca.html.md @@ -92,7 +92,8 @@ $ curl http://localhost:8500/v1/connect/ca/configuration "Provider": "consul", "Config": { "LeafCertTTL": "72h", - "RotationPeriod": "2160h" + "RotationPeriod": "2160h", + "IntermediateCertTTL": "8760h" }, "CreateIndex": 5, "ModifyIndex": 5 diff --git a/website/source/docs/connect/ca/consul.html.md b/website/source/docs/connect/ca/consul.html.md index 160e962033..02f00c7868 100644 --- a/website/source/docs/connect/ca/consul.html.md +++ b/website/source/docs/connect/ca/consul.html.md @@ -73,7 +73,8 @@ $ curl localhost:8500/v1/connect/ca/configuration "Provider": "consul", "Config": { "LeafCertTTL": "72h", - "RotationPeriod": "2160h" + "RotationPeriod": "2160h", + "IntermediateCertTTL": "8760h" }, "CreateIndex": 5, "ModifyIndex": 5 @@ -106,7 +107,8 @@ $ jq -n --arg key "$(cat root.key)" --arg cert "$(cat root.crt)" ' "LeafCertTTL": "72h", "PrivateKey": $key, "RootCert": $cert, - "RotationPeriod": "2160h" + "RotationPeriod": "2160h", + "IntermediateCertTTL": "8760h" } }' > ca_config.json ``` @@ -121,7 +123,8 @@ $ cat ca_config.json "LeafCertTTL": "72h", "PrivateKey": "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEArqiy1c3pbT3cSkjdEM1APALUareU...", "RootCert": "-----BEGIN CERTIFICATE-----\nMIIDijCCAnKgAwIBAgIJAOFZ66em1qC7MA0GCSqGSIb3...", - "RotationPeriod": "2160h" + "RotationPeriod": "2160h", + "IntermediateCertTTL": "8760h" } }