Merge pull request #9299 from hashicorp/1.8.x-update-secondary-ca

Backport #9009 to 1.8.x
This commit is contained in:
Kyle Havlovitz 2020-12-02 11:35:09 -08:00 committed by GitHub
commit 31199ca426
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 1675 additions and 1025 deletions

View File

@ -21,30 +21,7 @@ func (c *consulCAMockDelegate) State() *state.Store {
} }
func (c *consulCAMockDelegate) ApplyCARequest(req *structs.CARequest) (interface{}, error) { func (c *consulCAMockDelegate) ApplyCARequest(req *structs.CARequest) (interface{}, error) {
idx, _, err := c.state.CAConfig(nil) return ApplyCARequestToStore(c.state, req)
if err != nil {
return nil, err
}
switch req.Op {
case structs.CAOpSetProviderState:
_, err := c.state.CASetProviderState(idx+1, req.ProviderState)
if err != nil {
return nil, err
}
return true, nil
case structs.CAOpDeleteProviderState:
if err := c.state.CADeleteProviderState(req.ProviderState.ID); err != nil {
return nil, err
}
return true, nil
case structs.CAOpIncrementProviderSerialNumber:
return uint64(2), nil
default:
return nil, fmt.Errorf("Invalid CA operation '%s'", req.Op)
}
} }
func newMockDelegate(t *testing.T, conf *structs.CAConfiguration) *consulCAMockDelegate { func newMockDelegate(t *testing.T, conf *structs.CAConfiguration) *consulCAMockDelegate {

View File

@ -135,7 +135,7 @@ func (v *VaultProvider) renewToken(ctx context.Context, watcher *vaultapi.Lifeti
go watcher.Start() go watcher.Start()
case <-watcher.RenewCh(): case <-watcher.RenewCh():
v.logger.Error("Successfully renewed token for Vault provider") v.logger.Info("Successfully renewed token for Vault provider")
} }
} }
} }

View File

@ -8,6 +8,8 @@ import (
"sync" "sync"
"github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/sdk/freeport" "github.com/hashicorp/consul/sdk/freeport"
"github.com/hashicorp/consul/sdk/testutil/retry" "github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/go-hclog" "github.com/hashicorp/go-hclog"
@ -234,3 +236,30 @@ func (v *TestVaultServer) Stop() error {
return nil return nil
} }
func ApplyCARequestToStore(store *state.Store, req *structs.CARequest) (interface{}, error) {
idx, _, err := store.CAConfig(nil)
if err != nil {
return nil, err
}
switch req.Op {
case structs.CAOpSetProviderState:
_, err := store.CASetProviderState(idx+1, req.ProviderState)
if err != nil {
return nil, err
}
return true, nil
case structs.CAOpDeleteProviderState:
if err := store.CADeleteProviderState(req.ProviderState.ID); err != nil {
return nil, err
}
return true, nil
case structs.CAOpIncrementProviderSerialNumber:
return uint64(2), nil
default:
return nil, fmt.Errorf("Invalid CA operation '%s'", req.Op)
}
}

View File

@ -56,7 +56,7 @@ func ValidateLeaf(caPEM string, leafPEM string, intermediatePEMs []string) error
return err return err
} }
func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *structs.CARoot { func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int, ttl time.Duration) *structs.CARoot {
var result structs.CARoot var result structs.CARoot
result.Active = true result.Active = true
result.Name = fmt.Sprintf("Test CA %d", atomic.AddUint64(&testCACounter, 1)) result.Name = fmt.Sprintf("Test CA %d", atomic.AddUint64(&testCACounter, 1))
@ -76,6 +76,14 @@ func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *struc
id := &SpiffeIDSigning{ClusterID: TestClusterID, Domain: "consul"} id := &SpiffeIDSigning{ClusterID: TestClusterID, Domain: "consul"}
// Create the CA cert // Create the CA cert
now := time.Now()
before := now
after := now
if ttl != 0 {
after = after.Add(ttl)
} else {
after = after.AddDate(10, 0, 0)
}
template := x509.Certificate{ template := x509.Certificate{
SerialNumber: sn, SerialNumber: sn,
Subject: pkix.Name{CommonName: result.Name}, Subject: pkix.Name{CommonName: result.Name},
@ -85,8 +93,8 @@ func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *struc
x509.KeyUsageCRLSign | x509.KeyUsageCRLSign |
x509.KeyUsageDigitalSignature, x509.KeyUsageDigitalSignature,
IsCA: true, IsCA: true,
NotAfter: time.Now().AddDate(10, 0, 0), NotAfter: after,
NotBefore: time.Now(), NotBefore: before,
AuthorityKeyId: testKeyID(t, signer.Public()), AuthorityKeyId: testKeyID(t, signer.Public()),
SubjectKeyId: testKeyID(t, signer.Public()), SubjectKeyId: testKeyID(t, signer.Public()),
} }
@ -159,13 +167,19 @@ func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *struc
// that is cross-signed with the previous cert, and this will be set as // that is cross-signed with the previous cert, and this will be set as
// SigningCert. // SigningCert.
func TestCA(t testing.T, xc *structs.CARoot) *structs.CARoot { func TestCA(t testing.T, xc *structs.CARoot) *structs.CARoot {
return testCA(t, xc, DefaultPrivateKeyType, DefaultPrivateKeyBits) return testCA(t, xc, DefaultPrivateKeyType, DefaultPrivateKeyBits, 0)
}
// TestCAWithTTL is similar to TestCA, except that it
// takes a custom duration for the lifetime of the certificate.
func TestCAWithTTL(t testing.T, xc *structs.CARoot, ttl time.Duration) *structs.CARoot {
return testCA(t, xc, DefaultPrivateKeyType, DefaultPrivateKeyBits, ttl)
} }
// TestCAWithKeyType is similar to TestCA, except that it // TestCAWithKeyType is similar to TestCA, except that it
// takes two additional arguments to override the default private key type and size. // takes two additional arguments to override the default private key type and size.
func TestCAWithKeyType(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *structs.CARoot { func TestCAWithKeyType(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *structs.CARoot {
return testCA(t, xc, keyType, keyBits) return testCA(t, xc, keyType, keyBits, 0)
} }
// testCertID is an interface to be implemented the various spiffe ID / CertURI types // testCertID is an interface to be implemented the various spiffe ID / CertURI types

View File

@ -12,10 +12,16 @@ func TestSpiffeIDService(t testing.T, service string) *SpiffeIDService {
// TestSpiffeIDServiceWithHost returns a SPIFFE ID representing a service with // TestSpiffeIDServiceWithHost returns a SPIFFE ID representing a service with
// the specified trust domain. // the specified trust domain.
func TestSpiffeIDServiceWithHost(t testing.T, service, host string) *SpiffeIDService { func TestSpiffeIDServiceWithHost(t testing.T, service, host string) *SpiffeIDService {
return TestSpiffeIDServiceWithHostDC(t, service, host, "dc1")
}
// TestSpiffeIDServiceWithHostDC returns a SPIFFE ID representing a service with
// the specified trust domain for the given datacenter.
func TestSpiffeIDServiceWithHostDC(t testing.T, service, host, datacenter string) *SpiffeIDService {
return &SpiffeIDService{ return &SpiffeIDService{
Host: host, Host: host,
Namespace: "default", Namespace: "default",
Datacenter: "dc1", Datacenter: datacenter,
Service: service, Service: service,
} }
} }

View File

@ -3,14 +3,12 @@ package consul
import ( import (
"errors" "errors"
"fmt" "fmt"
"reflect"
"time" "time"
"github.com/hashicorp/go-hclog" "github.com/hashicorp/go-hclog"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/go-memdb" "github.com/hashicorp/go-memdb"
@ -103,216 +101,7 @@ func (s *ConnectCA) ConfigurationSet(
return acl.ErrPermissionDenied return acl.ErrPermissionDenied
} }
// Exit early if it's a no-op change return s.srv.caManager.UpdateConfiguration(args)
state := s.srv.fsm.State()
confIdx, config, err := state.CAConfig(nil)
if err != nil {
return err
}
// Don't allow state changes. Either it needs to be empty or the same to allow
// read-modify-write loops that don't touch the State field.
if len(args.Config.State) > 0 &&
!reflect.DeepEqual(args.Config.State, config.State) {
return ErrStateReadOnly
}
// Don't allow users to change the ClusterID.
args.Config.ClusterID = config.ClusterID
if args.Config.Provider == config.Provider && reflect.DeepEqual(args.Config.Config, config.Config) {
return nil
}
// If the provider hasn't changed, we need to load the current Provider state
// so it can decide if it needs to change resources or not based on the config
// change.
if args.Config.Provider == config.Provider {
// Note this is a shallow copy since the State method doc requires the
// provider return a map that will not be further modified and should not
// modify the one we pass to Configure.
args.Config.State = config.State
}
// Create a new instance of the provider described by the config
// and get the current active root CA. This acts as a good validation
// of the config and makes sure the provider is functioning correctly
// before we commit any changes to Raft.
newProvider, err := s.srv.createCAProvider(args.Config)
if err != nil {
return fmt.Errorf("could not initialize provider: %v", err)
}
pCfg := ca.ProviderConfig{
ClusterID: args.Config.ClusterID,
Datacenter: s.srv.config.Datacenter,
// This endpoint can be called in a secondary DC too so set this correctly.
IsPrimary: s.srv.config.Datacenter == s.srv.config.PrimaryDatacenter,
RawConfig: args.Config.Config,
State: args.Config.State,
}
if err := newProvider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
// Set up a defer to clean up the new provider if we exit early due to an error.
cleanupNewProvider := true
defer func() {
if cleanupNewProvider {
if err := newProvider.Cleanup(); err != nil {
s.logger.Warn("failed to clean up CA provider while handling startup failure", "provider", newProvider, "error", err)
}
}
}()
if err := newProvider.GenerateRoot(); err != nil {
return fmt.Errorf("error generating CA root certificate: %v", err)
}
newRootPEM, err := newProvider.ActiveRoot()
if err != nil {
return err
}
newActiveRoot, err := parseCARoot(newRootPEM, args.Config.Provider, args.Config.ClusterID)
if err != nil {
return err
}
// See if the provider needs to persist any state along with the config
pState, err := newProvider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
args.Config.State = pState
// Compare the new provider's root CA ID to the current one. If they
// match, just update the existing provider with the new config.
// If they don't match, begin the root rotation process.
_, root, err := state.CARootActive(nil)
if err != nil {
return err
}
// If the root didn't change or if this is a secondary DC, just update the
// config and return.
if (s.srv.config.Datacenter != s.srv.config.PrimaryDatacenter) ||
root != nil && root.ID == newActiveRoot.ID {
args.Op = structs.CAOpSetConfig
resp, err := s.srv.raftApply(structs.ConnectCARequestType, args)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
// If the config has been committed, update the local provider instance
cleanupNewProvider = false
s.srv.setCAProvider(newProvider, newActiveRoot)
s.logger.Info("CA provider config updated")
return nil
}
// At this point, we know the config change has trigged a root rotation,
// either by swapping the provider type or changing the provider's config
// to use a different root certificate.
// First up, sanity check that the current provider actually supports
// cross-signing.
oldProvider, _ := s.srv.getCAProvider()
if oldProvider == nil {
return fmt.Errorf("internal error: CA provider is nil")
}
canXSign, err := oldProvider.SupportsCrossSigning()
if err != nil {
return fmt.Errorf("CA provider error: %s", err)
}
if !canXSign && !args.Config.ForceWithoutCrossSigning {
return errors.New("The current CA Provider does not support cross-signing. " +
"You can try again with ForceWithoutCrossSigningSet but this may cause " +
"disruption - see documentation for more.")
}
if !canXSign && args.Config.ForceWithoutCrossSigning {
s.logger.Warn("current CA doesn't support cross signing but " +
"CA reconfiguration forced anyway with ForceWithoutCrossSigning")
}
// If it's a config change that would trigger a rotation (different provider/root):
// 1. Get the root from the new provider.
// 2. Call CrossSignCA on the old provider to sign the new root with the old one to
// get a cross-signed certificate.
// 3. Take the active root for the new provider and append the intermediate from step 2
// to its list of intermediates.
newRoot, err := connect.ParseCert(newRootPEM)
if err != nil {
return err
}
if canXSign {
// Have the old provider cross-sign the new root
xcCert, err := oldProvider.CrossSignCA(newRoot)
if err != nil {
return err
}
// Add the cross signed cert to the new CA's intermediates (to be attached
// to leaf certs).
newActiveRoot.IntermediateCerts = []string{xcCert}
}
intermediate, err := newProvider.GenerateIntermediate()
if err != nil {
return err
}
if intermediate != newRootPEM {
newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediate)
}
// Update the roots and CA config in the state store at the same time
idx, roots, err := state.CARoots(nil)
if err != nil {
return err
}
var newRoots structs.CARoots
for _, r := range roots {
newRoot := *r
if newRoot.Active {
newRoot.Active = false
newRoot.RotatedOutAt = time.Now()
}
newRoots = append(newRoots, &newRoot)
}
newRoots = append(newRoots, newActiveRoot)
args.Op = structs.CAOpSetRootsAndConfig
args.Index = idx
args.Config.ModifyIndex = confIdx
args.Roots = newRoots
resp, err := s.srv.raftApply(structs.ConnectCARequestType, args)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
if respOk, ok := resp.(bool); ok && !respOk {
return fmt.Errorf("could not atomically update roots and config")
}
// If the config has been committed, update the local provider instance
// and call teardown on the old provider
cleanupNewProvider = false
s.srv.setCAProvider(newProvider, newActiveRoot)
if err := oldProvider.Cleanup(); err != nil {
s.logger.Warn("failed to clean up old provider", "provider", config.Provider)
}
s.logger.Info("CA rotated to new root under provider", "provider", args.Config.Provider)
return nil
} }
// Roots returns the currently trusted root certificates. // Roots returns the currently trusted root certificates.
@ -438,7 +227,7 @@ func (s *ConnectCA) SignIntermediate(
return acl.ErrPermissionDenied return acl.ErrPermissionDenied
} }
provider, _ := s.srv.getCAProvider() provider, _ := s.srv.caManager.getCAProvider()
if provider == nil { if provider == nil {
return fmt.Errorf("internal error: CA provider is nil") return fmt.Errorf("internal error: CA provider is nil")
} }

View File

@ -492,6 +492,140 @@ func TestConnectCAConfig_TriggerRotation(t *testing.T) {
} }
} }
func TestConnectCAConfig_UpdateSecondary(t *testing.T) {
t.Parallel()
assert := assert.New(t)
require := require.New(t)
// Initialize primary as the primary DC
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "primary"
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "primary")
// secondary as a secondary DC
dir2, s2 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "secondary"
c.PrimaryDatacenter = "primary"
})
defer os.RemoveAll(dir2)
defer s2.Shutdown()
codec := rpcClient(t, s2)
defer codec.Close()
// Create the WAN link
joinWAN(t, s2, s1)
testrpc.WaitForLeader(t, s2.RPC, "secondary")
// Capture the current root
rootList, activeRoot, err := getTestRoots(s1, "primary")
require.NoError(err)
require.Len(rootList.Roots, 1)
rootCert := activeRoot
waitForActiveCARoot(t, s1, rootCert)
waitForActiveCARoot(t, s2, rootCert)
// Capture the current intermediate
rootList, activeRoot, err = getTestRoots(s2, "secondary")
require.NoError(err)
require.Len(rootList.Roots, 1)
require.Len(activeRoot.IntermediateCerts, 1)
oldIntermediatePEM := activeRoot.IntermediateCerts[0]
// Update the secondary CA config to use a new private key, which should
// cause a re-signing with a new intermediate.
_, newKey, err := connect.GeneratePrivateKey()
assert.NoError(err)
newConfig := &structs.CAConfiguration{
Provider: "consul",
Config: map[string]interface{}{
"PrivateKey": newKey,
"RootCert": "",
"RotationPeriod": 90 * 24 * time.Hour,
},
}
{
args := &structs.CARequest{
Datacenter: "secondary",
Config: newConfig,
}
var reply interface{}
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply))
}
// Make sure the new intermediate has replaced the old one in the active root,
// and that the root itself hasn't changed.
var newIntermediatePEM string
{
args := &structs.DCSpecificRequest{
Datacenter: "secondary",
}
var reply structs.IndexedCARoots
require.Nil(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", args, &reply))
require.Len(reply.Roots, 1)
require.Len(reply.Roots[0].IntermediateCerts, 1)
newIntermediatePEM = reply.Roots[0].IntermediateCerts[0]
require.NotEqual(oldIntermediatePEM, newIntermediatePEM)
require.Equal(reply.Roots[0].RootCert, rootCert.RootCert)
}
// Verify the new config was set.
{
args := &structs.DCSpecificRequest{
Datacenter: "secondary",
}
var reply structs.CAConfiguration
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationGet", args, &reply))
actual, err := ca.ParseConsulCAConfig(reply.Config)
require.NoError(err)
expected, err := ca.ParseConsulCAConfig(newConfig.Config)
require.NoError(err)
assert.Equal(reply.Provider, newConfig.Provider)
assert.Equal(actual, expected)
}
// Verify that new leaf certs get the new intermediate bundled
{
// Generate a CSR and request signing
spiffeId := connect.TestSpiffeIDServiceWithHostDC(t, "web", connect.TestClusterID+".consul", "secondary")
csr, _ := connect.TestCSR(t, spiffeId)
args := &structs.CASignRequest{
Datacenter: "secondary",
CSR: csr,
}
var reply structs.IssuedCert
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.Sign", args, &reply))
// Verify the leaf cert has the new intermediate.
{
roots := x509.NewCertPool()
assert.True(roots.AppendCertsFromPEM([]byte(rootCert.RootCert)))
leaf, err := connect.ParseCert(reply.CertPEM)
require.NoError(err)
intermediates := x509.NewCertPool()
require.True(intermediates.AppendCertsFromPEM([]byte(newIntermediatePEM)))
_, err = leaf.Verify(x509.VerifyOptions{
Roots: roots,
Intermediates: intermediates,
})
require.NoError(err)
}
// Verify other fields
assert.Equal("web", reply.Service)
assert.Equal(spiffeId.URI().String(), reply.ServiceURI)
}
}
// Test CA signing // Test CA signing
func TestConnectCASign(t *testing.T) { func TestConnectCASign(t *testing.T) {
t.Parallel() t.Parallel()

View File

@ -334,8 +334,7 @@ func (s *Server) establishLeadership() error {
s.getOrCreateAutopilotConfig() s.getOrCreateAutopilotConfig()
s.autopilot.Start() s.autopilot.Start()
// todo(kyhavlov): start a goroutine here for handling periodic CA rotation if err := s.caManager.InitializeCA(); err != nil {
if err := s.initializeCA(); err != nil {
return err return err
} }
@ -371,7 +370,8 @@ func (s *Server) revokeLeadership() {
s.stopConnectLeader() s.stopConnectLeader()
s.setCAProvider(nil, nil) s.caManager.setCAProvider(nil, nil)
s.caManager.setState(CAStateUninitialized, false)
s.stopACLTokenReaping() s.stopACLTokenReaping()

View File

@ -4,17 +4,13 @@ import (
"bytes" "bytes"
"context" "context"
"fmt" "fmt"
"reflect"
"strings"
"time" "time"
"golang.org/x/time/rate" "golang.org/x/time/rate"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/logging" "github.com/hashicorp/consul/logging"
uuid "github.com/hashicorp/go-uuid"
) )
const ( const (
@ -37,76 +33,36 @@ var (
maxRetryBackoff = 256 maxRetryBackoff = 256
) )
// initializeCAConfig is used to initialize the CA config if necessary // startConnectLeader starts multi-dc connect leader routines.
// when setting up the CA during establishLeadership func (s *Server) startConnectLeader() {
func (s *Server) initializeCAConfig() (*structs.CAConfiguration, error) { if !s.config.ConnectEnabled {
state := s.fsm.State() return
_, config, err := state.CAConfig(nil)
if err != nil {
return nil, err
}
if config == nil {
config = s.config.CAConfig
if config.ClusterID == "" {
id, err := uuid.GenerateUUID()
if err != nil {
return nil, err
}
config.ClusterID = id
}
} else if _, ok := config.Config["IntermediateCertTTL"]; !ok {
dup := *config
copied := make(map[string]interface{})
for k, v := range dup.Config {
copied[k] = v
}
copied["IntermediateCertTTL"] = connect.DefaultIntermediateCertTTL.String()
dup.Config = copied
config = &dup
} else {
return config, nil
} }
req := structs.CARequest{ // Start the Connect secondary DC actions if enabled.
Op: structs.CAOpSetConfig, if s.config.Datacenter != s.config.PrimaryDatacenter {
Config: config, s.leaderRoutineManager.Start(secondaryCARootWatchRoutineName, s.caManager.secondaryCARootWatch)
} s.leaderRoutineManager.Start(intentionReplicationRoutineName, s.replicateIntentions)
if resp, err := s.raftApply(structs.ConnectCARequestType, req); err != nil {
return nil, err
} else if respErr, ok := resp.(error); ok {
return nil, respErr
} }
return config, nil s.leaderRoutineManager.Start(intermediateCertRenewWatchRoutineName, s.caManager.intermediateCertRenewalWatch)
s.leaderRoutineManager.Start(caRootPruningRoutineName, s.runCARootPruning)
} }
// parseCARoot returns a filled-in structs.CARoot from a raw PEM value. // stopConnectLeader stops connect specific leader functions.
func parseCARoot(pemValue, provider, clusterID string) (*structs.CARoot, error) { func (s *Server) stopConnectLeader() {
id, err := connect.CalculateCertFingerprint(pemValue) s.leaderRoutineManager.Stop(secondaryCARootWatchRoutineName)
if err != nil { s.leaderRoutineManager.Stop(intentionReplicationRoutineName)
return nil, fmt.Errorf("error parsing root fingerprint: %v", err) s.leaderRoutineManager.Stop(intermediateCertRenewWatchRoutineName)
s.leaderRoutineManager.Stop(caRootPruningRoutineName)
// If the provider implements NeedsStop, we call Stop to perform any shutdown actions.
provider, _ := s.caManager.getCAProvider()
if provider != nil {
if needsStop, ok := provider.(ca.NeedsStop); ok {
needsStop.Stop()
} }
rootCert, err := connect.ParseCert(pemValue)
if err != nil {
return nil, fmt.Errorf("error parsing root cert: %v", err)
} }
keyType, keyBits, err := connect.KeyInfoFromCert(rootCert)
if err != nil {
return nil, fmt.Errorf("error extracting root key info: %v", err)
}
return &structs.CARoot{
ID: id,
Name: fmt.Sprintf("%s CA Root Cert", strings.Title(provider)),
SerialNumber: rootCert.SerialNumber.Uint64(),
SigningKeyID: connect.EncodeSigningKeyID(rootCert.SubjectKeyId),
ExternalTrustDomain: clusterID,
NotBefore: rootCert.NotBefore,
NotAfter: rootCert.NotAfter,
RootCert: pemValue,
PrivateKeyType: keyType,
PrivateKeyBits: keyBits,
Active: true,
}, nil
} }
// createProvider returns a connect CA provider from the given config. // createProvider returns a connect CA provider from the given config.
@ -131,483 +87,6 @@ func (s *Server) createCAProvider(conf *structs.CAConfiguration) (ca.Provider, e
return p, nil return p, nil
} }
// getCAProvider is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) getCAProvider() (ca.Provider, *structs.CARoot) {
retries := 0
var result ca.Provider
var resultRoot *structs.CARoot
for result == nil {
s.caProviderLock.RLock()
result = s.caProvider
resultRoot = s.caProviderRoot
s.caProviderLock.RUnlock()
// In cases where an agent is started with managed proxies, we may ask
// for the provider before establishLeadership completes. If we're the
// leader, then wait and get the provider again
if result == nil && s.IsLeader() && retries < 10 {
retries++
time.Sleep(50 * time.Millisecond)
continue
}
break
}
return result, resultRoot
}
// setCAProvider is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) setCAProvider(newProvider ca.Provider, root *structs.CARoot) {
s.caProviderLock.Lock()
defer s.caProviderLock.Unlock()
s.caProvider = newProvider
s.caProviderRoot = root
}
// initializeCA sets up the CA provider when gaining leadership, either bootstrapping
// the CA if this is the primary DC or making a remote RPC for intermediate signing
// if this is a secondary DC.
func (s *Server) initializeCA() error {
connectLogger := s.loggers.Named(logging.Connect)
// Bail if connect isn't enabled.
if !s.config.ConnectEnabled {
return nil
}
// Initialize the provider based on the current config.
conf, err := s.initializeCAConfig()
if err != nil {
return err
}
provider, err := s.createCAProvider(conf)
if err != nil {
return err
}
s.caProviderReconfigurationLock.Lock()
defer s.caProviderReconfigurationLock.Unlock()
s.setCAProvider(provider, nil)
// If this isn't the primary DC, run the secondary DC routine if the primary has already been upgraded to at least 1.6.0
if s.config.PrimaryDatacenter != s.config.Datacenter {
versionOk, foundPrimary := ServersInDCMeetMinimumVersion(s, s.config.PrimaryDatacenter, minMultiDCConnectVersion)
if !foundPrimary {
connectLogger.Warn("primary datacenter is configured but unreachable - deferring initialization of the secondary datacenter CA")
// return nil because we will initialize the secondary CA later
return nil
} else if !versionOk {
// return nil because we will initialize the secondary CA later
connectLogger.Warn("servers in the primary datacenter are not at least at the minimum version - deferring initialization of the secondary datacenter CA",
"min_version", minMultiDCConnectVersion.String(),
)
return nil
}
// Get the root CA to see if we need to refresh our intermediate.
args := structs.DCSpecificRequest{
Datacenter: s.config.PrimaryDatacenter,
}
var roots structs.IndexedCARoots
if err := s.forwardDC("ConnectCA.Roots", s.config.PrimaryDatacenter, &args, &roots); err != nil {
return err
}
// Configure the CA provider and initialize the intermediate certificate if necessary.
if err := s.initializeSecondaryProvider(provider, roots); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
if err := s.initializeSecondaryCA(provider, roots); err != nil {
return err
}
connectLogger.Info("initialized secondary datacenter CA with provider", "provider", conf.Provider)
return nil
}
return s.initializeRootCA(provider, conf)
}
// initializeRootCA runs the initialization logic for a root CA.
// It is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) initializeRootCA(provider ca.Provider, conf *structs.CAConfiguration) error {
connectLogger := s.loggers.Named(logging.Connect)
pCfg := ca.ProviderConfig{
ClusterID: conf.ClusterID,
Datacenter: s.config.Datacenter,
IsPrimary: true,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
if err := provider.GenerateRoot(); err != nil {
return fmt.Errorf("error generating CA root certificate: %v", err)
}
// Get the active root cert from the CA
rootPEM, err := provider.ActiveRoot()
if err != nil {
return fmt.Errorf("error getting root cert: %v", err)
}
rootCA, err := parseCARoot(rootPEM, conf.Provider, conf.ClusterID)
if err != nil {
return err
}
// Also create the intermediate CA, which is the one that actually signs leaf certs
interPEM, err := provider.GenerateIntermediate()
if err != nil {
return fmt.Errorf("error generating intermediate cert: %v", err)
}
_, err = connect.ParseCert(interPEM)
if err != nil {
return fmt.Errorf("error getting intermediate cert: %v", err)
}
// If the provider has state to persist and it's changed or new then update
// CAConfig.
pState, err := provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
if !reflect.DeepEqual(conf.State, pState) {
// Update the CAConfig in raft to persist the provider state
conf.State = pState
req := structs.CARequest{
Op: structs.CAOpSetConfig,
Config: conf,
}
if _, err = s.raftApply(structs.ConnectCARequestType, req); err != nil {
return fmt.Errorf("error persisting provider state: %v", err)
}
}
// Check if the CA root is already initialized and exit if it is,
// adding on any existing intermediate certs since they aren't directly
// tied to the provider.
// Every change to the CA after this initial bootstrapping should
// be done through the rotation process.
state := s.fsm.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
if activeRoot != nil {
// This state shouldn't be possible to get into because we update the root and
// CA config in the same FSM operation.
if activeRoot.ID != rootCA.ID {
return fmt.Errorf("stored CA root %q is not the active root (%s)", rootCA.ID, activeRoot.ID)
}
rootCA.IntermediateCerts = activeRoot.IntermediateCerts
s.setCAProvider(provider, rootCA)
return nil
}
// Get the highest index
idx, _, err := state.CARoots(nil)
if err != nil {
return err
}
// Store the root cert in raft
resp, err := s.raftApply(structs.ConnectCARequestType, &structs.CARequest{
Op: structs.CAOpSetRoots,
Index: idx,
Roots: []*structs.CARoot{rootCA},
})
if err != nil {
connectLogger.Error("Raft apply failed", "error", err)
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
s.setCAProvider(provider, rootCA)
connectLogger.Info("initialized primary datacenter CA with provider", "provider", conf.Provider)
return nil
}
// initializeSecondaryCA runs the routine for generating an intermediate CA CSR and getting
// it signed by the primary DC if the root CA of the primary DC has changed since the last
// intermediate.
// It is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) initializeSecondaryCA(provider ca.Provider, primaryRoots structs.IndexedCARoots) error {
activeIntermediate, err := provider.ActiveIntermediate()
if err != nil {
return err
}
var (
storedRootID string
expectedSigningKeyID string
currentSigningKeyID string
activeSecondaryRoot *structs.CARoot
)
if activeIntermediate != "" {
// In the event that we already have an intermediate, we must have
// already replicated some primary root information locally, so check
// to see if we're up to date by fetching the rootID and the
// signingKeyID used in the secondary.
//
// Note that for the same rootID the primary representation of the root
// will have a different SigningKeyID field than the secondary
// representation of the same root. This is because it's derived from
// the intermediate which is different in all datacenters.
storedRoot, err := provider.ActiveRoot()
if err != nil {
return err
}
storedRootID, err = connect.CalculateCertFingerprint(storedRoot)
if err != nil {
return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, storedRoot)
}
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
// This will fetch the secondary's exact current representation of the
// active root. Note that this data should only be used if the IDs
// match, otherwise it's out of date and should be regenerated.
_, activeSecondaryRoot, err = s.fsm.State().CARootActive(nil)
if err != nil {
return err
}
if activeSecondaryRoot != nil {
currentSigningKeyID = activeSecondaryRoot.SigningKeyID
}
}
// Determine which of the provided PRIMARY representations of roots is the
// active one. We'll use this as a template to generate any new root
// representations meant for this secondary.
var newActiveRoot *structs.CARoot
for _, root := range primaryRoots.Roots {
if root.ID == primaryRoots.ActiveRootID && root.Active {
newActiveRoot = root
break
}
}
if newActiveRoot == nil {
return fmt.Errorf("primary datacenter does not have an active root CA for Connect")
}
// Get a signed intermediate from the primary DC if the provider
// hasn't been initialized yet or if the primary's root has changed.
needsNewIntermediate := false
if activeIntermediate == "" || storedRootID != primaryRoots.ActiveRootID {
needsNewIntermediate = true
}
// Also we take this opportunity to correct an incorrectly persisted SigningKeyID
// in secondary datacenters (see PR-6513).
if expectedSigningKeyID != "" && currentSigningKeyID != expectedSigningKeyID {
needsNewIntermediate = true
}
newIntermediate := false
if needsNewIntermediate {
if err := s.getIntermediateCASigned(provider, newActiveRoot); err != nil {
return err
}
newIntermediate = true
} else {
// Discard the primary's representation since our local one is
// sufficiently up to date.
newActiveRoot = activeSecondaryRoot
}
// Update the roots list in the state store if there's a new active root.
state := s.fsm.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
if activeRoot == nil || activeRoot.ID != newActiveRoot.ID || newIntermediate {
if err := s.persistNewRoot(provider, newActiveRoot); err != nil {
return err
}
}
s.setCAProvider(provider, newActiveRoot)
return nil
}
// persistNewRoot is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) persistNewRoot(provider ca.Provider, newActiveRoot *structs.CARoot) error {
connectLogger := s.loggers.Named(logging.Connect)
state := s.fsm.State()
idx, oldRoots, err := state.CARoots(nil)
if err != nil {
return err
}
_, config, err := state.CAConfig(nil)
if err != nil {
return err
}
if config == nil {
return fmt.Errorf("local CA not initialized yet")
}
newConf := *config
newConf.ClusterID = newActiveRoot.ExternalTrustDomain
// Persist any state the provider needs us to
newConf.State, err = provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
// Copy the root list and append the new active root, updating the old root
// with the time it was rotated out.
var newRoots structs.CARoots
for _, r := range oldRoots {
newRoot := *r
if newRoot.Active {
newRoot.Active = false
newRoot.RotatedOutAt = time.Now()
}
if newRoot.ExternalTrustDomain == "" {
newRoot.ExternalTrustDomain = config.ClusterID
}
newRoots = append(newRoots, &newRoot)
}
newRoots = append(newRoots, newActiveRoot)
args := &structs.CARequest{
Op: structs.CAOpSetRootsAndConfig,
Index: idx,
Roots: newRoots,
Config: &newConf,
}
resp, err := s.raftApply(structs.ConnectCARequestType, &args)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
if respOk, ok := resp.(bool); ok && !respOk {
return fmt.Errorf("could not atomically update roots and config")
}
connectLogger.Info("updated root certificates from primary datacenter")
return nil
}
// getIntermediateCAPrimary regenerates the intermediate cert in the primary datacenter.
// This is only run for CAs that require an intermediary in the primary DC, such as Vault.
// This function is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) getIntermediateCAPrimary(provider ca.Provider, newActiveRoot *structs.CARoot) error {
connectLogger := s.loggers.Named(logging.Connect)
// Generate and sign an intermediate cert using the root CA.
intermediatePEM, err := provider.GenerateIntermediate()
if err != nil {
return fmt.Errorf("error generating new intermediate cert: %v", err)
}
intermediateCert, err := connect.ParseCert(intermediatePEM)
if err != nil {
return fmt.Errorf("error parsing intermediate cert: %v", err)
}
// Append the new intermediate to our local active root entry. This is
// where the root representations start to diverge.
newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM)
newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
connectLogger.Info("generated new intermediate certificate for primary datacenter")
return nil
}
// getIntermediateCASigned is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) getIntermediateCASigned(provider ca.Provider, newActiveRoot *structs.CARoot) error {
connectLogger := s.loggers.Named(logging.Connect)
csr, err := provider.GenerateIntermediateCSR()
if err != nil {
return err
}
var intermediatePEM string
if err := s.forwardDC("ConnectCA.SignIntermediate", s.config.PrimaryDatacenter, s.generateCASignRequest(csr), &intermediatePEM); err != nil {
// this is a failure in the primary and shouldn't be capable of erroring out our establishing leadership
connectLogger.Warn("Primary datacenter refused to sign our intermediate CA certificate", "error", err)
return nil
}
if err := provider.SetIntermediate(intermediatePEM, newActiveRoot.RootCert); err != nil {
return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err)
}
intermediateCert, err := connect.ParseCert(intermediatePEM)
if err != nil {
return fmt.Errorf("error parsing intermediate cert: %v", err)
}
// Append the new intermediate to our local active root entry. This is
// where the root representations start to diverge.
newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM)
newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
connectLogger.Info("received new intermediate certificate from primary datacenter")
return nil
}
func (s *Server) generateCASignRequest(csr string) *structs.CASignRequest {
return &structs.CASignRequest{
Datacenter: s.config.PrimaryDatacenter,
CSR: csr,
WriteRequest: structs.WriteRequest{Token: s.tokens.ReplicationToken()},
}
}
// startConnectLeader starts multi-dc connect leader routines.
func (s *Server) startConnectLeader() {
// Start the Connect secondary DC actions if enabled.
if s.config.ConnectEnabled && s.config.Datacenter != s.config.PrimaryDatacenter {
s.leaderRoutineManager.Start(secondaryCARootWatchRoutineName, s.secondaryCARootWatch)
s.leaderRoutineManager.Start(intentionReplicationRoutineName, s.replicateIntentions)
}
s.leaderRoutineManager.Start(intermediateCertRenewWatchRoutineName, s.intermediateCertRenewalWatch)
s.leaderRoutineManager.Start(caRootPruningRoutineName, s.runCARootPruning)
}
// stopConnectLeader stops connect specific leader functions.
func (s *Server) stopConnectLeader() {
s.leaderRoutineManager.Stop(secondaryCARootWatchRoutineName)
s.leaderRoutineManager.Stop(intentionReplicationRoutineName)
s.leaderRoutineManager.Stop(intermediateCertRenewWatchRoutineName)
s.leaderRoutineManager.Stop(caRootPruningRoutineName)
// If the provider implements NeedsStop, we call Stop to perform any shutdown actions.
s.caProviderReconfigurationLock.Lock()
defer s.caProviderReconfigurationLock.Unlock()
provider, _ := s.getCAProvider()
if provider != nil {
if needsStop, ok := provider.(ca.NeedsStop); ok {
needsStop.Stop()
}
}
}
func (s *Server) runCARootPruning(ctx context.Context) error { func (s *Server) runCARootPruning(ctx context.Context) error {
ticker := time.NewTicker(caRootPruneInterval) ticker := time.NewTicker(caRootPruneInterval)
defer ticker.Stop() defer ticker.Stop()
@ -677,155 +156,6 @@ func (s *Server) pruneCARoots() error {
return nil return nil
} }
// intermediateCertRenewalWatch checks the intermediate cert for
// expiration. As soon as more than half the time a cert is valid has passed,
// it will try to renew it.
func (s *Server) intermediateCertRenewalWatch(ctx context.Context) error {
connectLogger := s.loggers.Named(logging.Connect)
isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter
for {
select {
case <-ctx.Done():
return nil
case <-time.After(structs.IntermediateCertRenewInterval):
retryLoopBackoffAbortOnSuccess(ctx, func() error {
s.caProviderReconfigurationLock.Lock()
defer s.caProviderReconfigurationLock.Unlock()
provider, _ := s.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
// If this isn't the primary, make sure the CA has been initialized.
if !isPrimary && !s.configuredSecondaryCA() {
return fmt.Errorf("secondary CA is not yet configured.")
}
state := s.fsm.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
// If this is the primary, check if this is a provider that uses an intermediate cert. If
// it isn't, we don't need to check for a renewal.
if isPrimary {
_, config, err := state.CAConfig(nil)
if err != nil {
return err
}
if _, ok := ca.PrimaryIntermediateProviders[config.Provider]; !ok {
return nil
}
}
activeIntermediate, err := provider.ActiveIntermediate()
if err != nil {
return err
}
if activeIntermediate == "" {
return fmt.Errorf("datacenter doesn't have an active intermediate.")
}
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer),
intermediateCert.NotAfter) {
return nil
}
renewalFunc := s.getIntermediateCAPrimary
if !isPrimary {
renewalFunc = s.getIntermediateCASigned
}
if err := renewalFunc(provider, activeRoot); err != nil {
return err
}
if err := s.persistNewRoot(provider, activeRoot); err != nil {
return err
}
s.setCAProvider(provider, activeRoot)
return nil
}, func(err error) {
connectLogger.Error("error renewing intermediate certs",
"routine", intermediateCertRenewWatchRoutineName,
"error", err,
)
})
}
}
}
// secondaryCARootWatch maintains a blocking query to the primary datacenter's
// ConnectCA.Roots endpoint to monitor when it needs to request a new signed
// intermediate certificate.
func (s *Server) secondaryCARootWatch(ctx context.Context) error {
connectLogger := s.loggers.Named(logging.Connect)
args := structs.DCSpecificRequest{
Datacenter: s.config.PrimaryDatacenter,
QueryOptions: structs.QueryOptions{
// the maximum time the primary roots watch query can block before returning
MaxQueryTime: s.config.MaxQueryTime,
},
}
connectLogger.Debug("starting Connect CA root replication from primary datacenter", "primary", s.config.PrimaryDatacenter)
retryLoopBackoff(ctx, func() error {
var roots structs.IndexedCARoots
if err := s.forwardDC("ConnectCA.Roots", s.config.PrimaryDatacenter, &args, &roots); err != nil {
return fmt.Errorf("Error retrieving the primary datacenter's roots: %v", err)
}
// Check to see if the primary has been upgraded in case we're waiting to switch to
// secondary mode.
provider, _ := s.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
if !s.configuredSecondaryCA() {
versionOk, primaryFound := ServersInDCMeetMinimumVersion(s, s.config.PrimaryDatacenter, minMultiDCConnectVersion)
if !primaryFound {
return fmt.Errorf("Primary datacenter is unreachable - deferring secondary CA initialization")
}
if versionOk {
if err := s.initializeSecondaryProvider(provider, roots); err != nil {
return fmt.Errorf("Failed to initialize secondary CA provider: %v", err)
}
}
}
// Run the secondary CA init routine to see if we need to request a new
// intermediate.
if s.configuredSecondaryCA() {
if err := s.initializeSecondaryCA(provider, roots); err != nil {
return fmt.Errorf("Failed to initialize the secondary CA: %v", err)
}
}
args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, roots.QueryMeta.Index)
return nil
}, func(err error) {
connectLogger.Error("CA root replication failed, will retry",
"routine", secondaryCARootWatchRoutineName,
"error", err,
)
})
return nil
}
// replicateIntentions executes a blocking query to the primary datacenter to replicate // replicateIntentions executes a blocking query to the primary datacenter to replicate
// the intentions there to the local state. // the intentions there to the local state.
func (s *Server) replicateIntentions(ctx context.Context) error { func (s *Server) replicateIntentions(ctx context.Context) error {
@ -1014,46 +344,6 @@ func nextIndexVal(prevIdx, idx uint64) uint64 {
return idx return idx
} }
// initializeSecondaryProvider configures the given provider for a secondary, non-root datacenter.
// It is being called while holding caProviderReconfigurationLock which means
// it must never take that lock itself or call anything that does.
func (s *Server) initializeSecondaryProvider(provider ca.Provider, roots structs.IndexedCARoots) error {
if roots.TrustDomain == "" {
return fmt.Errorf("trust domain from primary datacenter is not initialized")
}
clusterID := strings.Split(roots.TrustDomain, ".")[0]
_, conf, err := s.fsm.State().CAConfig(nil)
if err != nil {
return err
}
pCfg := ca.ProviderConfig{
ClusterID: clusterID,
Datacenter: s.config.Datacenter,
IsPrimary: false,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
s.actingSecondaryLock.Lock()
s.actingSecondaryCA = true
s.actingSecondaryLock.Unlock()
return nil
}
// configuredSecondaryCA is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) configuredSecondaryCA() bool {
s.actingSecondaryLock.RLock()
defer s.actingSecondaryLock.RUnlock()
return s.actingSecondaryCA
}
// halfTime returns a duration that is half the time between notBefore and // halfTime returns a duration that is half the time between notBefore and
// notAfter. // notAfter.
func halfTime(notBefore, notAfter time.Time) time.Duration { func halfTime(notBefore, notAfter time.Time) time.Duration {
@ -1069,3 +359,11 @@ func lessThanHalfTimePassed(now, notBefore, notAfter time.Time) bool {
t := notBefore.Add(halfTime(notBefore, notAfter)) t := notBefore.Add(halfTime(notBefore, notAfter))
return t.Sub(now) > 0 return t.Sub(now) > 0
} }
func (s *Server) generateCASignRequest(csr string) *structs.CASignRequest {
return &structs.CASignRequest{
Datacenter: s.config.PrimaryDatacenter,
CSR: csr,
WriteRequest: structs.WriteRequest{Token: s.tokens.ReplicationToken()},
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,285 @@
package consul
import (
"context"
"crypto/x509"
"errors"
"fmt"
"testing"
"time"
"github.com/hashicorp/consul/agent/connect"
ca "github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/go-version"
"github.com/hashicorp/serf/serf"
"github.com/stretchr/testify/require"
)
// TODO(kyhavlov): replace with t.Deadline()
const CATestTimeout = 7 * time.Second
type mockCAServerDelegate struct {
t *testing.T
config *Config
store *state.Store
primaryRoot *structs.CARoot
callbackCh chan string
}
func NewMockCAServerDelegate(t *testing.T, config *Config) *mockCAServerDelegate {
store, err := state.NewStateStore(nil)
require.NoError(t, err)
delegate := &mockCAServerDelegate{
t: t,
config: config,
store: store,
primaryRoot: connect.TestCAWithTTL(t, nil, 1*time.Second),
callbackCh: make(chan string, 0),
}
delegate.store.CASetConfig(1, testCAConfig())
return delegate
}
func (m *mockCAServerDelegate) State() *state.Store {
return m.store
}
func (m *mockCAServerDelegate) IsLeader() bool {
return true
}
func (m *mockCAServerDelegate) CheckServers(datacenter string, fn func(*metadata.Server) bool) {
ver, _ := version.NewVersion("1.6.0")
fn(&metadata.Server{
Status: serf.StatusAlive,
Build: *ver,
})
}
func (m *mockCAServerDelegate) ApplyCARequest(req *structs.CARequest) (interface{}, error) {
return ca.ApplyCARequestToStore(m.store, req)
}
func (m *mockCAServerDelegate) createCAProvider(conf *structs.CAConfiguration) (ca.Provider, error) {
return &mockCAProvider{
callbackCh: m.callbackCh,
rootPEM: m.primaryRoot.RootCert,
}, nil
}
func (m *mockCAServerDelegate) forwardDC(method, dc string, args interface{}, reply interface{}) error {
switch method {
case "ConnectCA.Roots":
roots := reply.(*structs.IndexedCARoots)
roots.TrustDomain = connect.TestClusterID
roots.Roots = []*structs.CARoot{m.primaryRoot}
roots.ActiveRootID = m.primaryRoot.ID
case "ConnectCA.SignIntermediate":
r := reply.(*string)
*r = m.primaryRoot.RootCert
default:
return fmt.Errorf("received call to unsupported method %q", method)
}
m.callbackCh <- fmt.Sprintf("forwardDC/%s", method)
return nil
}
func (m *mockCAServerDelegate) generateCASignRequest(csr string) *structs.CASignRequest {
return &structs.CASignRequest{
Datacenter: m.config.PrimaryDatacenter,
CSR: csr,
}
}
func (m *mockCAServerDelegate) raftApply(t structs.MessageType, msg interface{}) (interface{}, error) {
if t == structs.ConnectCARequestType {
req := msg.(*structs.CARequest)
act, err := m.store.CARootSetCAS(1, req.Index, req.Roots)
require.NoError(m.t, err)
require.True(m.t, act)
act, err = m.store.CACheckAndSetConfig(1, req.Config.ModifyIndex, req.Config)
require.NoError(m.t, err)
require.True(m.t, act)
} else {
return nil, fmt.Errorf("got invalid MessageType %v", t)
}
m.callbackCh <- fmt.Sprintf("raftApply/%v", t)
return nil, nil
}
// mockCAProvider mocks an empty provider implementation with a channel in order to coordinate
// waiting for certain methods to be called.
type mockCAProvider struct {
callbackCh chan string
rootPEM string
}
func (m *mockCAProvider) Configure(cfg ca.ProviderConfig) error { return nil }
func (m *mockCAProvider) State() (map[string]string, error) { return nil, nil }
func (m *mockCAProvider) GenerateRoot() error { return nil }
func (m *mockCAProvider) ActiveRoot() (string, error) { return m.rootPEM, nil }
func (m *mockCAProvider) GenerateIntermediateCSR() (string, error) {
m.callbackCh <- "provider/GenerateIntermediateCSR"
return "", nil
}
func (m *mockCAProvider) SetIntermediate(intermediatePEM, rootPEM string) error {
m.callbackCh <- "provider/SetIntermediate"
return nil
}
func (m *mockCAProvider) ActiveIntermediate() (string, error) { return m.rootPEM, nil }
func (m *mockCAProvider) GenerateIntermediate() (string, error) { return "", nil }
func (m *mockCAProvider) Sign(*x509.CertificateRequest) (string, error) { return "", nil }
func (m *mockCAProvider) SignIntermediate(*x509.CertificateRequest) (string, error) { return "", nil }
func (m *mockCAProvider) CrossSignCA(*x509.Certificate) (string, error) { return "", nil }
func (m *mockCAProvider) SupportsCrossSigning() (bool, error) { return false, nil }
func (m *mockCAProvider) Cleanup() error { return nil }
func waitForCh(t *testing.T, ch chan string, expected string) {
select {
case op := <-ch:
if op != expected {
t.Fatalf("got unexpected op %q, wanted %q", op, expected)
}
case <-time.After(CATestTimeout):
t.Fatalf("never got op %q", expected)
}
}
func waitForEmptyCh(t *testing.T, ch chan string) {
select {
case op := <-ch:
t.Fatalf("got unexpected op %q", op)
case <-time.After(1 * time.Second):
}
}
func testCAConfig() *structs.CAConfiguration {
return &structs.CAConfiguration{
ClusterID: connect.TestClusterID,
Provider: "mock",
Config: map[string]interface{}{
"LeafCertTTL": "72h",
"IntermediateCertTTL": "2160h",
},
}
}
// initTestManager initializes a CAManager with a mockCAServerDelegate, consuming
// the ops that come through the channels and returning when initialization has finished.
func initTestManager(t *testing.T, manager *CAManager, delegate *mockCAServerDelegate) {
initCh := make(chan struct{})
go func() {
require.NoError(t, manager.InitializeCA())
close(initCh)
}()
for i := 0; i < 5; i++ {
select {
case <-delegate.callbackCh:
case <-time.After(CATestTimeout):
t.Fatal("failed waiting for initialization events")
}
}
select {
case <-initCh:
case <-time.After(CATestTimeout):
t.Fatal("failed waiting for initialization")
}
}
func TestCAManager_Initialize(t *testing.T) {
conf := DefaultConfig()
conf.ConnectEnabled = true
conf.PrimaryDatacenter = "dc1"
conf.Datacenter = "dc2"
delegate := NewMockCAServerDelegate(t, conf)
manager := NewCAManager(delegate, testutil.Logger(t), conf)
// Call InitializeCA and then confirm the RPCs and provider calls
// happen in the expected order.
require.EqualValues(t, CAStateUninitialized, manager.state)
errCh := make(chan error)
go func() {
errCh <- manager.InitializeCA()
}()
waitForCh(t, delegate.callbackCh, "forwardDC/ConnectCA.Roots")
require.EqualValues(t, CAStateInitializing, manager.state)
waitForCh(t, delegate.callbackCh, "provider/GenerateIntermediateCSR")
waitForCh(t, delegate.callbackCh, "forwardDC/ConnectCA.SignIntermediate")
waitForCh(t, delegate.callbackCh, "provider/SetIntermediate")
waitForCh(t, delegate.callbackCh, fmt.Sprintf("raftApply/%v", structs.ConnectCARequestType))
waitForEmptyCh(t, delegate.callbackCh)
// Make sure the InitializeCA call returned successfully.
select {
case err := <-errCh:
require.NoError(t, err)
case <-time.After(CATestTimeout):
t.Fatal("never got result from errCh")
}
require.EqualValues(t, CAStateReady, manager.state)
}
func TestCAManager_UpdateConfigWhileRenewIntermediate(t *testing.T) {
// No parallel execution because we change globals
// Set the interval and drift buffer low for renewing the cert.
origInterval := structs.IntermediateCertRenewInterval
origDriftBuffer := ca.CertificateTimeDriftBuffer
defer func() {
structs.IntermediateCertRenewInterval = origInterval
ca.CertificateTimeDriftBuffer = origDriftBuffer
}()
structs.IntermediateCertRenewInterval = time.Millisecond
ca.CertificateTimeDriftBuffer = 0
conf := DefaultConfig()
conf.ConnectEnabled = true
conf.PrimaryDatacenter = "dc1"
conf.Datacenter = "dc2"
delegate := NewMockCAServerDelegate(t, conf)
manager := NewCAManager(delegate, testutil.Logger(t), conf)
initTestManager(t, manager, delegate)
// Wait half the TTL for the cert to need renewing.
time.Sleep(500 * time.Millisecond)
// Call RenewIntermediate and then confirm the RPCs and provider calls
// happen in the expected order.
errCh := make(chan error)
go func() {
errCh <- manager.RenewIntermediate(context.TODO(), false)
}()
waitForCh(t, delegate.callbackCh, "provider/GenerateIntermediateCSR")
// Call UpdateConfiguration while RenewIntermediate is still in-flight to
// make sure we get an error about the state being occupied.
go func() {
require.EqualValues(t, CAStateRenewIntermediate, manager.state)
require.Error(t, errors.New("already in state"), manager.UpdateConfiguration(&structs.CARequest{}))
}()
waitForCh(t, delegate.callbackCh, "forwardDC/ConnectCA.SignIntermediate")
waitForCh(t, delegate.callbackCh, "provider/SetIntermediate")
waitForCh(t, delegate.callbackCh, fmt.Sprintf("raftApply/%v", structs.ConnectCARequestType))
waitForEmptyCh(t, delegate.callbackCh)
// Make sure the RenewIntermediate call returned successfully.
select {
case err := <-errCh:
require.NoError(t, err)
case <-time.After(CATestTimeout):
t.Fatal("never got result from errCh")
}
require.EqualValues(t, CAStateReady, manager.state)
}

View File

@ -81,11 +81,11 @@ func TestLeader_SecondaryCA_Initialize(t *testing.T) {
s2.tokens.UpdateAgentToken(masterToken, token.TokenSourceConfig) s2.tokens.UpdateAgentToken(masterToken, token.TokenSourceConfig)
s2.tokens.UpdateReplicationToken(masterToken, token.TokenSourceConfig) s2.tokens.UpdateReplicationToken(masterToken, token.TokenSourceConfig)
testrpc.WaitForLeader(t, s2.RPC, "secondary")
// Create the WAN link // Create the WAN link
joinWAN(t, s2, s1) joinWAN(t, s2, s1)
testrpc.WaitForLeader(t, s2.RPC, "secondary")
waitForNewACLs(t, s1) waitForNewACLs(t, s1)
waitForNewACLs(t, s2) waitForNewACLs(t, s2)
@ -176,9 +176,7 @@ func waitForActiveCARoot(t *testing.T, srv *Server, expect *structs.CARoot) {
} }
func getCAProviderWithLock(s *Server) (ca.Provider, *structs.CARoot) { func getCAProviderWithLock(s *Server) (ca.Provider, *structs.CARoot) {
s.caProviderReconfigurationLock.Lock() return s.caManager.getCAProvider()
defer s.caProviderReconfigurationLock.Unlock()
return s.getCAProvider()
} }
func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) { func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) {
@ -703,22 +701,6 @@ func TestLeader_SecondaryCA_TransitionFromPrimary(t *testing.T) {
require.NoError(t, s2.RPC("ConnectCA.Roots", &args, &dc2PrimaryRoots)) require.NoError(t, s2.RPC("ConnectCA.Roots", &args, &dc2PrimaryRoots))
require.Len(t, dc2PrimaryRoots.Roots, 1) require.Len(t, dc2PrimaryRoots.Roots, 1)
// Set the ExternalTrustDomain to a blank string to simulate an old version (pre-1.4.0)
// it's fine to change the roots struct directly here because the RPC endpoint already
// makes a copy to return.
dc2PrimaryRoots.Roots[0].ExternalTrustDomain = ""
rootSetArgs := structs.CARequest{
Op: structs.CAOpSetRoots,
Datacenter: "dc2",
Index: dc2PrimaryRoots.Index,
Roots: dc2PrimaryRoots.Roots,
}
resp, err := s2.raftApply(structs.ConnectCARequestType, rootSetArgs)
require.NoError(t, err)
if respErr, ok := resp.(error); ok {
t.Fatal(respErr)
}
// Shutdown s2 and restart it with the dc1 as the primary // Shutdown s2 and restart it with the dc1 as the primary
s2.Shutdown() s2.Shutdown()
dir3, s3 := testServerWithConfig(t, func(c *Config) { dir3, s3 := testServerWithConfig(t, func(c *Config) {

View File

@ -19,7 +19,6 @@ import (
metrics "github.com/armon/go-metrics" metrics "github.com/armon/go-metrics"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
ca "github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/consul/authmethod" "github.com/hashicorp/consul/agent/consul/authmethod"
"github.com/hashicorp/consul/agent/consul/authmethod/ssoauth" "github.com/hashicorp/consul/agent/consul/authmethod/ssoauth"
"github.com/hashicorp/consul/agent/consul/autopilot" "github.com/hashicorp/consul/agent/consul/autopilot"
@ -139,20 +138,8 @@ type Server struct {
// autopilot is the Autopilot instance for this server. // autopilot is the Autopilot instance for this server.
autopilot *autopilot.Autopilot autopilot *autopilot.Autopilot
// autopilotWaitGroup is used to block until Autopilot shuts down. // caManager is used to synchronize CA operations across the leader and RPC functions.
autopilotWaitGroup sync.WaitGroup caManager *CAManager
// caProviderReconfigurationLock guards the provider reconfiguration.
caProviderReconfigurationLock sync.Mutex
// caProvider is the current CA provider in use for Connect. This is
// only non-nil when we are the leader.
caProvider ca.Provider
// caProviderRoot is the CARoot that was stored along with the ca.Provider
// active. It's only updated in lock-step with the caProvider. This prevents
// races between state updates to active roots and the fetch of the provider
// instance.
caProviderRoot *structs.CARoot
caProviderLock sync.RWMutex
// rate limiter to use when signing leaf certificates // rate limiter to use when signing leaf certificates
caLeafLimiter connectSignRateLimiter caLeafLimiter connectSignRateLimiter
@ -302,10 +289,6 @@ type Server struct {
shutdownCh chan struct{} shutdownCh chan struct{}
shutdownLock sync.Mutex shutdownLock sync.Mutex
// State for whether this datacenter is acting as a secondary CA.
actingSecondaryCA bool
actingSecondaryLock sync.RWMutex
// Manager to handle starting/stopping go routines when establishing/revoking raft leadership // Manager to handle starting/stopping go routines when establishing/revoking raft leadership
leaderRoutineManager *LeaderRoutineManager leaderRoutineManager *LeaderRoutineManager
@ -497,6 +480,7 @@ func NewServer(config *Config, options ...ConsulOption) (*Server, error) {
return nil, fmt.Errorf("Failed to start Raft: %v", err) return nil, fmt.Errorf("Failed to start Raft: %v", err)
} }
s.caManager = NewCAManager(&caDelegateWithState{s}, s.loggers.Named(logging.Connect), s.config)
if s.config.ConnectEnabled && (s.config.AutoEncryptAllowTLS || s.config.AutoConfigAuthzEnabled) { if s.config.ConnectEnabled && (s.config.AutoEncryptAllowTLS || s.config.AutoConfigAuthzEnabled) {
go s.connectCARootsMonitor(&lib.StopChannelContext{StopCh: s.shutdownCh}) go s.connectCARootsMonitor(&lib.StopChannelContext{StopCh: s.shutdownCh})
} }

View File

@ -132,7 +132,7 @@ func (s *Server) getCARoots(ws memdb.WatchSet, state *state.Store) (*structs.Ind
} }
func (s *Server) SignCertificate(csr *x509.CertificateRequest, spiffeID connect.CertURI) (*structs.IssuedCert, error) { func (s *Server) SignCertificate(csr *x509.CertificateRequest, spiffeID connect.CertURI) (*structs.IssuedCert, error) {
provider, caRoot := s.getCAProvider() provider, caRoot := s.caManager.getCAProvider()
if provider == nil { if provider == nil {
return nil, fmt.Errorf("internal error: CA provider is nil") return nil, fmt.Errorf("internal error: CA provider is nil")
} else if caRoot == nil { } else if caRoot == nil {

View File

@ -1504,7 +1504,7 @@ func TestServer_CALogging(t *testing.T) {
defer s1.Shutdown() defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1") testrpc.WaitForLeader(t, s1.RPC, "dc1")
if _, ok := s1.caProvider.(ca.NeedsLogger); !ok { if _, ok := s1.caManager.provider.(ca.NeedsLogger); !ok {
t.Fatalf("provider does not implement NeedsLogger") t.Fatalf("provider does not implement NeedsLogger")
} }

View File

@ -151,7 +151,7 @@ type ACLServiceIdentity struct {
func (s *ACLServiceIdentity) Clone() *ACLServiceIdentity { func (s *ACLServiceIdentity) Clone() *ACLServiceIdentity {
s2 := *s s2 := *s
s2.Datacenters = cloneStringSlice(s.Datacenters) s2.Datacenters = CloneStringSlice(s.Datacenters)
return &s2 return &s2
} }
@ -666,7 +666,7 @@ func (t *ACLPolicy) UnmarshalJSON(data []byte) error {
func (p *ACLPolicy) Clone() *ACLPolicy { func (p *ACLPolicy) Clone() *ACLPolicy {
p2 := *p p2 := *p
p2.Datacenters = cloneStringSlice(p.Datacenters) p2.Datacenters = CloneStringSlice(p.Datacenters)
return &p2 return &p2
} }
@ -1460,7 +1460,7 @@ type ACLPolicyBatchDeleteRequest struct {
PolicyIDs []string PolicyIDs []string
} }
func cloneStringSlice(s []string) []string { func CloneStringSlice(s []string) []string {
if len(s) == 0 { if len(s) == 0 {
return nil return nil
} }

View File

@ -122,6 +122,16 @@ type CARoot struct {
RaftIndex RaftIndex
} }
func (c *CARoot) Clone() *CARoot {
if c == nil {
return nil
}
newCopy := *c
newCopy.IntermediateCerts = CloneStringSlice(c.IntermediateCerts)
return &newCopy
}
// CARoots is a list of CARoot structures. // CARoots is a list of CARoot structures.
type CARoots []*CARoot type CARoots []*CARoot