connect: connect CA Roots in secondary datacenters should use a SigningKeyID derived from their local intermediate (#6513)

This fixes an issue where leaf certificates issued in secondary
datacenters would be reissued very frequently (every ~20 seconds)
because the logic meant to detect root rotation was errantly triggering
because a hash of the ultimate root (in the primary) was being compared
against a hash of the local intermediate root (in the secondary) and
always failing.
This commit is contained in:
R.B. Boyer 2019-09-26 11:54:14 -05:00 committed by GitHub
parent 2274f64cab
commit c4b92d5534
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 456 additions and 19 deletions

View File

@ -14,6 +14,7 @@ import (
"os"
"reflect"
"sort"
"strconv"
"strings"
"testing"
"time"
@ -4868,15 +4869,209 @@ func TestAgentConnectCALeafCert_goodNotLocal(t *testing.T) {
}
}
func requireLeafValidUnderCA(t *testing.T, issued *structs.IssuedCert,
ca *structs.CARoot) {
func TestAgentConnectCALeafCert_secondaryDC_good(t *testing.T) {
t.Parallel()
assert := assert.New(t)
require := require.New(t)
a1 := NewTestAgent(t, t.Name()+"-dc1", `
datacenter = "dc1"
primary_datacenter = "dc1"
`)
defer a1.Shutdown()
testrpc.WaitForTestAgent(t, a1.RPC, "dc1")
a2 := NewTestAgent(t, t.Name()+"-dc2", `
datacenter = "dc2"
primary_datacenter = "dc1"
`)
defer a2.Shutdown()
testrpc.WaitForTestAgent(t, a2.RPC, "dc2")
// Wait for the WAN join.
addr := fmt.Sprintf("127.0.0.1:%d", a1.Config.SerfPortWAN)
_, err := a2.JoinWAN([]string{addr})
require.NoError(err)
testrpc.WaitForLeader(t, a1.RPC, "dc1")
testrpc.WaitForLeader(t, a2.RPC, "dc2")
retry.Run(t, func(r *retry.R) {
if got, want := len(a1.WANMembers()), 2; got < want {
r.Fatalf("got %d WAN members want at least %d", got, want)
}
})
// CA already setup by default by NewTestAgent but force a new one so we can
// verify it was signed easily.
dc1_ca1 := connect.TestCAConfigSet(t, a1, nil)
// Wait until root is updated in both dcs.
waitForActiveCARoot(t, a1.srv, dc1_ca1)
waitForActiveCARoot(t, a2.srv, dc1_ca1)
{
// Register a local service in the SECONDARY
args := &structs.ServiceDefinition{
ID: "foo",
Name: "test",
Address: "127.0.0.1",
Port: 8000,
Check: structs.CheckType{
TTL: 15 * time.Second,
},
}
req, _ := http.NewRequest("PUT", "/v1/agent/service/register", jsonReader(args))
resp := httptest.NewRecorder()
_, err := a2.srv.AgentRegisterService(resp, req)
require.NoError(err)
if !assert.Equal(200, resp.Code) {
t.Log("Body: ", resp.Body.String())
}
}
// List
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
resp := httptest.NewRecorder()
obj, err := a2.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal("MISS", resp.Header().Get("X-Cache"))
// Get the issued cert
issued, ok := obj.(*structs.IssuedCert)
assert.True(ok)
// Verify that the cert is signed by the CA
requireLeafValidUnderCA(t, issued, dc1_ca1)
// Verify blocking index
assert.True(issued.ModifyIndex > 0)
assert.Equal(fmt.Sprintf("%d", issued.ModifyIndex),
resp.Header().Get("X-Consul-Index"))
// Test caching
{
// Fetch it again
resp := httptest.NewRecorder()
obj2, err := a2.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)
// Should cache hit this time and not make request
require.Equal("HIT", resp.Header().Get("X-Cache"))
}
// Test that we aren't churning leaves for no reason at idle.
{
ch := make(chan error, 1)
go func() {
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test?index="+strconv.Itoa(int(issued.ModifyIndex)), nil)
resp := httptest.NewRecorder()
obj, err := a2.srv.AgentConnectCALeafCert(resp, req)
if err != nil {
ch <- err
} else {
issued2 := obj.(*structs.IssuedCert)
if issued.CertPEM == issued2.CertPEM {
ch <- fmt.Errorf("leaf woke up unexpectedly with same cert")
} else {
ch <- fmt.Errorf("leaf woke up unexpectedly with new cert")
}
}
}()
start := time.Now()
// Before applying the fix from PR-6513 this would reliably wake up
// after ~20ms with a new cert. Since this test is necessarily a bit
// timing dependent we'll chill out for 5 seconds which should be enough
// time to disprove the original bug.
select {
case <-time.After(5 * time.Second):
case err := <-ch:
dur := time.Since(start)
t.Fatalf("unexpected return from blocking query; leaf churned during idle period, took %s: %v", dur, err)
}
}
// Set a new CA
dc1_ca2 := connect.TestCAConfigSet(t, a2, nil)
// Wait until root is updated in both dcs.
waitForActiveCARoot(t, a1.srv, dc1_ca2)
waitForActiveCARoot(t, a2.srv, dc1_ca2)
// Test that caching is updated in the background
retry.Run(t, func(r *retry.R) {
resp := httptest.NewRecorder()
// Try and sign again (note no index/wait arg since cache should update in
// background even if we aren't actively blocking)
obj, err := a2.srv.AgentConnectCALeafCert(resp, req)
r.Check(err)
issued2 := obj.(*structs.IssuedCert)
if issued.CertPEM == issued2.CertPEM {
r.Fatalf("leaf has not updated")
}
// Got a new leaf. Sanity check it's a whole new key as well as different
// cert.
if issued.PrivateKeyPEM == issued2.PrivateKeyPEM {
r.Fatalf("new leaf has same private key as before")
}
// Verify that the cert is signed by the new CA
requireLeafValidUnderCA(t, issued2, dc1_ca2)
// Should be a cache hit! The data should've updated in the cache
// in the background so this should've been fetched directly from
// the cache.
if resp.Header().Get("X-Cache") != "HIT" {
r.Fatalf("should be a cache hit")
}
})
}
func waitForActiveCARoot(t *testing.T, srv *HTTPServer, expect *structs.CARoot) {
retry.Run(t, func(r *retry.R) {
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/roots", nil)
resp := httptest.NewRecorder()
obj, err := srv.AgentConnectCARoots(resp, req)
if err != nil {
r.Fatalf("err: %v", err)
}
roots, ok := obj.(structs.IndexedCARoots)
if !ok {
r.Fatalf("response is wrong type %T", obj)
}
var root *structs.CARoot
for _, r := range roots.Roots {
if r.ID == roots.ActiveRootID {
root = r
break
}
}
if root == nil {
r.Fatal("no active root")
}
if root.ID != expect.ID {
r.Fatalf("current active root is %s; waiting for %s", root.ID, expect.ID)
}
})
}
func requireLeafValidUnderCA(t *testing.T, issued *structs.IssuedCert, ca *structs.CARoot) {
leaf, intermediates, err := connect.ParseLeafCerts(issued.CertPEM)
require.NoError(t, err)
roots := x509.NewCertPool()
require.True(t, roots.AppendCertsFromPEM([]byte(ca.RootCert)))
leaf, err := connect.ParseCert(issued.CertPEM)
require.NoError(t, err)
_, err = leaf.Verify(x509.VerifyOptions{
Roots: roots,
Roots: roots,
Intermediates: intermediates,
})
require.NoError(t, err)

View File

@ -96,9 +96,9 @@ type ConnectCALeaf struct {
// since all times we get from our wall clock should point to the same Location
// anyway.
type fetchState struct {
// authorityKeyID is the key ID of the CA root that signed the current cert.
// This is just to save parsing the whole cert everytime we have to check if
// the root changed.
// authorityKeyId is the ID of the CA key (whether root or intermediate) that signed
// the current cert. This is just to save parsing the whole cert everytime
// we have to check if the root changed.
authorityKeyID string
// forceExpireAfter is used to coordinate renewing certs after a CA rotation
@ -362,7 +362,7 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
expiresAt = state.forceExpireAfter
}
if expiresAt == now || expiresAt.Before(now) {
if expiresAt.Equal(now) || expiresAt.Before(now) {
// Already expired, just make a new one right away
return c.generateNewLeaf(reqReal, lastResultWithNewState())
}

View File

@ -29,6 +29,67 @@ func ParseCert(pemValue string) (*x509.Certificate, error) {
return x509.ParseCertificate(block.Bytes)
}
// ParseLeafCerts parses all of the x509 certificates from a PEM-encoded value
// under the assumption that the first cert is a leaf (non-CA) cert and the
// rest are intermediate CA certs.
//
// If no certificates are found this returns an error.
func ParseLeafCerts(pemValue string) (*x509.Certificate, *x509.CertPool, error) {
certs, err := parseCerts(pemValue)
if err != nil {
return nil, nil, err
}
leaf := certs[0]
if leaf.IsCA {
return nil, nil, fmt.Errorf("first PEM-block should be a leaf cert")
}
intermediates := x509.NewCertPool()
for _, cert := range certs[1:] {
if !cert.IsCA {
return nil, nil, fmt.Errorf("found an unexpected leaf cert after the first PEM-block")
}
intermediates.AddCert(cert)
}
return leaf, intermediates, nil
}
// ParseCerts parses the all x509 certificates from a PEM-encoded value.
// The first returned cert is a leaf cert and any other ones are intermediates.
//
// If no certificates are found this returns an error.
func parseCerts(pemValue string) ([]*x509.Certificate, error) {
var out []*x509.Certificate
rest := []byte(pemValue)
for {
// The _ result below is not an error but the remaining PEM bytes.
block, remaining := pem.Decode(rest)
if block == nil {
break
}
rest = remaining
if block.Type != "CERTIFICATE" {
return nil, fmt.Errorf("PEM-block should be CERTIFICATE type")
}
cert, err := x509.ParseCertificate(block.Bytes)
if err != nil {
return nil, err
}
out = append(out, cert)
}
if len(out) == 0 {
return nil, fmt.Errorf("no PEM-encoded data found")
}
return out, nil
}
// CalculateCertFingerprint parses the x509 certificate from a PEM-encoded value
// and calculates the SHA-1 fingerprint.
func CalculateCertFingerprint(pemValue string) (string, error) {

View File

@ -290,7 +290,10 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, roots structs.Index
return err
}
var storedRootID string
var (
storedRootID string
expectedSigningKeyID string
)
if activeIntermediate != "" {
storedRoot, err := provider.ActiveRoot()
if err != nil {
@ -301,6 +304,12 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, roots structs.Index
if err != nil {
return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, roots)
}
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
}
var newActiveRoot *structs.CARoot
@ -314,10 +323,21 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, roots structs.Index
return fmt.Errorf("primary datacenter does not have an active root CA for Connect")
}
newIntermediate := false
// Get a signed intermediate from the primary DC if the provider
// hasn't been initialized yet or if the primary's root has changed.
needsNewIntermediate := false
if activeIntermediate == "" || storedRootID != roots.ActiveRootID {
needsNewIntermediate = true
}
// Also we take this opportunity to correct an incorrectly persisted SigningKeyID
// in secondary datacenters (see PR-6513).
if expectedSigningKeyID != "" && newActiveRoot.SigningKeyID != expectedSigningKeyID {
needsNewIntermediate = true
}
newIntermediate := false
if needsNewIntermediate {
csr, err := provider.GenerateIntermediateCSR()
if err != nil {
return err
@ -334,8 +354,14 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, roots structs.Index
return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err)
}
intermediateCert, err := connect.ParseCert(intermediatePEM)
if err != nil {
return fmt.Errorf("error parsing intermediate cert: %v", err)
}
// Append the new intermediate to our local active root entry.
newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM)
newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
newIntermediate = true
s.logger.Printf("[INFO] connect: received new intermediate certificate from primary datacenter")

View File

@ -145,13 +145,18 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) {
require.NoError(err)
require.NotEmpty(oldIntermediatePEM)
// Store the current root
rootReq := &structs.DCSpecificRequest{
Datacenter: "dc1",
// Capture the current root
var originalRoot *structs.CARoot
{
rootList, activeRoot, err := getTestRoots(s1, "dc1")
require.NoError(err)
require.Len(rootList.Roots, 1)
originalRoot = activeRoot
}
var rootList structs.IndexedCARoots
require.NoError(s1.RPC("ConnectCA.Roots", rootReq, &rootList))
require.Len(rootList.Roots, 1)
// Wait for current state to be reflected in both datacenters.
waitForActiveCARoot(t, s1, "dc1", originalRoot)
waitForActiveCARoot(t, s2, "dc2", originalRoot)
// Update the provider config to use a new private key, which should
// cause a rotation.
@ -175,6 +180,14 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) {
require.NoError(s1.RPC("ConnectCA.ConfigurationSet", args, &reply))
}
var updatedRoot *structs.CARoot
{
rootList, activeRoot, err := getTestRoots(s1, "dc1")
require.NoError(err)
require.Len(rootList.Roots, 2)
updatedRoot = activeRoot
}
// Wait for dc2's intermediate to be refreshed.
var intermediatePEM string
retry.Run(t, func(r *retry.R) {
@ -186,6 +199,9 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) {
})
require.NoError(err)
waitForActiveCARoot(t, s1, "dc1", updatedRoot)
waitForActiveCARoot(t, s2, "dc2", updatedRoot)
// Verify the root lists have been rotated in each DC's state store.
state1 := s1.fsm.State()
_, primaryRoot, err := state1.CARootActive(nil)
@ -243,6 +259,99 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) {
require.NoError(err)
}
func TestLeader_SecondaryCA_FixSigningKeyID_via_IntermediateRefresh(t *testing.T) {
t.Parallel()
require := require.New(t)
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.Build = "1.6.0"
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
// dc2 as a secondary DC
dir2pre, s2pre := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc2"
c.PrimaryDatacenter = "dc1"
c.Build = "1.6.0"
})
defer os.RemoveAll(dir2pre)
defer s2pre.Shutdown()
// Create the WAN link
joinWAN(t, s2pre, s1)
testrpc.WaitForLeader(t, s2pre.RPC, "dc2")
// Restore the pre-1.6.1 behavior of the SigningKeyID not being derived
// from the intermediates.
{
state := s2pre.fsm.State()
// Get the highest index
idx, roots, err := state.CARoots(nil)
require.NoError(err)
var activeRoot *structs.CARoot
for _, root := range roots {
if root.Active {
activeRoot = root
}
}
require.NotNil(activeRoot)
rootCert, err := connect.ParseCert(activeRoot.RootCert)
require.NoError(err)
// Force this to be derived just from the root, not the intermediate.
activeRoot.SigningKeyID = connect.EncodeSigningKeyID(rootCert.SubjectKeyId)
// Store the root cert in raft
resp, err := s2pre.raftApply(structs.ConnectCARequestType, &structs.CARequest{
Op: structs.CAOpSetRoots,
Index: idx,
Roots: []*structs.CARoot{activeRoot},
})
require.NoError(err)
if respErr, ok := resp.(error); ok {
t.Fatalf("respErr: %v", respErr)
}
}
// Shutdown s2pre and restart it to trigger the secondary CA init to correct
// the SigningKeyID.
s2pre.Shutdown()
dir2, s2 := testServerWithConfig(t, func(c *Config) {
c.DataDir = s2pre.config.DataDir
c.Datacenter = "dc2"
c.PrimaryDatacenter = "dc1"
c.NodeName = s2pre.config.NodeName
c.NodeID = s2pre.config.NodeID
})
defer os.RemoveAll(dir2)
defer s2.Shutdown()
testrpc.WaitForLeader(t, s2.RPC, "dc2")
{ // verify that the root is now corrected
provider, activeRoot := s2.getCAProvider()
require.NotNil(provider)
require.NotNil(activeRoot)
activeIntermediate, err := provider.ActiveIntermediate()
require.NoError(err)
intermediateCert, err := connect.ParseCert(activeIntermediate)
require.NoError(err)
// Force this to be derived just from the root, not the intermediate.
expect := connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
require.Equal(expect, activeRoot.SigningKeyID)
}
}
func TestLeader_SecondaryCA_TransitionFromPrimary(t *testing.T) {
t.Parallel()
@ -461,6 +570,52 @@ func TestLeader_SecondaryCA_UpgradeBeforePrimary(t *testing.T) {
require.NoError(t, err)
}
func waitForActiveCARoot(t *testing.T, s *Server, datacenter string, expect *structs.CARoot) {
retry.Run(t, func(r *retry.R) {
args := &structs.DCSpecificRequest{
Datacenter: datacenter,
}
var reply structs.IndexedCARoots
if err := s.RPC("ConnectCA.Roots", args, &reply); err != nil {
r.Fatalf("err: %v", err)
}
var root *structs.CARoot
for _, r := range reply.Roots {
if r.ID == reply.ActiveRootID {
root = r
break
}
}
if root == nil {
r.Fatal("no active root")
}
if root.ID != expect.ID {
r.Fatalf("current active root is %s; waiting for %s", root.ID, expect.ID)
}
})
}
func getTestRoots(s *Server, datacenter string) (*structs.IndexedCARoots, *structs.CARoot, error) {
rootReq := &structs.DCSpecificRequest{
Datacenter: datacenter,
}
var rootList structs.IndexedCARoots
if err := s.RPC("ConnectCA.Roots", rootReq, &rootList); err != nil {
return nil, nil, err
}
var active *structs.CARoot
for _, root := range rootList.Roots {
if root.Active {
active = root
break
}
}
return &rootList, active, nil
}
func TestLeader_ReplicateIntentions(t *testing.T) {
t.Parallel()

View File

@ -60,8 +60,8 @@ type CARoot struct {
SerialNumber uint64
// SigningKeyID is the ID of the public key that corresponds to the private
// key used to sign the certificate. Is is the HexString format of the raw
// AuthorityKeyID bytes.
// key used to sign leaf certificates. Is is the HexString format of the
// raw AuthorityKeyID bytes.
SigningKeyID string
// ExternalTrustDomain is the trust domain this root was generated under. It