mirror of https://github.com/status-im/consul.git
Merge pull request #4325 from hashicorp/ca-pruning
connect/ca: add logic for pruning old stale RootCA entries
This commit is contained in:
commit
f92e5b1121
|
@ -5,6 +5,7 @@ import (
|
|||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/connect"
|
||||
|
@ -177,6 +178,7 @@ func (s *ConnectCA) ConfigurationSet(
|
|||
newRoot := *r
|
||||
if newRoot.Active {
|
||||
newRoot.Active = false
|
||||
newRoot.RotatedOutAt = time.Now()
|
||||
}
|
||||
newRoots = append(newRoots, &newRoot)
|
||||
}
|
||||
|
|
|
@ -28,7 +28,18 @@ const (
|
|||
barrierWriteTimeout = 2 * time.Minute
|
||||
)
|
||||
|
||||
var minAutopilotVersion = version.Must(version.NewVersion("0.8.0"))
|
||||
var (
|
||||
// caRootPruneInterval is how often we check for stale CARoots to remove.
|
||||
caRootPruneInterval = time.Hour
|
||||
|
||||
// caRootExpireDuration is the duration after which an inactive root is considered
|
||||
// "expired". Currently this is based on the default leaf cert TTL of 3 days.
|
||||
caRootExpireDuration = 7 * 24 * time.Hour
|
||||
|
||||
// minAutopilotVersion is the minimum Consul version in which Autopilot features
|
||||
// are supported.
|
||||
minAutopilotVersion = version.Must(version.NewVersion("0.8.0"))
|
||||
)
|
||||
|
||||
// monitorLeadership is used to monitor if we acquire or lose our role
|
||||
// as the leader in the Raft cluster. There is some work the leader is
|
||||
|
@ -220,6 +231,8 @@ func (s *Server) establishLeadership() error {
|
|||
return err
|
||||
}
|
||||
|
||||
s.startCARootPruning()
|
||||
|
||||
s.setConsistentReadReady()
|
||||
return nil
|
||||
}
|
||||
|
@ -236,6 +249,8 @@ func (s *Server) revokeLeadership() error {
|
|||
return err
|
||||
}
|
||||
|
||||
s.stopCARootPruning()
|
||||
|
||||
s.setCAProvider(nil, nil)
|
||||
|
||||
s.resetConsistentReadReady()
|
||||
|
@ -550,6 +565,92 @@ func (s *Server) setCAProvider(newProvider ca.Provider, root *structs.CARoot) {
|
|||
s.caProviderRoot = root
|
||||
}
|
||||
|
||||
// startCARootPruning starts a goroutine that looks for stale CARoots
|
||||
// and removes them from the state store.
|
||||
func (s *Server) startCARootPruning() {
|
||||
s.caPruningLock.Lock()
|
||||
defer s.caPruningLock.Unlock()
|
||||
|
||||
if s.caPruningEnabled {
|
||||
return
|
||||
}
|
||||
|
||||
s.caPruningCh = make(chan struct{})
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(caRootPruneInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.caPruningCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := s.pruneCARoots(); err != nil {
|
||||
s.logger.Printf("[ERR] connect: error pruning CA roots: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
s.caPruningEnabled = true
|
||||
}
|
||||
|
||||
// pruneCARoots looks for any CARoots that have been rotated out and expired.
|
||||
func (s *Server) pruneCARoots() error {
|
||||
if !s.config.ConnectEnabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
idx, roots, err := s.fsm.State().CARoots(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var newRoots structs.CARoots
|
||||
for _, r := range roots {
|
||||
if !r.Active && !r.RotatedOutAt.IsZero() && time.Now().Sub(r.RotatedOutAt) > caRootExpireDuration {
|
||||
s.logger.Printf("[INFO] connect: pruning old unused root CA (ID: %s)", r.ID)
|
||||
continue
|
||||
}
|
||||
newRoot := *r
|
||||
newRoots = append(newRoots, &newRoot)
|
||||
}
|
||||
|
||||
// Return early if there's nothing to remove.
|
||||
if len(newRoots) == len(roots) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Commit the new root state.
|
||||
var args structs.CARequest
|
||||
args.Op = structs.CAOpSetRoots
|
||||
args.Index = idx
|
||||
args.Roots = newRoots
|
||||
resp, err := s.raftApply(structs.ConnectCARequestType, args)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if respErr, ok := resp.(error); ok {
|
||||
return respErr
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// stopCARootPruning stops the CARoot pruning process.
|
||||
func (s *Server) stopCARootPruning() {
|
||||
s.caPruningLock.Lock()
|
||||
defer s.caPruningLock.Unlock()
|
||||
|
||||
if !s.caPruningEnabled {
|
||||
return
|
||||
}
|
||||
|
||||
close(s.caPruningCh)
|
||||
s.caPruningEnabled = false
|
||||
}
|
||||
|
||||
// reconcileReaped is used to reconcile nodes that have failed and been reaped
|
||||
// from Serf but remain in the catalog. This is done by looking for unknown nodes with serfHealth checks registered.
|
||||
// We generate a "reap" event to cause the node to be cleaned up.
|
||||
|
|
|
@ -5,12 +5,14 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/agent/connect"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/api"
|
||||
"github.com/hashicorp/consul/testrpc"
|
||||
"github.com/hashicorp/consul/testutil/retry"
|
||||
"github.com/hashicorp/net-rpc-msgpackrpc"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestLeader_RegisterMember(t *testing.T) {
|
||||
|
@ -1001,3 +1003,64 @@ func TestLeader_ACL_Initialization(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLeader_CARootPruning(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
caRootExpireDuration = 500 * time.Millisecond
|
||||
caRootPruneInterval = 200 * time.Millisecond
|
||||
|
||||
require := require.New(t)
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
|
||||
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
||||
|
||||
// Get the current root
|
||||
rootReq := &structs.DCSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
}
|
||||
var rootList structs.IndexedCARoots
|
||||
require.Nil(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", rootReq, &rootList))
|
||||
require.Len(rootList.Roots, 1)
|
||||
oldRoot := rootList.Roots[0]
|
||||
|
||||
// Update the provider config to use a new private key, which should
|
||||
// cause a rotation.
|
||||
_, newKey, err := connect.GeneratePrivateKey()
|
||||
require.NoError(err)
|
||||
newConfig := &structs.CAConfiguration{
|
||||
Provider: "consul",
|
||||
Config: map[string]interface{}{
|
||||
"PrivateKey": newKey,
|
||||
"RootCert": "",
|
||||
"RotationPeriod": 90 * 24 * time.Hour,
|
||||
},
|
||||
}
|
||||
{
|
||||
args := &structs.CARequest{
|
||||
Datacenter: "dc1",
|
||||
Config: newConfig,
|
||||
}
|
||||
var reply interface{}
|
||||
|
||||
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply))
|
||||
}
|
||||
|
||||
// Should have 2 roots now.
|
||||
_, roots, err := s1.fsm.State().CARoots(nil)
|
||||
require.NoError(err)
|
||||
require.Len(roots, 2)
|
||||
|
||||
time.Sleep(caRootExpireDuration * 2)
|
||||
|
||||
// Now the old root should be pruned.
|
||||
_, roots, err = s1.fsm.State().CARoots(nil)
|
||||
require.NoError(err)
|
||||
require.Len(roots, 1)
|
||||
require.True(roots[0].Active)
|
||||
require.NotEqual(roots[0].ID, oldRoot.ID)
|
||||
}
|
||||
|
|
|
@ -107,6 +107,12 @@ type Server struct {
|
|||
caProviderRoot *structs.CARoot
|
||||
caProviderLock sync.RWMutex
|
||||
|
||||
// caPruningCh is used to shut down the CA root pruning goroutine when we
|
||||
// lose leadership.
|
||||
caPruningCh chan struct{}
|
||||
caPruningLock sync.RWMutex
|
||||
caPruningEnabled bool
|
||||
|
||||
// Consul configuration
|
||||
config *Config
|
||||
|
||||
|
|
|
@ -73,6 +73,11 @@ type CARoot struct {
|
|||
// cannot be active.
|
||||
Active bool
|
||||
|
||||
// RotatedOutAt is the time at which this CA was removed from the state.
|
||||
// This will only be set on roots that have been rotated out from being the
|
||||
// active root.
|
||||
RotatedOutAt time.Time `json:"-"`
|
||||
|
||||
RaftIndex
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue