consul/agent/connect/ca/provider_consul.go
Paul Banks 45d57ca601
connect: Allow CA Providers to store small amount of state (#6751)
* pass logger through to provider

* test for proper operation of NeedsLogger

* remove public testServer function

* Ooops actually set the logger in all the places we need it - CA config set wasn't and causing segfault

* Fix all the other places in tests where we set the logger

* Allow CA Providers to persist some state

* Update CA provider plugin interface

* Fix plugin stubs to match provider changes

* Update agent/connect/ca/provider.go

Co-Authored-By: R.B. Boyer <rb@hashicorp.com>

* Cleanup review comments
2019-11-11 20:57:16 +00:00

689 lines
19 KiB
Go

package ca
import (
"bytes"
"crypto/rand"
"crypto/sha256"
"crypto/x509"
"crypto/x509/pkix"
"encoding/pem"
"errors"
"fmt"
"log"
"math/big"
"net/url"
"sync"
"time"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
)
var ErrNotInitialized = errors.New("provider not initialized")
type ConsulProvider struct {
Delegate ConsulProviderStateDelegate
config *structs.ConsulCAProviderConfig
id string
clusterID string
isRoot bool
spiffeID *connect.SpiffeIDSigning
logger *log.Logger
// testState is only used to test Consul leader's handling of providers that
// need to persist state. Consul provider actually manages it's state directly
// in the FSM since it is highly sensitive not (root private keys) not just
// metadata for lookups. We could make a whole mock provider to keep this out
// of Consul but that would still need to be configurable through real config
// and is a lot more boilerplate to test this for equivalent functionality.
testState map[string]string
sync.RWMutex
}
type ConsulProviderStateDelegate interface {
State() *state.Store
ApplyCARequest(*structs.CARequest) error
}
// Configure sets up the provider using the given configuration.
func (c *ConsulProvider) Configure(clusterID string, isRoot bool, rawConfig map[string]interface{}, state map[string]string) error {
// Parse the raw config and update our ID.
config, err := ParseConsulCAConfig(rawConfig)
if err != nil {
return err
}
c.config = config
hash := sha256.Sum256([]byte(fmt.Sprintf("%s,%s,%v", config.PrivateKey, config.RootCert, isRoot)))
c.id = connect.HexString(hash[:])
c.clusterID = clusterID
c.isRoot = isRoot
c.spiffeID = connect.SpiffeIDSigningForCluster(&structs.CAConfiguration{ClusterID: clusterID})
// Passthrough test state for state handling tests. See testState doc.
c.parseTestState(rawConfig)
// Exit early if the state store has an entry for this provider's config.
_, providerState, err := c.Delegate.State().CAProviderState(c.id)
if err != nil {
return err
}
if providerState != nil {
return nil
}
// Check if there's an entry with the old ID scheme.
oldID := fmt.Sprintf("%s,%s", config.PrivateKey, config.RootCert)
_, providerState, err = c.Delegate.State().CAProviderState(oldID)
if err != nil {
return err
}
// Found an entry with the old ID, so update it to the new ID and
// delete the old entry.
if providerState != nil {
newState := *providerState
newState.ID = c.id
createReq := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if err := c.Delegate.ApplyCARequest(createReq); err != nil {
return err
}
deleteReq := &structs.CARequest{
Op: structs.CAOpDeleteProviderState,
ProviderState: providerState,
}
if err := c.Delegate.ApplyCARequest(deleteReq); err != nil {
return err
}
return nil
}
// Write the provider state to the state store.
newState := structs.CAConsulProviderState{
ID: c.id,
}
args := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if err := c.Delegate.ApplyCARequest(args); err != nil {
return err
}
c.logger.Printf("[DEBUG] consul CA provider configured ID=%s isRoot=%v",
c.id, c.isRoot)
return nil
}
// State implements Provider. Consul actually does store all it's state in raft
// but it manages it independently through a separate table already so this is a
// no-op. This method just passes through testState which allows tests to verify
// state handling behavior without needing to plumb a full test mock provider
// right through Consul server code.
func (c *ConsulProvider) State() (map[string]string, error) {
return c.testState, nil
}
// ActiveRoot returns the active root CA certificate.
func (c *ConsulProvider) ActiveRoot() (string, error) {
_, providerState, err := c.getState()
if err != nil {
return "", err
}
return providerState.RootCert, nil
}
// GenerateRoot initializes a new root certificate and private key
// if needed.
func (c *ConsulProvider) GenerateRoot() error {
idx, providerState, err := c.getState()
if err != nil {
return err
}
if !c.isRoot {
return fmt.Errorf("provider is not the root certificate authority")
}
if providerState.RootCert != "" {
return nil
}
// Generate a private key if needed
newState := *providerState
if c.config.PrivateKey == "" {
_, pk, err := connect.GeneratePrivateKeyWithConfig(c.config.PrivateKeyType, c.config.PrivateKeyBits)
if err != nil {
return err
}
newState.PrivateKey = pk
} else {
newState.PrivateKey = c.config.PrivateKey
}
// Generate the root CA if necessary
if c.config.RootCert == "" {
ca, err := c.generateCA(newState.PrivateKey, idx+1)
if err != nil {
return fmt.Errorf("error generating CA: %v", err)
}
newState.RootCert = ca
} else {
newState.RootCert = c.config.RootCert
}
// Write the provider state
args := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if err := c.Delegate.ApplyCARequest(args); err != nil {
return err
}
return nil
}
// GenerateIntermediateCSR creates a private key and generates a CSR
// for another datacenter's root to sign.
func (c *ConsulProvider) GenerateIntermediateCSR() (string, error) {
_, providerState, err := c.getState()
if err != nil {
return "", err
}
if c.isRoot {
return "", fmt.Errorf("provider is the root certificate authority, " +
"cannot generate an intermediate CSR")
}
// Create a new private key and CSR.
signer, pk, err := connect.GeneratePrivateKeyWithConfig(c.config.PrivateKeyType, c.config.PrivateKeyBits)
if err != nil {
return "", err
}
uid, err := connect.CompactUID()
if err != nil {
return "", err
}
cn := connect.CACN("consul", uid, c.clusterID, c.isRoot)
csr, err := connect.CreateCACSR(c.spiffeID, cn, signer)
if err != nil {
return "", err
}
// Write the new provider state to the store.
newState := *providerState
newState.PrivateKey = pk
args := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if err := c.Delegate.ApplyCARequest(args); err != nil {
return "", err
}
return csr, nil
}
// SetIntermediate validates that the given intermediate is for the right private key
// and writes the given intermediate and root certificates to the state.
func (c *ConsulProvider) SetIntermediate(intermediatePEM, rootPEM string) error {
_, providerState, err := c.getState()
if err != nil {
return err
}
if c.isRoot {
return fmt.Errorf("cannot set an intermediate using another root in the primary datacenter")
}
err = validateSetIntermediate(
intermediatePEM, rootPEM,
providerState.PrivateKey,
c.spiffeID,
)
if err != nil {
return err
}
// Update the state
newState := *providerState
newState.IntermediateCert = intermediatePEM
newState.RootCert = rootPEM
args := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if err := c.Delegate.ApplyCARequest(args); err != nil {
return err
}
return nil
}
// We aren't maintaining separate root/intermediate CAs for the builtin
// provider, so just return the root.
func (c *ConsulProvider) ActiveIntermediate() (string, error) {
if c.isRoot {
return c.ActiveRoot()
}
_, providerState, err := c.getState()
if err != nil {
return "", err
}
return providerState.IntermediateCert, nil
}
// We aren't maintaining separate root/intermediate CAs for the builtin
// provider, so just return the root.
func (c *ConsulProvider) GenerateIntermediate() (string, error) {
return c.ActiveIntermediate()
}
// Remove the state store entry for this provider instance.
func (c *ConsulProvider) Cleanup() error {
args := &structs.CARequest{
Op: structs.CAOpDeleteProviderState,
ProviderState: &structs.CAConsulProviderState{ID: c.id},
}
if err := c.Delegate.ApplyCARequest(args); err != nil {
return err
}
return nil
}
// Sign returns a new certificate valid for the given SpiffeIDService
// using the current CA.
func (c *ConsulProvider) Sign(csr *x509.CertificateRequest) (string, error) {
// Lock during the signing so we don't use the same index twice
// for different cert serial numbers.
c.Lock()
defer c.Unlock()
// Get the provider state
idx, providerState, err := c.getState()
if err != nil {
return "", err
}
if providerState.PrivateKey == "" {
return "", ErrNotInitialized
}
// Create the keyId for the cert from the signing private key.
signer, err := connect.ParseSigner(providerState.PrivateKey)
if err != nil {
return "", err
}
if signer == nil {
return "", ErrNotInitialized
}
keyId, err := connect.KeyId(signer.Public())
if err != nil {
return "", err
}
// Parse the SPIFFE ID
spiffeId, err := connect.ParseCertURI(csr.URIs[0])
if err != nil {
return "", err
}
// Even though leafs should be from our own CSRs which should have the same CN
// logic as here, override anyway to account for older version clients that
// didn't include the Common Name in the CSR.
subject, err := connect.CNForCertURI(spiffeId)
if err != nil {
return "", err
}
// Parse the CA cert
certPEM, err := c.ActiveIntermediate()
if err != nil {
return "", err
}
caCert, err := connect.ParseCert(certPEM)
if err != nil {
return "", fmt.Errorf("error parsing CA cert: %s", err)
}
// Cert template for generation
sn := &big.Int{}
sn.SetUint64(idx + 1)
// Sign the certificate valid from 1 minute in the past, this helps it be
// accepted right away even when nodes are not in close time sync across the
// cluster. A minute is more than enough for typical DC clock drift.
effectiveNow := time.Now().Add(-1 * time.Minute)
template := x509.Certificate{
SerialNumber: sn,
Subject: pkix.Name{CommonName: subject},
URIs: csr.URIs,
Signature: csr.Signature,
// We use the correct signature algorithm for the CA key we are signing with
// regardless of the algorithm used to sign the CSR signature above since
// the leaf might use a different key type.
SignatureAlgorithm: connect.SigAlgoForKey(signer),
PublicKeyAlgorithm: csr.PublicKeyAlgorithm,
PublicKey: csr.PublicKey,
BasicConstraintsValid: true,
KeyUsage: x509.KeyUsageDataEncipherment |
x509.KeyUsageKeyAgreement |
x509.KeyUsageDigitalSignature |
x509.KeyUsageKeyEncipherment,
ExtKeyUsage: []x509.ExtKeyUsage{
x509.ExtKeyUsageClientAuth,
x509.ExtKeyUsageServerAuth,
},
NotAfter: effectiveNow.Add(c.config.LeafCertTTL),
NotBefore: effectiveNow,
AuthorityKeyId: keyId,
SubjectKeyId: keyId,
}
// Create the certificate, PEM encode it and return that value.
var buf bytes.Buffer
bs, err := x509.CreateCertificate(
rand.Reader, &template, caCert, csr.PublicKey, signer)
if err != nil {
return "", fmt.Errorf("error generating certificate: %s", err)
}
err = pem.Encode(&buf, &pem.Block{Type: "CERTIFICATE", Bytes: bs})
if err != nil {
return "", fmt.Errorf("error encoding certificate: %s", err)
}
err = c.incrementProviderIndex(providerState)
if err != nil {
return "", err
}
// Set the response
return buf.String(), nil
}
// SignIntermediate will validate the CSR to ensure the trust domain in the
// URI SAN matches the local one and that basic constraints for a CA certificate
// are met. It should return a signed CA certificate with a path length constraint
// of 0 to ensure that the certificate cannot be used to generate further CA certs.
func (c *ConsulProvider) SignIntermediate(csr *x509.CertificateRequest) (string, error) {
idx, providerState, err := c.getState()
if err != nil {
return "", err
}
if uriCount := len(csr.URIs); uriCount != 1 {
return "", fmt.Errorf("incoming CSR has unexpected number of URIs: %d", uriCount)
}
certURI, err := connect.ParseCertURI(csr.URIs[0])
if err != nil {
return "", err
}
// Verify that the trust domain is valid.
if !c.spiffeID.CanSign(certURI) {
return "", fmt.Errorf("incoming CSR domain %q is not valid for our domain %q",
certURI.URI().String(), c.spiffeID.URI().String())
}
// Get the signing private key.
signer, err := connect.ParseSigner(providerState.PrivateKey)
if err != nil {
return "", err
}
subjectKeyID, err := connect.KeyId(csr.PublicKey)
if err != nil {
return "", err
}
// Parse the CA cert
caCert, err := connect.ParseCert(providerState.RootCert)
if err != nil {
return "", fmt.Errorf("error parsing CA cert: %s", err)
}
// Cert template for generation
sn := &big.Int{}
sn.SetUint64(idx + 1)
// Sign the certificate valid from 1 minute in the past, this helps it be
// accepted right away even when nodes are not in close time sync across the
// cluster. A minute is more than enough for typical DC clock drift.
effectiveNow := time.Now().Add(-1 * time.Minute)
template := x509.Certificate{
SerialNumber: sn,
Subject: csr.Subject,
URIs: csr.URIs,
Signature: csr.Signature,
SignatureAlgorithm: connect.SigAlgoForKey(signer),
PublicKeyAlgorithm: csr.PublicKeyAlgorithm,
PublicKey: csr.PublicKey,
BasicConstraintsValid: true,
KeyUsage: x509.KeyUsageCertSign |
x509.KeyUsageCRLSign |
x509.KeyUsageDigitalSignature,
IsCA: true,
MaxPathLenZero: true,
NotAfter: effectiveNow.AddDate(1, 0, 0),
NotBefore: effectiveNow,
SubjectKeyId: subjectKeyID,
}
// Create the certificate, PEM encode it and return that value.
var buf bytes.Buffer
bs, err := x509.CreateCertificate(
rand.Reader, &template, caCert, csr.PublicKey, signer)
if err != nil {
return "", fmt.Errorf("error generating certificate: %s", err)
}
err = pem.Encode(&buf, &pem.Block{Type: "CERTIFICATE", Bytes: bs})
if err != nil {
return "", fmt.Errorf("error encoding certificate: %s", err)
}
err = c.incrementProviderIndex(providerState)
if err != nil {
return "", err
}
// Set the response
return buf.String(), nil
}
// CrossSignCA returns the given CA cert signed by the current active root.
func (c *ConsulProvider) CrossSignCA(cert *x509.Certificate) (string, error) {
c.Lock()
defer c.Unlock()
// Get the provider state
idx, providerState, err := c.getState()
if err != nil {
return "", err
}
privKey, err := connect.ParseSigner(providerState.PrivateKey)
if err != nil {
return "", fmt.Errorf("error parsing private key %q: %s", providerState.PrivateKey, err)
}
rootCA, err := connect.ParseCert(providerState.RootCert)
if err != nil {
return "", err
}
keyId, err := connect.KeyId(privKey.Public())
if err != nil {
return "", err
}
// Create the cross-signing template from the existing root CA
serialNum := &big.Int{}
serialNum.SetUint64(idx + 1)
template := *cert
template.SerialNumber = serialNum
template.SignatureAlgorithm = rootCA.SignatureAlgorithm
template.AuthorityKeyId = keyId
// Sign the certificate valid from 1 minute in the past, this helps it be
// accepted right away even when nodes are not in close time sync across the
// cluster. A minute is more than enough for typical DC clock drift.
effectiveNow := time.Now().Add(-1 * time.Minute)
template.NotBefore = effectiveNow
// This cross-signed cert is only needed during rotation, and only while old
// leaf certs are still in use. They expire within 3 days currently so 7 is
// safe. TODO(banks): make this be based on leaf expiry time when that is
// configurable.
template.NotAfter = effectiveNow.AddDate(0, 0, 7)
bs, err := x509.CreateCertificate(
rand.Reader, &template, rootCA, cert.PublicKey, privKey)
if err != nil {
return "", fmt.Errorf("error generating CA certificate: %s", err)
}
var buf bytes.Buffer
err = pem.Encode(&buf, &pem.Block{Type: "CERTIFICATE", Bytes: bs})
if err != nil {
return "", fmt.Errorf("error encoding private key: %s", err)
}
err = c.incrementProviderIndex(providerState)
if err != nil {
return "", err
}
return buf.String(), nil
}
// getState returns the current provider state from the state delegate, and returns
// ErrNotInitialized if no entry is found.
func (c *ConsulProvider) getState() (uint64, *structs.CAConsulProviderState, error) {
stateStore := c.Delegate.State()
idx, providerState, err := stateStore.CAProviderState(c.id)
if err != nil {
return 0, nil, err
}
if providerState == nil {
return 0, nil, ErrNotInitialized
}
return idx, providerState, nil
}
// incrementProviderIndex does a write to increment the provider state store table index
// used for serial numbers when generating certificates.
func (c *ConsulProvider) incrementProviderIndex(providerState *structs.CAConsulProviderState) error {
newState := *providerState
args := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if err := c.Delegate.ApplyCARequest(args); err != nil {
return err
}
return nil
}
// generateCA makes a new root CA using the current private key
func (c *ConsulProvider) generateCA(privateKey string, sn uint64) (string, error) {
stateStore := c.Delegate.State()
_, config, err := stateStore.CAConfig(nil)
if err != nil {
return "", err
}
privKey, err := connect.ParseSigner(privateKey)
if err != nil {
return "", fmt.Errorf("error parsing private key %q: %s", privateKey, err)
}
// The URI (SPIFFE compatible) for the cert
id := connect.SpiffeIDSigningForCluster(config)
keyId, err := connect.KeyId(privKey.Public())
if err != nil {
return "", err
}
// Create the CA cert
uid, err := connect.CompactUID()
if err != nil {
return "", err
}
cn := connect.CACN("consul", uid, c.clusterID, c.isRoot)
serialNum := &big.Int{}
serialNum.SetUint64(sn)
template := x509.Certificate{
SerialNumber: serialNum,
Subject: pkix.Name{CommonName: cn},
URIs: []*url.URL{id.URI()},
BasicConstraintsValid: true,
KeyUsage: x509.KeyUsageCertSign |
x509.KeyUsageCRLSign |
x509.KeyUsageDigitalSignature,
IsCA: true,
NotAfter: time.Now().AddDate(10, 0, 0),
NotBefore: time.Now(),
AuthorityKeyId: keyId,
SubjectKeyId: keyId,
}
bs, err := x509.CreateCertificate(
rand.Reader, &template, &template, privKey.Public(), privKey)
if err != nil {
return "", fmt.Errorf("error generating CA certificate: %s", err)
}
var buf bytes.Buffer
err = pem.Encode(&buf, &pem.Block{Type: "CERTIFICATE", Bytes: bs})
if err != nil {
return "", fmt.Errorf("error encoding private key: %s", err)
}
return buf.String(), nil
}
// SetLogger implements the NeedsLogger interface so the provider can log important messages.
func (c *ConsulProvider) SetLogger(logger *log.Logger) {
c.logger = logger
}
func (c *ConsulProvider) parseTestState(rawConfig map[string]interface{}) {
c.testState = nil
if rawTestState, ok := rawConfig["test_state"]; ok {
if ts, ok := rawTestState.(map[string]string); ok {
c.testState = ts
return
}
// Secondary's config takes a trip through the state store before Configure
// is called unlike primaries. This means we end up with map[string]string
// encoded as map[string]interface{}. Just handle that case and ignore the
// rest as this is test-only code and we'd rather not leave a way to error
// CA setup and leave cluster unavailable in prod by accidentally setting a
// bad test_state config.
if ts, ok := rawTestState.(map[string]interface{}); ok {
c.testState = make(map[string]string)
for k, v := range ts {
if s, ok := v.(string); ok {
c.testState[k] = s
}
}
}
}
}