consul/agent/connect/ca/provider_consul.go
Chris S. Kim 747a4c73c1
Fix bug with Vault CA provider (#18112)
Updating RootPKIPath but not IntermediatePKIPath would not update 
leaf signing certs with the new root. Unsure if this happens in practice 
but manual testing showed it is a bug that would break mesh and agent 
connections once the old root is pruned.
2023-07-14 15:58:33 -04:00

683 lines
20 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package ca
import (
"bytes"
"crypto/rand"
"crypto/sha256"
"crypto/x509"
"crypto/x509/pkix"
"encoding/pem"
"errors"
"fmt"
"math/big"
"net/url"
"sync"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs"
)
var (
// NotBefore will be CertificateTimeDriftBuffer in the past to account for
// time drift between different servers.
CertificateTimeDriftBuffer = time.Minute
ErrNotInitialized = errors.New("provider not initialized")
)
type ConsulProvider struct {
Delegate ConsulProviderStateDelegate
config *structs.ConsulCAProviderConfig
id string
clusterID string
isPrimary bool
spiffeID *connect.SpiffeIDSigning
logger hclog.Logger
// testState is only used to test Consul leader's handling of providers that
// need to persist state. Consul provider actually manages it's state directly
// in the FSM since it is highly sensitive not (root private keys) not just
// metadata for lookups. We could make a whole mock provider to keep this out
// of Consul but that would still need to be configurable through real config
// and is a lot more boilerplate to test this for equivalent functionality.
testState map[string]string
sync.RWMutex
}
var _ Provider = (*ConsulProvider)(nil)
// NewConsulProvider returns a new ConsulProvider that is ready to be used.
func NewConsulProvider(delegate ConsulProviderStateDelegate, logger hclog.Logger) *ConsulProvider {
return &ConsulProvider{Delegate: delegate, logger: logger}
}
type ConsulProviderStateDelegate interface {
ProviderState(id string) (*structs.CAConsulProviderState, error)
ApplyCARequest(*structs.CARequest) (interface{}, error)
}
func hexStringHash(input string) string {
hash := sha256.Sum256([]byte(input))
return connect.HexString(hash[:])
}
// Configure sets up the provider using the given configuration.
func (c *ConsulProvider) Configure(cfg ProviderConfig) error {
// Parse the raw config and update our ID.
config, err := ParseConsulCAConfig(cfg.RawConfig)
if err != nil {
return err
}
c.config = config
c.id = hexStringHash(fmt.Sprintf("%s,%s,%s,%d,%v", config.PrivateKey, config.RootCert, config.PrivateKeyType, config.PrivateKeyBits, cfg.IsPrimary))
c.clusterID = cfg.ClusterID
c.isPrimary = cfg.IsPrimary
c.spiffeID = connect.SpiffeIDSigningForCluster(c.clusterID)
// Passthrough test state for state handling tests. See testState doc.
c.parseTestState(cfg.RawConfig, cfg.State)
// Exit early if the state store has an entry for this provider's config.
providerState, err := c.Delegate.ProviderState(c.id)
if err != nil {
return err
}
if providerState != nil {
return nil
}
oldIDs := []string{
hexStringHash(fmt.Sprintf("%s,%s,%v", config.PrivateKey, config.RootCert, cfg.IsPrimary)),
fmt.Sprintf("%s,%s", config.PrivateKey, config.RootCert),
}
// Check if there are any entries with old ID schemes.
for _, oldID := range oldIDs {
providerState, err = c.Delegate.ProviderState(oldID)
if err != nil {
return err
}
// Found an entry with the old ID, so update it to the new ID and
// delete the old entry.
if providerState != nil {
newState := *providerState
newState.ID = c.id
createReq := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if _, err := c.Delegate.ApplyCARequest(createReq); err != nil {
return err
}
deleteReq := &structs.CARequest{
Op: structs.CAOpDeleteProviderState,
ProviderState: providerState,
}
if _, err := c.Delegate.ApplyCARequest(deleteReq); err != nil {
return err
}
return nil
}
}
args := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &structs.CAConsulProviderState{ID: c.id},
}
if _, err := c.Delegate.ApplyCARequest(args); err != nil {
return err
}
c.logger.Debug("consul CA provider configured", "id", c.id, "is_primary", c.isPrimary)
return nil
}
// State implements Provider. Consul actually does store all it's state in raft
// but it manages it independently through a separate table already so this is a
// no-op. This method just passes through testState which allows tests to verify
// state handling behavior without needing to plumb a full test mock provider
// right through Consul server code.
func (c *ConsulProvider) State() (map[string]string, error) {
return c.testState, nil
}
// GenerateCAChain initializes a new root certificate and private key if needed.
func (c *ConsulProvider) GenerateCAChain() (string, error) {
providerState, err := c.getState()
if err != nil {
return "", err
}
if !c.isPrimary {
return "", fmt.Errorf("provider is not the root certificate authority")
}
if providerState.RootCert != "" {
return providerState.RootCert, nil
}
// Generate a private key if needed
newState := *providerState
if c.config.PrivateKey == "" {
_, pk, err := connect.GeneratePrivateKeyWithConfig(c.config.PrivateKeyType, c.config.PrivateKeyBits)
if err != nil {
return "", err
}
newState.PrivateKey = pk
} else {
newState.PrivateKey = c.config.PrivateKey
}
// Generate the root CA if necessary
if c.config.RootCert == "" {
nextSerial, err := c.incrementAndGetNextSerialNumber()
if err != nil {
return "", fmt.Errorf("error computing next serial number: %v", err)
}
ca, err := c.generateCA(newState.PrivateKey, nextSerial, c.config.RootCertTTL)
if err != nil {
return "", fmt.Errorf("error generating CA: %v", err)
}
newState.RootCert = ca
} else {
newState.RootCert = c.config.RootCert
}
// Write the provider state
args := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if _, err := c.Delegate.ApplyCARequest(args); err != nil {
return "", err
}
return newState.RootCert, nil
}
// GenerateIntermediateCSR creates a private key and generates a CSR
// for another datacenter's root to sign.
func (c *ConsulProvider) GenerateIntermediateCSR() (string, string, error) {
providerState, err := c.getState()
if err != nil {
return "", "", err
}
if c.isPrimary {
return "", "", fmt.Errorf("provider is the root certificate authority, " +
"cannot generate an intermediate CSR")
}
// Create a new private key and CSR.
signer, pk, err := connect.GeneratePrivateKeyWithConfig(c.config.PrivateKeyType, c.config.PrivateKeyBits)
if err != nil {
return "", "", err
}
csr, err := connect.CreateCACSR(c.spiffeID, signer)
if err != nil {
return "", "", err
}
// Write the new provider state to the store.
newState := *providerState
newState.PrivateKey = pk
args := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if _, err := c.Delegate.ApplyCARequest(args); err != nil {
return "", "", err
}
return csr, "", nil
}
// SetIntermediate validates that the given intermediate is for the right private key
// and writes the given intermediate and root certificates to the state.
func (c *ConsulProvider) SetIntermediate(intermediatePEM, rootPEM, _ string) error {
providerState, err := c.getState()
if err != nil {
return err
}
if c.isPrimary {
return fmt.Errorf("cannot set an intermediate using another root in the primary datacenter")
}
if err = validateSetIntermediate(intermediatePEM, rootPEM, c.spiffeID); err != nil {
return err
}
if err := validateIntermediateSignedByPrivateKey(intermediatePEM, providerState.PrivateKey); err != nil {
return err
}
// Update the state
newState := *providerState
newState.IntermediateCert = intermediatePEM
newState.RootCert = rootPEM
args := &structs.CARequest{
Op: structs.CAOpSetProviderState,
ProviderState: &newState,
}
if _, err := c.Delegate.ApplyCARequest(args); err != nil {
return err
}
return nil
}
func (c *ConsulProvider) ActiveLeafSigningCert() (string, error) {
providerState, err := c.getState()
if err != nil {
return "", err
}
if c.isPrimary {
return providerState.RootCert, nil
}
return providerState.IntermediateCert, nil
}
// Remove the state store entry for this provider instance.
func (c *ConsulProvider) Cleanup(_ bool, _ map[string]interface{}) error {
// This method only gets called for final cleanup. Therefore we don't
// need to worry about the case where a ca config update is made to
// change the cert ttls but leaving the private key and root cert the
// same. Changing those would change the id field on the provider.
args := &structs.CARequest{
Op: structs.CAOpDeleteProviderState,
ProviderState: &structs.CAConsulProviderState{ID: c.id},
}
if _, err := c.Delegate.ApplyCARequest(args); err != nil {
return err
}
return nil
}
// Sign returns a new certificate valid for the given SpiffeIDService
// using the current CA.
func (c *ConsulProvider) Sign(csr *x509.CertificateRequest) (string, error) {
connect.HackSANExtensionForCSR(csr)
// Lock during the signing so we don't use the same index twice
// for different cert serial numbers.
c.Lock()
defer c.Unlock()
// Get the provider state
providerState, err := c.getState()
if err != nil {
return "", err
}
if providerState.PrivateKey == "" {
return "", ErrNotInitialized
}
// Create the keyId for the cert from the signing private key.
signer, err := connect.ParseSigner(providerState.PrivateKey)
if err != nil {
return "", err
}
if signer == nil {
return "", ErrNotInitialized
}
keyId, err := connect.KeyId(signer.Public())
if err != nil {
return "", err
}
// Create the subjectKeyId for the cert from the csr public key.
subjectKeyID, err := connect.KeyId(csr.PublicKey)
if err != nil {
return "", err
}
// Parse the CA cert
certPEM, err := c.ActiveLeafSigningCert()
if err != nil {
return "", err
}
caCert, err := connect.ParseCert(certPEM)
if err != nil {
return "", fmt.Errorf("error parsing CA cert: %s", err)
}
nextSerial, err := c.incrementAndGetNextSerialNumber()
if err != nil {
return "", fmt.Errorf("error computing next serial number: %v", err)
}
// Cert template for generation
sn := &big.Int{}
sn.SetUint64(nextSerial)
// Sign the certificate valid from 1 minute in the past, this helps it be
// accepted right away even when nodes are not in close time sync across the
// cluster. A minute is more than enough for typical DC clock drift.
effectiveNow := time.Now().Add(-1 * time.Minute)
template := x509.Certificate{
SerialNumber: sn,
URIs: csr.URIs,
Signature: csr.Signature,
// We use the correct signature algorithm for the CA key we are signing with
// regardless of the algorithm used to sign the CSR signature above since
// the leaf might use a different key type.
SignatureAlgorithm: connect.SigAlgoForKey(signer),
PublicKeyAlgorithm: csr.PublicKeyAlgorithm,
PublicKey: csr.PublicKey,
BasicConstraintsValid: true,
KeyUsage: x509.KeyUsageDataEncipherment |
x509.KeyUsageKeyAgreement |
x509.KeyUsageDigitalSignature |
x509.KeyUsageKeyEncipherment,
ExtKeyUsage: []x509.ExtKeyUsage{
x509.ExtKeyUsageClientAuth,
x509.ExtKeyUsageServerAuth,
},
NotAfter: effectiveNow.Add(c.config.LeafCertTTL),
NotBefore: effectiveNow,
AuthorityKeyId: keyId,
SubjectKeyId: subjectKeyID,
DNSNames: csr.DNSNames,
IPAddresses: csr.IPAddresses,
}
// Create the certificate, PEM encode it and return that value.
var buf bytes.Buffer
bs, err := x509.CreateCertificate(
rand.Reader, &template, caCert, csr.PublicKey, signer)
if err != nil {
return "", fmt.Errorf("error generating certificate: %s", err)
}
err = pem.Encode(&buf, &pem.Block{Type: "CERTIFICATE", Bytes: bs})
if err != nil {
return "", fmt.Errorf("error encoding certificate: %s", err)
}
// Set the response
return buf.String(), nil
}
// SignIntermediate will validate the CSR to ensure the trust domain in the
// URI SAN matches the local one and that basic constraints for a CA certificate
// are met. It should return a signed CA certificate with a path length constraint
// of 0 to ensure that the certificate cannot be used to generate further CA certs.
func (c *ConsulProvider) SignIntermediate(csr *x509.CertificateRequest) (string, error) {
providerState, err := c.getState()
if err != nil {
return "", err
}
err = validateSignIntermediate(csr, c.spiffeID)
if err != nil {
return "", err
}
// Get the signing private key.
signer, err := connect.ParseSigner(providerState.PrivateKey)
if err != nil {
return "", err
}
subjectKeyID, err := connect.KeyId(csr.PublicKey)
if err != nil {
return "", err
}
// Parse the CA cert
caCert, err := connect.ParseCert(providerState.RootCert)
if err != nil {
return "", fmt.Errorf("error parsing CA cert: %s", err)
}
nextSerial, err := c.incrementAndGetNextSerialNumber()
if err != nil {
return "", fmt.Errorf("error computing next serial number: %v", err)
}
// Cert template for generation
sn := &big.Int{}
sn.SetUint64(nextSerial)
// Sign the certificate valid from 1 minute in the past, this helps it be
// accepted right away even when nodes are not in close time sync across the
// cluster. A minute is more than enough for typical DC clock drift.
effectiveNow := time.Now().Add(-1 * CertificateTimeDriftBuffer)
template := x509.Certificate{
SerialNumber: sn,
DNSNames: csr.DNSNames,
EmailAddresses: csr.EmailAddresses,
IPAddresses: csr.IPAddresses,
URIs: csr.URIs,
ExtraExtensions: csr.ExtraExtensions,
Subject: csr.Subject,
Signature: csr.Signature,
SignatureAlgorithm: connect.SigAlgoForKey(signer),
PublicKeyAlgorithm: csr.PublicKeyAlgorithm,
PublicKey: csr.PublicKey,
BasicConstraintsValid: true,
KeyUsage: x509.KeyUsageCertSign |
x509.KeyUsageCRLSign |
x509.KeyUsageDigitalSignature,
IsCA: true,
MaxPathLenZero: true,
NotAfter: effectiveNow.Add(c.config.IntermediateCertTTL),
NotBefore: effectiveNow,
SubjectKeyId: subjectKeyID,
}
// Create the certificate, PEM encode it and return that value.
var buf bytes.Buffer
bs, err := x509.CreateCertificate(
rand.Reader, &template, caCert, csr.PublicKey, signer)
if err != nil {
return "", fmt.Errorf("error generating certificate: %s", err)
}
err = pem.Encode(&buf, &pem.Block{Type: "CERTIFICATE", Bytes: bs})
if err != nil {
return "", fmt.Errorf("error encoding certificate: %s", err)
}
// Set the response
return buf.String(), nil
}
// CrossSignCA returns the given CA cert signed by the current active root.
func (c *ConsulProvider) CrossSignCA(cert *x509.Certificate) (string, error) {
c.Lock()
defer c.Unlock()
if c.config.DisableCrossSigning {
return "", errors.New("cross-signing disabled")
}
// Get the provider state
providerState, err := c.getState()
if err != nil {
return "", err
}
privKey, err := connect.ParseSigner(providerState.PrivateKey)
if err != nil {
return "", fmt.Errorf("error parsing private key %q: %s", providerState.PrivateKey, err)
}
rootCA, err := connect.ParseCert(providerState.RootCert)
if err != nil {
return "", err
}
keyId, err := connect.KeyId(privKey.Public())
if err != nil {
return "", err
}
nextSerial, err := c.incrementAndGetNextSerialNumber()
if err != nil {
return "", fmt.Errorf("error computing next serial number: %v", err)
}
// Create the cross-signing template from the existing root CA
serialNum := &big.Int{}
serialNum.SetUint64(nextSerial)
template := *cert
template.SerialNumber = serialNum
template.SignatureAlgorithm = rootCA.SignatureAlgorithm
template.AuthorityKeyId = keyId
// Sign the certificate valid from 1 minute in the past, this helps it be
// accepted right away even when nodes are not in close time sync across the
// cluster. A minute is more than enough for typical DC clock drift.
effectiveNow := time.Now().Add(-1 * time.Minute)
template.NotBefore = effectiveNow
// This cross-signed cert is only needed during rotation, and only while old
// leaf certs are still in use. They expire within 3 days currently so 7 is
// safe. TODO(banks): make this be based on leaf expiry time when that is
// configurable.
template.NotAfter = effectiveNow.AddDate(0, 0, 7)
bs, err := x509.CreateCertificate(
rand.Reader, &template, rootCA, cert.PublicKey, privKey)
if err != nil {
return "", fmt.Errorf("error generating CA certificate: %s", err)
}
var buf bytes.Buffer
err = pem.Encode(&buf, &pem.Block{Type: "CERTIFICATE", Bytes: bs})
if err != nil {
return "", fmt.Errorf("error encoding private key: %s", err)
}
return buf.String(), nil
}
// SupportsCrossSigning implements Provider
func (c *ConsulProvider) SupportsCrossSigning() (bool, error) {
return !c.config.DisableCrossSigning, nil
}
// getState returns the current provider state from the state delegate, and returns
// ErrNotInitialized if no entry is found.
func (c *ConsulProvider) getState() (*structs.CAConsulProviderState, error) {
providerState, err := c.Delegate.ProviderState(c.id)
if err != nil {
return nil, err
}
if providerState == nil {
return nil, ErrNotInitialized
}
return providerState, nil
}
func (c *ConsulProvider) incrementAndGetNextSerialNumber() (uint64, error) {
args := &structs.CARequest{
Op: structs.CAOpIncrementProviderSerialNumber,
}
raw, err := c.Delegate.ApplyCARequest(args)
if err != nil {
return 0, err
}
return raw.(uint64), nil
}
// generateCA makes a new root CA using the current private key
func (c *ConsulProvider) generateCA(privateKey string, sn uint64, rootCertTTL time.Duration) (string, error) {
privKey, err := connect.ParseSigner(privateKey)
if err != nil {
return "", fmt.Errorf("error parsing private key %q: %s", privateKey, err)
}
// The URI (SPIFFE compatible) for the cert
id := connect.SpiffeIDSigningForCluster(c.clusterID)
keyId, err := connect.KeyId(privKey.Public())
if err != nil {
return "", err
}
// Create the CA cert
uid, err := connect.CompactUID()
if err != nil {
return "", err
}
cn := connect.CACN("consul", uid, c.clusterID, c.isPrimary)
serialNum := &big.Int{}
serialNum.SetUint64(sn)
template := x509.Certificate{
SerialNumber: serialNum,
Subject: pkix.Name{CommonName: cn},
URIs: []*url.URL{id.URI()},
BasicConstraintsValid: true,
KeyUsage: x509.KeyUsageCertSign |
x509.KeyUsageCRLSign |
x509.KeyUsageDigitalSignature,
IsCA: true,
NotAfter: time.Now().Add(rootCertTTL),
NotBefore: time.Now(),
AuthorityKeyId: keyId,
SubjectKeyId: keyId,
}
bs, err := x509.CreateCertificate(
rand.Reader, &template, &template, privKey.Public(), privKey)
if err != nil {
return "", fmt.Errorf("error generating CA certificate: %s", err)
}
var buf bytes.Buffer
err = pem.Encode(&buf, &pem.Block{Type: "CERTIFICATE", Bytes: bs})
if err != nil {
return "", fmt.Errorf("error encoding private key: %s", err)
}
return buf.String(), nil
}
func (c *ConsulProvider) parseTestState(rawConfig map[string]interface{}, state map[string]string) {
c.testState = nil
if rawTestState, ok := rawConfig["test_state"]; ok {
if ts, ok := rawTestState.(map[string]string); ok {
c.testState = ts
return
}
// Secondary's config takes a trip through the state store before Configure
// is called and RPC calls that msgpack encode also have the same effect. It
// means we end up with map[string]string encoded as map[string]interface{}.
// We just handle that case. There is no struct error handling because this
// is test-only code (undocumented config key) and we'd rather not leave a
// way to error CA setup and leave cluster unavailable in prod by
// accidentally setting a bad test_state config.
if ts, ok := rawTestState.(map[string]interface{}); ok {
c.testState = make(map[string]string)
for k, v := range ts {
if s, ok := v.(string); ok {
c.testState[k] = s
}
}
}
}
// If config didn't explicitly specify test_state to return, but there is some
// actual state from a previous provider. Just use that since that is expected
// behavior that providers with state would preserve the state they are passed
// in the common case.
if len(state) > 0 && c.testState == nil {
c.testState = state
}
}