consul/agent/hcp/bootstrap/bootstrap.go
hashicorp-copywrite[bot] 5fb9df1640
[COMPLIANCE] License changes (#18443)
* Adding explicit MPL license for sub-package

This directory and its subdirectories (packages) contain files licensed with the MPLv2 `LICENSE` file in this directory and are intentionally licensed separately from the BSL `LICENSE` file at the root of this repository.

* Adding explicit MPL license for sub-package

This directory and its subdirectories (packages) contain files licensed with the MPLv2 `LICENSE` file in this directory and are intentionally licensed separately from the BSL `LICENSE` file at the root of this repository.

* Updating the license from MPL to Business Source License

Going forward, this project will be licensed under the Business Source License v1.1. Please see our blog post for more details at <Blog URL>, FAQ at www.hashicorp.com/licensing-faq, and details of the license at www.hashicorp.com/bsl.

* add missing license headers

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

---------

Co-authored-by: hashicorp-copywrite[bot] <110428419+hashicorp-copywrite[bot]@users.noreply.github.com>
2023-08-11 09:12:13 -04:00

585 lines
19 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
// Package bootstrap handles bootstrapping an agent's config from HCP. It must be a
// separate package from other HCP components because it has a dependency on
// agent/config while other components need to be imported and run within the
// server process in agent/consul and that would create a dependency cycle.
package bootstrap
import (
"bufio"
"context"
"crypto/tls"
"crypto/x509"
"encoding/json"
"encoding/pem"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/connect"
hcpclient "github.com/hashicorp/consul/agent/hcp/client"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/lib/retry"
"github.com/hashicorp/go-uuid"
)
const (
subDir = "hcp-config"
caFileName = "server-tls-cas.pem"
certFileName = "server-tls-cert.pem"
configFileName = "server-config.json"
keyFileName = "server-tls-key.pem"
tokenFileName = "hcp-management-token"
successFileName = "successful-bootstrap"
)
type ConfigLoader func(source config.Source) (config.LoadResult, error)
// UI is a shim to allow the agent command to pass in it's mitchelh/cli.UI so we
// can output useful messages to the user during bootstrapping. For example if
// we have to retry several times to bootstrap we don't want the agent to just
// stall with no output which is the case if we just returned all intermediate
// warnings or errors.
type UI interface {
Output(string)
Warn(string)
Info(string)
Error(string)
}
// RawBootstrapConfig contains the Consul config as a raw JSON string and the management token
// which either was retrieved from persisted files or from the bootstrap endpoint
type RawBootstrapConfig struct {
ConfigJSON string
ManagementToken string
}
// LoadConfig will attempt to load previously-fetched config from disk and fall back to
// fetch from HCP servers if the local data is incomplete.
// It must be passed a (CLI) UI implementation so it can deliver progress
// updates to the user, for example if it is waiting to retry for a long period.
func LoadConfig(ctx context.Context, client hcpclient.Client, dataDir string, loader ConfigLoader, ui UI) (ConfigLoader, error) {
ui.Output("Loading configuration from HCP")
// See if we have existing config on disk
//
// OPTIMIZE: We could probably be more intelligent about config loading.
// The currently implemented approach is:
// 1. Attempt to load data from disk
// 2. If that fails or the data is incomplete, block indefinitely fetching remote config.
//
// What if instead we had the following flow:
// 1. Attempt to fetch config from HCP.
// 2. If that fails, fall back to data on disk from last fetch.
// 3. If that fails, go into blocking loop to fetch remote config.
//
// This should allow us to more gracefully transition cases like when
// an existing cluster is linked, but then wants to receive TLS materials
// at a later time. Currently, if we observe the existing-cluster marker we
// don't attempt to fetch any additional configuration from HCP.
cfg, ok := loadPersistedBootstrapConfig(dataDir, ui)
if !ok {
ui.Info("Fetching configuration from HCP servers")
var err error
cfg, err = fetchBootstrapConfig(ctx, client, dataDir, ui)
if err != nil {
return nil, fmt.Errorf("failed to bootstrap from HCP: %w", err)
}
ui.Info("Configuration fetched from HCP and saved on local disk")
} else {
ui.Info("Loaded HCP configuration from local disk")
}
// Create a new loader func to return
newLoader := bootstrapConfigLoader(loader, cfg)
return newLoader, nil
}
// bootstrapConfigLoader is a ConfigLoader for passing bootstrap JSON config received from HCP
// to the config.builder. ConfigLoaders are functions used to build an agent's RuntimeConfig
// from various sources like files and flags. This config is contained in the config.LoadResult.
//
// The flow to include bootstrap config from HCP as a loader's data source is as follows:
//
// 1. A base ConfigLoader function (baseLoader) is created on agent start, and it sets the input
// source argument as the DefaultConfig.
//
// 2. When a server agent can be configured by HCP that baseLoader is wrapped in this bootstrapConfigLoader.
//
// 3. The bootstrapConfigLoader calls that base loader with the bootstrap JSON config as the
// default source. This data will be merged with other valid sources in the config.builder.
//
// 4. The result of the call to baseLoader() below contains the resulting RuntimeConfig, and we do some
// additional modifications to attach data that doesn't get populated during the build in the config pkg.
//
// Note that since the ConfigJSON is stored as the baseLoader's DefaultConfig, its data is the first
// to be merged by the config.builder and could be overwritten by user-provided values in config files or
// CLI flags. However, values set to RuntimeConfig after the baseLoader call are final.
func bootstrapConfigLoader(baseLoader ConfigLoader, cfg *RawBootstrapConfig) ConfigLoader {
return func(source config.Source) (config.LoadResult, error) {
// Don't allow any further attempts to provide a DefaultSource. This should
// only ever be needed later in client agent AutoConfig code but that should
// be mutually exclusive from this bootstrapping mechanism since this is
// only for servers. If we ever try to change that, this clear failure
// should alert future developers that the assumptions are changing rather
// than quietly not applying the config they expect!
if source != nil {
return config.LoadResult{},
fmt.Errorf("non-nil config source provided to a loader after HCP bootstrap already provided a DefaultSource")
}
// Otherwise, just call to the loader we were passed with our own additional
// JSON as the source.
//
// OPTIMIZE: We could check/log whether any fields set by the remote config were overwritten by a user-provided flag.
res, err := baseLoader(config.FileSource{
Name: "HCP Bootstrap",
Format: "json",
Data: cfg.ConfigJSON,
})
if err != nil {
return res, fmt.Errorf("failed to load HCP Bootstrap config: %w", err)
}
finalizeRuntimeConfig(res.RuntimeConfig, cfg)
return res, nil
}
}
const (
accessControlHeaderName = "Access-Control-Expose-Headers"
accessControlHeaderValue = "x-consul-default-acl-policy"
)
// finalizeRuntimeConfig will set additional HCP-specific values that are not
// handled by the config.builder.
func finalizeRuntimeConfig(rc *config.RuntimeConfig, cfg *RawBootstrapConfig) {
rc.Cloud.ManagementToken = cfg.ManagementToken
// HTTP response headers are modified for the HCP UI to work.
if rc.HTTPResponseHeaders == nil {
rc.HTTPResponseHeaders = make(map[string]string)
}
prevValue, ok := rc.HTTPResponseHeaders[accessControlHeaderName]
if !ok {
rc.HTTPResponseHeaders[accessControlHeaderName] = accessControlHeaderValue
} else {
rc.HTTPResponseHeaders[accessControlHeaderName] = prevValue + "," + accessControlHeaderValue
}
}
// fetchBootstrapConfig will fetch boostrap configuration from remote servers and persist it to disk.
// It will retry until successful or a terminal error condition is found (e.g. permission denied).
func fetchBootstrapConfig(ctx context.Context, client hcpclient.Client, dataDir string, ui UI) (*RawBootstrapConfig, error) {
w := retry.Waiter{
MinWait: 1 * time.Second,
MaxWait: 5 * time.Minute,
Jitter: retry.NewJitter(50),
}
var bsCfg *hcpclient.BootstrapConfig
for {
// Note we don't want to shadow `ctx` here since we need that for the Wait
// below.
reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
resp, err := client.FetchBootstrap(reqCtx)
if err != nil {
ui.Error(fmt.Sprintf("Error: failed to fetch bootstrap config from HCP, will retry in %s: %s",
w.NextWait().Round(time.Second), err))
if err := w.Wait(ctx); err != nil {
return nil, err
}
// Finished waiting, restart loop
continue
}
bsCfg = resp
break
}
devMode := dataDir == ""
cfgJSON, err := persistAndProcessConfig(dataDir, devMode, bsCfg)
if err != nil {
return nil, fmt.Errorf("failed to persist config for existing cluster: %w", err)
}
return &RawBootstrapConfig{
ConfigJSON: cfgJSON,
ManagementToken: bsCfg.ManagementToken,
}, nil
}
// persistAndProcessConfig is called when we receive data from CCM.
// We validate and persist everything that was received, then also update
// the JSON config as needed.
func persistAndProcessConfig(dataDir string, devMode bool, bsCfg *hcpclient.BootstrapConfig) (string, error) {
if devMode {
// Agent in dev mode, we still need somewhere to persist the certs
// temporarily though to be able to start up at all since we don't support
// inline certs right now. Use temp dir
tmp, err := os.MkdirTemp(os.TempDir(), "consul-dev-")
if err != nil {
return "", fmt.Errorf("failed to create temp dir for certificates: %w", err)
}
dataDir = tmp
}
// Create subdir if it's not already there.
dir := filepath.Join(dataDir, subDir)
if err := lib.EnsurePath(dir, true); err != nil {
return "", fmt.Errorf("failed to ensure directory %q: %w", dir, err)
}
// Parse just to a map for now as we only have to inject to a specific place
// and parsing whole Config struct is complicated...
var cfg map[string]any
if err := json.Unmarshal([]byte(bsCfg.ConsulConfig), &cfg); err != nil {
return "", fmt.Errorf("failed to unmarshal bootstrap config: %w", err)
}
// Avoid ever setting an initial_management token from HCP now that we can
// separately bootstrap an HCP management token with a distinct accessor ID.
//
// CCM will continue to return an initial_management token because previous versions of Consul
// cannot bootstrap an HCP management token distinct from the initial management token.
// This block can be deleted once CCM supports tailoring bootstrap config responses
// based on the version of Consul that requested it.
acls, aclsOK := cfg["acl"].(map[string]any)
if aclsOK {
tokens, tokensOK := acls["tokens"].(map[string]interface{})
if tokensOK {
delete(tokens, "initial_management")
}
}
var cfgJSON string
if bsCfg.TLSCert != "" {
if err := validateTLSCerts(bsCfg.TLSCert, bsCfg.TLSCertKey, bsCfg.TLSCAs); err != nil {
return "", fmt.Errorf("invalid certificates: %w", err)
}
// Persist the TLS cert files from the response since we need to refer to them
// as disk files either way.
if err := persistTLSCerts(dir, bsCfg.TLSCert, bsCfg.TLSCertKey, bsCfg.TLSCAs); err != nil {
return "", fmt.Errorf("failed to persist TLS certificates to dir %q: %w", dataDir, err)
}
// Store paths to the persisted TLS cert files.
cfg["ca_file"] = filepath.Join(dir, caFileName)
cfg["cert_file"] = filepath.Join(dir, certFileName)
cfg["key_file"] = filepath.Join(dir, keyFileName)
// Convert the bootstrap config map back into a string
cfgJSONBytes, err := json.Marshal(cfg)
if err != nil {
return "", err
}
cfgJSON = string(cfgJSONBytes)
}
if !devMode {
// Persist the final config we need to add so that it is available locally after a restart.
// Assuming the configured data dir wasn't a tmp dir to start with.
if err := persistBootstrapConfig(dir, cfgJSON); err != nil {
return "", fmt.Errorf("failed to persist bootstrap config: %w", err)
}
// HCP only returns the management token if it requires Consul to
// initialize it
if bsCfg.ManagementToken != "" {
if err := validateManagementToken(bsCfg.ManagementToken); err != nil {
return "", fmt.Errorf("invalid management token: %w", err)
}
if err := persistManagementToken(dir, bsCfg.ManagementToken); err != nil {
return "", fmt.Errorf("failed to persist HCP management token: %w", err)
}
}
if err := persistSuccessMarker(dir); err != nil {
return "", fmt.Errorf("failed to persist success marker: %w", err)
}
}
return cfgJSON, nil
}
func persistSuccessMarker(dir string) error {
name := filepath.Join(dir, successFileName)
return os.WriteFile(name, []byte(""), 0600)
}
func persistTLSCerts(dir string, serverCert, serverKey string, caCerts []string) error {
if serverCert == "" || serverKey == "" {
return fmt.Errorf("unexpected bootstrap response from HCP: missing TLS information")
}
// Write out CA cert(s). We write them all to one file because Go's x509
// machinery will read as many certs as it finds from each PEM file provided
// and add them separaetly to the CertPool for validation
f, err := os.OpenFile(filepath.Join(dir, caFileName), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600)
if err != nil {
return err
}
bf := bufio.NewWriter(f)
for _, caPEM := range caCerts {
bf.WriteString(caPEM + "\n")
}
if err := bf.Flush(); err != nil {
return err
}
if err := f.Close(); err != nil {
return err
}
if err := os.WriteFile(filepath.Join(dir, certFileName), []byte(serverCert), 0600); err != nil {
return err
}
if err := os.WriteFile(filepath.Join(dir, keyFileName), []byte(serverKey), 0600); err != nil {
return err
}
return nil
}
// Basic validation to ensure a UUID was loaded and assumes the token is non-empty
func validateManagementToken(token string) error {
// note: we assume that the token is not an empty string
if _, err := uuid.ParseUUID(token); err != nil {
return errors.New("management token is not a valid UUID")
}
return nil
}
func persistManagementToken(dir, token string) error {
name := filepath.Join(dir, tokenFileName)
return os.WriteFile(name, []byte(token), 0600)
}
func persistBootstrapConfig(dir, cfgJSON string) error {
// Persist the important bits we got from bootstrapping. The TLS certs are
// already persisted, just need to persist the config we are going to add.
name := filepath.Join(dir, configFileName)
return os.WriteFile(name, []byte(cfgJSON), 0600)
}
func loadPersistedBootstrapConfig(dataDir string, ui UI) (*RawBootstrapConfig, bool) {
if dataDir == "" {
// There's no files to load when in dev mode.
return nil, false
}
dir := filepath.Join(dataDir, subDir)
_, err := os.Stat(filepath.Join(dir, successFileName))
if os.IsNotExist(err) {
// Haven't bootstrapped from HCP.
return nil, false
}
if err != nil {
ui.Warn("failed to check for config on disk, re-fetching from HCP: " + err.Error())
return nil, false
}
if err := checkCerts(dir); err != nil {
ui.Warn("failed to validate certs on disk, re-fetching from HCP: " + err.Error())
return nil, false
}
configJSON, err := loadBootstrapConfigJSON(dataDir)
if err != nil {
ui.Warn("failed to load bootstrap config from disk, re-fetching from HCP: " + err.Error())
return nil, false
}
mgmtToken, err := loadManagementToken(dir)
if err != nil {
ui.Warn("failed to load HCP management token from disk, re-fetching from HCP: " + err.Error())
return nil, false
}
return &RawBootstrapConfig{
ConfigJSON: configJSON,
ManagementToken: mgmtToken,
}, true
}
func loadBootstrapConfigJSON(dataDir string) (string, error) {
filename := filepath.Join(dataDir, subDir, configFileName)
_, err := os.Stat(filename)
if os.IsNotExist(err) {
return "", nil
}
if err != nil {
return "", fmt.Errorf("failed to check for bootstrap config: %w", err)
}
// Attempt to load persisted config to check for errors and basic validity.
// Errors here will raise issues like referencing unsupported config fields.
_, err = config.Load(config.LoadOpts{
ConfigFiles: []string{filename},
HCL: []string{
"server = true",
`bind_addr = "127.0.0.1"`,
fmt.Sprintf("data_dir = %q", dataDir),
},
ConfigFormat: "json",
})
if err != nil {
return "", fmt.Errorf("failed to parse local bootstrap config: %w", err)
}
jsonBs, err := os.ReadFile(filename)
if err != nil {
return "", fmt.Errorf(fmt.Sprintf("failed to read local bootstrap config file: %s", err))
}
return strings.TrimSpace(string(jsonBs)), nil
}
func loadManagementToken(dir string) (string, error) {
name := filepath.Join(dir, tokenFileName)
bytes, err := os.ReadFile(name)
if os.IsNotExist(err) {
return "", errors.New("configuration files on disk are incomplete, missing: " + name)
}
if err != nil {
return "", fmt.Errorf("failed to read: %w", err)
}
token := string(bytes)
if err := validateManagementToken(token); err != nil {
return "", fmt.Errorf("invalid management token: %w", err)
}
return token, nil
}
func checkCerts(dir string) error {
files := []string{
filepath.Join(dir, caFileName),
filepath.Join(dir, certFileName),
filepath.Join(dir, keyFileName),
}
missing := make([]string, 0)
for _, file := range files {
_, err := os.Stat(file)
if os.IsNotExist(err) {
missing = append(missing, file)
continue
}
if err != nil {
return err
}
}
// If all the TLS files are missing, assume this is intentional.
// Existing clusters do not receive any TLS certs.
if len(missing) == len(files) {
return nil
}
// If only some of the files are missing, something went wrong.
if len(missing) > 0 {
return fmt.Errorf("configuration files on disk are incomplete, missing: %v", missing)
}
cert, key, caCerts, err := loadCerts(dir)
if err != nil {
return fmt.Errorf("failed to load certs from disk: %w", err)
}
if err = validateTLSCerts(cert, key, caCerts); err != nil {
return fmt.Errorf("invalid certs on disk: %w", err)
}
return nil
}
func loadCerts(dir string) (cert, key string, caCerts []string, err error) {
certPEMBlock, err := os.ReadFile(filepath.Join(dir, certFileName))
if err != nil {
return "", "", nil, err
}
keyPEMBlock, err := os.ReadFile(filepath.Join(dir, keyFileName))
if err != nil {
return "", "", nil, err
}
caPEMs, err := os.ReadFile(filepath.Join(dir, caFileName))
if err != nil {
return "", "", nil, err
}
caCerts, err = splitCACerts(caPEMs)
if err != nil {
return "", "", nil, fmt.Errorf("failed to parse CA certs: %w", err)
}
return string(certPEMBlock), string(keyPEMBlock), caCerts, nil
}
// splitCACerts takes a list of concatenated PEM blocks and splits
// them back up into strings. This is used because CACerts are written
// into a single file, but validated individually.
func splitCACerts(caPEMs []byte) ([]string, error) {
var out []string
for {
nextBlock, remaining := pem.Decode(caPEMs)
if nextBlock == nil {
break
}
if nextBlock.Type != "CERTIFICATE" {
return nil, fmt.Errorf("PEM-block should be CERTIFICATE type")
}
// Collect up to the start of the remaining bytes.
// We don't grab nextBlock.Bytes because it's not PEM encoded.
out = append(out, string(caPEMs[:len(caPEMs)-len(remaining)]))
caPEMs = remaining
}
if len(out) == 0 {
return nil, errors.New("invalid CA certificate")
}
return out, nil
}
// validateTLSCerts checks that the CA cert, server cert, and key on disk are structurally valid.
//
// OPTIMIZE: This could be improved by returning an error if certs are expired or close to expiration.
// However, that requires issuing new certs on bootstrap requests, since returning an error
// would trigger a re-fetch from HCP.
func validateTLSCerts(cert, key string, caCerts []string) error {
leaf, err := tls.X509KeyPair([]byte(cert), []byte(key))
if err != nil {
return errors.New("invalid server certificate or key")
}
_, err = x509.ParseCertificate(leaf.Certificate[0])
if err != nil {
return errors.New("invalid server certificate")
}
for _, caCert := range caCerts {
_, err = connect.ParseCert(caCert)
if err != nil {
return errors.New("invalid CA certificate")
}
}
return nil
}