mirror of
https://github.com/status-im/consul.git
synced 2025-01-11 22:34:55 +00:00
d6e05482ab
Co-authored-by: Daniel Nephin <dnephin@hashicorp.com>
487 lines
14 KiB
Go
487 lines
14 KiB
Go
package autoconf
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul/agent/agentpb"
|
|
"github.com/hashicorp/consul/agent/config"
|
|
"github.com/hashicorp/consul/lib"
|
|
"github.com/hashicorp/consul/logging"
|
|
"github.com/hashicorp/consul/tlsutil"
|
|
"github.com/hashicorp/go-discover"
|
|
discoverk8s "github.com/hashicorp/go-discover/provider/k8s"
|
|
"github.com/hashicorp/go-hclog"
|
|
)
|
|
|
|
const (
|
|
// autoConfigFileName is the name of the file that the agent auto-config settings are
|
|
// stored in within the data directory
|
|
autoConfigFileName = "auto-config.json"
|
|
)
|
|
|
|
// DirectRPC is the interface that needs to be satisifed for AutoConfig to be able to perform
|
|
// direct RPCs against individual servers. This should not use
|
|
type DirectRPC interface {
|
|
RPC(dc string, node string, addr net.Addr, method string, args interface{}, reply interface{}) error
|
|
}
|
|
|
|
type options struct {
|
|
logger hclog.Logger
|
|
directRPC DirectRPC
|
|
tlsConfigurator *tlsutil.Configurator
|
|
builderOpts config.BuilderOpts
|
|
waiter *lib.RetryWaiter
|
|
overrides []config.Source
|
|
}
|
|
|
|
// Option represents one point of configurability for the New function
|
|
// when creating a new AutoConfig object
|
|
type Option func(*options)
|
|
|
|
// WithLogger will cause the created AutoConfig type to use the provided logger
|
|
func WithLogger(logger hclog.Logger) Option {
|
|
return func(opt *options) {
|
|
opt.logger = logger
|
|
}
|
|
}
|
|
|
|
// WithTLSConfigurator will cause the created AutoConfig type to use the provided configurator
|
|
func WithTLSConfigurator(tlsConfigurator *tlsutil.Configurator) Option {
|
|
return func(opt *options) {
|
|
opt.tlsConfigurator = tlsConfigurator
|
|
}
|
|
}
|
|
|
|
// WithConnectionPool will cause the created AutoConfig type to use the provided connection pool
|
|
func WithDirectRPC(directRPC DirectRPC) Option {
|
|
return func(opt *options) {
|
|
opt.directRPC = directRPC
|
|
}
|
|
}
|
|
|
|
// WithBuilderOpts will cause the created AutoConfig type to use the provided CLI builderOpts
|
|
func WithBuilderOpts(builderOpts config.BuilderOpts) Option {
|
|
return func(opt *options) {
|
|
opt.builderOpts = builderOpts
|
|
}
|
|
}
|
|
|
|
// WithRetryWaiter will cause the created AutoConfig type to use the provided retry waiter
|
|
func WithRetryWaiter(waiter *lib.RetryWaiter) Option {
|
|
return func(opt *options) {
|
|
opt.waiter = waiter
|
|
}
|
|
}
|
|
|
|
// WithOverrides is used to provide a config source to append to the tail sources
|
|
// during config building. It is really only useful for testing to tune non-user
|
|
// configurable tunables to make various tests converge more quickly than they
|
|
// could otherwise.
|
|
func WithOverrides(overrides ...config.Source) Option {
|
|
return func(opt *options) {
|
|
opt.overrides = overrides
|
|
}
|
|
}
|
|
|
|
// AutoConfig is all the state necessary for being able to parse a configuration
|
|
// as well as perform the necessary RPCs to perform Agent Auto Configuration.
|
|
//
|
|
// NOTE: This struct and methods on it are not currently thread/goroutine safe.
|
|
// However it doesn't spawn any of its own go routines yet and is used in a
|
|
// synchronous fashion. In the future if either of those two conditions change
|
|
// then we will need to add some locking here. I am deferring that for now
|
|
// to help ease the review of this already large PR.
|
|
type AutoConfig struct {
|
|
config *config.RuntimeConfig
|
|
builderOpts config.BuilderOpts
|
|
logger hclog.Logger
|
|
directRPC DirectRPC
|
|
tlsConfigurator *tlsutil.Configurator
|
|
autoConfigData string
|
|
waiter *lib.RetryWaiter
|
|
overrides []config.Source
|
|
}
|
|
|
|
func flattenOptions(opts []Option) options {
|
|
var flat options
|
|
for _, opt := range opts {
|
|
opt(&flat)
|
|
}
|
|
return flat
|
|
}
|
|
|
|
// New creates a new AutoConfig object for providing automatic
|
|
// Consul configuration.
|
|
func New(options ...Option) (*AutoConfig, error) {
|
|
flat := flattenOptions(options)
|
|
|
|
if flat.directRPC == nil {
|
|
return nil, fmt.Errorf("must provide a direct RPC delegate")
|
|
}
|
|
|
|
if flat.tlsConfigurator == nil {
|
|
return nil, fmt.Errorf("must provide a TLS configurator")
|
|
}
|
|
|
|
logger := flat.logger
|
|
if logger == nil {
|
|
logger = hclog.NewNullLogger()
|
|
} else {
|
|
logger = logger.Named(logging.AutoConfig)
|
|
}
|
|
|
|
waiter := flat.waiter
|
|
if waiter == nil {
|
|
waiter = lib.NewRetryWaiter(1, 0, 10*time.Minute, lib.NewJitterRandomStagger(25))
|
|
}
|
|
|
|
ac := &AutoConfig{
|
|
builderOpts: flat.builderOpts,
|
|
logger: logger,
|
|
directRPC: flat.directRPC,
|
|
tlsConfigurator: flat.tlsConfigurator,
|
|
waiter: waiter,
|
|
overrides: flat.overrides,
|
|
}
|
|
|
|
return ac, nil
|
|
}
|
|
|
|
// LoadConfig will build the configuration including the extraHead source injected
|
|
// after all other defaults but before any user supplied configuration and the overrides
|
|
// source injected as the final source in the configuration parsing chain.
|
|
func LoadConfig(builderOpts config.BuilderOpts, extraHead config.Source, overrides ...config.Source) (*config.RuntimeConfig, []string, error) {
|
|
b, err := config.NewBuilder(builderOpts)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
if extraHead.Data != "" {
|
|
b.Head = append(b.Head, extraHead)
|
|
}
|
|
|
|
if len(overrides) != 0 {
|
|
b.Tail = append(b.Tail, overrides...)
|
|
}
|
|
|
|
cfg, err := b.BuildAndValidate()
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
return &cfg, b.Warnings, nil
|
|
}
|
|
|
|
// ReadConfig will parse the current configuration and inject any
|
|
// auto-config sources if present into the correct place in the parsing chain.
|
|
func (ac *AutoConfig) ReadConfig() (*config.RuntimeConfig, error) {
|
|
src := config.Source{
|
|
Name: autoConfigFileName,
|
|
Format: "json",
|
|
Data: ac.autoConfigData,
|
|
}
|
|
|
|
cfg, warnings, err := LoadConfig(ac.builderOpts, src, ac.overrides...)
|
|
if err != nil {
|
|
return cfg, err
|
|
}
|
|
|
|
for _, w := range warnings {
|
|
ac.logger.Warn(w)
|
|
}
|
|
|
|
ac.config = cfg
|
|
return cfg, nil
|
|
}
|
|
|
|
// restorePersistedAutoConfig will attempt to load the persisted auto-config
|
|
// settings from the data directory. It returns true either when there was an
|
|
// unrecoverable error or when the configuration was successfully loaded from
|
|
// disk. Recoverable errors, such as "file not found" are suppressed and this
|
|
// method will return false for the first boolean.
|
|
func (ac *AutoConfig) restorePersistedAutoConfig() (bool, error) {
|
|
if ac.config.DataDir == "" {
|
|
// no data directory means we don't have anything to potentially load
|
|
return false, nil
|
|
}
|
|
|
|
path := filepath.Join(ac.config.DataDir, autoConfigFileName)
|
|
ac.logger.Debug("attempting to restore any persisted configuration", "path", path)
|
|
|
|
content, err := ioutil.ReadFile(path)
|
|
if err == nil {
|
|
ac.logger.Info("restored persisted configuration", "path", path)
|
|
ac.autoConfigData = string(content)
|
|
return true, nil
|
|
}
|
|
|
|
if !os.IsNotExist(err) {
|
|
return true, fmt.Errorf("failed to load %s: %w", path, err)
|
|
}
|
|
|
|
// ignore non-existence errors as that is an indicator that we haven't
|
|
// performed the auto configuration before
|
|
return false, nil
|
|
}
|
|
|
|
// InitialConfiguration will perform a one-time RPC request to the configured servers
|
|
// to retrieve various cluster wide configurations. See the agent/agentpb/auto_config.proto
|
|
// file for a complete reference of what configurations can be applied in this manner.
|
|
// The returned configuration will be the new configuration with any auto-config settings
|
|
// already applied. If AutoConfig is not enabled this method will just parse any
|
|
// local configuration and return the built runtime configuration.
|
|
//
|
|
// The context passed in can be used to cancel the retrieval of the initial configuration
|
|
// like when receiving a signal during startup.
|
|
func (ac *AutoConfig) InitialConfiguration(ctx context.Context) (*config.RuntimeConfig, error) {
|
|
if ac.config == nil {
|
|
config, err := ac.ReadConfig()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ac.config = config
|
|
}
|
|
|
|
if !ac.config.AutoConfig.Enabled {
|
|
return ac.config, nil
|
|
}
|
|
|
|
ready, err := ac.restorePersistedAutoConfig()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if !ready {
|
|
ac.logger.Info("retrieving initial agent auto configuration remotely")
|
|
if err := ac.getInitialConfiguration(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// re-read the configuration now that we have our initial auto-config
|
|
config, err := ac.ReadConfig()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ac.config = config
|
|
return ac.config, nil
|
|
}
|
|
|
|
// introToken is responsible for determining the correct intro token to use
|
|
// when making the initial Cluster.AutoConfig RPC request.
|
|
func (ac *AutoConfig) introToken() (string, error) {
|
|
conf := ac.config.AutoConfig
|
|
// without an intro token or intro token file we cannot do anything
|
|
if conf.IntroToken == "" && conf.IntroTokenFile == "" {
|
|
return "", fmt.Errorf("neither intro_token or intro_token_file settings are not configured")
|
|
}
|
|
|
|
token := conf.IntroToken
|
|
if token == "" {
|
|
// load the intro token from the file
|
|
content, err := ioutil.ReadFile(conf.IntroTokenFile)
|
|
if err != nil {
|
|
return "", fmt.Errorf("Failed to read intro token from file: %w", err)
|
|
}
|
|
|
|
token = string(content)
|
|
|
|
if token == "" {
|
|
return "", fmt.Errorf("intro_token_file did not contain any token")
|
|
}
|
|
}
|
|
|
|
return token, nil
|
|
}
|
|
|
|
// autoConfigHosts is responsible for taking the list of server addresses and
|
|
// resolving any go-discover provider invocations. It will then return a list
|
|
// of hosts. These might be hostnames and is expected that DNS resolution may
|
|
// be performed after this function runs. Additionally these may contain ports
|
|
// so SplitHostPort could also be necessary.
|
|
func (ac *AutoConfig) autoConfigHosts() ([]string, error) {
|
|
servers := ac.config.AutoConfig.ServerAddresses
|
|
|
|
providers := make(map[string]discover.Provider)
|
|
for k, v := range discover.Providers {
|
|
providers[k] = v
|
|
}
|
|
providers["k8s"] = &discoverk8s.Provider{}
|
|
|
|
disco, err := discover.New(
|
|
discover.WithUserAgent(lib.UserAgent()),
|
|
discover.WithProviders(providers),
|
|
)
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Failed to create go-discover resolver: %w", err)
|
|
}
|
|
|
|
var addrs []string
|
|
for _, addr := range servers {
|
|
switch {
|
|
case strings.Contains(addr, "provider="):
|
|
resolved, err := disco.Addrs(addr, ac.logger.StandardLogger(&hclog.StandardLoggerOptions{InferLevels: true}))
|
|
if err != nil {
|
|
ac.logger.Error("failed to resolve go-discover auto-config servers", "configuration", addr, "err", err)
|
|
continue
|
|
}
|
|
|
|
addrs = append(addrs, resolved...)
|
|
ac.logger.Debug("discovered auto-config servers", "servers", resolved)
|
|
default:
|
|
addrs = append(addrs, addr)
|
|
}
|
|
}
|
|
|
|
if len(addrs) == 0 {
|
|
return nil, fmt.Errorf("no auto-config server addresses available for use")
|
|
}
|
|
|
|
return addrs, nil
|
|
}
|
|
|
|
// resolveHost will take a single host string and convert it to a list of TCPAddrs
|
|
// This will process any port in the input as well as looking up the hostname using
|
|
// normal DNS resolution.
|
|
func (ac *AutoConfig) resolveHost(hostPort string) []net.TCPAddr {
|
|
port := ac.config.ServerPort
|
|
host, portStr, err := net.SplitHostPort(hostPort)
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "missing port in address") {
|
|
host = hostPort
|
|
} else {
|
|
ac.logger.Warn("error splitting host address into IP and port", "address", hostPort, "error", err)
|
|
return nil
|
|
}
|
|
} else {
|
|
port, err = strconv.Atoi(portStr)
|
|
if err != nil {
|
|
ac.logger.Warn("Parsed port is not an integer", "port", portStr, "error", err)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// resolve the host to a list of IPs
|
|
ips, err := net.LookupIP(host)
|
|
if err != nil {
|
|
ac.logger.Warn("IP resolution failed", "host", host, "error", err)
|
|
return nil
|
|
}
|
|
|
|
var addrs []net.TCPAddr
|
|
for _, ip := range ips {
|
|
addrs = append(addrs, net.TCPAddr{IP: ip, Port: port})
|
|
}
|
|
|
|
return addrs
|
|
}
|
|
|
|
// recordAutoConfigReply takes an AutoConfig RPC reply records it with the agent
|
|
// This will persist the configuration to disk (unless in dev mode running without
|
|
// a data dir) and will reload the configuration.
|
|
func (ac *AutoConfig) recordAutoConfigReply(reply *agentpb.AutoConfigResponse) error {
|
|
conf, err := json.Marshal(translateConfig(reply.Config))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to encode auto-config configuration as JSON: %w", err)
|
|
}
|
|
|
|
ac.autoConfigData = string(conf)
|
|
|
|
if ac.config.DataDir == "" {
|
|
ac.logger.Debug("not persisting auto-config settings because there is no data directory")
|
|
return nil
|
|
}
|
|
|
|
path := filepath.Join(ac.config.DataDir, autoConfigFileName)
|
|
|
|
err = ioutil.WriteFile(path, conf, 0660)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to write auto-config configurations: %w", err)
|
|
}
|
|
|
|
ac.logger.Debug("auto-config settings were persisted to disk")
|
|
|
|
return nil
|
|
}
|
|
|
|
// getInitialConfigurationOnce will perform full server to TCPAddr resolution and
|
|
// loop through each host trying to make the Cluster.AutoConfig RPC call. When
|
|
// successful the bool return will be true and the err value will indicate whether we
|
|
// successfully recorded the auto config settings (persisted to disk and stored internally
|
|
// on the AutoConfig object)
|
|
func (ac *AutoConfig) getInitialConfigurationOnce(ctx context.Context) (bool, error) {
|
|
token, err := ac.introToken()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
request := agentpb.AutoConfigRequest{
|
|
Datacenter: ac.config.Datacenter,
|
|
Node: ac.config.NodeName,
|
|
Segment: ac.config.SegmentName,
|
|
JWT: token,
|
|
}
|
|
|
|
var reply agentpb.AutoConfigResponse
|
|
|
|
servers, err := ac.autoConfigHosts()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
for _, s := range servers {
|
|
// try each IP to see if we can successfully make the request
|
|
for _, addr := range ac.resolveHost(s) {
|
|
if ctx.Err() != nil {
|
|
return false, ctx.Err()
|
|
}
|
|
|
|
ac.logger.Debug("making Cluster.AutoConfig RPC", "addr", addr.String())
|
|
if err = ac.directRPC.RPC(ac.config.Datacenter, ac.config.NodeName, &addr, "Cluster.AutoConfig", &request, &reply); err != nil {
|
|
ac.logger.Error("AutoConfig RPC failed", "addr", addr.String(), "error", err)
|
|
continue
|
|
}
|
|
|
|
return true, ac.recordAutoConfigReply(&reply)
|
|
}
|
|
}
|
|
|
|
return false, ctx.Err()
|
|
}
|
|
|
|
// getInitialConfiguration implements a loop to retry calls to getInitialConfigurationOnce.
|
|
// It uses the RetryWaiter on the AutoConfig object to control how often to attempt
|
|
// the initial configuration process. It is also canceallable by cancelling the provided context.
|
|
func (ac *AutoConfig) getInitialConfiguration(ctx context.Context) error {
|
|
// this resets the failures so that we will perform immediate request
|
|
wait := ac.waiter.Success()
|
|
for {
|
|
select {
|
|
case <-wait:
|
|
done, err := ac.getInitialConfigurationOnce(ctx)
|
|
if done {
|
|
return err
|
|
}
|
|
if err != nil {
|
|
ac.logger.Error(err.Error())
|
|
}
|
|
wait = ac.waiter.Failed()
|
|
case <-ctx.Done():
|
|
ac.logger.Info("interrupted during initial auto configuration", "err", ctx.Err())
|
|
return ctx.Err()
|
|
}
|
|
}
|
|
}
|