mirror of https://github.com/status-im/consul.git
Add DNS recursor strategy option (#10611)
This change adds a new `dns_config.recursor_strategy` option which controls how Consul queries DNS resolvers listed in the `recursors` config option. The supported options are `sequential` (default), and `random`. Closes #8807 Co-authored-by: Blake Covarrubias <blake@covarrubi.as> Co-authored-by: Priyanka Sengupta <psengupta@flatiron.com>
This commit is contained in:
parent
832896ed11
commit
a0cd3dd88e
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
config: add `dns_config.recursor_strategy` flag to control the order which DNS recursors are queried
|
||||
```
|
|
@ -908,6 +908,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) {
|
|||
DNSNodeTTL: b.durationVal("dns_config.node_ttl", c.DNS.NodeTTL),
|
||||
DNSOnlyPassing: boolVal(c.DNS.OnlyPassing),
|
||||
DNSPort: dnsPort,
|
||||
DNSRecursorStrategy: b.dnsRecursorStrategyVal(stringVal(c.DNS.RecursorStrategy)),
|
||||
DNSRecursorTimeout: b.durationVal("recursor_timeout", c.DNS.RecursorTimeout),
|
||||
DNSRecursors: dnsRecursors,
|
||||
DNSServiceTTL: dnsServiceTTL,
|
||||
|
@ -1745,6 +1746,20 @@ func (b *builder) meshGatewayConfVal(mgConf *MeshGatewayConfig) structs.MeshGate
|
|||
return cfg
|
||||
}
|
||||
|
||||
func (b *builder) dnsRecursorStrategyVal(v string) dns.RecursorStrategy {
|
||||
var out dns.RecursorStrategy
|
||||
|
||||
switch dns.RecursorStrategy(v) {
|
||||
case dns.RecursorStrategyRandom:
|
||||
out = dns.RecursorStrategyRandom
|
||||
case dns.RecursorStrategySequential, "":
|
||||
out = dns.RecursorStrategySequential
|
||||
default:
|
||||
b.err = multierror.Append(b.err, fmt.Errorf("dns_config.recursor_strategy: invalid strategy: %q", v))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (b *builder) exposeConfVal(v *ExposeConfig) structs.ExposeConfig {
|
||||
var out structs.ExposeConfig
|
||||
if v == nil {
|
||||
|
|
|
@ -634,6 +634,7 @@ type DNS struct {
|
|||
MaxStale *string `mapstructure:"max_stale"`
|
||||
NodeTTL *string `mapstructure:"node_ttl"`
|
||||
OnlyPassing *bool `mapstructure:"only_passing"`
|
||||
RecursorStrategy *string `mapstructure:"recursor_strategy"`
|
||||
RecursorTimeout *string `mapstructure:"recursor_timeout"`
|
||||
ServiceTTL map[string]string `mapstructure:"service_ttl"`
|
||||
UDPAnswerLimit *int `mapstructure:"udp_answer_limit"`
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
|
||||
"github.com/hashicorp/consul/agent/cache"
|
||||
"github.com/hashicorp/consul/agent/consul"
|
||||
"github.com/hashicorp/consul/agent/dns"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/agent/token"
|
||||
"github.com/hashicorp/consul/api"
|
||||
|
@ -270,6 +271,15 @@ type RuntimeConfig struct {
|
|||
// hcl: dns_config { only_passing = (true|false) }
|
||||
DNSOnlyPassing bool
|
||||
|
||||
// DNSRecursorStrategy controls the order in which DNS recursors are queried.
|
||||
// 'sequential' queries recursors in the order they are listed under `recursors`.
|
||||
// 'random' causes random selection of recursors which has the effect of
|
||||
// spreading the query load among all listed servers, rather than having
|
||||
// client agents try the first server in the list every time.
|
||||
//
|
||||
// hcl: dns_config { recursor_strategy = "(random|sequential)" }
|
||||
DNSRecursorStrategy dns.RecursorStrategy
|
||||
|
||||
// DNSRecursorTimeout specifies the timeout in seconds
|
||||
// for Consul's internal dns client used for recursion.
|
||||
// This value is used for the connection, read and write timeout.
|
||||
|
|
|
@ -5425,6 +5425,7 @@ func TestLoad_FullConfig(t *testing.T) {
|
|||
DNSNodeTTL: 7084 * time.Second,
|
||||
DNSOnlyPassing: true,
|
||||
DNSPort: 7001,
|
||||
DNSRecursorStrategy: "sequential",
|
||||
DNSRecursorTimeout: 4427 * time.Second,
|
||||
DNSRecursors: []string{"63.38.39.58", "92.49.18.18"},
|
||||
DNSSOA: RuntimeSOAConfig{Refresh: 3600, Retry: 600, Expire: 86400, Minttl: 0},
|
||||
|
|
|
@ -147,6 +147,7 @@
|
|||
"DNSNodeTTL": "0s",
|
||||
"DNSOnlyPassing": false,
|
||||
"DNSPort": 0,
|
||||
"DNSRecursorStrategy": "",
|
||||
"DNSRecursorTimeout": "0s",
|
||||
"DNSRecursors": [],
|
||||
"DNSSOA": {
|
||||
|
|
40
agent/dns.go
40
agent/dns.go
|
@ -70,22 +70,23 @@ type dnsSOAConfig struct {
|
|||
}
|
||||
|
||||
type dnsConfig struct {
|
||||
AllowStale bool
|
||||
Datacenter string
|
||||
EnableTruncate bool
|
||||
MaxStale time.Duration
|
||||
UseCache bool
|
||||
CacheMaxAge time.Duration
|
||||
NodeName string
|
||||
NodeTTL time.Duration
|
||||
OnlyPassing bool
|
||||
RecursorTimeout time.Duration
|
||||
Recursors []string
|
||||
SegmentName string
|
||||
UDPAnswerLimit int
|
||||
ARecordLimit int
|
||||
NodeMetaTXT bool
|
||||
SOAConfig dnsSOAConfig
|
||||
AllowStale bool
|
||||
Datacenter string
|
||||
EnableTruncate bool
|
||||
MaxStale time.Duration
|
||||
UseCache bool
|
||||
CacheMaxAge time.Duration
|
||||
NodeName string
|
||||
NodeTTL time.Duration
|
||||
OnlyPassing bool
|
||||
RecursorStrategy agentdns.RecursorStrategy
|
||||
RecursorTimeout time.Duration
|
||||
Recursors []string
|
||||
SegmentName string
|
||||
UDPAnswerLimit int
|
||||
ARecordLimit int
|
||||
NodeMetaTXT bool
|
||||
SOAConfig dnsSOAConfig
|
||||
// TTLRadix sets service TTLs by prefix, eg: "database-*"
|
||||
TTLRadix *radix.Tree
|
||||
// TTLStict sets TTLs to service by full name match. It Has higher priority than TTLRadix
|
||||
|
@ -154,6 +155,7 @@ func GetDNSConfig(conf *config.RuntimeConfig) (*dnsConfig, error) {
|
|||
NodeName: conf.NodeName,
|
||||
NodeTTL: conf.DNSNodeTTL,
|
||||
OnlyPassing: conf.DNSOnlyPassing,
|
||||
RecursorStrategy: conf.DNSRecursorStrategy,
|
||||
RecursorTimeout: conf.DNSRecursorTimeout,
|
||||
SegmentName: conf.SegmentName,
|
||||
UDPAnswerLimit: conf.DNSUDPAnswerLimit,
|
||||
|
@ -1851,7 +1853,8 @@ func (d *DNSServer) handleRecurse(resp dns.ResponseWriter, req *dns.Msg) {
|
|||
var r *dns.Msg
|
||||
var rtt time.Duration
|
||||
var err error
|
||||
for _, recursor := range cfg.Recursors {
|
||||
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
|
||||
recursor := cfg.Recursors[idx]
|
||||
r, rtt, err = c.Exchange(req, recursor)
|
||||
// Check if the response is valid and has the desired Response code
|
||||
if r != nil && (r.Rcode != dns.RcodeSuccess && r.Rcode != dns.RcodeNameError) {
|
||||
|
@ -1936,7 +1939,8 @@ func (d *DNSServer) resolveCNAME(cfg *dnsConfig, name string, maxRecursionLevel
|
|||
var r *dns.Msg
|
||||
var rtt time.Duration
|
||||
var err error
|
||||
for _, recursor := range cfg.Recursors {
|
||||
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
|
||||
recursor := cfg.Recursors[idx]
|
||||
r, rtt, err = c.Exchange(m, recursor)
|
||||
if err == nil {
|
||||
d.logger.Debug("cname recurse RTT for name",
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
package dns
|
||||
|
||||
import "regexp"
|
||||
import (
|
||||
"math/rand"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// MaxLabelLength is the maximum length for a name that can be used in DNS.
|
||||
const MaxLabelLength = 63
|
||||
|
@ -8,3 +11,24 @@ const MaxLabelLength = 63
|
|||
// InvalidNameRe is a regex that matches characters which can not be included in
|
||||
// a DNS name.
|
||||
var InvalidNameRe = regexp.MustCompile(`[^A-Za-z0-9\\-]+`)
|
||||
|
||||
type RecursorStrategy string
|
||||
|
||||
const (
|
||||
RecursorStrategySequential RecursorStrategy = "sequential"
|
||||
RecursorStrategyRandom RecursorStrategy = "random"
|
||||
)
|
||||
|
||||
func (s RecursorStrategy) Indexes(max int) []int {
|
||||
switch s {
|
||||
case RecursorStrategyRandom:
|
||||
return rand.Perm(max)
|
||||
default:
|
||||
idxs := make([]int, max)
|
||||
for i := range idxs {
|
||||
idxs[i] = i
|
||||
}
|
||||
return idxs
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
package dns
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/consul/sdk/testutil/retry"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDNS_Recursor_StrategyRandom(t *testing.T) {
|
||||
configuredRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
|
||||
recursorStrategy := RecursorStrategy("random")
|
||||
|
||||
retry.RunWith(&retry.Counter{Count: 5}, t, func(r *retry.R) {
|
||||
recursorsToQuery := make([]string, 0)
|
||||
for _, idx := range recursorStrategy.Indexes(len(configuredRecursors)) {
|
||||
recursorsToQuery = append(recursorsToQuery, configuredRecursors[idx])
|
||||
}
|
||||
|
||||
// Ensure the slices contain the same elements
|
||||
require.ElementsMatch(t, configuredRecursors, recursorsToQuery)
|
||||
|
||||
// Ensure the elements are not in the same order
|
||||
require.NotEqual(r, configuredRecursors, recursorsToQuery)
|
||||
})
|
||||
}
|
||||
|
||||
func TestDNS_Recursor_StrategySequential(t *testing.T) {
|
||||
expectedRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
|
||||
recursorStrategy := RecursorStrategy("sequential")
|
||||
|
||||
recursorsToQuery := make([]string, 0)
|
||||
for _, idx := range recursorStrategy.Indexes(len(expectedRecursors)) {
|
||||
recursorsToQuery = append(recursorsToQuery, expectedRecursors[idx])
|
||||
}
|
||||
|
||||
// The list of recursors should match the order in which they were defined
|
||||
// in the configuration
|
||||
require.Equal(t, recursorsToQuery, expectedRecursors)
|
||||
}
|
|
@ -7610,6 +7610,7 @@ func TestDNS_ConfigReload(t *testing.T) {
|
|||
}
|
||||
enable_truncate = false
|
||||
only_passing = false
|
||||
recursor_strategy = "sequential"
|
||||
recursor_timeout = "15s"
|
||||
disable_compression = false
|
||||
a_record_limit = 1
|
||||
|
@ -7628,6 +7629,7 @@ func TestDNS_ConfigReload(t *testing.T) {
|
|||
for _, s := range a.dnsServers {
|
||||
cfg := s.config.Load().(*dnsConfig)
|
||||
require.Equal(t, []string{"8.8.8.8:53"}, cfg.Recursors)
|
||||
require.Equal(t, agentdns.RecursorStrategy("sequential"), cfg.RecursorStrategy)
|
||||
require.False(t, cfg.AllowStale)
|
||||
require.Equal(t, 20*time.Second, cfg.MaxStale)
|
||||
require.Equal(t, 10*time.Second, cfg.NodeTTL)
|
||||
|
@ -7658,6 +7660,7 @@ func TestDNS_ConfigReload(t *testing.T) {
|
|||
}
|
||||
newCfg.DNSEnableTruncate = true
|
||||
newCfg.DNSOnlyPassing = true
|
||||
newCfg.DNSRecursorStrategy = "random"
|
||||
newCfg.DNSRecursorTimeout = 16 * time.Second
|
||||
newCfg.DNSDisableCompression = true
|
||||
newCfg.DNSARecordLimit = 2
|
||||
|
@ -7673,6 +7676,7 @@ func TestDNS_ConfigReload(t *testing.T) {
|
|||
for _, s := range a.dnsServers {
|
||||
cfg := s.config.Load().(*dnsConfig)
|
||||
require.Equal(t, []string{"1.1.1.1:53"}, cfg.Recursors)
|
||||
require.Equal(t, agentdns.RecursorStrategy("random"), cfg.RecursorStrategy)
|
||||
require.True(t, cfg.AllowStale)
|
||||
require.Equal(t, 21*time.Second, cfg.MaxStale)
|
||||
require.Equal(t, 11*time.Second, cfg.NodeTTL)
|
||||
|
|
|
@ -1357,6 +1357,11 @@ bind_addr = "{{ GetPrivateInterfaces | include \"network\" \"10.0.0.0/8\" | attr
|
|||
then all services on that node will be excluded because they are also considered
|
||||
critical.
|
||||
|
||||
- `recursor_strategy` - If set to `sequential`, Consul will query recursors in the
|
||||
order listed in the [`recursors`](#recursors) option. If set to `random`,
|
||||
Consul will query an upstream DNS resolvers in a random order. Defaults to
|
||||
`sequential`.
|
||||
|
||||
- `recursor_timeout` - Timeout used by Consul when
|
||||
recursively querying an upstream DNS server. See [`recursors`](#recursors) for more details. Default is 2s. This is available in Consul 0.7 and later.
|
||||
|
||||
|
|
Loading…
Reference in New Issue