Add DNS recursor strategy option (#10611)

This change adds a new `dns_config.recursor_strategy` option which
controls how Consul queries DNS resolvers listed in the `recursors`
config option. The supported options are `sequential` (default), and
`random`.

Closes #8807

Co-authored-by: Blake Covarrubias <blake@covarrubi.as>
Co-authored-by: Priyanka Sengupta <psengupta@flatiron.com>
This commit is contained in:
Blake Covarrubias 2021-07-19 15:22:51 -07:00 committed by GitHub
parent 832896ed11
commit a0cd3dd88e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 127 additions and 19 deletions

3
.changelog/10611.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
config: add `dns_config.recursor_strategy` flag to control the order which DNS recursors are queried
```

View File

@ -908,6 +908,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) {
DNSNodeTTL: b.durationVal("dns_config.node_ttl", c.DNS.NodeTTL),
DNSOnlyPassing: boolVal(c.DNS.OnlyPassing),
DNSPort: dnsPort,
DNSRecursorStrategy: b.dnsRecursorStrategyVal(stringVal(c.DNS.RecursorStrategy)),
DNSRecursorTimeout: b.durationVal("recursor_timeout", c.DNS.RecursorTimeout),
DNSRecursors: dnsRecursors,
DNSServiceTTL: dnsServiceTTL,
@ -1745,6 +1746,20 @@ func (b *builder) meshGatewayConfVal(mgConf *MeshGatewayConfig) structs.MeshGate
return cfg
}
func (b *builder) dnsRecursorStrategyVal(v string) dns.RecursorStrategy {
var out dns.RecursorStrategy
switch dns.RecursorStrategy(v) {
case dns.RecursorStrategyRandom:
out = dns.RecursorStrategyRandom
case dns.RecursorStrategySequential, "":
out = dns.RecursorStrategySequential
default:
b.err = multierror.Append(b.err, fmt.Errorf("dns_config.recursor_strategy: invalid strategy: %q", v))
}
return out
}
func (b *builder) exposeConfVal(v *ExposeConfig) structs.ExposeConfig {
var out structs.ExposeConfig
if v == nil {

View File

@ -634,6 +634,7 @@ type DNS struct {
MaxStale *string `mapstructure:"max_stale"`
NodeTTL *string `mapstructure:"node_ttl"`
OnlyPassing *bool `mapstructure:"only_passing"`
RecursorStrategy *string `mapstructure:"recursor_strategy"`
RecursorTimeout *string `mapstructure:"recursor_timeout"`
ServiceTTL map[string]string `mapstructure:"service_ttl"`
UDPAnswerLimit *int `mapstructure:"udp_answer_limit"`

View File

@ -12,6 +12,7 @@ import (
"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/dns"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api"
@ -270,6 +271,15 @@ type RuntimeConfig struct {
// hcl: dns_config { only_passing = (true|false) }
DNSOnlyPassing bool
// DNSRecursorStrategy controls the order in which DNS recursors are queried.
// 'sequential' queries recursors in the order they are listed under `recursors`.
// 'random' causes random selection of recursors which has the effect of
// spreading the query load among all listed servers, rather than having
// client agents try the first server in the list every time.
//
// hcl: dns_config { recursor_strategy = "(random|sequential)" }
DNSRecursorStrategy dns.RecursorStrategy
// DNSRecursorTimeout specifies the timeout in seconds
// for Consul's internal dns client used for recursion.
// This value is used for the connection, read and write timeout.

View File

@ -5425,6 +5425,7 @@ func TestLoad_FullConfig(t *testing.T) {
DNSNodeTTL: 7084 * time.Second,
DNSOnlyPassing: true,
DNSPort: 7001,
DNSRecursorStrategy: "sequential",
DNSRecursorTimeout: 4427 * time.Second,
DNSRecursors: []string{"63.38.39.58", "92.49.18.18"},
DNSSOA: RuntimeSOAConfig{Refresh: 3600, Retry: 600, Expire: 86400, Minttl: 0},

View File

@ -147,6 +147,7 @@
"DNSNodeTTL": "0s",
"DNSOnlyPassing": false,
"DNSPort": 0,
"DNSRecursorStrategy": "",
"DNSRecursorTimeout": "0s",
"DNSRecursors": [],
"DNSSOA": {

View File

@ -70,22 +70,23 @@ type dnsSOAConfig struct {
}
type dnsConfig struct {
AllowStale bool
Datacenter string
EnableTruncate bool
MaxStale time.Duration
UseCache bool
CacheMaxAge time.Duration
NodeName string
NodeTTL time.Duration
OnlyPassing bool
RecursorTimeout time.Duration
Recursors []string
SegmentName string
UDPAnswerLimit int
ARecordLimit int
NodeMetaTXT bool
SOAConfig dnsSOAConfig
AllowStale bool
Datacenter string
EnableTruncate bool
MaxStale time.Duration
UseCache bool
CacheMaxAge time.Duration
NodeName string
NodeTTL time.Duration
OnlyPassing bool
RecursorStrategy agentdns.RecursorStrategy
RecursorTimeout time.Duration
Recursors []string
SegmentName string
UDPAnswerLimit int
ARecordLimit int
NodeMetaTXT bool
SOAConfig dnsSOAConfig
// TTLRadix sets service TTLs by prefix, eg: "database-*"
TTLRadix *radix.Tree
// TTLStict sets TTLs to service by full name match. It Has higher priority than TTLRadix
@ -154,6 +155,7 @@ func GetDNSConfig(conf *config.RuntimeConfig) (*dnsConfig, error) {
NodeName: conf.NodeName,
NodeTTL: conf.DNSNodeTTL,
OnlyPassing: conf.DNSOnlyPassing,
RecursorStrategy: conf.DNSRecursorStrategy,
RecursorTimeout: conf.DNSRecursorTimeout,
SegmentName: conf.SegmentName,
UDPAnswerLimit: conf.DNSUDPAnswerLimit,
@ -1851,7 +1853,8 @@ func (d *DNSServer) handleRecurse(resp dns.ResponseWriter, req *dns.Msg) {
var r *dns.Msg
var rtt time.Duration
var err error
for _, recursor := range cfg.Recursors {
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(req, recursor)
// Check if the response is valid and has the desired Response code
if r != nil && (r.Rcode != dns.RcodeSuccess && r.Rcode != dns.RcodeNameError) {
@ -1936,7 +1939,8 @@ func (d *DNSServer) resolveCNAME(cfg *dnsConfig, name string, maxRecursionLevel
var r *dns.Msg
var rtt time.Duration
var err error
for _, recursor := range cfg.Recursors {
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(m, recursor)
if err == nil {
d.logger.Debug("cname recurse RTT for name",

View File

@ -1,6 +1,9 @@
package dns
import "regexp"
import (
"math/rand"
"regexp"
)
// MaxLabelLength is the maximum length for a name that can be used in DNS.
const MaxLabelLength = 63
@ -8,3 +11,24 @@ const MaxLabelLength = 63
// InvalidNameRe is a regex that matches characters which can not be included in
// a DNS name.
var InvalidNameRe = regexp.MustCompile(`[^A-Za-z0-9\\-]+`)
type RecursorStrategy string
const (
RecursorStrategySequential RecursorStrategy = "sequential"
RecursorStrategyRandom RecursorStrategy = "random"
)
func (s RecursorStrategy) Indexes(max int) []int {
switch s {
case RecursorStrategyRandom:
return rand.Perm(max)
default:
idxs := make([]int, max)
for i := range idxs {
idxs[i] = i
}
return idxs
}
}

40
agent/dns/dns_test.go Normal file
View File

@ -0,0 +1,40 @@
package dns
import (
"testing"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/stretchr/testify/require"
)
func TestDNS_Recursor_StrategyRandom(t *testing.T) {
configuredRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
recursorStrategy := RecursorStrategy("random")
retry.RunWith(&retry.Counter{Count: 5}, t, func(r *retry.R) {
recursorsToQuery := make([]string, 0)
for _, idx := range recursorStrategy.Indexes(len(configuredRecursors)) {
recursorsToQuery = append(recursorsToQuery, configuredRecursors[idx])
}
// Ensure the slices contain the same elements
require.ElementsMatch(t, configuredRecursors, recursorsToQuery)
// Ensure the elements are not in the same order
require.NotEqual(r, configuredRecursors, recursorsToQuery)
})
}
func TestDNS_Recursor_StrategySequential(t *testing.T) {
expectedRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
recursorStrategy := RecursorStrategy("sequential")
recursorsToQuery := make([]string, 0)
for _, idx := range recursorStrategy.Indexes(len(expectedRecursors)) {
recursorsToQuery = append(recursorsToQuery, expectedRecursors[idx])
}
// The list of recursors should match the order in which they were defined
// in the configuration
require.Equal(t, recursorsToQuery, expectedRecursors)
}

View File

@ -7610,6 +7610,7 @@ func TestDNS_ConfigReload(t *testing.T) {
}
enable_truncate = false
only_passing = false
recursor_strategy = "sequential"
recursor_timeout = "15s"
disable_compression = false
a_record_limit = 1
@ -7628,6 +7629,7 @@ func TestDNS_ConfigReload(t *testing.T) {
for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"8.8.8.8:53"}, cfg.Recursors)
require.Equal(t, agentdns.RecursorStrategy("sequential"), cfg.RecursorStrategy)
require.False(t, cfg.AllowStale)
require.Equal(t, 20*time.Second, cfg.MaxStale)
require.Equal(t, 10*time.Second, cfg.NodeTTL)
@ -7658,6 +7660,7 @@ func TestDNS_ConfigReload(t *testing.T) {
}
newCfg.DNSEnableTruncate = true
newCfg.DNSOnlyPassing = true
newCfg.DNSRecursorStrategy = "random"
newCfg.DNSRecursorTimeout = 16 * time.Second
newCfg.DNSDisableCompression = true
newCfg.DNSARecordLimit = 2
@ -7673,6 +7676,7 @@ func TestDNS_ConfigReload(t *testing.T) {
for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"1.1.1.1:53"}, cfg.Recursors)
require.Equal(t, agentdns.RecursorStrategy("random"), cfg.RecursorStrategy)
require.True(t, cfg.AllowStale)
require.Equal(t, 21*time.Second, cfg.MaxStale)
require.Equal(t, 11*time.Second, cfg.NodeTTL)

View File

@ -1357,6 +1357,11 @@ bind_addr = "{{ GetPrivateInterfaces | include \"network\" \"10.0.0.0/8\" | attr
then all services on that node will be excluded because they are also considered
critical.
- `recursor_strategy` - If set to `sequential`, Consul will query recursors in the
order listed in the [`recursors`](#recursors) option. If set to `random`,
Consul will query an upstream DNS resolvers in a random order. Defaults to
`sequential`.
- `recursor_timeout` - Timeout used by Consul when
recursively querying an upstream DNS server. See [`recursors`](#recursors) for more details. Default is 2s. This is available in Consul 0.7 and later.