diff --git a/command/agent/command.go b/command/agent/command.go
index 3f21ae2730..73acf307e2 100644
--- a/command/agent/command.go
+++ b/command/agent/command.go
@@ -189,6 +189,11 @@ func (c *Command) readConfig() *Config {
}
}
+ // Verify DNS settings
+ if config.DNSConfig.UDPAnswerLimit < 1 {
+ c.Ui.Error(fmt.Sprintf("dns_config.udp_answer_limit %d too low, must always be greater than zero", config.DNSConfig.UDPAnswerLimit))
+ }
+
if config.EncryptKey != "" {
if _, err := config.EncryptBytes(); err != nil {
c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err))
diff --git a/command/agent/config.go b/command/agent/config.go
index 6d4ebf2bd8..eb3e659b24 100644
--- a/command/agent/config.go
+++ b/command/agent/config.go
@@ -77,6 +77,21 @@ type DNSConfig struct {
// returned by default for UDP.
EnableTruncate bool `mapstructure:"enable_truncate"`
+ // UDPAnswerLimit is used to limit the maximum number of DNS Resource
+ // Records returned in the ANSWER section of a DNS response. This is
+ // not normally useful and will be limited based on the querying
+ // protocol, however systems that implemented §6 Rule 9 in RFC3484
+ // may want to set this to `1` in order to subvert §6 Rule 9 and
+ // re-obtain the effect of randomized resource records (i.e. each
+ // answer contains only one IP, but the IP changes every request).
+ // RFC3484 sorts answers in a deterministic order, which defeats the
+ // purpose of randomized DNS responses. This RFC has been obsoleted
+ // by RFC6724 and restores the desired behavior of randomized
+ // responses, however a large number of Linux hosts using glibc(3)
+ // implemented §6 Rule 9 and may need this option (e.g. CentOS 5-6,
+ // Debian Squeeze, etc).
+ UDPAnswerLimit int `mapstructure:"udp_answer_limit"`
+
// MaxStale is used to bound how stale of a result is
// accepted for a DNS lookup. This can be used with
// AllowStale to limit how old of a value is served up.
@@ -527,7 +542,8 @@ func DefaultConfig() *Config {
Server: 8300,
},
DNSConfig: DNSConfig{
- MaxStale: 5 * time.Second,
+ UDPAnswerLimit: 3,
+ MaxStale: 5 * time.Second,
},
Telemetry: Telemetry{
StatsitePrefix: "consul",
@@ -1127,6 +1143,9 @@ func MergeConfig(a, b *Config) *Config {
if b.DNSConfig.AllowStale {
result.DNSConfig.AllowStale = true
}
+ if b.DNSConfig.UDPAnswerLimit != 0 {
+ result.DNSConfig.UDPAnswerLimit = b.DNSConfig.UDPAnswerLimit
+ }
if b.DNSConfig.EnableTruncate {
result.DNSConfig.EnableTruncate = true
}
diff --git a/command/agent/config_test.go b/command/agent/config_test.go
index 6e0c446b2b..e78736702e 100644
--- a/command/agent/config_test.go
+++ b/command/agent/config_test.go
@@ -521,19 +521,28 @@ func TestDecodeConfig(t *testing.T) {
}
// DNS node ttl, max stale
- input = `{"dns_config": {"node_ttl": "5s", "max_stale": "15s", "allow_stale": true}}`
+ input = `{"dns_config": {"allow_stale": true, "enable_truncate": false, "max_stale": "15s", "node_ttl": "5s", "only_passing": true, "udp_answer_limit": 6}}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
- if config.DNSConfig.NodeTTL != 5*time.Second {
+ if !config.DNSConfig.AllowStale {
+ t.Fatalf("bad: %#v", config)
+ }
+ if config.DNSConfig.EnableTruncate {
t.Fatalf("bad: %#v", config)
}
if config.DNSConfig.MaxStale != 15*time.Second {
t.Fatalf("bad: %#v", config)
}
- if !config.DNSConfig.AllowStale {
+ if config.DNSConfig.NodeTTL != 5*time.Second {
+ t.Fatalf("bad: %#v", config)
+ }
+ if !config.DNSConfig.OnlyPassing {
+ t.Fatalf("bad: %#v", config)
+ }
+ if config.DNSConfig.UDPAnswerLimit != 6 {
t.Fatalf("bad: %#v", config)
}
@@ -1252,13 +1261,14 @@ func TestMergeConfig(t *testing.T) {
DataDir: "/tmp/bar",
DNSRecursors: []string{"127.0.0.2:1001"},
DNSConfig: DNSConfig{
- NodeTTL: 10 * time.Second,
+ AllowStale: false,
+ EnableTruncate: true,
+ MaxStale: 30 * time.Second,
+ NodeTTL: 10 * time.Second,
ServiceTTL: map[string]time.Duration{
"api": 10 * time.Second,
},
- AllowStale: true,
- MaxStale: 30 * time.Second,
- EnableTruncate: true,
+ UDPAnswerLimit: 4,
},
Domain: "other",
LogLevel: "info",
diff --git a/command/agent/dns.go b/command/agent/dns.go
index 409fca710c..2a8d8dd5c3 100644
--- a/command/agent/dns.go
+++ b/command/agent/dns.go
@@ -12,12 +12,17 @@ import (
"github.com/armon/go-metrics"
"github.com/hashicorp/consul/consul"
"github.com/hashicorp/consul/consul/structs"
+ "github.com/hashicorp/consul/lib"
"github.com/miekg/dns"
)
const (
- maxServiceResponses = 3 // For UDP only
- maxRecurseRecords = 5
+ // UDP can fit ~25 A records in a 512B response, and ~14 AAAA
+ // records. Limit further to prevent unintentional configuration
+ // abuse that would have a negative effect on application response
+ // times.
+ maxUDPAnswerLimit = 8
+ maxRecurseRecords = 5
)
// DNSServer is used to wrap an Agent and expose various
@@ -487,15 +492,16 @@ func (d *DNSServer) formatNodeRecord(node *structs.Node, addr, qName string, qTy
return records
}
-// trimAnswers makes sure a UDP response is not longer than allowed by RFC 1035.
-// We first enforce an arbitrary limit, and then make sure the response doesn't
-// exceed 512 bytes.
-func trimAnswers(resp *dns.Msg) (trimmed bool) {
+// trimUDPAnswers makes sure a UDP response is not longer than allowed by RFC
+// 1035. Enforce an arbitrary limit that can be further ratcheted down by
+// config, and then make sure the response doesn't exceed 512 bytes.
+func trimUDPAnswers(config *DNSConfig, resp *dns.Msg) (trimmed bool) {
numAnswers := len(resp.Answer)
// This cuts UDP responses to a useful but limited number of responses.
- if numAnswers > maxServiceResponses {
- resp.Answer = resp.Answer[:maxServiceResponses]
+ maxAnswers := lib.MinInt(maxUDPAnswerLimit, config.UDPAnswerLimit)
+ if numAnswers > maxAnswers {
+ resp.Answer = resp.Answer[:maxAnswers]
}
// This enforces the hard limit of 512 bytes per the RFC.
@@ -567,7 +573,7 @@ RPC:
// If the network is not TCP, restrict the number of responses
if network != "tcp" {
- wasTrimmed := trimAnswers(resp)
+ wasTrimmed := trimUDPAnswers(d.config, resp)
// Flag that there are more records to return in the UDP response
if wasTrimmed && d.config.EnableTruncate {
@@ -599,7 +605,7 @@ func (d *DNSServer) preparedQueryLookup(network, datacenter, query string, req,
// match the previous behavior. We can optimize by pushing more filtering
// into the query execution, but for now I think we need to get the full
// response. We could also choose a large arbitrary number that will
- // likely work in practice, like 10*maxServiceResponses which should help
+ // likely work in practice, like 10*maxUDPAnswerLimit which should help
// reduce bandwidth if there are thousands of nodes available.
endpoint := d.agent.getEndpoint(preparedQueryEndpoint)
@@ -662,7 +668,7 @@ RPC:
// If the network is not TCP, restrict the number of responses.
if network != "tcp" {
- wasTrimmed := trimAnswers(resp)
+ wasTrimmed := trimUDPAnswers(d.config, resp)
// Flag that there are more records to return in the UDP response
if wasTrimmed && d.config.EnableTruncate {
@@ -682,6 +688,7 @@ func (d *DNSServer) serviceNodeRecords(dc string, nodes structs.CheckServiceNode
qName := req.Question[0].Name
qType := req.Question[0].Qtype
handled := make(map[string]struct{})
+
for _, node := range nodes {
// Start with the translated address but use the service address,
// if specified.
diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go
index 94cc1e14d9..658b26b5c2 100644
--- a/command/agent/dns_test.go
+++ b/command/agent/dns_test.go
@@ -2,6 +2,7 @@ package agent
import (
"fmt"
+ "math/rand"
"net"
"os"
"strings"
@@ -13,6 +14,18 @@ import (
"github.com/miekg/dns"
)
+const (
+ configUDPAnswerLimit = 4
+ defaultNumUDPResponses = 3
+ testUDPTruncateLimit = 8
+
+ pctNodesWithIPv6 = 0.5
+
+ // generateNumNodes is the upper bounds for the number of hosts used
+ // in testing below. Generate an arbitrarily large number of hosts.
+ generateNumNodes = testUDPTruncateLimit * defaultNumUDPResponses * configUDPAnswerLimit
+)
+
func makeDNSServer(t *testing.T) (string, *DNSServer) {
return makeDNSServerConfig(t, nil, nil)
}
@@ -26,7 +39,7 @@ func makeDNSServerConfig(
if agentFn != nil {
agentFn(agentConf)
}
- dnsConf := &DNSConfig{}
+ dnsConf := &DefaultConfig().DNSConfig
if dnsFn != nil {
dnsFn(dnsConf)
}
@@ -1808,8 +1821,8 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
- // Register a large set of nodes.
- for i := 0; i < 3*maxServiceResponses; i++ {
+ // Register a large number of nodes.
+ for i := 0; i < generateNumNodes; i++ {
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: fmt.Sprintf("foo%d", i),
@@ -1856,7 +1869,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
m := new(dns.Msg)
m.SetQuestion(question, dns.TypeANY)
- c := new(dns.Client)
+ c := &dns.Client{Net: "udp"}
in, _, err := c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
@@ -1864,7 +1877,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
// Response length should be truncated and we should get
// an A record for each response.
- if len(in.Answer) != maxServiceResponses {
+ if len(in.Answer) != defaultNumUDPResponses {
t.Fatalf("Bad: %#v", len(in.Answer))
}
@@ -1902,8 +1915,8 @@ func TestDNS_ServiceLookup_Truncate(t *testing.T) {
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
- // Register nodes a large number of nodes.
- for i := 0; i < 3*maxServiceResponses; i++ {
+ // Register a large number of nodes.
+ for i := 0; i < generateNumNodes; i++ {
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: fmt.Sprintf("foo%d", i),
@@ -2043,17 +2056,19 @@ func TestDNS_ServiceLookup_LargeResponses(t *testing.T) {
}
}
-func TestDNS_ServiceLookup_MaxResponses(t *testing.T) {
- dir, srv := makeDNSServer(t)
+func testDNS_ServiceLookup_responseLimits(t *testing.T, answerLimit int, qType uint16,
+ expectedService, expectedQuery, expectedQueryID int) (bool, error) {
+ dir, srv := makeDNSServerConfig(t, nil, func(c *DNSConfig) {
+ c.UDPAnswerLimit = answerLimit
+ })
defer os.RemoveAll(dir)
defer srv.agent.Shutdown()
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
- // Register a large number of nodes.
- for i := 0; i < 6*maxServiceResponses; i++ {
+ for i := 0; i < generateNumNodes; i++ {
nodeAddress := fmt.Sprintf("127.0.0.%d", i+1)
- if i > 3 {
+ if rand.Float64() < pctNodesWithIPv6 {
nodeAddress = fmt.Sprintf("fe80::%d", i+1)
}
args := &structs.RegisterRequest{
@@ -2061,72 +2076,115 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) {
Node: fmt.Sprintf("foo%d", i),
Address: nodeAddress,
Service: &structs.NodeService{
- Service: "web",
- Port: 8000,
+ Service: "api-tier",
+ Port: 8080,
},
}
var out struct{}
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
- t.Fatalf("err: %v", err)
+ return false, fmt.Errorf("err: %v", err)
}
}
-
- // Register an equivalent prepared query.
var id string
{
args := &structs.PreparedQueryRequest{
Datacenter: "dc1",
Op: structs.PreparedQueryCreate,
Query: &structs.PreparedQuery{
+ Name: "api-tier",
Service: structs.ServiceQuery{
- Service: "web",
+ Service: "api-tier",
},
},
}
+
if err := srv.agent.RPC("PreparedQuery.Apply", args, &id); err != nil {
- t.Fatalf("err: %v", err)
+ return false, fmt.Errorf("err: %v", err)
}
}
// Look up the service directly and via prepared query.
questions := []string{
- "web.service.consul.",
+ "api-tier.service.consul.",
+ "api-tier.query.consul.",
id + ".query.consul.",
}
- for _, question := range questions {
+ for idx, question := range questions {
m := new(dns.Msg)
- m.SetQuestion(question, dns.TypeANY)
+ m.SetQuestion(question, qType)
addr, _ := srv.agent.config.ClientListener("", srv.agent.config.Ports.DNS)
- c := new(dns.Client)
+ c := &dns.Client{Net: "udp"}
in, _, err := c.Exchange(m, addr.String())
if err != nil {
- t.Fatalf("err: %v", err)
+ return false, fmt.Errorf("err: %v", err)
}
- if len(in.Answer) != 3 {
- t.Fatalf("should receive 3 answers for ANY")
+ switch idx {
+ case 0:
+ if len(in.Answer) != expectedService {
+ return false, fmt.Errorf("%d/%d answers received for type %v for %s", len(in.Answer), answerLimit, question)
+ }
+ case 1:
+ if len(in.Answer) != expectedQuery {
+ return false, fmt.Errorf("%d/%d answers received for type %v for %s", len(in.Answer), answerLimit, question)
+ }
+ case 2:
+ if len(in.Answer) != expectedQueryID {
+ return false, fmt.Errorf("%d/%d answers received for type %v for %s", len(in.Answer), answerLimit, question)
+ }
+ default:
+ panic("abort")
+ }
+ }
+
+ return true, nil
+}
+
+func TestDNS_ServiceLookup_AnswerLimits(t *testing.T) {
+ // Build a matrix of config parameters (udpAnswerLimit), and the
+ // length of the response per query type and question.
+ tests := []struct {
+ name string
+ udpAnswerLimit int
+ expectedAService int
+ expectedAQuery int
+ expectedAQueryID int
+ expectedAAAAService int
+ expectedAAAAQuery int
+ expectedAAAAQueryID int
+ expectedANYService int
+ expectedANYQuery int
+ expectedANYQueryID int
+ }{
+ {"0", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {"1", 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {"2", 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ {"3", 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
+ {"4", 4, 4, 4, 4, 4, 4, 4, 4, 4, 4},
+ {"5", 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
+ {"6", 6, 6, 6, 6, 6, 6, 5, 6, 6, 6},
+ {"7", 7, 7, 7, 6, 7, 7, 5, 7, 7, 6},
+ {"8", 8, 8, 8, 6, 8, 8, 5, 8, 8, 6},
+ {"9", 9, 8, 8, 6, 8, 8, 5, 8, 8, 6},
+ {"20", 20, 8, 8, 6, 8, 8, 5, 8, 8, 6},
+ {"30", 30, 8, 8, 6, 8, 8, 5, 8, 8, 6},
+ }
+ for _, test := range tests {
+ ok, err := testDNS_ServiceLookup_responseLimits(t, test.udpAnswerLimit, dns.TypeA, test.expectedAService, test.expectedAQuery, test.expectedAQueryID)
+ if !ok {
+ t.Errorf("Expected service A lookup %d to pass: %v", test.name, err)
}
- m.SetQuestion(question, dns.TypeA)
- in, _, err = c.Exchange(m, addr.String())
- if err != nil {
- t.Fatalf("err: %v", err)
+ ok, err = testDNS_ServiceLookup_responseLimits(t, test.udpAnswerLimit, dns.TypeAAAA, test.expectedAAAAService, test.expectedAAAAQuery, test.expectedAAAAQueryID)
+ if !ok {
+ t.Errorf("Expected service AAAA lookup %d to pass: %v", test.name, err)
}
- if len(in.Answer) != 3 {
- t.Fatalf("should receive 3 answers for A")
- }
-
- m.SetQuestion(question, dns.TypeAAAA)
- in, _, err = c.Exchange(m, addr.String())
- if err != nil {
- t.Fatalf("err: %v", err)
- }
-
- if len(in.Answer) != 3 {
- t.Fatalf("should receive 3 answers for AAAA")
+ ok, err = testDNS_ServiceLookup_responseLimits(t, test.udpAnswerLimit, dns.TypeANY, test.expectedANYService, test.expectedANYQuery, test.expectedANYQueryID)
+ if !ok {
+ t.Errorf("Expected service ANY lookup %d to %v: %v", test.name, err)
}
}
}
diff --git a/lib/math.go b/lib/math.go
new file mode 100644
index 0000000000..36b483c64e
--- /dev/null
+++ b/lib/math.go
@@ -0,0 +1,15 @@
+package lib
+
+func MaxInt(a, b int) int {
+ if a > b {
+ return a
+ }
+ return b
+}
+
+func MinInt(a, b int) int {
+ if a > b {
+ return b
+ }
+ return a
+}
diff --git a/lib/math_test.go b/lib/math_test.go
new file mode 100644
index 0000000000..fdb963ecde
--- /dev/null
+++ b/lib/math_test.go
@@ -0,0 +1,29 @@
+package lib_test
+
+import (
+ "testing"
+
+ "github.com/hashicorp/consul/lib"
+)
+
+func TestMathMaxInt(t *testing.T) {
+ tests := [][3]int{{1, 2, 2}, {-1, 1, 1}, {2, 0, 2}}
+ for i, _ := range tests {
+ expected := tests[i][2]
+ actual := lib.MaxInt(tests[i][0], tests[i][1])
+ if expected != actual {
+ t.Fatalf("in iteration %d expected %d, got %d", i, expected, actual)
+ }
+ }
+}
+
+func TestMathMinInt(t *testing.T) {
+ tests := [][3]int{{1, 2, 1}, {-1, 1, -1}, {2, 0, 0}}
+ for i, _ := range tests {
+ expected := tests[i][2]
+ actual := lib.MinInt(tests[i][0], tests[i][1])
+ if expected != actual {
+ t.Fatalf("in iteration %d expected %d, got %d", i, expected, actual)
+ }
+ }
+}
diff --git a/website/source/docs/agent/options.html.markdown b/website/source/docs/agent/options.html.markdown
index ddd19b20db..2b8c6b81b5 100644
--- a/website/source/docs/agent/options.html.markdown
+++ b/website/source/docs/agent/options.html.markdown
@@ -499,12 +499,27 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
using TCP to get the full set of records.
* `only_passing`
- When set to true, the default, the querying agent will only receive node
- or service addresses for healthy services. A healthy service is defined
- as a service with one or more healthchecks and all defined healthchecks
- for the service are in a passing or warning state (i.e. not
- critical). Set to false to have the querying agent include all node and
- service addresses regardless of the health of the service.
+ When set to true, the default, the querying agent will only receive node
+ or service addresses for healthy services. A healthy service is defined
+ as a service with one or more healthchecks and all defined healthchecks
+ for the service are in a passing or warning state (i.e. not
+ critical). Set to false to have the querying agent include all node and
+ service addresses regardless of the health of the service.
+
+ * `udp_answer_limit` - Limit the number of
+ resource records contained in the answer section of a UDP-based DNS
+ response. When answering a question, Consul will use the complete list of
+ matching hosts, shuffle the list randomly, and then limit the number of
+ answers to `udp_answer_limit` (default `3`). In environments where
+ [RFC 3484 Section 6](https://tools.ietf.org/html/rfc3484#section-6) Rule 9
+ is implemented and enforced (i.e. DNS answers are always sorted and
+ therefore never random), clients may need to set this value to `1` to
+ preserve the expected randomized distribution behavior (note:
+ [https://tools.ietf.org/html/rfc3484](RFC 3484) has been obsoleted by
+ [RFC 6724](https://tools.ietf.org/html/rfc6724) and as a result it should
+ be increasingly uncommon to need to change this value with modern
+ resolvers).
* `domain` Equivalent to the
[`-domain` command-line flag](#_domain).