Allow adjusting the number of DNS records in a response...

Based on work done by @fusiondog in #1583, extend the concept to use an integer instead of a boolean.

Fixes: #1583 && #1481
This commit is contained in:
Sean Chittenden 2016-02-11 23:58:48 -08:00
parent 74623c372a
commit 9fb64ab114
8 changed files with 186 additions and 34 deletions

View File

@ -189,6 +189,11 @@ func (c *Command) readConfig() *Config {
} }
} }
// Verify DNS settings
if config.DNSConfig.UDPAnswerLimit < 1 {
c.Ui.Error(fmt.Sprintf("dns_config.udp_answer_limit %d too low, must always be greater than zero", config.DNSConfig.UDPAnswerLimit))
}
if config.EncryptKey != "" { if config.EncryptKey != "" {
if _, err := config.EncryptBytes(); err != nil { if _, err := config.EncryptBytes(); err != nil {
c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err)) c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err))

View File

@ -77,13 +77,16 @@ type DNSConfig struct {
// returned by default for UDP. // returned by default for UDP.
EnableTruncate bool `mapstructure:"enable_truncate"` EnableTruncate bool `mapstructure:"enable_truncate"`
// EnableSingleton is used to override default behavior // UDPAnswerLimit is used to limit the maximum number of DNS Resource
// of DNS and return only a single host in a round robin // Records returned in the ANSWER section of a DNS response. This is
// DNS service request rather than all healthy service // not normally useful and will be limited based on the querying
// members. This is a work around for systems using // protocol, however systems that implemented §6 Rule 9 in RFC3484
// getaddrinfo using rule 9 sorting from RFC3484 which // may want to set this to 1 in order to achieve round-robin DNS.
// breaks round robin DNS. // RFC3484 sorts answers in a deterministic order, which defeats the
EnableSingleton bool `mapstructure:"enable_singleton"` // purpose of round-robin DNS. This RFC has been obsoleted by
// RFC6724, however a large number of Linux hosts using glibc(3)
// implemented §6 Rule 9 (e.g. CentOS 5-6, Debian Squeeze, etc).
UDPAnswerLimit int `mapstructure:"udp_answer_limit"`
// MaxStale is used to bound how stale of a result is // MaxStale is used to bound how stale of a result is
// accepted for a DNS lookup. This can be used with // accepted for a DNS lookup. This can be used with
@ -535,6 +538,7 @@ func DefaultConfig() *Config {
Server: 8300, Server: 8300,
}, },
DNSConfig: DNSConfig{ DNSConfig: DNSConfig{
UDPAnswerLimit: 3,
MaxStale: 5 * time.Second, MaxStale: 5 * time.Second,
}, },
Telemetry: Telemetry{ Telemetry: Telemetry{
@ -1135,12 +1139,12 @@ func MergeConfig(a, b *Config) *Config {
if b.DNSConfig.AllowStale { if b.DNSConfig.AllowStale {
result.DNSConfig.AllowStale = true result.DNSConfig.AllowStale = true
} }
if b.DNSConfig.UDPAnswerLimit != 0 {
result.DNSConfig.UDPAnswerLimit = b.DNSConfig.UDPAnswerLimit
}
if b.DNSConfig.EnableTruncate { if b.DNSConfig.EnableTruncate {
result.DNSConfig.EnableTruncate = true result.DNSConfig.EnableTruncate = true
} }
if b.DNSConfig.EnableSingleton {
result.DNSConfig.EnableSingleton = true
}
if b.DNSConfig.MaxStale != 0 { if b.DNSConfig.MaxStale != 0 {
result.DNSConfig.MaxStale = b.DNSConfig.MaxStale result.DNSConfig.MaxStale = b.DNSConfig.MaxStale
} }

View File

@ -1252,13 +1252,14 @@ func TestMergeConfig(t *testing.T) {
DataDir: "/tmp/bar", DataDir: "/tmp/bar",
DNSRecursors: []string{"127.0.0.2:1001"}, DNSRecursors: []string{"127.0.0.2:1001"},
DNSConfig: DNSConfig{ DNSConfig: DNSConfig{
AllowStale: false,
EnableTruncate: true,
MaxStale: 30 * time.Second,
NodeTTL: 10 * time.Second, NodeTTL: 10 * time.Second,
ServiceTTL: map[string]time.Duration{ ServiceTTL: map[string]time.Duration{
"api": 10 * time.Second, "api": 10 * time.Second,
}, },
AllowStale: true, UDPAnswerLimit: 3,
MaxStale: 30 * time.Second,
EnableTruncate: true,
}, },
Domain: "other", Domain: "other",
LogLevel: "info", LogLevel: "info",

View File

@ -12,11 +12,12 @@ import (
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/hashicorp/consul/consul" "github.com/hashicorp/consul/consul"
"github.com/hashicorp/consul/consul/structs" "github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/consul/lib"
"github.com/miekg/dns" "github.com/miekg/dns"
) )
const ( const (
maxServiceResponses = 3 // For UDP only maxUDPAnswerLimit = 8 // For UDP only
maxRecurseRecords = 5 maxRecurseRecords = 5
) )
@ -599,7 +600,7 @@ func (d *DNSServer) preparedQueryLookup(network, datacenter, query string, req,
// match the previous behavior. We can optimize by pushing more filtering // match the previous behavior. We can optimize by pushing more filtering
// into the query execution, but for now I think we need to get the full // into the query execution, but for now I think we need to get the full
// response. We could also choose a large arbitrary number that will // response. We could also choose a large arbitrary number that will
// likely work in practice, like 10*maxServiceResponses which should help // likely work in practice, like 10*maxUDPAnswerLimit which should help
// reduce bandwidth if there are thousands of nodes available. // reduce bandwidth if there are thousands of nodes available.
endpoint := d.agent.getEndpoint(preparedQueryEndpoint) endpoint := d.agent.getEndpoint(preparedQueryEndpoint)
@ -682,6 +683,9 @@ func (d *DNSServer) serviceNodeRecords(dc string, nodes structs.CheckServiceNode
qName := req.Question[0].Name qName := req.Question[0].Name
qType := req.Question[0].Qtype qType := req.Question[0].Qtype
handled := make(map[string]struct{}) handled := make(map[string]struct{})
// Post-shuffle: limit the number of nodes we return to the client
effectiveRecordLimit := lib.MinInt(d.config.UDPAnswerLimit, maxUDPAnswerLimit)
for _, node := range nodes { for _, node := range nodes {
// Start with the translated address but use the service address, // Start with the translated address but use the service address,
// if specified. // if specified.
@ -702,7 +706,11 @@ func (d *DNSServer) serviceNodeRecords(dc string, nodes structs.CheckServiceNode
if records != nil { if records != nil {
resp.Answer = append(resp.Answer, records...) resp.Answer = append(resp.Answer, records...)
} }
if d.config.EnableSingleton {
if len(resp.Answer) >= effectiveRecordLimit {
if d.config.EnableTruncate {
resp.Truncated = true
}
break break
} }
} }

View File

@ -13,6 +13,13 @@ import (
"github.com/miekg/dns" "github.com/miekg/dns"
) )
const (
testUDPAnswerLimit = 4
testMaxUDPDNSResponses = 3
testGenerateNumNodes = 12
testUDPTruncateLimit = 8
)
func makeDNSServer(t *testing.T) (string, *DNSServer) { func makeDNSServer(t *testing.T) (string, *DNSServer) {
return makeDNSServerConfig(t, nil, nil) return makeDNSServerConfig(t, nil, nil)
} }
@ -26,7 +33,7 @@ func makeDNSServerConfig(
if agentFn != nil { if agentFn != nil {
agentFn(agentConf) agentFn(agentConf)
} }
dnsConf := &DNSConfig{} dnsConf := &DefaultConfig().DNSConfig
if dnsFn != nil { if dnsFn != nil {
dnsFn(dnsConf) dnsFn(dnsConf)
} }
@ -1809,7 +1816,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
testutil.WaitForLeader(t, srv.agent.RPC, "dc1") testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
// Register a large set of nodes. // Register a large set of nodes.
for i := 0; i < 3*maxServiceResponses; i++ { for i := 0; i < 2*testMaxUDPDNSResponses; i++ {
args := &structs.RegisterRequest{ args := &structs.RegisterRequest{
Datacenter: "dc1", Datacenter: "dc1",
Node: fmt.Sprintf("foo%d", i), Node: fmt.Sprintf("foo%d", i),
@ -1856,7 +1863,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
m := new(dns.Msg) m := new(dns.Msg)
m.SetQuestion(question, dns.TypeANY) m.SetQuestion(question, dns.TypeANY)
c := new(dns.Client) c := &dns.Client{Net: "udp"}
in, _, err := c.Exchange(m, addr.String()) in, _, err := c.Exchange(m, addr.String())
if err != nil { if err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
@ -1864,7 +1871,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
// Response length should be truncated and we should get // Response length should be truncated and we should get
// an A record for each response. // an A record for each response.
if len(in.Answer) != maxServiceResponses { if len(in.Answer) != testMaxUDPDNSResponses {
t.Fatalf("Bad: %#v", len(in.Answer)) t.Fatalf("Bad: %#v", len(in.Answer))
} }
@ -1903,7 +1910,7 @@ func TestDNS_ServiceLookup_Truncate(t *testing.T) {
testutil.WaitForLeader(t, srv.agent.RPC, "dc1") testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
// Register nodes a large number of nodes. // Register nodes a large number of nodes.
for i := 0; i < 3*maxServiceResponses; i++ { for i := 0; i < 2*testUDPTruncateLimit; i++ {
args := &structs.RegisterRequest{ args := &structs.RegisterRequest{
Datacenter: "dc1", Datacenter: "dc1",
Node: fmt.Sprintf("foo%d", i), Node: fmt.Sprintf("foo%d", i),
@ -2044,16 +2051,18 @@ func TestDNS_ServiceLookup_LargeResponses(t *testing.T) {
} }
func TestDNS_ServiceLookup_MaxResponses(t *testing.T) { func TestDNS_ServiceLookup_MaxResponses(t *testing.T) {
dir, srv := makeDNSServer(t) dir, srv := makeDNSServerConfig(t, nil, func(c *DNSConfig) {
c.UDPAnswerLimit = testUDPAnswerLimit
})
defer os.RemoveAll(dir) defer os.RemoveAll(dir)
defer srv.agent.Shutdown() defer srv.agent.Shutdown()
testutil.WaitForLeader(t, srv.agent.RPC, "dc1") testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
// Register a large number of nodes. // Register a large number of nodes.
for i := 0; i < 6*maxServiceResponses; i++ { for i := 0; i < testUDPAnswerLimit*testMaxUDPDNSResponses; i++ {
nodeAddress := fmt.Sprintf("127.0.0.%d", i+1) nodeAddress := fmt.Sprintf("127.0.0.%d", i+1)
if i > 3 { if i > (testUDPAnswerLimit * testMaxUDPDNSResponses / 2) {
nodeAddress = fmt.Sprintf("fe80::%d", i+1) nodeAddress = fmt.Sprintf("fe80::%d", i+1)
} }
args := &structs.RegisterRequest{ args := &structs.RegisterRequest{
@ -2094,7 +2103,7 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) {
"web.service.consul.", "web.service.consul.",
id + ".query.consul.", id + ".query.consul.",
} }
for _, question := range questions { for i, question := range questions {
m := new(dns.Msg) m := new(dns.Msg)
m.SetQuestion(question, dns.TypeANY) m.SetQuestion(question, dns.TypeANY)
@ -2105,8 +2114,8 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
if len(in.Answer) != 3 { if len(in.Answer) != testUDPAnswerLimit {
t.Fatalf("should receive 3 answers for ANY") t.Fatalf("should receive %d answers for ANY, received: %d", testUDPAnswerLimit, len(in.Answer))
} }
m.SetQuestion(question, dns.TypeA) m.SetQuestion(question, dns.TypeA)
@ -2115,8 +2124,8 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
if len(in.Answer) != 3 { if len(in.Answer) != testUDPAnswerLimit {
t.Fatalf("should receive 3 answers for A") t.Fatalf("%d: should receive %d answers for A, received: %d", i, testUDPAnswerLimit, len(in.Answer))
} }
m.SetQuestion(question, dns.TypeAAAA) m.SetQuestion(question, dns.TypeAAAA)
@ -2125,8 +2134,79 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
if len(in.Answer) != 3 { if len(in.Answer) != testUDPAnswerLimit {
t.Fatalf("should receive 3 answers for AAAA") t.Fatalf("should receive %d answers for AAAA, received: %d", testUDPAnswerLimit, len(in.Answer))
}
}
}
func TestDNS_ServiceLookup_UDPAnswerLimit(t *testing.T) {
dir, srv := makeDNSServer(t)
defer os.RemoveAll(dir)
defer srv.agent.Shutdown()
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
// Register a large number of nodes.
for i := 0; i < testGenerateNumNodes*testMaxUDPDNSResponses; i++ {
nodeAddress := fmt.Sprintf("127.0.0.%d", i+1)
// Mix-in a few IPv6 addresses
if i > testGenerateNumNodes/2 {
nodeAddress = fmt.Sprintf("fe80::%d", i+1)
}
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: fmt.Sprintf("foo%d", i),
Address: nodeAddress,
Service: &structs.NodeService{
Service: "web",
Port: 8000,
},
}
var out struct{}
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
}
// Look up the service directly and via prepared query.
questions := []string{
"web.service.consul.",
}
for _, question := range questions {
m := new(dns.Msg)
m.SetQuestion(question, dns.TypeANY)
addr, _ := srv.agent.config.ClientListener("", srv.agent.config.Ports.DNS)
c := &dns.Client{Net: "udp"}
in, _, err := c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != srv.agent.config.DNSConfig.UDPAnswerLimit {
t.Fatalf("%d/%d answers received for ANY", len(in.Answer), srv.agent.config.DNSConfig.UDPAnswerLimit)
}
m.SetQuestion(question, dns.TypeA)
in, _, err = c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != srv.agent.config.DNSConfig.UDPAnswerLimit {
t.Fatalf("%d/%d answers for A", len(in.Answer), srv.agent.config.DNSConfig.UDPAnswerLimit)
}
m.SetQuestion(question, dns.TypeAAAA)
in, _, err = c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != srv.agent.config.DNSConfig.UDPAnswerLimit {
t.Fatalf("%d/%d answers for AAAA", len(in.Answer), srv.agent.config.DNSConfig.UDPAnswerLimit)
} }
} }
} }

15
lib/math.go Normal file
View File

@ -0,0 +1,15 @@
package lib
func MaxInt(a, b int) int {
if a > b {
return a
}
return b
}
func MinInt(a, b int) int {
if a > b {
return b
}
return a
}

27
lib/math_test.go Normal file
View File

@ -0,0 +1,27 @@
package lib
import (
"testing"
)
func TestMathMaxInt(t *testing.T) {
tests := [][3]int{{1, 2, 2}, {-1, 1, 1}, {2, 0, 2}}
for i, _ := range tests {
expected := tests[i][2]
actual := MaxInt(tests[i][0], tests[i][1])
if expected != actual {
t.Fatalf("in iteration %d expected %d, got %d", i, expected, actual)
}
}
}
func TestMathMinInt(t *testing.T) {
tests := [][3]int{{1, 2, 1}, {-1, 1, -1}, {2, 0, 0}}
for i, _ := range tests {
expected := tests[i][2]
actual := MinInt(tests[i][0], tests[i][1])
if expected != actual {
t.Fatalf("in iteration %d expected %d, got %d", i, expected, actual)
}
}
}

View File

@ -506,6 +506,18 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
critical). Set to false to have the querying agent include all node and critical). Set to false to have the querying agent include all node and
service addresses regardless of the health of the service. service addresses regardless of the health of the service.
* <a name="udp_answer_limit"></a><a
href="#udp_answer_limit">`udp_answer_limit`</a> - Artificially limit the
number of resource records contained in the answer section of a UDP-based
DNS response. The default number of resource records returned is `3`. In
environments where
[RFC 3484 Section 6](https://tools.ietf.org/html/rfc3484#section-6) Rule 9
is implemented and enforced, clients may need to set this value to `1` to
preserve randomized DNS round-robin (note:
[https://tools.ietf.org/html/rfc3484](RFC 3484) has been obsoleted by
[RFC 6724](https://tools.ietf.org/html/rfc6724) so it should be uncommon to
need to change this value).
* <a name="domain"></a><a href="#domain">`domain`</a> Equivalent to the * <a name="domain"></a><a href="#domain">`domain`</a> Equivalent to the
[`-domain` command-line flag](#_domain). [`-domain` command-line flag](#_domain).