Merge pull request #1712 from hashicorp/b-dns-single-record

Expose a tunable to adjust the number of DNS records in an answer (useful for defeating RFC3484 §6 Rule 9).
This commit is contained in:
Sean Chittenden 2016-03-30 01:17:16 -07:00
commit f44f411b70
8 changed files with 225 additions and 67 deletions

View File

@ -189,6 +189,11 @@ func (c *Command) readConfig() *Config {
}
}
// Verify DNS settings
if config.DNSConfig.UDPAnswerLimit < 1 {
c.Ui.Error(fmt.Sprintf("dns_config.udp_answer_limit %d too low, must always be greater than zero", config.DNSConfig.UDPAnswerLimit))
}
if config.EncryptKey != "" {
if _, err := config.EncryptBytes(); err != nil {
c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err))

View File

@ -77,6 +77,21 @@ type DNSConfig struct {
// returned by default for UDP.
EnableTruncate bool `mapstructure:"enable_truncate"`
// UDPAnswerLimit is used to limit the maximum number of DNS Resource
// Records returned in the ANSWER section of a DNS response. This is
// not normally useful and will be limited based on the querying
// protocol, however systems that implemented §6 Rule 9 in RFC3484
// may want to set this to `1` in order to subvert §6 Rule 9 and
// re-obtain the effect of randomized resource records (i.e. each
// answer contains only one IP, but the IP changes every request).
// RFC3484 sorts answers in a deterministic order, which defeats the
// purpose of randomized DNS responses. This RFC has been obsoleted
// by RFC6724 and restores the desired behavior of randomized
// responses, however a large number of Linux hosts using glibc(3)
// implemented §6 Rule 9 and may need this option (e.g. CentOS 5-6,
// Debian Squeeze, etc).
UDPAnswerLimit int `mapstructure:"udp_answer_limit"`
// MaxStale is used to bound how stale of a result is
// accepted for a DNS lookup. This can be used with
// AllowStale to limit how old of a value is served up.
@ -527,7 +542,8 @@ func DefaultConfig() *Config {
Server: 8300,
},
DNSConfig: DNSConfig{
MaxStale: 5 * time.Second,
UDPAnswerLimit: 3,
MaxStale: 5 * time.Second,
},
Telemetry: Telemetry{
StatsitePrefix: "consul",
@ -1127,6 +1143,9 @@ func MergeConfig(a, b *Config) *Config {
if b.DNSConfig.AllowStale {
result.DNSConfig.AllowStale = true
}
if b.DNSConfig.UDPAnswerLimit != 0 {
result.DNSConfig.UDPAnswerLimit = b.DNSConfig.UDPAnswerLimit
}
if b.DNSConfig.EnableTruncate {
result.DNSConfig.EnableTruncate = true
}

View File

@ -521,19 +521,28 @@ func TestDecodeConfig(t *testing.T) {
}
// DNS node ttl, max stale
input = `{"dns_config": {"node_ttl": "5s", "max_stale": "15s", "allow_stale": true}}`
input = `{"dns_config": {"allow_stale": true, "enable_truncate": false, "max_stale": "15s", "node_ttl": "5s", "only_passing": true, "udp_answer_limit": 6}}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
if config.DNSConfig.NodeTTL != 5*time.Second {
if !config.DNSConfig.AllowStale {
t.Fatalf("bad: %#v", config)
}
if config.DNSConfig.EnableTruncate {
t.Fatalf("bad: %#v", config)
}
if config.DNSConfig.MaxStale != 15*time.Second {
t.Fatalf("bad: %#v", config)
}
if !config.DNSConfig.AllowStale {
if config.DNSConfig.NodeTTL != 5*time.Second {
t.Fatalf("bad: %#v", config)
}
if !config.DNSConfig.OnlyPassing {
t.Fatalf("bad: %#v", config)
}
if config.DNSConfig.UDPAnswerLimit != 6 {
t.Fatalf("bad: %#v", config)
}
@ -1252,13 +1261,14 @@ func TestMergeConfig(t *testing.T) {
DataDir: "/tmp/bar",
DNSRecursors: []string{"127.0.0.2:1001"},
DNSConfig: DNSConfig{
NodeTTL: 10 * time.Second,
AllowStale: false,
EnableTruncate: true,
MaxStale: 30 * time.Second,
NodeTTL: 10 * time.Second,
ServiceTTL: map[string]time.Duration{
"api": 10 * time.Second,
},
AllowStale: true,
MaxStale: 30 * time.Second,
EnableTruncate: true,
UDPAnswerLimit: 4,
},
Domain: "other",
LogLevel: "info",

View File

@ -12,12 +12,17 @@ import (
"github.com/armon/go-metrics"
"github.com/hashicorp/consul/consul"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/consul/lib"
"github.com/miekg/dns"
)
const (
maxServiceResponses = 3 // For UDP only
maxRecurseRecords = 5
// UDP can fit ~25 A records in a 512B response, and ~14 AAAA
// records. Limit further to prevent unintentional configuration
// abuse that would have a negative effect on application response
// times.
maxUDPAnswerLimit = 8
maxRecurseRecords = 5
)
// DNSServer is used to wrap an Agent and expose various
@ -487,15 +492,16 @@ func (d *DNSServer) formatNodeRecord(node *structs.Node, addr, qName string, qTy
return records
}
// trimAnswers makes sure a UDP response is not longer than allowed by RFC 1035.
// We first enforce an arbitrary limit, and then make sure the response doesn't
// exceed 512 bytes.
func trimAnswers(resp *dns.Msg) (trimmed bool) {
// trimUDPAnswers makes sure a UDP response is not longer than allowed by RFC
// 1035. Enforce an arbitrary limit that can be further ratcheted down by
// config, and then make sure the response doesn't exceed 512 bytes.
func trimUDPAnswers(config *DNSConfig, resp *dns.Msg) (trimmed bool) {
numAnswers := len(resp.Answer)
// This cuts UDP responses to a useful but limited number of responses.
if numAnswers > maxServiceResponses {
resp.Answer = resp.Answer[:maxServiceResponses]
maxAnswers := lib.MinInt(maxUDPAnswerLimit, config.UDPAnswerLimit)
if numAnswers > maxAnswers {
resp.Answer = resp.Answer[:maxAnswers]
}
// This enforces the hard limit of 512 bytes per the RFC.
@ -567,7 +573,7 @@ RPC:
// If the network is not TCP, restrict the number of responses
if network != "tcp" {
wasTrimmed := trimAnswers(resp)
wasTrimmed := trimUDPAnswers(d.config, resp)
// Flag that there are more records to return in the UDP response
if wasTrimmed && d.config.EnableTruncate {
@ -599,7 +605,7 @@ func (d *DNSServer) preparedQueryLookup(network, datacenter, query string, req,
// match the previous behavior. We can optimize by pushing more filtering
// into the query execution, but for now I think we need to get the full
// response. We could also choose a large arbitrary number that will
// likely work in practice, like 10*maxServiceResponses which should help
// likely work in practice, like 10*maxUDPAnswerLimit which should help
// reduce bandwidth if there are thousands of nodes available.
endpoint := d.agent.getEndpoint(preparedQueryEndpoint)
@ -662,7 +668,7 @@ RPC:
// If the network is not TCP, restrict the number of responses.
if network != "tcp" {
wasTrimmed := trimAnswers(resp)
wasTrimmed := trimUDPAnswers(d.config, resp)
// Flag that there are more records to return in the UDP response
if wasTrimmed && d.config.EnableTruncate {
@ -682,6 +688,7 @@ func (d *DNSServer) serviceNodeRecords(dc string, nodes structs.CheckServiceNode
qName := req.Question[0].Name
qType := req.Question[0].Qtype
handled := make(map[string]struct{})
for _, node := range nodes {
// Start with the translated address but use the service address,
// if specified.

View File

@ -2,6 +2,7 @@ package agent
import (
"fmt"
"math/rand"
"net"
"os"
"strings"
@ -13,6 +14,18 @@ import (
"github.com/miekg/dns"
)
const (
configUDPAnswerLimit = 4
defaultNumUDPResponses = 3
testUDPTruncateLimit = 8
pctNodesWithIPv6 = 0.5
// generateNumNodes is the upper bounds for the number of hosts used
// in testing below. Generate an arbitrarily large number of hosts.
generateNumNodes = testUDPTruncateLimit * defaultNumUDPResponses * configUDPAnswerLimit
)
func makeDNSServer(t *testing.T) (string, *DNSServer) {
return makeDNSServerConfig(t, nil, nil)
}
@ -26,7 +39,7 @@ func makeDNSServerConfig(
if agentFn != nil {
agentFn(agentConf)
}
dnsConf := &DNSConfig{}
dnsConf := &DefaultConfig().DNSConfig
if dnsFn != nil {
dnsFn(dnsConf)
}
@ -1808,8 +1821,8 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
// Register a large set of nodes.
for i := 0; i < 3*maxServiceResponses; i++ {
// Register a large number of nodes.
for i := 0; i < generateNumNodes; i++ {
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: fmt.Sprintf("foo%d", i),
@ -1856,7 +1869,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
m := new(dns.Msg)
m.SetQuestion(question, dns.TypeANY)
c := new(dns.Client)
c := &dns.Client{Net: "udp"}
in, _, err := c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
@ -1864,7 +1877,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
// Response length should be truncated and we should get
// an A record for each response.
if len(in.Answer) != maxServiceResponses {
if len(in.Answer) != defaultNumUDPResponses {
t.Fatalf("Bad: %#v", len(in.Answer))
}
@ -1902,8 +1915,8 @@ func TestDNS_ServiceLookup_Truncate(t *testing.T) {
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
// Register nodes a large number of nodes.
for i := 0; i < 3*maxServiceResponses; i++ {
// Register a large number of nodes.
for i := 0; i < generateNumNodes; i++ {
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: fmt.Sprintf("foo%d", i),
@ -2043,17 +2056,19 @@ func TestDNS_ServiceLookup_LargeResponses(t *testing.T) {
}
}
func TestDNS_ServiceLookup_MaxResponses(t *testing.T) {
dir, srv := makeDNSServer(t)
func testDNS_ServiceLookup_responseLimits(t *testing.T, answerLimit int, qType uint16,
expectedService, expectedQuery, expectedQueryID int) (bool, error) {
dir, srv := makeDNSServerConfig(t, nil, func(c *DNSConfig) {
c.UDPAnswerLimit = answerLimit
})
defer os.RemoveAll(dir)
defer srv.agent.Shutdown()
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
// Register a large number of nodes.
for i := 0; i < 6*maxServiceResponses; i++ {
for i := 0; i < generateNumNodes; i++ {
nodeAddress := fmt.Sprintf("127.0.0.%d", i+1)
if i > 3 {
if rand.Float64() < pctNodesWithIPv6 {
nodeAddress = fmt.Sprintf("fe80::%d", i+1)
}
args := &structs.RegisterRequest{
@ -2061,72 +2076,115 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) {
Node: fmt.Sprintf("foo%d", i),
Address: nodeAddress,
Service: &structs.NodeService{
Service: "web",
Port: 8000,
Service: "api-tier",
Port: 8080,
},
}
var out struct{}
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
return false, fmt.Errorf("err: %v", err)
}
}
// Register an equivalent prepared query.
var id string
{
args := &structs.PreparedQueryRequest{
Datacenter: "dc1",
Op: structs.PreparedQueryCreate,
Query: &structs.PreparedQuery{
Name: "api-tier",
Service: structs.ServiceQuery{
Service: "web",
Service: "api-tier",
},
},
}
if err := srv.agent.RPC("PreparedQuery.Apply", args, &id); err != nil {
t.Fatalf("err: %v", err)
return false, fmt.Errorf("err: %v", err)
}
}
// Look up the service directly and via prepared query.
questions := []string{
"web.service.consul.",
"api-tier.service.consul.",
"api-tier.query.consul.",
id + ".query.consul.",
}
for _, question := range questions {
for idx, question := range questions {
m := new(dns.Msg)
m.SetQuestion(question, dns.TypeANY)
m.SetQuestion(question, qType)
addr, _ := srv.agent.config.ClientListener("", srv.agent.config.Ports.DNS)
c := new(dns.Client)
c := &dns.Client{Net: "udp"}
in, _, err := c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
return false, fmt.Errorf("err: %v", err)
}
if len(in.Answer) != 3 {
t.Fatalf("should receive 3 answers for ANY")
switch idx {
case 0:
if len(in.Answer) != expectedService {
return false, fmt.Errorf("%d/%d answers received for type %v for %s", len(in.Answer), answerLimit, question)
}
case 1:
if len(in.Answer) != expectedQuery {
return false, fmt.Errorf("%d/%d answers received for type %v for %s", len(in.Answer), answerLimit, question)
}
case 2:
if len(in.Answer) != expectedQueryID {
return false, fmt.Errorf("%d/%d answers received for type %v for %s", len(in.Answer), answerLimit, question)
}
default:
panic("abort")
}
}
return true, nil
}
func TestDNS_ServiceLookup_AnswerLimits(t *testing.T) {
// Build a matrix of config parameters (udpAnswerLimit), and the
// length of the response per query type and question.
tests := []struct {
name string
udpAnswerLimit int
expectedAService int
expectedAQuery int
expectedAQueryID int
expectedAAAAService int
expectedAAAAQuery int
expectedAAAAQueryID int
expectedANYService int
expectedANYQuery int
expectedANYQueryID int
}{
{"0", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{"1", 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{"2", 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
{"3", 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
{"4", 4, 4, 4, 4, 4, 4, 4, 4, 4, 4},
{"5", 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
{"6", 6, 6, 6, 6, 6, 6, 5, 6, 6, 6},
{"7", 7, 7, 7, 6, 7, 7, 5, 7, 7, 6},
{"8", 8, 8, 8, 6, 8, 8, 5, 8, 8, 6},
{"9", 9, 8, 8, 6, 8, 8, 5, 8, 8, 6},
{"20", 20, 8, 8, 6, 8, 8, 5, 8, 8, 6},
{"30", 30, 8, 8, 6, 8, 8, 5, 8, 8, 6},
}
for _, test := range tests {
ok, err := testDNS_ServiceLookup_responseLimits(t, test.udpAnswerLimit, dns.TypeA, test.expectedAService, test.expectedAQuery, test.expectedAQueryID)
if !ok {
t.Errorf("Expected service A lookup %d to pass: %v", test.name, err)
}
m.SetQuestion(question, dns.TypeA)
in, _, err = c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
ok, err = testDNS_ServiceLookup_responseLimits(t, test.udpAnswerLimit, dns.TypeAAAA, test.expectedAAAAService, test.expectedAAAAQuery, test.expectedAAAAQueryID)
if !ok {
t.Errorf("Expected service AAAA lookup %d to pass: %v", test.name, err)
}
if len(in.Answer) != 3 {
t.Fatalf("should receive 3 answers for A")
}
m.SetQuestion(question, dns.TypeAAAA)
in, _, err = c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 3 {
t.Fatalf("should receive 3 answers for AAAA")
ok, err = testDNS_ServiceLookup_responseLimits(t, test.udpAnswerLimit, dns.TypeANY, test.expectedANYService, test.expectedANYQuery, test.expectedANYQueryID)
if !ok {
t.Errorf("Expected service ANY lookup %d to %v: %v", test.name, err)
}
}
}

15
lib/math.go Normal file
View File

@ -0,0 +1,15 @@
package lib
func MaxInt(a, b int) int {
if a > b {
return a
}
return b
}
func MinInt(a, b int) int {
if a > b {
return b
}
return a
}

29
lib/math_test.go Normal file
View File

@ -0,0 +1,29 @@
package lib_test
import (
"testing"
"github.com/hashicorp/consul/lib"
)
func TestMathMaxInt(t *testing.T) {
tests := [][3]int{{1, 2, 2}, {-1, 1, 1}, {2, 0, 2}}
for i, _ := range tests {
expected := tests[i][2]
actual := lib.MaxInt(tests[i][0], tests[i][1])
if expected != actual {
t.Fatalf("in iteration %d expected %d, got %d", i, expected, actual)
}
}
}
func TestMathMinInt(t *testing.T) {
tests := [][3]int{{1, 2, 1}, {-1, 1, -1}, {2, 0, 0}}
for i, _ := range tests {
expected := tests[i][2]
actual := lib.MinInt(tests[i][0], tests[i][1])
if expected != actual {
t.Fatalf("in iteration %d expected %d, got %d", i, expected, actual)
}
}
}

View File

@ -499,12 +499,27 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
using TCP to get the full set of records.
* <a name="only_passing"></a><a href="#only_passing">`only_passing`</a>
When set to true, the default, the querying agent will only receive node
or service addresses for healthy services. A healthy service is defined
as a service with one or more healthchecks and all defined healthchecks
for the service are in a passing or warning state (i.e. not
critical). Set to false to have the querying agent include all node and
service addresses regardless of the health of the service.
When set to true, the default, the querying agent will only receive node
or service addresses for healthy services. A healthy service is defined
as a service with one or more healthchecks and all defined healthchecks
for the service are in a passing or warning state (i.e. not
critical). Set to false to have the querying agent include all node and
service addresses regardless of the health of the service.
* <a name="udp_answer_limit"></a><a
href="#udp_answer_limit">`udp_answer_limit`</a> - Limit the number of
resource records contained in the answer section of a UDP-based DNS
response. When answering a question, Consul will use the complete list of
matching hosts, shuffle the list randomly, and then limit the number of
answers to `udp_answer_limit` (default `3`). In environments where
[RFC 3484 Section 6](https://tools.ietf.org/html/rfc3484#section-6) Rule 9
is implemented and enforced (i.e. DNS answers are always sorted and
therefore never random), clients may need to set this value to `1` to
preserve the expected randomized distribution behavior (note:
[https://tools.ietf.org/html/rfc3484](RFC 3484) has been obsoleted by
[RFC 6724](https://tools.ietf.org/html/rfc6724) and as a result it should
be increasingly uncommon to need to change this value with modern
resolvers).
* <a name="domain"></a><a href="#domain">`domain`</a> Equivalent to the
[`-domain` command-line flag](#_domain).