Merge pull request #200 from hashicorp/f-dns

Adding support for DNS TTLs and stale reads
This commit is contained in:
Armon Dadgar 2014-06-09 11:13:47 -07:00
commit 93fb12ecdf
9 changed files with 545 additions and 34 deletions

View File

@ -227,8 +227,8 @@ func (c *Command) setupAgent(config *Config, logOutput io.Writer, logWriter *log
return err
}
server, err := NewDNSServer(agent, logOutput, config.Domain,
dnsAddr.String(), config.DNSRecursor)
server, err := NewDNSServer(agent, &config.DNSConfig, logOutput,
config.Domain, dnsAddr.String(), config.DNSRecursor)
if err != nil {
agent.Shutdown()
c.Ui.Error(fmt.Sprintf("Error starting dns server: %s", err))

View File

@ -27,6 +27,35 @@ type PortConfig struct {
Server int // Server internal RPC
}
// DNSConfig is used to fine tune the DNS sub-system.
// It can be used to control cache values, and stale
// reads
type DNSConfig struct {
// NodeTTL provides the TTL value for a node query
NodeTTL time.Duration `mapstructure:"-"`
NodeTTLRaw string `mapstructure:"node_ttl" json:"-"`
// ServiceTTL provides the TTL value for a service
// query for given service. The "*" wildcard can be used
// to set a default for all services.
ServiceTTL map[string]time.Duration `mapstructure:"-"`
ServiceTTLRaw map[string]string `mapstructure:"service_ttl" json:"-"`
// AllowStale is used to enable lookups with stale
// data. This gives horizontal read scalability since
// any Consul server can service the query instead of
// only the leader.
AllowStale bool `mapstructure:"allow_stale"`
// MaxStale is used to bound how stale of a result is
// accepted for a DNS lookup. This can be used with
// AllowStale to limit how old of a value is served up.
// If the stale result exceeds this, another non-stale
// stale read is performed.
MaxStale time.Duration `mapstructure:"-"`
MaxStaleRaw string `mapstructure:"max_stale" json:"-"`
}
// Config is the configuration that can be set for an Agent.
// Some of this is configurable as CLI flags, but most must
// be set using a configuration file.
@ -50,6 +79,9 @@ type Config struct {
// resolve non-consul domains
DNSRecursor string `mapstructure:"recursor"`
// DNS configuration
DNSConfig DNSConfig `mapstructure:"dns_config"`
// Domain is the DNS domain for the records. Defaults to "consul."
Domain string `mapstructure:"domain"`
@ -185,6 +217,9 @@ func DefaultConfig() *Config {
SerfWan: consul.DefaultWANSerfPort,
Server: 8300,
},
DNSConfig: DNSConfig{
MaxStale: 5 * time.Second,
},
Protocol: consul.ProtocolVersionMax,
AEInterval: time.Minute,
}
@ -244,6 +279,36 @@ func DecodeConfig(r io.Reader) (*Config, error) {
return nil, err
}
// Handle time conversions
if raw := result.DNSConfig.NodeTTLRaw; raw != "" {
dur, err := time.ParseDuration(raw)
if err != nil {
return nil, fmt.Errorf("NodeTTL invalid: %v", err)
}
result.DNSConfig.NodeTTL = dur
}
if raw := result.DNSConfig.MaxStaleRaw; raw != "" {
dur, err := time.ParseDuration(raw)
if err != nil {
return nil, fmt.Errorf("MaxStale invalid: %v", err)
}
result.DNSConfig.MaxStale = dur
}
if len(result.DNSConfig.ServiceTTLRaw) != 0 {
if result.DNSConfig.ServiceTTL == nil {
result.DNSConfig.ServiceTTL = make(map[string]time.Duration)
}
for service, raw := range result.DNSConfig.ServiceTTLRaw {
dur, err := time.ParseDuration(raw)
if err != nil {
return nil, fmt.Errorf("ServiceTTL %s invalid: %v", service, err)
}
result.DNSConfig.ServiceTTL[service] = dur
}
}
return &result, nil
}
@ -454,6 +519,23 @@ func MergeConfig(a, b *Config) *Config {
if b.RejoinAfterLeave {
result.RejoinAfterLeave = true
}
if b.DNSConfig.NodeTTL != 0 {
result.DNSConfig.NodeTTL = b.DNSConfig.NodeTTL
}
if len(b.DNSConfig.ServiceTTL) != 0 {
if result.DNSConfig.ServiceTTL == nil {
result.DNSConfig.ServiceTTL = make(map[string]time.Duration)
}
for service, dur := range b.DNSConfig.ServiceTTL {
result.DNSConfig.ServiceTTL[service] = dur
}
}
if b.DNSConfig.AllowStale {
result.DNSConfig.AllowStale = true
}
if b.DNSConfig.MaxStale != 0 {
result.DNSConfig.MaxStale = b.DNSConfig.MaxStale
}
// Copy the start join addresses
result.StartJoin = make([]string, 0, len(a.StartJoin)+len(b.StartJoin))

View File

@ -290,6 +290,40 @@ func TestDecodeConfig(t *testing.T) {
if !config.RejoinAfterLeave {
t.Fatalf("bad: %#v", config)
}
// DNS node ttl, max stale
input = `{"dns_config": {"node_ttl": "5s", "max_stale": "15s", "allow_stale": true}}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
if config.DNSConfig.NodeTTL != 5*time.Second {
t.Fatalf("bad: %#v", config)
}
if config.DNSConfig.MaxStale != 15*time.Second {
t.Fatalf("bad: %#v", config)
}
if !config.DNSConfig.AllowStale {
t.Fatalf("bad: %#v", config)
}
// DNS service ttl
input = `{"dns_config": {"service_ttl": {"*": "1s", "api": "10s", "web": "30s"}}}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
if config.DNSConfig.ServiceTTL["*"] != time.Second {
t.Fatalf("bad: %#v", config)
}
if config.DNSConfig.ServiceTTL["api"] != 10*time.Second {
t.Fatalf("bad: %#v", config)
}
if config.DNSConfig.ServiceTTL["web"] != 30*time.Second {
t.Fatalf("bad: %#v", config)
}
}
func TestDecodeConfig_Service(t *testing.T) {
@ -391,10 +425,18 @@ func TestMergeConfig(t *testing.T) {
}
b := &Config{
Bootstrap: true,
Datacenter: "dc2",
DataDir: "/tmp/bar",
DNSRecursor: "127.0.0.2:1001",
Bootstrap: true,
Datacenter: "dc2",
DataDir: "/tmp/bar",
DNSRecursor: "127.0.0.2:1001",
DNSConfig: DNSConfig{
NodeTTL: 10 * time.Second,
ServiceTTL: map[string]time.Duration{
"api": 10 * time.Second,
},
AllowStale: true,
MaxStale: 30 * time.Second,
},
Domain: "other",
LogLevel: "info",
NodeName: "baz",

View File

@ -23,6 +23,7 @@ const (
// service discovery endpoints using a DNS interface.
type DNSServer struct {
agent *Agent
config *DNSConfig
dnsHandler *dns.ServeMux
dnsServer *dns.Server
dnsServerTCP *dns.Server
@ -32,7 +33,7 @@ type DNSServer struct {
}
// NewDNSServer starts a new DNS server to provide an agent interface
func NewDNSServer(agent *Agent, logOutput io.Writer, domain, bind, recursor string) (*DNSServer, error) {
func NewDNSServer(agent *Agent, config *DNSConfig, logOutput io.Writer, domain, bind, recursor string) (*DNSServer, error) {
// Make sure domain is FQDN
domain = dns.Fqdn(domain)
@ -55,6 +56,7 @@ func NewDNSServer(agent *Agent, logOutput io.Writer, domain, bind, recursor stri
// Create the server
srv := &DNSServer{
agent: agent,
config: config,
dnsHandler: mux,
dnsServer: server,
dnsServerTCP: serverTCP,
@ -306,16 +308,25 @@ func (d *DNSServer) nodeLookup(network, datacenter, node string, req, resp *dns.
// Make an RPC request
args := structs.NodeSpecificRequest{
Datacenter: datacenter,
Node: node,
Datacenter: datacenter,
Node: node,
QueryOptions: structs.QueryOptions{AllowStale: d.config.AllowStale},
}
var out structs.IndexedNodeServices
RPC:
if err := d.agent.RPC("Catalog.NodeServices", &args, &out); err != nil {
d.logger.Printf("[ERR] dns: rpc error: %v", err)
resp.SetRcode(req, dns.RcodeServerFailure)
return
}
// Verify that request is not too stale, redo the request
if args.AllowStale && out.LastContact > d.config.MaxStale {
args.AllowStale = false
d.logger.Printf("[WARN] dns: Query results too stale, re-requesting")
goto RPC
}
// If we have no address, return not found!
if out.NodeServices == nil {
resp.SetRcode(req, dns.RcodeNameError)
@ -323,14 +334,15 @@ func (d *DNSServer) nodeLookup(network, datacenter, node string, req, resp *dns.
}
// Add the node record
records := d.formatNodeRecord(&out.NodeServices.Node, req.Question[0].Name, qType)
records := d.formatNodeRecord(&out.NodeServices.Node, req.Question[0].Name,
qType, d.config.NodeTTL)
if records != nil {
resp.Answer = append(resp.Answer, records...)
}
}
// formatNodeRecord takes a Node and returns an A, AAAA, or CNAME record
func (d *DNSServer) formatNodeRecord(node *structs.Node, qName string, qType uint16) (records []dns.RR) {
func (d *DNSServer) formatNodeRecord(node *structs.Node, qName string, qType uint16, ttl time.Duration) (records []dns.RR) {
// Parse the IP
ip := net.ParseIP(node.Address)
var ipv4 net.IP
@ -344,7 +356,7 @@ func (d *DNSServer) formatNodeRecord(node *structs.Node, qName string, qType uin
Name: qName,
Rrtype: dns.TypeA,
Class: dns.ClassINET,
Ttl: 0,
Ttl: uint32(ttl / time.Second),
},
A: ip,
}}
@ -355,7 +367,7 @@ func (d *DNSServer) formatNodeRecord(node *structs.Node, qName string, qType uin
Name: qName,
Rrtype: dns.TypeAAAA,
Class: dns.ClassINET,
Ttl: 0,
Ttl: uint32(ttl / time.Second),
},
AAAA: ip,
}}
@ -368,7 +380,7 @@ func (d *DNSServer) formatNodeRecord(node *structs.Node, qName string, qType uin
Name: qName,
Rrtype: dns.TypeCNAME,
Class: dns.ClassINET,
Ttl: 0,
Ttl: uint32(ttl / time.Second),
},
Target: dns.Fqdn(node.Address),
}
@ -398,24 +410,43 @@ func (d *DNSServer) formatNodeRecord(node *structs.Node, qName string, qType uin
func (d *DNSServer) serviceLookup(network, datacenter, service, tag string, req, resp *dns.Msg) {
// Make an RPC request
args := structs.ServiceSpecificRequest{
Datacenter: datacenter,
ServiceName: service,
ServiceTag: tag,
TagFilter: tag != "",
Datacenter: datacenter,
ServiceName: service,
ServiceTag: tag,
TagFilter: tag != "",
QueryOptions: structs.QueryOptions{AllowStale: d.config.AllowStale},
}
var out structs.IndexedCheckServiceNodes
RPC:
if err := d.agent.RPC("Health.ServiceNodes", &args, &out); err != nil {
d.logger.Printf("[ERR] dns: rpc error: %v", err)
resp.SetRcode(req, dns.RcodeServerFailure)
return
}
// Verify that request is not too stale, redo the request
if args.AllowStale && out.LastContact > d.config.MaxStale {
args.AllowStale = false
d.logger.Printf("[WARN] dns: Query results too stale, re-requesting")
goto RPC
}
// If we have no nodes, return not found!
if len(out.Nodes) == 0 {
resp.SetRcode(req, dns.RcodeNameError)
return
}
// Determine the TTL
var ttl time.Duration
if d.config.ServiceTTL != nil {
var ok bool
ttl, ok = d.config.ServiceTTL[service]
if !ok {
ttl = d.config.ServiceTTL["*"]
}
}
// Filter out any service nodes due to health checks
out.Nodes = d.filterServiceNodes(out.Nodes)
@ -429,10 +460,10 @@ func (d *DNSServer) serviceLookup(network, datacenter, service, tag string, req,
// Add various responses depending on the request
qType := req.Question[0].Qtype
d.serviceNodeRecords(out.Nodes, req, resp)
d.serviceNodeRecords(out.Nodes, req, resp, ttl)
if qType == dns.TypeSRV {
d.serviceSRVRecords(datacenter, out.Nodes, req, resp)
d.serviceSRVRecords(datacenter, out.Nodes, req, resp, ttl)
}
}
@ -464,7 +495,7 @@ func shuffleServiceNodes(nodes structs.CheckServiceNodes) {
}
// serviceNodeRecords is used to add the node records for a service lookup
func (d *DNSServer) serviceNodeRecords(nodes structs.CheckServiceNodes, req, resp *dns.Msg) {
func (d *DNSServer) serviceNodeRecords(nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration) {
qName := req.Question[0].Name
qType := req.Question[0].Qtype
handled := make(map[string]struct{})
@ -478,7 +509,7 @@ func (d *DNSServer) serviceNodeRecords(nodes structs.CheckServiceNodes, req, res
handled[addr] = struct{}{}
// Add the node record
records := d.formatNodeRecord(&node.Node, qName, qType)
records := d.formatNodeRecord(&node.Node, qName, qType, ttl)
if records != nil {
resp.Answer = append(resp.Answer, records...)
}
@ -486,7 +517,7 @@ func (d *DNSServer) serviceNodeRecords(nodes structs.CheckServiceNodes, req, res
}
// serviceARecords is used to add the SRV records for a service lookup
func (d *DNSServer) serviceSRVRecords(dc string, nodes structs.CheckServiceNodes, req, resp *dns.Msg) {
func (d *DNSServer) serviceSRVRecords(dc string, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration) {
handled := make(map[string]struct{})
for _, node := range nodes {
// Avoid duplicate entries, possible if a node has
@ -503,7 +534,7 @@ func (d *DNSServer) serviceSRVRecords(dc string, nodes structs.CheckServiceNodes
Name: req.Question[0].Name,
Rrtype: dns.TypeSRV,
Class: dns.ClassINET,
Ttl: 0,
Ttl: uint32(ttl / time.Second),
},
Priority: 1,
Weight: 1,
@ -513,7 +544,7 @@ func (d *DNSServer) serviceSRVRecords(dc string, nodes structs.CheckServiceNodes
resp.Answer = append(resp.Answer, srvRec)
// Add the extra record
records := d.formatNodeRecord(&node.Node, srvRec.Target, dns.TypeANY)
records := d.formatNodeRecord(&node.Node, srvRec.Target, dns.TypeANY, ttl)
if records != nil {
resp.Extra = append(resp.Extra, records...)
}

View File

@ -8,14 +8,20 @@ import (
"os"
"strings"
"testing"
"time"
)
func makeDNSServer(t *testing.T) (string, *DNSServer) {
config := &DNSConfig{}
return makeDNSServerConfig(t, config)
}
func makeDNSServerConfig(t *testing.T, config *DNSConfig) (string, *DNSServer) {
conf := nextConfig()
addr, _ := conf.ClientListener(conf.Ports.DNS)
dir, agent := makeAgent(t, conf)
server, err := NewDNSServer(agent, agent.logOutput, conf.Domain,
addr.String(), "8.8.8.8:53")
server, err := NewDNSServer(agent, config, agent.logOutput,
conf.Domain, addr.String(), "8.8.8.8:53")
if err != nil {
t.Fatalf("err: %v", err)
}
@ -100,6 +106,9 @@ func TestDNS_NodeLookup(t *testing.T) {
if aRec.A.String() != "127.0.0.1" {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if aRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
// Re-do the query, but specify the DC
m = new(dns.Msg)
@ -122,6 +131,9 @@ func TestDNS_NodeLookup(t *testing.T) {
if aRec.A.String() != "127.0.0.1" {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if aRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
}
func TestDNS_NodeLookup_PeriodName(t *testing.T) {
@ -206,6 +218,9 @@ func TestDNS_NodeLookup_AAAA(t *testing.T) {
if aRec.AAAA.String() != "::4242:4242" {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if aRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
}
func TestDNS_NodeLookup_CNAME(t *testing.T) {
@ -249,6 +264,9 @@ func TestDNS_NodeLookup_CNAME(t *testing.T) {
if cnRec.Target != "www.google.com." {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if cnRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
}
func TestDNS_ServiceLookup(t *testing.T) {
@ -299,6 +317,9 @@ func TestDNS_ServiceLookup(t *testing.T) {
if srvRec.Target != "foo.node.dc1.consul." {
t.Fatalf("Bad: %#v", srvRec)
}
if srvRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
aRec, ok := in.Extra[0].(*dns.A)
if !ok {
@ -310,6 +331,9 @@ func TestDNS_ServiceLookup(t *testing.T) {
if aRec.A.String() != "127.0.0.1" {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Extra[0])
}
}
func TestDNS_ServiceLookup_TagPeriod(t *testing.T) {
@ -760,3 +784,226 @@ func TestDNS_ServiceLookup_CNAME(t *testing.T) {
}
}
}
func TestDNS_NodeLookup_TTL(t *testing.T) {
config := &DNSConfig{
NodeTTL: 10 * time.Second,
AllowStale: true,
MaxStale: time.Second,
}
dir, srv := makeDNSServerConfig(t, config)
defer os.RemoveAll(dir)
defer srv.agent.Shutdown()
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
// Register node
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
}
var out struct{}
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
m := new(dns.Msg)
m.SetQuestion("foo.node.consul.", dns.TypeANY)
c := new(dns.Client)
addr, _ := srv.agent.config.ClientListener(srv.agent.config.Ports.DNS)
in, _, err := c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
}
aRec, ok := in.Answer[0].(*dns.A)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if aRec.A.String() != "127.0.0.1" {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if aRec.Hdr.Ttl != 10 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
// Register node with IPv6
args = &structs.RegisterRequest{
Datacenter: "dc1",
Node: "bar",
Address: "::4242:4242",
}
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Check an IPv6 record
m = new(dns.Msg)
m.SetQuestion("bar.node.consul.", dns.TypeANY)
in, _, err = c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
}
aaaaRec, ok := in.Answer[0].(*dns.AAAA)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if aaaaRec.AAAA.String() != "::4242:4242" {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if aaaaRec.Hdr.Ttl != 10 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
// Register node with CNAME
args = &structs.RegisterRequest{
Datacenter: "dc1",
Node: "google",
Address: "www.google.com",
}
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
m = new(dns.Msg)
m.SetQuestion("google.node.consul.", dns.TypeANY)
in, _, err = c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
}
// Should have the CNAME record + a few A records
if len(in.Answer) < 2 {
t.Fatalf("Bad: %#v", in)
}
cnRec, ok := in.Answer[0].(*dns.CNAME)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if cnRec.Target != "www.google.com." {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if cnRec.Hdr.Ttl != 10 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
}
func TestDNS_ServiceLookup_TTL(t *testing.T) {
config := &DNSConfig{
ServiceTTL: map[string]time.Duration{
"db": 10 * time.Second,
"*": 5 * time.Second,
},
AllowStale: true,
MaxStale: time.Second,
}
dir, srv := makeDNSServerConfig(t, config)
defer os.RemoveAll(dir)
defer srv.agent.Shutdown()
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
// Register node with 2 services
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
Service: &structs.NodeService{
Service: "db",
Tags: []string{"master"},
Port: 12345,
},
}
var out struct{}
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
args = &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
Service: &structs.NodeService{
Service: "api",
Port: 2222,
},
}
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
m := new(dns.Msg)
m.SetQuestion("db.service.consul.", dns.TypeSRV)
c := new(dns.Client)
addr, _ := srv.agent.config.ClientListener(srv.agent.config.Ports.DNS)
in, _, err := c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
}
srvRec, ok := in.Answer[0].(*dns.SRV)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if srvRec.Hdr.Ttl != 10 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
aRec, ok := in.Extra[0].(*dns.A)
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 10 {
t.Fatalf("Bad: %#v", in.Extra[0])
}
m = new(dns.Msg)
m.SetQuestion("api.service.consul.", dns.TypeSRV)
in, _, err = c.Exchange(m, addr.String())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
}
srvRec, ok = in.Answer[0].(*dns.SRV)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if srvRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
aRec, ok = in.Extra[0].(*dns.A)
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Extra[0])
}
}

View File

@ -19,7 +19,7 @@ with no failing health checks. It's that simple!
There are a number of [configuration options](/docs/agent/options.html) that
are important for the DNS interface. They are `client_addr`, `ports.dns`, `recursor`,
and `domain`. By default Consul will listen on 127.0.0.1:8600 for DNS queries
`domain`, and `dns_config`. By default Consul will listen on 127.0.0.1:8600 for DNS queries
in the "consul." domain, without support for DNS recursion.
There are a few ways to use the DNS interface. One option is to use a custom
@ -118,3 +118,10 @@ without setting the truncate bit. This is to prevent a redundant lookup over
TCP which generate additional load. If the lookup is done over TCP, the results
are not truncated.
## Caching
By default, all DNS results served by Consul set a 0 TTL value. This disables
caching of DNS results. However, there are many situations in which caching is
desirable for performance and scalability. This is discussed more in the guide
for [DNS Caching](/docs/guides/dns-cache.html).

View File

@ -183,6 +183,30 @@ definitions support being updated during a reload.
This flag can be used to change that domain. All queries in this domain are assumed
to be handled by Consul, and will not be recursively resolved.
* `dns_config` - This object allows a number of sub-keys to be set which can tune
how DNS queries are perfomed. See this guide on [DNS caching](/docs/guides/dns-cache.html).
The following sub-keys are available:
* `node_ttl` - By default, this is "0s", which means all node lookups are served with
a 0 TTL value. This can be set to allow node lookups to set a TTL value, which enables
DNS caching. This should be specified with the "s" suffix for second, or "m" for minute.
* `service_ttl` - This is a sub-object, which allows for setting a TTL on service lookups
with a per-service policy. The "*" wildcard service can be specified and is used when
there is no specific policy available for a service. By default, all services are served
with a 0 TTL value. Setting this enables DNS caching.
* `allow_stale` - Enables a stale query for DNS information. This allows any Consul
server to service the request, instead of only the leader. The advantage of this is
you get linear read scalability with Consul servers. By default, this is false, meaning
all requests are serviced by the leader. This provides stronger consistency but
with less throughput and higher latency.
* `max_stale` - When `allow_stale` is specified, this is used to limit how
stale of a result will be used. By default, this is set to "5s", which means
if a Consul server is more than 5 seconds behind the leader, the query will be
re-evaluated on the leader to get more up-to-date results.
* `enable_debug` - When set, enables some additional debugging features. Currently,
only used to set the runtime profiling HTTP endpoints.
@ -201,12 +225,12 @@ definitions support being updated during a reload.
* `ports` - This is a nested object that allows setting the bind ports
for the following keys:
* dns - The DNS server, -1 to disable. Default 8600.
* http - The HTTP api, -1 to disable. Default 8500.
* rpc - The RPC endpoint. Default 8400.
* serf_lan - The Serf LAN port. Default 8301.
* serf_wan - The Serf WAN port. Default 8302.
* server - Server RPC address. Default 8300.
* `dns` - The DNS server, -1 to disable. Default 8600.
* `http` - The HTTP api, -1 to disable. Default 8500.
* `rpc` - The RPC endpoint. Default 8400.
* `serf_lan` - The Serf LAN port. Default 8301.
* `serf_wan` - The Serf WAN port. Default 8302.
* `server` - Server RPC address. Default 8300.
* `recursor` - This flag provides an address of an upstream DNS server that is used to
recursively resolve queries if they are not inside the service domain for consul. For example,

View File

@ -0,0 +1,74 @@
---
layout: "docs"
page_title: "DNS Caching"
sidebar_current: "docs-guides-dns-cache"
---
# DNS Caching
One of the main interfaces to Consul is DNS. Using DNS is a simple way
integrate Consul into an existing infrastructure without any high-touch
integration.
By default, Consul serves all DNS results with a 0 TTL value. This prevents
any caching. The advantage of this is that each DNS lookup is always re-evaluated
and the most timely information is served. However this adds a latency hit
for each lookup and can potentially exhaust the query throughput of a cluster.
For this reason, Consul provides a number of tuning parameters that can
be used to customize how DNS queries are handled.
## Stale Reads
Stale reads can be used to reduce latency and increase the throughput
of DNS queries. By default, all reads are serviced by a [single leader node](/docs/internals/consensus.html).
These reads are strongly consistent but are limited by the throughput
of a single node. Doing a stale read allows any Consul server to
service a query, but non-leader nodes may return data that is potentially
out-of-date. By allowing data to be slightly stale, we get horizontal
read scalability. Now any Consul server can service the request, so we
increase throughput by the number of servers in a cluster.
The [settings](/docs/agent/options.html) used to control stale reads
are `dns_config.allow_stale` which must be set to enable stale reads,
and `dns_config.max_stale` which limits how stale results are allowed to
be.
By default, `allow_stale` is disabled meaning no stale results may be served.
The default for `max_stale` is 5 seconds. This means that is `allow_stale` is
enabled, we will use data from any Consul server that is within 5 seconds
of the leader.
## TTL Values
TTL values can be set to allow DNS results to be cached upstream
of Consul which can be reduce the number of lookups and to amortize
the latency of doing a DNS lookup. By default, all TTLs are zero,
preventing any caching.
To enable caching of node lookups (e.g. "foo.node.consul"), we can set
the `dns_config.node_ttl` value. This can be set to "10s" for example,
and all node lookups will serve results with a 10 second TTL.
Service TTLs can be specified at a more fine grain level. You can set
a TTL on a per-service level, and additionally a wildcard can be specified
that matches if there is no specific service TTL provided.
This is specified using the `dns_config.service_ttl` map. The "*" service
is the wildcard service. For example, if we specify:
```
{
"dns_config": {
"service_ttl": {
"*": "5s",
"web": "30s"
}
}
}
```
This sets all lookups to "web.service.consul" to use a 30 second TTL,
while lookups to "db.service.consul" or "api.service.consul" will use the
5 second TTL from the wildcard.

View File

@ -136,6 +136,10 @@
<a href="/docs/guides/bootstrapping.html">Bootstrapping</a>
</li>
<li<%= sidebar_current("docs-guides-dns-cache") %>>
<a href="/docs/guides/dns-cache.html">DNS Caching</a>
</li>
<li<%= sidebar_current("docs-guides-forwarding") %>>
<a href="/docs/guides/forwarding.html">DNS Forwarding</a>
</li>