dns: implements prefix lookups for DNS TTL (#4605)

This will fix https://github.com/hashicorp/consul/issues/4509 and allow forinstance lb-* to match services lb-001 or lb-service-007.
This commit is contained in:
Pierre Souchay 2018-10-19 10:53:19 +02:00 committed by Jack Pearkes
parent c4d6d19d8e
commit fab55bee2b
6 changed files with 188 additions and 247 deletions

View File

@ -410,6 +410,7 @@ func TestCatalogServices(t *testing.T) {
t.Parallel() t.Parallel()
a := NewTestAgent(t.Name(), "") a := NewTestAgent(t.Name(), "")
defer a.Shutdown() defer a.Shutdown()
testrpc.WaitForTestAgent(t, a.RPC, "dc1")
// Register node // Register node
args := &structs.RegisterRequest{ args := &structs.RegisterRequest{

View File

@ -1340,7 +1340,7 @@ func TestCatalog_ListServices_NodeMetaFilter(t *testing.T) {
codec := rpcClient(t, s1) codec := rpcClient(t, s1)
defer codec.Close() defer codec.Close()
testrpc.WaitForLeader(t, s1.RPC, "dc1") testrpc.WaitForTestAgent(t, s1.RPC, "dc1")
// Add a new node with the right meta k/v pair // Add a new node with the right meta k/v pair
node := &structs.Node{Node: "foo", Address: "127.0.0.1", Meta: map[string]string{"somekey": "somevalue"}} node := &structs.Node{Node: "foo", Address: "127.0.0.1", Meta: map[string]string{"somekey": "somevalue"}}

View File

@ -450,6 +450,7 @@ func TestClient_SnapshotRPC(t *testing.T) {
// Try to join. // Try to join.
joinLAN(t, c1, s1) joinLAN(t, c1, s1)
testrpc.WaitForLeader(t, c1.RPC, "dc1")
// Wait until we've got a healthy server. // Wait until we've got a healthy server.
retry.Run(t, func(r *retry.R) { retry.Run(t, func(r *retry.R) {

View File

@ -12,6 +12,7 @@ import (
"regexp" "regexp"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-radix"
"github.com/coredns/coredns/plugin/pkg/dnsutil" "github.com/coredns/coredns/plugin/pkg/dnsutil"
"github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul"
@ -72,6 +73,9 @@ type DNSServer struct {
domain string domain string
recursors []string recursors []string
logger *log.Logger logger *log.Logger
// Those are handling prefix lookups
ttlRadix *radix.Tree
ttlStrict map[string]time.Duration
// disableCompression is the config.DisableCompression flag that can // disableCompression is the config.DisableCompression flag that can
// be safely changed at runtime. It always contains a bool and is // be safely changed at runtime. It always contains a bool and is
@ -99,7 +103,21 @@ func NewDNSServer(a *Agent) (*DNSServer, error) {
domain: domain, domain: domain,
logger: a.logger, logger: a.logger,
recursors: recursors, recursors: recursors,
ttlRadix: radix.New(),
ttlStrict: make(map[string]time.Duration),
} }
if dnscfg.ServiceTTL != nil {
for key, ttl := range dnscfg.ServiceTTL {
// All suffix with '*' are put in radix
// This include '*' that will match anything
if strings.HasSuffix(key, "*") {
srv.ttlRadix.Insert(key[:len(key)-1], ttl)
} else {
srv.ttlStrict[key] = ttl
}
}
}
srv.disableCompression.Store(a.config.DNSDisableCompression) srv.disableCompression.Store(a.config.DNSDisableCompression)
return srv, nil return srv, nil
@ -130,6 +148,22 @@ func GetDNSConfig(conf *config.RuntimeConfig) *dnsConfig {
} }
} }
// GetTTLForService Find the TTL for a given service.
// return ttl, true if found, 0, false otherwise
func (d *DNSServer) GetTTLForService(service string) (time.Duration, bool) {
if d.config.ServiceTTL != nil {
ttl, ok := d.ttlStrict[service]
if ok {
return ttl, true
}
_, ttlRaw, ok := d.ttlRadix.LongestPrefix(service)
if ok {
return ttlRaw.(time.Duration), true
}
}
return time.Duration(0), false
}
func (d *DNSServer) ListenAndServe(network, addr string, notif func()) error { func (d *DNSServer) ListenAndServe(network, addr string, notif func()) error {
mux := dns.NewServeMux() mux := dns.NewServeMux()
mux.HandleFunc("arpa.", d.handlePtr) mux.HandleFunc("arpa.", d.handlePtr)
@ -1027,14 +1061,7 @@ func (d *DNSServer) serviceLookup(network, datacenter, service, tag string, conn
out.Nodes.Shuffle() out.Nodes.Shuffle()
// Determine the TTL // Determine the TTL
var ttl time.Duration ttl, _ := d.GetTTLForService(service)
if d.config.ServiceTTL != nil {
var ok bool
ttl, ok = d.config.ServiceTTL[service]
if !ok {
ttl = d.config.ServiceTTL["*"]
}
}
// Add various responses depending on the request // Add various responses depending on the request
qType := req.Question[0].Qtype qType := req.Question[0].Qtype
@ -1155,11 +1182,7 @@ RPC:
d.logger.Printf("[WARN] dns: Failed to parse TTL '%s' for prepared query '%s', ignoring", out.DNS.TTL, query) d.logger.Printf("[WARN] dns: Failed to parse TTL '%s' for prepared query '%s', ignoring", out.DNS.TTL, query)
} }
} else if d.config.ServiceTTL != nil { } else if d.config.ServiceTTL != nil {
var ok bool ttl, _ = d.GetTTLForService(out.Service)
ttl, ok = d.config.ServiceTTL[out.Service]
if !ok {
ttl = d.config.ServiceTTL["*"]
}
} }
// If we have no nodes, return not found! // If we have no nodes, return not found!

View File

@ -4615,7 +4615,9 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) {
a := NewTestAgent(t.Name(), ` a := NewTestAgent(t.Name(), `
dns_config { dns_config {
service_ttl = { service_ttl = {
"d*" = "42s"
"db" = "10s" "db" = "10s"
"db*" = "66s"
"*" = "5s" "*" = "5s"
} }
allow_stale = true allow_stale = true
@ -4624,15 +4626,17 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) {
`) `)
defer a.Shutdown() defer a.Shutdown()
// Register node with 2 services for idx, service := range []string{"db", "dblb", "dk", "api"} {
nodeName := fmt.Sprintf("foo%d", idx)
address := fmt.Sprintf("127.0.0.%d", idx)
args := &structs.RegisterRequest{ args := &structs.RegisterRequest{
Datacenter: "dc1", Datacenter: "dc1",
Node: "foo", Node: nodeName,
Address: "127.0.0.1", Address: address,
Service: &structs.NodeService{ Service: &structs.NodeService{
Service: "db", Service: service,
Tags: []string{"master"}, Tags: []string{"master"},
Port: 12345, Port: 12345 + idx,
}, },
} }
@ -4640,38 +4644,28 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) {
if err := a.RPC("Catalog.Register", args, &out); err != nil { if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
args = &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
Service: &structs.NodeService{
Service: "api",
Port: 2222,
},
} }
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
m := new(dns.Msg)
m.SetQuestion("db.service.consul.", dns.TypeSRV)
c := new(dns.Client) c := new(dns.Client)
expectResult := func(dnsQuery string, expectedTTL uint32) {
t.Run(dnsQuery, func(t *testing.T) {
m := new(dns.Msg)
m.SetQuestion(dnsQuery, dns.TypeSRV)
in, _, err := c.Exchange(m, a.DNSAddr()) in, _, err := c.Exchange(m, a.DNSAddr())
if err != nil { if err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
if len(in.Answer) != 1 { if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in) t.Fatalf("Bad: %#v, len is %d", in, len(in.Answer))
} }
srvRec, ok := in.Answer[0].(*dns.SRV) srvRec, ok := in.Answer[0].(*dns.SRV)
if !ok { if !ok {
t.Fatalf("Bad: %#v", in.Answer[0]) t.Fatalf("Bad: %#v", in.Answer[0])
} }
if srvRec.Hdr.Ttl != 10 { if srvRec.Hdr.Ttl != expectedTTL {
t.Fatalf("Bad: %#v", in.Answer[0]) t.Fatalf("Bad: %#v", in.Answer[0])
} }
@ -4679,36 +4673,19 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) {
if !ok { if !ok {
t.Fatalf("Bad: %#v", in.Extra[0]) t.Fatalf("Bad: %#v", in.Extra[0])
} }
if aRec.Hdr.Ttl != 10 { if aRec.Hdr.Ttl != expectedTTL {
t.Fatalf("Bad: %#v", in.Extra[0]) t.Fatalf("Bad: %#v", in.Extra[0])
} }
})
m = new(dns.Msg)
m.SetQuestion("api.service.consul.", dns.TypeSRV)
in, _, err = c.Exchange(m, a.DNSAddr())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
}
srvRec, ok = in.Answer[0].(*dns.SRV)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if srvRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
aRec, ok = in.Extra[0].(*dns.A)
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Extra[0])
} }
// Should have its exact TTL
expectResult("db.service.consul.", 10)
// Should match db*
expectResult("dblb.service.consul.", 66)
// Should match d*
expectResult("dk.service.consul.", 42)
// Should match *
expectResult("api.service.consul.", 5)
} }
func TestDNS_PreparedQuery_TTL(t *testing.T) { func TestDNS_PreparedQuery_TTL(t *testing.T) {
@ -4716,7 +4693,9 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
a := NewTestAgent(t.Name(), ` a := NewTestAgent(t.Name(), `
dns_config { dns_config {
service_ttl = { service_ttl = {
"d*" = "42s"
"db" = "10s" "db" = "10s"
"db*" = "66s"
"*" = "5s" "*" = "5s"
} }
allow_stale = true allow_stale = true
@ -4726,16 +4705,17 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
defer a.Shutdown() defer a.Shutdown()
testrpc.WaitForLeader(t, a.RPC, "dc1") testrpc.WaitForLeader(t, a.RPC, "dc1")
// Register a node and a service. for idx, service := range []string{"db", "dblb", "dk", "api"} {
{ nodeName := fmt.Sprintf("foo%d", idx)
address := fmt.Sprintf("127.0.0.%d", idx)
args := &structs.RegisterRequest{ args := &structs.RegisterRequest{
Datacenter: "dc1", Datacenter: "dc1",
Node: "foo", Node: nodeName,
Address: "127.0.0.1", Address: address,
Service: &structs.NodeService{ Service: &structs.NodeService{
Service: "db", Service: service,
Tags: []string{"master"}, Tags: []string{"master"},
Port: 12345, Port: 12345 + idx,
}, },
} }
@ -4743,34 +4723,15 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
if err := a.RPC("Catalog.Register", args, &out); err != nil { if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
// Register prepared query without TTL and with TTL
args = &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
Service: &structs.NodeService{
Service: "api",
Port: 2222,
},
}
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
}
// Register prepared queries with and without a TTL set for "db", as
// well as one for "api".
{ {
args := &structs.PreparedQueryRequest{ args := &structs.PreparedQueryRequest{
Datacenter: "dc1", Datacenter: "dc1",
Op: structs.PreparedQueryCreate, Op: structs.PreparedQueryCreate,
Query: &structs.PreparedQuery{ Query: &structs.PreparedQuery{
Name: "db-ttl", Name: service,
Service: structs.ServiceQuery{ Service: structs.ServiceQuery{
Service: "db", Service: service,
},
DNS: structs.QueryDNSOptions{
TTL: "18s",
}, },
}, },
} }
@ -4779,29 +4740,17 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil { if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
queryTTL := fmt.Sprintf("%s-ttl", service)
args = &structs.PreparedQueryRequest{ args = &structs.PreparedQueryRequest{
Datacenter: "dc1", Datacenter: "dc1",
Op: structs.PreparedQueryCreate, Op: structs.PreparedQueryCreate,
Query: &structs.PreparedQuery{ Query: &structs.PreparedQuery{
Name: "db-nottl", Name: queryTTL,
Service: structs.ServiceQuery{ Service: structs.ServiceQuery{
Service: "db", Service: service,
}, },
}, DNS: structs.QueryDNSOptions{
} TTL: "18s",
if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil {
t.Fatalf("err: %v", err)
}
args = &structs.PreparedQueryRequest{
Datacenter: "dc1",
Op: structs.PreparedQueryCreate,
Query: &structs.PreparedQuery{
Name: "api-nottl",
Service: structs.ServiceQuery{
Service: "api",
}, },
}, },
} }
@ -4810,27 +4759,28 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
} }
}
// Make sure the TTL is set when requested, and overrides the agent-
// specific config since the query takes precedence.
m := new(dns.Msg)
m.SetQuestion("db-ttl.query.consul.", dns.TypeSRV)
c := new(dns.Client) c := new(dns.Client)
expectResult := func(dnsQuery string, expectedTTL uint32) {
t.Run(dnsQuery, func(t *testing.T) {
m := new(dns.Msg)
m.SetQuestion(dnsQuery, dns.TypeSRV)
in, _, err := c.Exchange(m, a.DNSAddr()) in, _, err := c.Exchange(m, a.DNSAddr())
if err != nil { if err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
if len(in.Answer) != 1 { if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in) t.Fatalf("Bad: %#v, len is %d", in, len(in.Answer))
} }
srvRec, ok := in.Answer[0].(*dns.SRV) srvRec, ok := in.Answer[0].(*dns.SRV)
if !ok { if !ok {
t.Fatalf("Bad: %#v", in.Answer[0]) t.Fatalf("Bad: %#v", in.Answer[0])
} }
if srvRec.Hdr.Ttl != 18 { if srvRec.Hdr.Ttl != expectedTTL {
t.Fatalf("Bad: %#v", in.Answer[0]) t.Fatalf("Bad: %#v", in.Answer[0])
} }
@ -4838,67 +4788,24 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
if !ok { if !ok {
t.Fatalf("Bad: %#v", in.Extra[0]) t.Fatalf("Bad: %#v", in.Extra[0])
} }
if aRec.Hdr.Ttl != 18 { if aRec.Hdr.Ttl != expectedTTL {
t.Fatalf("Bad: %#v", in.Extra[0]) t.Fatalf("Bad: %#v", in.Extra[0])
} }
})
// And the TTL should take the service-specific value from the agent's
// config otherwise.
m = new(dns.Msg)
m.SetQuestion("db-nottl.query.consul.", dns.TypeSRV)
in, _, err = c.Exchange(m, a.DNSAddr())
if err != nil {
t.Fatalf("err: %v", err)
} }
if len(in.Answer) != 1 { // Should have its exact TTL
t.Fatalf("Bad: %#v", in) expectResult("db.query.consul.", 10)
} expectResult("db-ttl.query.consul.", 18)
// Should match db*
srvRec, ok = in.Answer[0].(*dns.SRV) expectResult("dblb.query.consul.", 66)
if !ok { expectResult("dblb-ttl.query.consul.", 18)
t.Fatalf("Bad: %#v", in.Answer[0]) // Should match d*
} expectResult("dk.query.consul.", 42)
if srvRec.Hdr.Ttl != 10 { expectResult("dk-ttl.query.consul.", 18)
t.Fatalf("Bad: %#v", in.Answer[0]) // Should be the default value
} expectResult("api.query.consul.", 5)
expectResult("api-ttl.query.consul.", 18)
aRec, ok = in.Extra[0].(*dns.A)
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 10 {
t.Fatalf("Bad: %#v", in.Extra[0])
}
// If there's no query TTL and no service-specific value then the wild
// card value should be used.
m = new(dns.Msg)
m.SetQuestion("api-nottl.query.consul.", dns.TypeSRV)
in, _, err = c.Exchange(m, a.DNSAddr())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
}
srvRec, ok = in.Answer[0].(*dns.SRV)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if srvRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
aRec, ok = in.Extra[0].(*dns.A)
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Extra[0])
}
} }
func TestDNS_PreparedQuery_Failover(t *testing.T) { func TestDNS_PreparedQuery_Failover(t *testing.T) {

View File

@ -85,7 +85,11 @@ To enable caching of node lookups (e.g. "foo.node.consul"), we can set the
Service TTLs can be specified in a more granular fashion. You can set TTLs Service TTLs can be specified in a more granular fashion. You can set TTLs
per-service, with a wildcard TTL as the default. This is specified using the per-service, with a wildcard TTL as the default. This is specified using the
[`dns_config.service_ttl`](/docs/agent/options.html#service_ttl) map. The "*" [`dns_config.service_ttl`](/docs/agent/options.html#service_ttl) map. The "*"
service is the wildcard service. is supported at the end of any prefix and a lower precedence than strict match,
so 'my-service-x' has precedence over 'my-service-*', when performing wildcard
match, the longest path is taken into account, thus 'my-service-*' TTL will
be used instead of 'my-*' or '*'. With the same rule, '*' is the default value
when nothing else matches. If no match is found the TTL defaults to 0.
For example, a [`dns_config`](/docs/agent/options.html#dns_config) that provides For example, a [`dns_config`](/docs/agent/options.html#dns_config) that provides
a wildcard TTL and a specific TTL for a service might look like this: a wildcard TTL and a specific TTL for a service might look like this:
@ -95,7 +99,9 @@ a wildcard TTL and a specific TTL for a service might look like this:
"dns_config": { "dns_config": {
"service_ttl": { "service_ttl": {
"*": "5s", "*": "5s",
"web": "30s" "web": "30s",
"db*": "10s",
"db-master": "3s"
} }
} }
} }
@ -105,6 +111,9 @@ This sets all lookups to "web.service.consul" to use a 30 second TTL
while lookups to "db.service.consul" or "api.service.consul" will use the while lookups to "db.service.consul" or "api.service.consul" will use the
5 second TTL from the wildcard. 5 second TTL from the wildcard.
All lookups matching "db*" would get a 10 seconds TTL except "db-master"
that would have a 3 seconds TTL.
[Prepared Queries](/api/query.html) provide an additional [Prepared Queries](/api/query.html) provide an additional
level of control over TTL. They allow for the TTL to be defined along with level of control over TTL. They allow for the TTL to be defined along with
the query, and they can be changed on the fly by updating the query definition. the query, and they can be changed on the fly by updating the query definition.