From fab55bee2b018e3c612aa16ef7262db09dde9c0b Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Fri, 19 Oct 2018 10:53:19 +0200 Subject: [PATCH] dns: implements prefix lookups for DNS TTL (#4605) This will fix https://github.com/hashicorp/consul/issues/4509 and allow forinstance lb-* to match services lb-001 or lb-service-007. --- agent/catalog_endpoint_test.go | 1 + agent/consul/catalog_endpoint_test.go | 2 +- agent/consul/client_test.go | 1 + agent/dns.go | 49 ++- agent/dns_test.go | 369 +++++++------------ website/source/docs/guides/dns-cache.html.md | 13 +- 6 files changed, 188 insertions(+), 247 deletions(-) diff --git a/agent/catalog_endpoint_test.go b/agent/catalog_endpoint_test.go index d913a00330..cd8aaf0cf1 100644 --- a/agent/catalog_endpoint_test.go +++ b/agent/catalog_endpoint_test.go @@ -410,6 +410,7 @@ func TestCatalogServices(t *testing.T) { t.Parallel() a := NewTestAgent(t.Name(), "") defer a.Shutdown() + testrpc.WaitForTestAgent(t, a.RPC, "dc1") // Register node args := &structs.RegisterRequest{ diff --git a/agent/consul/catalog_endpoint_test.go b/agent/consul/catalog_endpoint_test.go index d7569fa996..176909fa76 100644 --- a/agent/consul/catalog_endpoint_test.go +++ b/agent/consul/catalog_endpoint_test.go @@ -1340,7 +1340,7 @@ func TestCatalog_ListServices_NodeMetaFilter(t *testing.T) { codec := rpcClient(t, s1) defer codec.Close() - testrpc.WaitForLeader(t, s1.RPC, "dc1") + testrpc.WaitForTestAgent(t, s1.RPC, "dc1") // Add a new node with the right meta k/v pair node := &structs.Node{Node: "foo", Address: "127.0.0.1", Meta: map[string]string{"somekey": "somevalue"}} diff --git a/agent/consul/client_test.go b/agent/consul/client_test.go index f61541b5ef..d28e3994ee 100644 --- a/agent/consul/client_test.go +++ b/agent/consul/client_test.go @@ -450,6 +450,7 @@ func TestClient_SnapshotRPC(t *testing.T) { // Try to join. joinLAN(t, c1, s1) + testrpc.WaitForLeader(t, c1.RPC, "dc1") // Wait until we've got a healthy server. retry.Run(t, func(r *retry.R) { diff --git a/agent/dns.go b/agent/dns.go index 36ad19e20d..3e7213b2a7 100644 --- a/agent/dns.go +++ b/agent/dns.go @@ -12,6 +12,7 @@ import ( "regexp" "github.com/armon/go-metrics" + "github.com/armon/go-radix" "github.com/coredns/coredns/plugin/pkg/dnsutil" "github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/consul" @@ -72,6 +73,9 @@ type DNSServer struct { domain string recursors []string logger *log.Logger + // Those are handling prefix lookups + ttlRadix *radix.Tree + ttlStrict map[string]time.Duration // disableCompression is the config.DisableCompression flag that can // be safely changed at runtime. It always contains a bool and is @@ -99,7 +103,21 @@ func NewDNSServer(a *Agent) (*DNSServer, error) { domain: domain, logger: a.logger, recursors: recursors, + ttlRadix: radix.New(), + ttlStrict: make(map[string]time.Duration), } + if dnscfg.ServiceTTL != nil { + for key, ttl := range dnscfg.ServiceTTL { + // All suffix with '*' are put in radix + // This include '*' that will match anything + if strings.HasSuffix(key, "*") { + srv.ttlRadix.Insert(key[:len(key)-1], ttl) + } else { + srv.ttlStrict[key] = ttl + } + } + } + srv.disableCompression.Store(a.config.DNSDisableCompression) return srv, nil @@ -130,6 +148,22 @@ func GetDNSConfig(conf *config.RuntimeConfig) *dnsConfig { } } +// GetTTLForService Find the TTL for a given service. +// return ttl, true if found, 0, false otherwise +func (d *DNSServer) GetTTLForService(service string) (time.Duration, bool) { + if d.config.ServiceTTL != nil { + ttl, ok := d.ttlStrict[service] + if ok { + return ttl, true + } + _, ttlRaw, ok := d.ttlRadix.LongestPrefix(service) + if ok { + return ttlRaw.(time.Duration), true + } + } + return time.Duration(0), false +} + func (d *DNSServer) ListenAndServe(network, addr string, notif func()) error { mux := dns.NewServeMux() mux.HandleFunc("arpa.", d.handlePtr) @@ -1027,14 +1061,7 @@ func (d *DNSServer) serviceLookup(network, datacenter, service, tag string, conn out.Nodes.Shuffle() // Determine the TTL - var ttl time.Duration - if d.config.ServiceTTL != nil { - var ok bool - ttl, ok = d.config.ServiceTTL[service] - if !ok { - ttl = d.config.ServiceTTL["*"] - } - } + ttl, _ := d.GetTTLForService(service) // Add various responses depending on the request qType := req.Question[0].Qtype @@ -1155,11 +1182,7 @@ RPC: d.logger.Printf("[WARN] dns: Failed to parse TTL '%s' for prepared query '%s', ignoring", out.DNS.TTL, query) } } else if d.config.ServiceTTL != nil { - var ok bool - ttl, ok = d.config.ServiceTTL[out.Service] - if !ok { - ttl = d.config.ServiceTTL["*"] - } + ttl, _ = d.GetTTLForService(out.Service) } // If we have no nodes, return not found! diff --git a/agent/dns_test.go b/agent/dns_test.go index bc6078d0ab..1613852df2 100644 --- a/agent/dns_test.go +++ b/agent/dns_test.go @@ -4615,7 +4615,9 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) { a := NewTestAgent(t.Name(), ` dns_config { service_ttl = { + "d*" = "42s" "db" = "10s" + "db*" = "66s" "*" = "5s" } allow_stale = true @@ -4624,91 +4626,66 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) { `) defer a.Shutdown() - // Register node with 2 services - args := &structs.RegisterRequest{ - Datacenter: "dc1", - Node: "foo", - Address: "127.0.0.1", - Service: &structs.NodeService{ - Service: "db", - Tags: []string{"master"}, - Port: 12345, - }, - } + for idx, service := range []string{"db", "dblb", "dk", "api"} { + nodeName := fmt.Sprintf("foo%d", idx) + address := fmt.Sprintf("127.0.0.%d", idx) + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: nodeName, + Address: address, + Service: &structs.NodeService{ + Service: service, + Tags: []string{"master"}, + Port: 12345 + idx, + }, + } - var out struct{} - if err := a.RPC("Catalog.Register", args, &out); err != nil { - t.Fatalf("err: %v", err) + var out struct{} + if err := a.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } } - args = &structs.RegisterRequest{ - Datacenter: "dc1", - Node: "foo", - Address: "127.0.0.1", - Service: &structs.NodeService{ - Service: "api", - Port: 2222, - }, - } - if err := a.RPC("Catalog.Register", args, &out); err != nil { - t.Fatalf("err: %v", err) - } - - m := new(dns.Msg) - m.SetQuestion("db.service.consul.", dns.TypeSRV) - c := new(dns.Client) - in, _, err := c.Exchange(m, a.DNSAddr()) - if err != nil { - t.Fatalf("err: %v", err) - } + expectResult := func(dnsQuery string, expectedTTL uint32) { + t.Run(dnsQuery, func(t *testing.T) { + m := new(dns.Msg) + m.SetQuestion(dnsQuery, dns.TypeSRV) - if len(in.Answer) != 1 { - t.Fatalf("Bad: %#v", in) - } + in, _, err := c.Exchange(m, a.DNSAddr()) + if err != nil { + t.Fatalf("err: %v", err) + } - srvRec, ok := in.Answer[0].(*dns.SRV) - if !ok { - t.Fatalf("Bad: %#v", in.Answer[0]) - } - if srvRec.Hdr.Ttl != 10 { - t.Fatalf("Bad: %#v", in.Answer[0]) - } + if len(in.Answer) != 1 { + t.Fatalf("Bad: %#v, len is %d", in, len(in.Answer)) + } - aRec, ok := in.Extra[0].(*dns.A) - if !ok { - t.Fatalf("Bad: %#v", in.Extra[0]) - } - if aRec.Hdr.Ttl != 10 { - t.Fatalf("Bad: %#v", in.Extra[0]) - } + srvRec, ok := in.Answer[0].(*dns.SRV) + if !ok { + t.Fatalf("Bad: %#v", in.Answer[0]) + } + if srvRec.Hdr.Ttl != expectedTTL { + t.Fatalf("Bad: %#v", in.Answer[0]) + } - m = new(dns.Msg) - m.SetQuestion("api.service.consul.", dns.TypeSRV) - in, _, err = c.Exchange(m, a.DNSAddr()) - if err != nil { - t.Fatalf("err: %v", err) - } - - if len(in.Answer) != 1 { - t.Fatalf("Bad: %#v", in) - } - - srvRec, ok = in.Answer[0].(*dns.SRV) - if !ok { - t.Fatalf("Bad: %#v", in.Answer[0]) - } - if srvRec.Hdr.Ttl != 5 { - t.Fatalf("Bad: %#v", in.Answer[0]) - } - - aRec, ok = in.Extra[0].(*dns.A) - if !ok { - t.Fatalf("Bad: %#v", in.Extra[0]) - } - if aRec.Hdr.Ttl != 5 { - t.Fatalf("Bad: %#v", in.Extra[0]) + aRec, ok := in.Extra[0].(*dns.A) + if !ok { + t.Fatalf("Bad: %#v", in.Extra[0]) + } + if aRec.Hdr.Ttl != expectedTTL { + t.Fatalf("Bad: %#v", in.Extra[0]) + } + }) } + // Should have its exact TTL + expectResult("db.service.consul.", 10) + // Should match db* + expectResult("dblb.service.consul.", 66) + // Should match d* + expectResult("dk.service.consul.", 42) + // Should match * + expectResult("api.service.consul.", 5) } func TestDNS_PreparedQuery_TTL(t *testing.T) { @@ -4716,7 +4693,9 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) { a := NewTestAgent(t.Name(), ` dns_config { service_ttl = { + "d*" = "42s" "db" = "10s" + "db*" = "66s" "*" = "5s" } allow_stale = true @@ -4726,16 +4705,17 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) { defer a.Shutdown() testrpc.WaitForLeader(t, a.RPC, "dc1") - // Register a node and a service. - { + for idx, service := range []string{"db", "dblb", "dk", "api"} { + nodeName := fmt.Sprintf("foo%d", idx) + address := fmt.Sprintf("127.0.0.%d", idx) args := &structs.RegisterRequest{ Datacenter: "dc1", - Node: "foo", - Address: "127.0.0.1", + Node: nodeName, + Address: address, Service: &structs.NodeService{ - Service: "db", + Service: service, Tags: []string{"master"}, - Port: 12345, + Port: 12345 + idx, }, } @@ -4743,162 +4723,89 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) { if err := a.RPC("Catalog.Register", args, &out); err != nil { t.Fatalf("err: %v", err) } + // Register prepared query without TTL and with TTL + { + args := &structs.PreparedQueryRequest{ + Datacenter: "dc1", + Op: structs.PreparedQueryCreate, + Query: &structs.PreparedQuery{ + Name: service, + Service: structs.ServiceQuery{ + Service: service, + }, + }, + } - args = &structs.RegisterRequest{ - Datacenter: "dc1", - Node: "foo", - Address: "127.0.0.1", - Service: &structs.NodeService{ - Service: "api", - Port: 2222, - }, - } - if err := a.RPC("Catalog.Register", args, &out); err != nil { - t.Fatalf("err: %v", err) + var id string + if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil { + t.Fatalf("err: %v", err) + } + queryTTL := fmt.Sprintf("%s-ttl", service) + args = &structs.PreparedQueryRequest{ + Datacenter: "dc1", + Op: structs.PreparedQueryCreate, + Query: &structs.PreparedQuery{ + Name: queryTTL, + Service: structs.ServiceQuery{ + Service: service, + }, + DNS: structs.QueryDNSOptions{ + TTL: "18s", + }, + }, + } + + if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil { + t.Fatalf("err: %v", err) + } } } - // Register prepared queries with and without a TTL set for "db", as - // well as one for "api". - { - args := &structs.PreparedQueryRequest{ - Datacenter: "dc1", - Op: structs.PreparedQueryCreate, - Query: &structs.PreparedQuery{ - Name: "db-ttl", - Service: structs.ServiceQuery{ - Service: "db", - }, - DNS: structs.QueryDNSOptions{ - TTL: "18s", - }, - }, - } - - var id string - if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil { - t.Fatalf("err: %v", err) - } - - args = &structs.PreparedQueryRequest{ - Datacenter: "dc1", - Op: structs.PreparedQueryCreate, - Query: &structs.PreparedQuery{ - Name: "db-nottl", - Service: structs.ServiceQuery{ - Service: "db", - }, - }, - } - - if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil { - t.Fatalf("err: %v", err) - } - - args = &structs.PreparedQueryRequest{ - Datacenter: "dc1", - Op: structs.PreparedQueryCreate, - Query: &structs.PreparedQuery{ - Name: "api-nottl", - Service: structs.ServiceQuery{ - Service: "api", - }, - }, - } - - if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil { - t.Fatalf("err: %v", err) - } - } - - // Make sure the TTL is set when requested, and overrides the agent- - // specific config since the query takes precedence. - m := new(dns.Msg) - m.SetQuestion("db-ttl.query.consul.", dns.TypeSRV) - c := new(dns.Client) - in, _, err := c.Exchange(m, a.DNSAddr()) - if err != nil { - t.Fatalf("err: %v", err) + expectResult := func(dnsQuery string, expectedTTL uint32) { + t.Run(dnsQuery, func(t *testing.T) { + m := new(dns.Msg) + m.SetQuestion(dnsQuery, dns.TypeSRV) + + in, _, err := c.Exchange(m, a.DNSAddr()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if len(in.Answer) != 1 { + t.Fatalf("Bad: %#v, len is %d", in, len(in.Answer)) + } + + srvRec, ok := in.Answer[0].(*dns.SRV) + if !ok { + t.Fatalf("Bad: %#v", in.Answer[0]) + } + if srvRec.Hdr.Ttl != expectedTTL { + t.Fatalf("Bad: %#v", in.Answer[0]) + } + + aRec, ok := in.Extra[0].(*dns.A) + if !ok { + t.Fatalf("Bad: %#v", in.Extra[0]) + } + if aRec.Hdr.Ttl != expectedTTL { + t.Fatalf("Bad: %#v", in.Extra[0]) + } + }) } - if len(in.Answer) != 1 { - t.Fatalf("Bad: %#v", in) - } - - srvRec, ok := in.Answer[0].(*dns.SRV) - if !ok { - t.Fatalf("Bad: %#v", in.Answer[0]) - } - if srvRec.Hdr.Ttl != 18 { - t.Fatalf("Bad: %#v", in.Answer[0]) - } - - aRec, ok := in.Extra[0].(*dns.A) - if !ok { - t.Fatalf("Bad: %#v", in.Extra[0]) - } - if aRec.Hdr.Ttl != 18 { - t.Fatalf("Bad: %#v", in.Extra[0]) - } - - // And the TTL should take the service-specific value from the agent's - // config otherwise. - m = new(dns.Msg) - m.SetQuestion("db-nottl.query.consul.", dns.TypeSRV) - in, _, err = c.Exchange(m, a.DNSAddr()) - if err != nil { - t.Fatalf("err: %v", err) - } - - if len(in.Answer) != 1 { - t.Fatalf("Bad: %#v", in) - } - - srvRec, ok = in.Answer[0].(*dns.SRV) - if !ok { - t.Fatalf("Bad: %#v", in.Answer[0]) - } - if srvRec.Hdr.Ttl != 10 { - t.Fatalf("Bad: %#v", in.Answer[0]) - } - - aRec, ok = in.Extra[0].(*dns.A) - if !ok { - t.Fatalf("Bad: %#v", in.Extra[0]) - } - if aRec.Hdr.Ttl != 10 { - t.Fatalf("Bad: %#v", in.Extra[0]) - } - - // If there's no query TTL and no service-specific value then the wild - // card value should be used. - m = new(dns.Msg) - m.SetQuestion("api-nottl.query.consul.", dns.TypeSRV) - in, _, err = c.Exchange(m, a.DNSAddr()) - if err != nil { - t.Fatalf("err: %v", err) - } - - if len(in.Answer) != 1 { - t.Fatalf("Bad: %#v", in) - } - - srvRec, ok = in.Answer[0].(*dns.SRV) - if !ok { - t.Fatalf("Bad: %#v", in.Answer[0]) - } - if srvRec.Hdr.Ttl != 5 { - t.Fatalf("Bad: %#v", in.Answer[0]) - } - - aRec, ok = in.Extra[0].(*dns.A) - if !ok { - t.Fatalf("Bad: %#v", in.Extra[0]) - } - if aRec.Hdr.Ttl != 5 { - t.Fatalf("Bad: %#v", in.Extra[0]) - } + // Should have its exact TTL + expectResult("db.query.consul.", 10) + expectResult("db-ttl.query.consul.", 18) + // Should match db* + expectResult("dblb.query.consul.", 66) + expectResult("dblb-ttl.query.consul.", 18) + // Should match d* + expectResult("dk.query.consul.", 42) + expectResult("dk-ttl.query.consul.", 18) + // Should be the default value + expectResult("api.query.consul.", 5) + expectResult("api-ttl.query.consul.", 18) } func TestDNS_PreparedQuery_Failover(t *testing.T) { diff --git a/website/source/docs/guides/dns-cache.html.md b/website/source/docs/guides/dns-cache.html.md index 1510f675b4..b4bacec091 100644 --- a/website/source/docs/guides/dns-cache.html.md +++ b/website/source/docs/guides/dns-cache.html.md @@ -85,7 +85,11 @@ To enable caching of node lookups (e.g. "foo.node.consul"), we can set the Service TTLs can be specified in a more granular fashion. You can set TTLs per-service, with a wildcard TTL as the default. This is specified using the [`dns_config.service_ttl`](/docs/agent/options.html#service_ttl) map. The "*" -service is the wildcard service. +is supported at the end of any prefix and a lower precedence than strict match, +so 'my-service-x' has precedence over 'my-service-*', when performing wildcard +match, the longest path is taken into account, thus 'my-service-*' TTL will +be used instead of 'my-*' or '*'. With the same rule, '*' is the default value +when nothing else matches. If no match is found the TTL defaults to 0. For example, a [`dns_config`](/docs/agent/options.html#dns_config) that provides a wildcard TTL and a specific TTL for a service might look like this: @@ -95,7 +99,9 @@ a wildcard TTL and a specific TTL for a service might look like this: "dns_config": { "service_ttl": { "*": "5s", - "web": "30s" + "web": "30s", + "db*": "10s", + "db-master": "3s" } } } @@ -105,6 +111,9 @@ This sets all lookups to "web.service.consul" to use a 30 second TTL while lookups to "db.service.consul" or "api.service.consul" will use the 5 second TTL from the wildcard. +All lookups matching "db*" would get a 10 seconds TTL except "db-master" +that would have a 3 seconds TTL. + [Prepared Queries](/api/query.html) provide an additional level of control over TTL. They allow for the TTL to be defined along with the query, and they can be changed on the fly by updating the query definition.