dns: implements prefix lookups for DNS TTL (#4605)

This will fix https://github.com/hashicorp/consul/issues/4509 and allow forinstance lb-* to match services lb-001 or lb-service-007.
This commit is contained in:
Pierre Souchay 2018-10-19 10:53:19 +02:00 committed by Jack Pearkes
parent c4d6d19d8e
commit fab55bee2b
6 changed files with 188 additions and 247 deletions

View File

@ -410,6 +410,7 @@ func TestCatalogServices(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), "")
defer a.Shutdown()
testrpc.WaitForTestAgent(t, a.RPC, "dc1")
// Register node
args := &structs.RegisterRequest{

View File

@ -1340,7 +1340,7 @@ func TestCatalog_ListServices_NodeMetaFilter(t *testing.T) {
codec := rpcClient(t, s1)
defer codec.Close()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
testrpc.WaitForTestAgent(t, s1.RPC, "dc1")
// Add a new node with the right meta k/v pair
node := &structs.Node{Node: "foo", Address: "127.0.0.1", Meta: map[string]string{"somekey": "somevalue"}}

View File

@ -450,6 +450,7 @@ func TestClient_SnapshotRPC(t *testing.T) {
// Try to join.
joinLAN(t, c1, s1)
testrpc.WaitForLeader(t, c1.RPC, "dc1")
// Wait until we've got a healthy server.
retry.Run(t, func(r *retry.R) {

View File

@ -12,6 +12,7 @@ import (
"regexp"
"github.com/armon/go-metrics"
"github.com/armon/go-radix"
"github.com/coredns/coredns/plugin/pkg/dnsutil"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/consul"
@ -72,6 +73,9 @@ type DNSServer struct {
domain string
recursors []string
logger *log.Logger
// Those are handling prefix lookups
ttlRadix *radix.Tree
ttlStrict map[string]time.Duration
// disableCompression is the config.DisableCompression flag that can
// be safely changed at runtime. It always contains a bool and is
@ -99,7 +103,21 @@ func NewDNSServer(a *Agent) (*DNSServer, error) {
domain: domain,
logger: a.logger,
recursors: recursors,
ttlRadix: radix.New(),
ttlStrict: make(map[string]time.Duration),
}
if dnscfg.ServiceTTL != nil {
for key, ttl := range dnscfg.ServiceTTL {
// All suffix with '*' are put in radix
// This include '*' that will match anything
if strings.HasSuffix(key, "*") {
srv.ttlRadix.Insert(key[:len(key)-1], ttl)
} else {
srv.ttlStrict[key] = ttl
}
}
}
srv.disableCompression.Store(a.config.DNSDisableCompression)
return srv, nil
@ -130,6 +148,22 @@ func GetDNSConfig(conf *config.RuntimeConfig) *dnsConfig {
}
}
// GetTTLForService Find the TTL for a given service.
// return ttl, true if found, 0, false otherwise
func (d *DNSServer) GetTTLForService(service string) (time.Duration, bool) {
if d.config.ServiceTTL != nil {
ttl, ok := d.ttlStrict[service]
if ok {
return ttl, true
}
_, ttlRaw, ok := d.ttlRadix.LongestPrefix(service)
if ok {
return ttlRaw.(time.Duration), true
}
}
return time.Duration(0), false
}
func (d *DNSServer) ListenAndServe(network, addr string, notif func()) error {
mux := dns.NewServeMux()
mux.HandleFunc("arpa.", d.handlePtr)
@ -1027,14 +1061,7 @@ func (d *DNSServer) serviceLookup(network, datacenter, service, tag string, conn
out.Nodes.Shuffle()
// Determine the TTL
var ttl time.Duration
if d.config.ServiceTTL != nil {
var ok bool
ttl, ok = d.config.ServiceTTL[service]
if !ok {
ttl = d.config.ServiceTTL["*"]
}
}
ttl, _ := d.GetTTLForService(service)
// Add various responses depending on the request
qType := req.Question[0].Qtype
@ -1155,11 +1182,7 @@ RPC:
d.logger.Printf("[WARN] dns: Failed to parse TTL '%s' for prepared query '%s', ignoring", out.DNS.TTL, query)
}
} else if d.config.ServiceTTL != nil {
var ok bool
ttl, ok = d.config.ServiceTTL[out.Service]
if !ok {
ttl = d.config.ServiceTTL["*"]
}
ttl, _ = d.GetTTLForService(out.Service)
}
// If we have no nodes, return not found!

View File

@ -4615,7 +4615,9 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) {
a := NewTestAgent(t.Name(), `
dns_config {
service_ttl = {
"d*" = "42s"
"db" = "10s"
"db*" = "66s"
"*" = "5s"
}
allow_stale = true
@ -4624,15 +4626,17 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) {
`)
defer a.Shutdown()
// Register node with 2 services
for idx, service := range []string{"db", "dblb", "dk", "api"} {
nodeName := fmt.Sprintf("foo%d", idx)
address := fmt.Sprintf("127.0.0.%d", idx)
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
Node: nodeName,
Address: address,
Service: &structs.NodeService{
Service: "db",
Service: service,
Tags: []string{"master"},
Port: 12345,
Port: 12345 + idx,
},
}
@ -4640,38 +4644,28 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) {
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
args = &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
Service: &structs.NodeService{
Service: "api",
Port: 2222,
},
}
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
m := new(dns.Msg)
m.SetQuestion("db.service.consul.", dns.TypeSRV)
c := new(dns.Client)
expectResult := func(dnsQuery string, expectedTTL uint32) {
t.Run(dnsQuery, func(t *testing.T) {
m := new(dns.Msg)
m.SetQuestion(dnsQuery, dns.TypeSRV)
in, _, err := c.Exchange(m, a.DNSAddr())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
t.Fatalf("Bad: %#v, len is %d", in, len(in.Answer))
}
srvRec, ok := in.Answer[0].(*dns.SRV)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if srvRec.Hdr.Ttl != 10 {
if srvRec.Hdr.Ttl != expectedTTL {
t.Fatalf("Bad: %#v", in.Answer[0])
}
@ -4679,36 +4673,19 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) {
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 10 {
if aRec.Hdr.Ttl != expectedTTL {
t.Fatalf("Bad: %#v", in.Extra[0])
}
m = new(dns.Msg)
m.SetQuestion("api.service.consul.", dns.TypeSRV)
in, _, err = c.Exchange(m, a.DNSAddr())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
}
srvRec, ok = in.Answer[0].(*dns.SRV)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if srvRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
aRec, ok = in.Extra[0].(*dns.A)
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Extra[0])
})
}
// Should have its exact TTL
expectResult("db.service.consul.", 10)
// Should match db*
expectResult("dblb.service.consul.", 66)
// Should match d*
expectResult("dk.service.consul.", 42)
// Should match *
expectResult("api.service.consul.", 5)
}
func TestDNS_PreparedQuery_TTL(t *testing.T) {
@ -4716,7 +4693,9 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
a := NewTestAgent(t.Name(), `
dns_config {
service_ttl = {
"d*" = "42s"
"db" = "10s"
"db*" = "66s"
"*" = "5s"
}
allow_stale = true
@ -4726,16 +4705,17 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
defer a.Shutdown()
testrpc.WaitForLeader(t, a.RPC, "dc1")
// Register a node and a service.
{
for idx, service := range []string{"db", "dblb", "dk", "api"} {
nodeName := fmt.Sprintf("foo%d", idx)
address := fmt.Sprintf("127.0.0.%d", idx)
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
Node: nodeName,
Address: address,
Service: &structs.NodeService{
Service: "db",
Service: service,
Tags: []string{"master"},
Port: 12345,
Port: 12345 + idx,
},
}
@ -4743,34 +4723,15 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
args = &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
Service: &structs.NodeService{
Service: "api",
Port: 2222,
},
}
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
}
// Register prepared queries with and without a TTL set for "db", as
// well as one for "api".
// Register prepared query without TTL and with TTL
{
args := &structs.PreparedQueryRequest{
Datacenter: "dc1",
Op: structs.PreparedQueryCreate,
Query: &structs.PreparedQuery{
Name: "db-ttl",
Name: service,
Service: structs.ServiceQuery{
Service: "db",
},
DNS: structs.QueryDNSOptions{
TTL: "18s",
Service: service,
},
},
}
@ -4779,29 +4740,17 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil {
t.Fatalf("err: %v", err)
}
queryTTL := fmt.Sprintf("%s-ttl", service)
args = &structs.PreparedQueryRequest{
Datacenter: "dc1",
Op: structs.PreparedQueryCreate,
Query: &structs.PreparedQuery{
Name: "db-nottl",
Name: queryTTL,
Service: structs.ServiceQuery{
Service: "db",
Service: service,
},
},
}
if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil {
t.Fatalf("err: %v", err)
}
args = &structs.PreparedQueryRequest{
Datacenter: "dc1",
Op: structs.PreparedQueryCreate,
Query: &structs.PreparedQuery{
Name: "api-nottl",
Service: structs.ServiceQuery{
Service: "api",
DNS: structs.QueryDNSOptions{
TTL: "18s",
},
},
}
@ -4810,27 +4759,28 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
t.Fatalf("err: %v", err)
}
}
// Make sure the TTL is set when requested, and overrides the agent-
// specific config since the query takes precedence.
m := new(dns.Msg)
m.SetQuestion("db-ttl.query.consul.", dns.TypeSRV)
}
c := new(dns.Client)
expectResult := func(dnsQuery string, expectedTTL uint32) {
t.Run(dnsQuery, func(t *testing.T) {
m := new(dns.Msg)
m.SetQuestion(dnsQuery, dns.TypeSRV)
in, _, err := c.Exchange(m, a.DNSAddr())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
t.Fatalf("Bad: %#v, len is %d", in, len(in.Answer))
}
srvRec, ok := in.Answer[0].(*dns.SRV)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if srvRec.Hdr.Ttl != 18 {
if srvRec.Hdr.Ttl != expectedTTL {
t.Fatalf("Bad: %#v", in.Answer[0])
}
@ -4838,67 +4788,24 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) {
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 18 {
if aRec.Hdr.Ttl != expectedTTL {
t.Fatalf("Bad: %#v", in.Extra[0])
}
// And the TTL should take the service-specific value from the agent's
// config otherwise.
m = new(dns.Msg)
m.SetQuestion("db-nottl.query.consul.", dns.TypeSRV)
in, _, err = c.Exchange(m, a.DNSAddr())
if err != nil {
t.Fatalf("err: %v", err)
})
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
}
srvRec, ok = in.Answer[0].(*dns.SRV)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if srvRec.Hdr.Ttl != 10 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
aRec, ok = in.Extra[0].(*dns.A)
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 10 {
t.Fatalf("Bad: %#v", in.Extra[0])
}
// If there's no query TTL and no service-specific value then the wild
// card value should be used.
m = new(dns.Msg)
m.SetQuestion("api-nottl.query.consul.", dns.TypeSRV)
in, _, err = c.Exchange(m, a.DNSAddr())
if err != nil {
t.Fatalf("err: %v", err)
}
if len(in.Answer) != 1 {
t.Fatalf("Bad: %#v", in)
}
srvRec, ok = in.Answer[0].(*dns.SRV)
if !ok {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if srvRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
aRec, ok = in.Extra[0].(*dns.A)
if !ok {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 5 {
t.Fatalf("Bad: %#v", in.Extra[0])
}
// Should have its exact TTL
expectResult("db.query.consul.", 10)
expectResult("db-ttl.query.consul.", 18)
// Should match db*
expectResult("dblb.query.consul.", 66)
expectResult("dblb-ttl.query.consul.", 18)
// Should match d*
expectResult("dk.query.consul.", 42)
expectResult("dk-ttl.query.consul.", 18)
// Should be the default value
expectResult("api.query.consul.", 5)
expectResult("api-ttl.query.consul.", 18)
}
func TestDNS_PreparedQuery_Failover(t *testing.T) {

View File

@ -85,7 +85,11 @@ To enable caching of node lookups (e.g. "foo.node.consul"), we can set the
Service TTLs can be specified in a more granular fashion. You can set TTLs
per-service, with a wildcard TTL as the default. This is specified using the
[`dns_config.service_ttl`](/docs/agent/options.html#service_ttl) map. The "*"
service is the wildcard service.
is supported at the end of any prefix and a lower precedence than strict match,
so 'my-service-x' has precedence over 'my-service-*', when performing wildcard
match, the longest path is taken into account, thus 'my-service-*' TTL will
be used instead of 'my-*' or '*'. With the same rule, '*' is the default value
when nothing else matches. If no match is found the TTL defaults to 0.
For example, a [`dns_config`](/docs/agent/options.html#dns_config) that provides
a wildcard TTL and a specific TTL for a service might look like this:
@ -95,7 +99,9 @@ a wildcard TTL and a specific TTL for a service might look like this:
"dns_config": {
"service_ttl": {
"*": "5s",
"web": "30s"
"web": "30s",
"db*": "10s",
"db-master": "3s"
}
}
}
@ -105,6 +111,9 @@ This sets all lookups to "web.service.consul" to use a 30 second TTL
while lookups to "db.service.consul" or "api.service.consul" will use the
5 second TTL from the wildcard.
All lookups matching "db*" would get a 10 seconds TTL except "db-master"
that would have a 3 seconds TTL.
[Prepared Queries](/api/query.html) provide an additional
level of control over TTL. They allow for the TTL to be defined along with
the query, and they can be changed on the fly by updating the query definition.