diff --git a/agent/discovery/discovery.go b/agent/discovery/discovery.go index 2496691e1a..8d4b4082a5 100644 --- a/agent/discovery/discovery.go +++ b/agent/discovery/discovery.go @@ -107,11 +107,11 @@ const ( type Result struct { Service *Location // The name and address of the service. Node *Location // The name and address of the node. - Weight uint32 // SRV queries PortName string // Used to generate a fgdn when a specifc port was queried PortNumber uint32 // SRV queries Metadata map[string]string // Used to collect metadata into TXT Records Type ResultType // Used to reconstruct the fqdn name of the resource + DNS DNSConfig // Used for DNS-specific configuration for this result Tenancy ResultTenancy } @@ -122,6 +122,11 @@ type Location struct { Address string } +type DNSConfig struct { + TTL *uint32 // deprecated: use for V1 prepared queries only + Weight uint32 // SRV queries +} + // ResultTenancy is used to reconstruct the fqdn name of the resource. type ResultTenancy struct { Namespace string diff --git a/agent/discovery/query_fetcher_v1.go b/agent/discovery/query_fetcher_v1.go index 97e0290ced..88ce2b5413 100644 --- a/agent/discovery/query_fetcher_v1.go +++ b/agent/discovery/query_fetcher_v1.go @@ -308,35 +308,31 @@ func (f *V1DataFetcher) FetchPreparedQuery(ctx Context, req *QueryPayload) ([]*R return nil, ECSNotGlobalError{err} } - // (v2-dns) TODO: (v2-dns) get TTLS working. They come from the database so not having - // TTL on the discovery result poses challenges. + // TODO (slackpad) - What's a safe limit we can set here? It seems like + // with dup filtering done at this level we need to get everything to + // match the previous behavior. We can optimize by pushing more filtering + // into the query execution, but for now I think we need to get the full + // response. We could also choose a large arbitrary number that will + // likely work in practice, like 10*maxUDPAnswerLimit which should help + // reduce bandwidth if there are thousands of nodes available. + // Determine the TTL. The parse should never fail since we vet it when + // the query is created, but we check anyway. If the query didn't + // specify a TTL then we will try to use the agent's service-specific + // TTL configs. - /* - // TODO (slackpad) - What's a safe limit we can set here? It seems like - // with dup filtering done at this level we need to get everything to - // match the previous behavior. We can optimize by pushing more filtering - // into the query execution, but for now I think we need to get the full - // response. We could also choose a large arbitrary number that will - // likely work in practice, like 10*maxUDPAnswerLimit which should help - // reduce bandwidth if there are thousands of nodes available. - // Determine the TTL. The parse should never fail since we vet it when - // the query is created, but we check anyway. If the query didn't - // specify a TTL then we will try to use the agent's service-specific - // TTL configs. - var ttl time.Duration - if out.DNS.TTL != "" { - var err error - ttl, err = time.ParseDuration(out.DNS.TTL) - if err != nil { - f.logger.Warn("Failed to parse TTL for prepared query , ignoring", - "ttl", out.DNS.TTL, - "prepared_query", req.Name, - ) - } - } else { - ttl, _ = cfg.GetTTLForService(out.Service) + // Check is there is a TTL provided as part of the prepared query + var ttlOverride *uint32 + if out.DNS.TTL != "" { + ttl, err := time.ParseDuration(out.DNS.TTL) + if err == nil { + ttlSec := uint32(ttl / time.Second) + ttlOverride = &ttlSec } - */ + f.logger.Warn("Failed to parse TTL for prepared query , ignoring", + "ttl", out.DNS.TTL, + "prepared_query", req.Name, + ) + } // If we have no nodes, return not found! if len(out.Nodes) == 0 { @@ -345,7 +341,7 @@ func (f *V1DataFetcher) FetchPreparedQuery(ctx Context, req *QueryPayload) ([]*R // Perform a random shuffle out.Nodes.Shuffle() - return f.buildResultsFromServiceNodes(out.Nodes, req), ECSNotGlobalError{} + return f.buildResultsFromServiceNodes(out.Nodes, req, ttlOverride), ECSNotGlobalError{} } // executePreparedQuery is used to execute a PreparedQuery against the Consul catalog. @@ -402,7 +398,7 @@ func (f *V1DataFetcher) ValidateRequest(_ Context, req *QueryPayload) error { } // buildResultsFromServiceNodes builds a list of results from a list of nodes. -func (f *V1DataFetcher) buildResultsFromServiceNodes(nodes []structs.CheckServiceNode, req *QueryPayload) []*Result { +func (f *V1DataFetcher) buildResultsFromServiceNodes(nodes []structs.CheckServiceNode, req *QueryPayload, ttlOverride *uint32) []*Result { // Convert the service endpoints to results up to the limit limit := req.Limit if len(nodes) < limit || limit == 0 { @@ -421,8 +417,11 @@ func (f *V1DataFetcher) buildResultsFromServiceNodes(nodes []structs.CheckServic Name: n.Node.Node, Address: n.Node.Address, }, - Type: ResultTypeService, - Weight: uint32(findWeight(n)), + Type: ResultTypeService, + DNS: DNSConfig{ + TTL: ttlOverride, + Weight: uint32(findWeight(n)), + }, PortNumber: uint32(f.translateServicePortFunc(n.Node.Datacenter, n.Service.Port, n.Service.TaggedAddresses)), Metadata: n.Node.Meta, Tenancy: ResultTenancy{ @@ -548,7 +547,7 @@ func (f *V1DataFetcher) fetchServiceBasedOnTenancy(ctx Context, req *QueryPayloa // Perform a random shuffle out.Nodes.Shuffle() - return f.buildResultsFromServiceNodes(out.Nodes, req), nil + return f.buildResultsFromServiceNodes(out.Nodes, req, nil), nil } // findWeight returns the weight of a service node. diff --git a/agent/discovery/query_fetcher_v1_test.go b/agent/discovery/query_fetcher_v1_test.go index 95f3a9fe50..41b6b32108 100644 --- a/agent/discovery/query_fetcher_v1_test.go +++ b/agent/discovery/query_fetcher_v1_test.go @@ -147,8 +147,10 @@ func Test_FetchEndpoints(t *testing.T) { Name: "service-name", Address: "service-address", }, - Type: ResultTypeService, - Weight: 1, + Type: ResultTypeService, + DNS: DNSConfig{ + Weight: 1, + }, }, } diff --git a/agent/discovery/query_fetcher_v2.go b/agent/discovery/query_fetcher_v2.go index 0033c84dff..45fc48fe31 100644 --- a/agent/discovery/query_fetcher_v2.go +++ b/agent/discovery/query_fetcher_v2.go @@ -116,7 +116,9 @@ func (f *V2DataFetcher) FetchEndpoints(reqContext Context, req *QueryPayload, lo Namespace: resourceObj.GetId().GetTenancy().GetNamespace(), Partition: resourceObj.GetId().GetTenancy().GetPartition(), }, - Weight: weight, + DNS: DNSConfig{ + Weight: weight, + }, } results = append(results, result) } diff --git a/agent/discovery/query_fetcher_v2_test.go b/agent/discovery/query_fetcher_v2_test.go index 9ace561f13..651c2d4eb6 100644 --- a/agent/discovery/query_fetcher_v2_test.go +++ b/agent/discovery/query_fetcher_v2_test.go @@ -261,7 +261,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, }, }, @@ -360,7 +362,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 2, + DNS: DNSConfig{ + Weight: 2, + }, }, { Node: &Location{Name: "consul-2", Address: "2.3.4.5"}, @@ -369,7 +373,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 3, + DNS: DNSConfig{ + Weight: 3, + }, }, }, }, @@ -408,7 +414,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 2, + DNS: DNSConfig{ + Weight: 2, + }, }, }, }, @@ -452,7 +460,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, { Node: &Location{Name: "consul-2", Address: "10.0.0.2"}, @@ -461,7 +471,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, { Node: &Location{Name: "consul-3", Address: "10.0.0.3"}, @@ -470,7 +482,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, { Node: &Location{Name: "consul-4", Address: "10.0.0.4"}, @@ -479,7 +493,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, { Node: &Location{Name: "consul-5", Address: "10.0.0.5"}, @@ -488,7 +504,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, { Node: &Location{Name: "consul-6", Address: "10.0.0.6"}, @@ -497,7 +515,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, { Node: &Location{Name: "consul-7", Address: "10.0.0.7"}, @@ -506,7 +526,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, { Node: &Location{Name: "consul-8", Address: "10.0.0.8"}, @@ -515,7 +537,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, { Node: &Location{Name: "consul-9", Address: "10.0.0.9"}, @@ -524,7 +548,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, { Node: &Location{Name: "consul-10", Address: "10.0.0.10"}, @@ -533,7 +559,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, }, verifyShuffle: true, @@ -572,7 +600,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: resource.DefaultNamespaceName, Partition: resource.DefaultPartitionName, }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, }, }, @@ -613,7 +643,9 @@ func Test_V2FetchEndpoints(t *testing.T) { Namespace: "test-namespace", Partition: "test-partition", }, - Weight: 1, + DNS: DNSConfig{ + Weight: 1, + }, }, }, }, diff --git a/agent/dns/router.go b/agent/dns/router.go index 0ff11cdd41..387eb0f43b 100644 --- a/agent/dns/router.go +++ b/agent/dns/router.go @@ -13,13 +13,12 @@ import ( "sync/atomic" "time" - "github.com/hashicorp/consul/acl" - "github.com/armon/go-radix" "github.com/miekg/dns" "github.com/hashicorp/go-hclog" + "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/discovery" "github.com/hashicorp/consul/agent/structs" @@ -286,13 +285,18 @@ func (r *Router) trimDomain(questionName string) string { } // getTTLForResult returns the TTL for a given result. -func getTTLForResult(name string, query *discovery.Query, cfg *RouterDynamicConfig) uint32 { +func getTTLForResult(name string, overrideTTL *uint32, query *discovery.Query, cfg *RouterDynamicConfig) uint32 { // In the case we are not making a discovery query, such as addr. or arpa. lookups, // use the node TTL by convention if query == nil { return uint32(cfg.NodeTTL / time.Second) } + if overrideTTL != nil { + // If a result was provided with an override, use that. This is the case for some prepared queries. + return *overrideTTL + } + switch query.QueryType { // TODO (v2-dns): currently have to do this related to the results type being changed to node whe // the v1 data fetcher encounters a blank service address and uses the node address instead. @@ -301,7 +305,7 @@ func getTTLForResult(name string, query *discovery.Query, cfg *RouterDynamicConf case discovery.QueryTypeWorkload: // TODO (v2-dns): we need to discuss what we want to do for workload TTLs return 0 - case discovery.QueryTypeService: + case discovery.QueryTypeService, discovery.QueryTypePreparedQuery: ttl, ok := cfg.getTTLForService(name) if ok { return uint32(ttl / time.Second) @@ -884,7 +888,9 @@ func (r *Router) getAnswerExtraAndNs(result *discovery.Result, req *dns.Msg, req if query != nil { ttlLookupName = query.QueryPayload.Name } - ttl := getTTLForResult(ttlLookupName, query, cfg) + + ttl := getTTLForResult(ttlLookupName, result.DNS.TTL, query, cfg) + qType := req.Question[0].Qtype // TODO (v2-dns): skip records that refer to a workload/node that don't have a valid DNS name. @@ -1278,7 +1284,7 @@ func makeSRVRecord(name, target string, result *discovery.Result, ttl uint32) *d Ttl: ttl, }, Priority: 1, - Weight: uint16(result.Weight), + Weight: uint16(result.DNS.Weight), Port: uint16(result.PortNumber), Target: target, } diff --git a/agent/dns/router_test.go b/agent/dns/router_test.go index 1e2fd09178..459ab97bda 100644 --- a/agent/dns/router_test.go +++ b/agent/dns/router_test.go @@ -1791,6 +1791,165 @@ func Test_HandleRequest(t *testing.T) { }, }, }, + // V1 Prepared Queries + { + name: "v1 prepared query w/ TTL override, ANY query, returns A record", + request: &dns.Msg{ + MsgHdr: dns.MsgHdr{ + Opcode: dns.OpcodeQuery, + }, + Question: []dns.Question{ + { + Name: "foo.query.consul.", + Qtype: dns.TypeA, + Qclass: dns.ClassINET, + }, + }, + }, + agentConfig: &config.RuntimeConfig{ + DNSDomain: "consul", + DNSNodeTTL: 123 * time.Second, + DNSSOA: config.RuntimeSOAConfig{ + Refresh: 1, + Retry: 2, + Expire: 3, + Minttl: 4, + }, + DNSUDPAnswerLimit: maxUDPAnswerLimit, + // We shouldn't use this if we have the override defined + DNSServiceTTL: map[string]time.Duration{ + "foo": 1 * time.Second, + }, + }, + configureDataFetcher: func(fetcher discovery.CatalogDataFetcher) { + fetcher.(*discovery.MockCatalogDataFetcher). + On("FetchPreparedQuery", mock.Anything, mock.Anything). + Return([]*discovery.Result{ + { + Service: &discovery.Location{Name: "foo", Address: "1.2.3.4"}, + Node: &discovery.Location{Name: "bar", Address: "1.2.3.4"}, + Type: discovery.ResultTypeService, + Tenancy: discovery.ResultTenancy{ + Datacenter: "dc1", + }, + DNS: discovery.DNSConfig{ + TTL: getUint32Ptr(3), + Weight: 1, + }, + }, + }, nil). + Run(func(args mock.Arguments) { + req := args.Get(1).(*discovery.QueryPayload) + require.Equal(t, "foo", req.Name) + }) + }, + validateAndNormalizeExpected: true, + response: &dns.Msg{ + MsgHdr: dns.MsgHdr{ + Opcode: dns.OpcodeQuery, + Response: true, + Authoritative: true, + }, + Compress: true, + Question: []dns.Question{ + { + Name: "foo.query.consul.", + Qtype: dns.TypeA, + Qclass: dns.ClassINET, + }, + }, + Answer: []dns.RR{ + &dns.A{ + Hdr: dns.RR_Header{ + Name: "foo.query.consul.", + Rrtype: dns.TypeA, + Class: dns.ClassINET, + Ttl: 3, + }, + A: net.ParseIP("1.2.3.4"), + }, + }, + }, + }, + { + name: "v1 prepared query w/ matching service TTL, ANY query, returns A record", + request: &dns.Msg{ + MsgHdr: dns.MsgHdr{ + Opcode: dns.OpcodeQuery, + }, + Question: []dns.Question{ + { + Name: "foo.query.consul.", + Qtype: dns.TypeA, + Qclass: dns.ClassINET, + }, + }, + }, + agentConfig: &config.RuntimeConfig{ + DNSDomain: "consul", + DNSNodeTTL: 123 * time.Second, + DNSSOA: config.RuntimeSOAConfig{ + Refresh: 1, + Retry: 2, + Expire: 3, + Minttl: 4, + }, + DNSUDPAnswerLimit: maxUDPAnswerLimit, + // Results should use this as the TTL + DNSServiceTTL: map[string]time.Duration{ + "foo": 1 * time.Second, + }, + }, + configureDataFetcher: func(fetcher discovery.CatalogDataFetcher) { + fetcher.(*discovery.MockCatalogDataFetcher). + On("FetchPreparedQuery", mock.Anything, mock.Anything). + Return([]*discovery.Result{ + { + Service: &discovery.Location{Name: "foo", Address: "1.2.3.4"}, + Node: &discovery.Location{Name: "bar", Address: "1.2.3.4"}, + Type: discovery.ResultTypeService, + Tenancy: discovery.ResultTenancy{ + Datacenter: "dc1", + }, + DNS: discovery.DNSConfig{ + // Intentionally no TTL here. + Weight: 1, + }, + }, + }, nil). + Run(func(args mock.Arguments) { + req := args.Get(1).(*discovery.QueryPayload) + require.Equal(t, "foo", req.Name) + }) + }, + validateAndNormalizeExpected: true, + response: &dns.Msg{ + MsgHdr: dns.MsgHdr{ + Opcode: dns.OpcodeQuery, + Response: true, + Authoritative: true, + }, + Compress: true, + Question: []dns.Question{ + { + Name: "foo.query.consul.", + Qtype: dns.TypeA, + Qclass: dns.ClassINET, + }, + }, + Answer: []dns.RR{ + &dns.A{ + Hdr: dns.RR_Header{ + Name: "foo.query.consul.", + Rrtype: dns.TypeA, + Class: dns.ClassINET, + Ttl: 1, + }, + A: net.ParseIP("1.2.3.4"), + }, + }, + }, + }, } testCases = append(testCases, getAdditionalTestCases(t)...) @@ -2174,3 +2333,8 @@ func TestDNS_syncExtra(t *testing.T) { t.Fatalf("Bad %#v vs. %#v", *resp, *expected) } } + +// getUint32Ptr return the pointer of an uint32 literal +func getUint32Ptr(i uint32) *uint32 { + return &i +} diff --git a/agent/dns_service_lookup_test.go b/agent/dns_service_lookup_test.go index 14ee6eb80e..fbca1bb1fb 100644 --- a/agent/dns_service_lookup_test.go +++ b/agent/dns_service_lookup_test.go @@ -502,6 +502,9 @@ func TestDNS_ServiceLookup(t *testing.T) { Service: structs.ServiceQuery{ Service: "db", }, + DNS: structs.QueryDNSOptions{ + TTL: "3s", + }, }, } if err := a.RPC(context.Background(), "PreparedQuery.Apply", args, &id); err != nil { @@ -538,9 +541,6 @@ func TestDNS_ServiceLookup(t *testing.T) { if srvRec.Target != "foo.node.dc1.consul." { t.Fatalf("Bad: %#v", srvRec) } - if srvRec.Hdr.Ttl != 0 { - t.Fatalf("Bad: %#v", in.Answer[0]) - } aRec, ok := in.Extra[0].(*dns.A) if !ok { @@ -552,9 +552,24 @@ func TestDNS_ServiceLookup(t *testing.T) { if aRec.A.String() != "127.0.0.1" { t.Fatalf("Bad: %#v", in.Extra[0]) } - if aRec.Hdr.Ttl != 0 { - t.Fatalf("Bad: %#v", in.Extra[0]) + + if strings.Contains(question, "query") { + // The query should have the TTL associated with the query registration. + if srvRec.Hdr.Ttl != 3 { + t.Fatalf("Bad: %#v", in.Answer[0]) + } + if aRec.Hdr.Ttl != 3 { + t.Fatalf("Bad: %#v", in.Extra[0]) + } + } else { + if srvRec.Hdr.Ttl != 0 { + t.Fatalf("Bad: %#v", in.Answer[0]) + } + if aRec.Hdr.Ttl != 0 { + t.Fatalf("Bad: %#v", in.Extra[0]) + } } + } // Lookup a non-existing service/query, we should receive an SOA.