consul/agent/dns.go

2216 lines
66 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package agent
import (
"context"
"encoding/hex"
"errors"
"fmt"
"math"
"net"
"regexp"
"strings"
"sync/atomic"
"time"
"github.com/armon/go-metrics"
"github.com/armon/go-radix"
"github.com/hashicorp/go-hclog"
"github.com/miekg/dns"
"github.com/hashicorp/consul/acl"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/api"
dnsutil "github.com/hashicorp/consul/internal/dnsutil"
libdns "github.com/hashicorp/consul/internal/dnsutil"
"github.com/hashicorp/consul/ipaddr"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/logging"
)
const (
// UDP can fit ~25 A records in a 512B response, and ~14 AAAA
// records. Limit further to prevent unintentional configuration
// abuse that would have a negative effect on application response
// times.
maxUDPAnswerLimit = 8
maxRecurseRecords = 5
maxRecursionLevelDefault = 3
// Increment a counter when requests staler than this are served
staleCounterThreshold = 5 * time.Second
defaultMaxUDPSize = 512
// If a consumer sets a buffer size greater than this amount we will default it down
// to this amount to ensure that consul does respond. Previously if consumer had a larger buffer
// size than 65535 - 60 bytes (maximim 60 bytes for IP header. UDP header will be offset in the
// trimUDP call) consul would fail to respond and the consumer timesout
// the request.
maxUDPDatagramSize = math.MaxUint16 - 68
)
type dnsSOAConfig struct {
Refresh uint32 // 3600 by default
Retry uint32 // 600
Expire uint32 // 86400
Minttl uint32 // 0
}
type dnsConfig struct {
AllowStale bool
Datacenter string
EnableTruncate bool
MaxStale time.Duration
UseCache bool
CacheMaxAge time.Duration
NodeName string
NodeTTL time.Duration
OnlyPassing bool
RecursorStrategy structs.RecursorStrategy
RecursorTimeout time.Duration
Recursors []string
SegmentName string
UDPAnswerLimit int
ARecordLimit int
NodeMetaTXT bool
SOAConfig dnsSOAConfig
// TTLRadix sets service TTLs by prefix, eg: "database-*"
TTLRadix *radix.Tree
// TTLStict sets TTLs to service by full name match. It Has higher priority than TTLRadix
TTLStrict map[string]time.Duration
DisableCompression bool
enterpriseDNSConfig
}
type serviceLookup struct {
PeerName string
Datacenter string
Service string
Tag string
MaxRecursionLevel int
Connect bool
Ingress bool
acl.EnterpriseMeta
}
type nodeLookup struct {
Datacenter string
PeerName string
Node string
Tag string
MaxRecursionLevel int
acl.EnterpriseMeta
}
// DNSServer is used to wrap an Agent and expose various
// service discovery endpoints using a DNS interface.
type DNSServer struct {
*dns.Server
agent *Agent
mux *dns.ServeMux
domain string
altDomain string
logger hclog.Logger
// config stores the config as an atomic value (for hot-reloading). It is always of type *dnsConfig
config atomic.Value
// recursorEnabled stores whever the recursor handler is enabled as an atomic flag.
// the recursor handler is only enabled if recursors are configured. This flag is used during config hot-reloading
recursorEnabled uint32
defaultEnterpriseMeta acl.EnterpriseMeta
}
func NewDNSServer(a *Agent) (*DNSServer, error) {
// Make sure domains are FQDN, make them case insensitive for ServeMux
domain := dns.Fqdn(strings.ToLower(a.config.DNSDomain))
altDomain := dns.Fqdn(strings.ToLower(a.config.DNSAltDomain))
srv := &DNSServer{
agent: a,
domain: domain,
altDomain: altDomain,
logger: a.logger.Named(logging.DNS),
defaultEnterpriseMeta: *a.AgentEnterpriseMeta(),
mux: dns.NewServeMux(),
}
cfg, err := GetDNSConfig(a.config)
if err != nil {
return nil, err
}
srv.config.Store(cfg)
srv.mux.HandleFunc("arpa.", srv.handlePtr)
srv.mux.HandleFunc(srv.domain, srv.handleQuery)
// this is not an empty string check because NewDNSServer will have
// converted the configured alt domain into an FQDN which will ensure that
// the value ends with a ".". Therefore "." is the empty string equivalent
// for originally having no alternate domain set. If there is a reason
// why consul should be configured to handle the root zone I have yet
// to think of it.
if srv.altDomain != "." {
srv.mux.HandleFunc(srv.altDomain, srv.handleQuery)
}
srv.toggleRecursorHandlerFromConfig(cfg)
return srv, nil
}
// GetDNSConfig takes global config and creates the config used by DNS server
func GetDNSConfig(conf *config.RuntimeConfig) (*dnsConfig, error) {
cfg := &dnsConfig{
AllowStale: conf.DNSAllowStale,
ARecordLimit: conf.DNSARecordLimit,
Datacenter: conf.Datacenter,
EnableTruncate: conf.DNSEnableTruncate,
MaxStale: conf.DNSMaxStale,
NodeName: conf.NodeName,
NodeTTL: conf.DNSNodeTTL,
OnlyPassing: conf.DNSOnlyPassing,
RecursorStrategy: conf.DNSRecursorStrategy,
RecursorTimeout: conf.DNSRecursorTimeout,
SegmentName: conf.SegmentName,
UDPAnswerLimit: conf.DNSUDPAnswerLimit,
NodeMetaTXT: conf.DNSNodeMetaTXT,
DisableCompression: conf.DNSDisableCompression,
UseCache: conf.DNSUseCache,
CacheMaxAge: conf.DNSCacheMaxAge,
SOAConfig: dnsSOAConfig{
Expire: conf.DNSSOA.Expire,
Minttl: conf.DNSSOA.Minttl,
Refresh: conf.DNSSOA.Refresh,
Retry: conf.DNSSOA.Retry,
},
enterpriseDNSConfig: getEnterpriseDNSConfig(conf),
}
if conf.DNSServiceTTL != nil {
cfg.TTLRadix = radix.New()
cfg.TTLStrict = make(map[string]time.Duration)
for key, ttl := range conf.DNSServiceTTL {
// All suffix with '*' are put in radix
// This include '*' that will match anything
if strings.HasSuffix(key, "*") {
cfg.TTLRadix.Insert(key[:len(key)-1], ttl)
} else {
cfg.TTLStrict[key] = ttl
}
}
}
for _, r := range conf.DNSRecursors {
ra, err := recursorAddr(r)
if err != nil {
return nil, fmt.Errorf("Invalid recursor address: %v", err)
}
cfg.Recursors = append(cfg.Recursors, ra)
}
return cfg, nil
}
// GetTTLForService Find the TTL for a given service.
// return ttl, true if found, 0, false otherwise
func (cfg *dnsConfig) GetTTLForService(service string) (time.Duration, bool) {
if cfg.TTLStrict != nil {
ttl, ok := cfg.TTLStrict[service]
if ok {
return ttl, true
}
}
if cfg.TTLRadix != nil {
_, ttlRaw, ok := cfg.TTLRadix.LongestPrefix(service)
if ok {
return ttlRaw.(time.Duration), true
}
}
return 0, false
}
func (d *DNSServer) ListenAndServe(network, addr string, notif func()) error {
d.Server = &dns.Server{
Addr: addr,
Net: network,
Handler: d.mux,
NotifyStartedFunc: notif,
}
if network == "udp" {
d.UDPSize = 65535
}
return d.Server.ListenAndServe()
}
func (d *DNSServer) Shutdown() {
if d.Server != nil {
d.logger.Info("Stopping server",
"protocol", "DNS",
"address", d.Server.Addr,
"network", d.Server.Net,
)
err := d.Server.Shutdown()
if err != nil {
d.logger.Error("Error stopping DNS server", "error", err)
}
}
}
// GetAddr is a function to return the server address if is not nil.
func (d *DNSServer) GetAddr() string {
if d.Server != nil {
return d.Server.Addr
}
return ""
}
// toggleRecursorHandlerFromConfig enables or disables the recursor handler based on config idempotently
func (d *DNSServer) toggleRecursorHandlerFromConfig(cfg *dnsConfig) {
shouldEnable := len(cfg.Recursors) > 0
if shouldEnable && atomic.CompareAndSwapUint32(&d.recursorEnabled, 0, 1) {
d.mux.HandleFunc(".", d.handleRecurse)
d.logger.Debug("recursor enabled")
return
}
if !shouldEnable && atomic.CompareAndSwapUint32(&d.recursorEnabled, 1, 0) {
d.mux.HandleRemove(".")
d.logger.Debug("recursor disabled")
return
}
}
// ReloadConfig hot-reloads the server config with new parameters under config.RuntimeConfig.DNS*
func (d *DNSServer) ReloadConfig(newCfg *config.RuntimeConfig) error {
cfg, err := GetDNSConfig(newCfg)
if err != nil {
return err
}
d.config.Store(cfg)
d.toggleRecursorHandlerFromConfig(cfg)
return nil
}
// setEDNS is used to set the responses EDNS size headers and
// possibly the ECS headers as well if they were present in the
// original request
func setEDNS(request *dns.Msg, response *dns.Msg, ecsGlobal bool) {
edns := request.IsEdns0()
if edns == nil {
return
}
// cannot just use the SetEdns0 function as we need to embed
// the ECS option as well
ednsResp := new(dns.OPT)
ednsResp.Hdr.Name = "."
ednsResp.Hdr.Rrtype = dns.TypeOPT
ednsResp.SetUDPSize(edns.UDPSize())
// Setup the ECS option if present
if subnet := ednsSubnetForRequest(request); subnet != nil {
subOp := new(dns.EDNS0_SUBNET)
subOp.Code = dns.EDNS0SUBNET
subOp.Family = subnet.Family
subOp.Address = subnet.Address
subOp.SourceNetmask = subnet.SourceNetmask
if c := response.Rcode; ecsGlobal || c == dns.RcodeNameError || c == dns.RcodeServerFailure || c == dns.RcodeRefused || c == dns.RcodeNotImplemented {
// reply is globally valid and should be cached accordingly
subOp.SourceScope = 0
} else {
// reply is only valid for the subnet it was queried with
subOp.SourceScope = subnet.SourceNetmask
}
ednsResp.Option = append(ednsResp.Option, subOp)
}
response.Extra = append(response.Extra, ednsResp)
}
// recursorAddr is used to add a port to the recursor if omitted.
func recursorAddr(recursor string) (string, error) {
// Add the port if none
START:
_, _, err := net.SplitHostPort(recursor)
if ae, ok := err.(*net.AddrError); ok {
if ae.Err == "missing port in address" {
recursor = ipaddr.FormatAddressPort(recursor, 53)
goto START
} else if ae.Err == "too many colons in address" {
if ip := net.ParseIP(recursor); ip != nil && ip.To4() == nil {
recursor = ipaddr.FormatAddressPort(recursor, 53)
goto START
}
}
}
if err != nil {
return "", err
}
// Get the address
addr, err := net.ResolveTCPAddr("tcp", recursor)
if err != nil {
return "", err
}
// Return string
return addr.String(), nil
}
func serviceNodeCanonicalDNSName(sn *structs.ServiceNode, domain string) string {
return serviceCanonicalDNSName(sn.ServiceName, "service", sn.Datacenter, domain, &sn.EnterpriseMeta)
}
func serviceIngressDNSName(service, datacenter, domain string, entMeta *acl.EnterpriseMeta) string {
return serviceCanonicalDNSName(service, "ingress", datacenter, domain, entMeta)
}
// getResponseDomain returns alt-domain if it is configured and request is made with alt-domain,
// respects DNS case insensitivity
func (d *DNSServer) getResponseDomain(questionName string) string {
labels := dns.SplitDomainName(questionName)
domain := d.domain
for i := len(labels) - 1; i >= 0; i-- {
currentSuffix := strings.Join(labels[i:], ".") + "."
if strings.EqualFold(currentSuffix, d.domain) || strings.EqualFold(currentSuffix, d.altDomain) {
domain = currentSuffix
}
}
return domain
}
// handlePtr is used to handle "reverse" DNS queries
func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) {
q := req.Question[0]
defer func(s time.Time) {
// V1 DNS-style metrics
metrics.MeasureSinceWithLabels([]string{"dns", "ptr_query"}, s,
[]metrics.Label{{Name: "node", Value: d.agent.config.NodeName}})
// V2 DNS-style metrics for forward compatibility
metrics.MeasureSinceWithLabels([]string{"dns", "query"}, s,
[]metrics.Label{
{Name: "node", Value: d.agent.config.NodeName},
{Name: "type", Value: dns.Type(dns.TypePTR).String()},
})
d.logger.Debug("request served from client",
"question", q,
"latency", time.Since(s).String(),
"client", resp.RemoteAddr().String(),
"client_network", resp.RemoteAddr().Network(),
)
}(time.Now())
cfg := d.config.Load().(*dnsConfig)
// Setup the message response
m := new(dns.Msg)
m.SetReply(req)
m.Compress = !cfg.DisableCompression
m.Authoritative = true
m.RecursionAvailable = (len(cfg.Recursors) > 0)
// Only add the SOA if requested
if req.Question[0].Qtype == dns.TypeSOA {
d.addSOAToMessage(cfg, m, q.Name)
}
datacenter := d.agent.config.Datacenter
// Get the QName without the domain suffix
qName := strings.ToLower(dns.Fqdn(req.Question[0].Name))
args := structs.DCSpecificRequest{
Datacenter: datacenter,
QueryOptions: structs.QueryOptions{
Token: d.coalesceDNSToken(),
AllowStale: cfg.AllowStale,
},
}
var out structs.IndexedNodes
// TODO: Replace ListNodes with an internal RPC that can do the filter
// server side to avoid transferring the entire node list.
if err := d.agent.RPC(context.Background(), "Catalog.ListNodes", &args, &out); err == nil {
for _, n := range out.Nodes {
lookup := serviceLookup{
// Peering PTR lookups are currently not supported, so we don't
// need to populate that field for creating the node FQDN.
// PeerName: n.PeerName,
Datacenter: n.Datacenter,
EnterpriseMeta: *n.GetEnterpriseMeta(),
}
arpa, _ := dns.ReverseAddr(n.Address)
if arpa == qName {
ptr := &dns.PTR{
Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: 0},
Ptr: nodeCanonicalDNSName(lookup, n.Node, d.domain),
}
m.Answer = append(m.Answer, ptr)
break
}
}
}
// only look into the services if we didn't find a node
if len(m.Answer) == 0 {
// lookup the service address
ip := libdns.IPFromARPA(qName)
var serviceAddress string
if ip != nil {
serviceAddress = ip.String()
}
sargs := structs.ServiceSpecificRequest{
Datacenter: datacenter,
QueryOptions: structs.QueryOptions{
Token: d.coalesceDNSToken(),
AllowStale: cfg.AllowStale,
},
ServiceAddress: serviceAddress,
EnterpriseMeta: *d.defaultEnterpriseMeta.WithWildcardNamespace(),
}
var sout structs.IndexedServiceNodes
if err := d.agent.RPC(context.Background(), "Catalog.ServiceNodes", &sargs, &sout); err == nil {
for _, n := range sout.ServiceNodes {
if n.ServiceAddress == serviceAddress {
ptr := &dns.PTR{
Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: 0},
Ptr: serviceNodeCanonicalDNSName(n, d.domain),
}
m.Answer = append(m.Answer, ptr)
break
}
}
}
}
// nothing found locally, recurse
if len(m.Answer) == 0 {
d.handleRecurse(resp, req)
return
}
// ptr record responses are globally valid
setEDNS(req, m, true)
// Write out the complete response
if err := resp.WriteMsg(m); err != nil {
d.logger.Warn("failed to respond", "error", err)
}
}
// handleQuery is used to handle DNS queries in the configured domain
func (d *DNSServer) handleQuery(resp dns.ResponseWriter, req *dns.Msg) {
q := req.Question[0]
defer func(s time.Time) {
// V1 DNS-style metrics
metrics.MeasureSinceWithLabels([]string{"dns", "domain_query"}, s,
[]metrics.Label{{Name: "node", Value: d.agent.config.NodeName}})
// V2 DNS-style metrics for forward compatibility
metrics.MeasureSinceWithLabels([]string{"dns", "query"}, s,
[]metrics.Label{
{Name: "node", Value: d.agent.config.NodeName},
{Name: "type", Value: dns.Type(q.Qtype).String()},
})
d.logger.Debug("request served from client",
"name", q.Name,
"type", dns.Type(q.Qtype).String(),
"class", dns.Class(q.Qclass).String(),
"latency", time.Since(s).String(),
"client", resp.RemoteAddr().String(),
"client_network", resp.RemoteAddr().Network(),
)
}(time.Now())
// Switch to TCP if the client is
network := "udp"
if _, ok := resp.RemoteAddr().(*net.TCPAddr); ok {
network = "tcp"
}
cfg := d.config.Load().(*dnsConfig)
// Set up the message response
m := new(dns.Msg)
m.SetReply(req)
m.Compress = !cfg.DisableCompression
m.Authoritative = true
m.RecursionAvailable = (len(cfg.Recursors) > 0)
var err error
switch req.Question[0].Qtype {
case dns.TypeSOA:
ns, glue := d.getNameserversAndNodeRecord(req.Question[0].Name, cfg, maxRecursionLevelDefault)
m.Answer = append(m.Answer, d.makeSOARecord(cfg, q.Name))
m.Ns = append(m.Ns, ns...)
m.Extra = append(m.Extra, glue...)
m.SetRcode(req, dns.RcodeSuccess)
case dns.TypeNS:
ns, glue := d.getNameserversAndNodeRecord(req.Question[0].Name, cfg, maxRecursionLevelDefault)
m.Answer = ns
m.Extra = glue
m.SetRcode(req, dns.RcodeSuccess)
case dns.TypeAXFR:
m.SetRcode(req, dns.RcodeNotImplemented)
default:
err = d.dispatch(resp.RemoteAddr(), req, m, maxRecursionLevelDefault)
rCode := rCodeFromError(err)
if rCode == dns.RcodeNameError || errors.Is(err, errNoData) {
d.addSOAToMessage(cfg, m, q.Name)
}
m.SetRcode(req, rCode)
}
setEDNS(req, m, !errors.Is(err, errECSNotGlobal))
d.trimDNSResponse(cfg, network, req, m)
if err := resp.WriteMsg(m); err != nil {
d.logger.Warn("failed to respond", "error", err)
}
}
// Craft dns records for an SOA
func (d *DNSServer) makeSOARecord(cfg *dnsConfig, questionName string) *dns.SOA {
domain := d.domain
if d.altDomain != "" && strings.HasSuffix(questionName, "."+d.altDomain) {
domain = d.altDomain
}
return &dns.SOA{
Hdr: dns.RR_Header{
Name: domain,
Rrtype: dns.TypeSOA,
Class: dns.ClassINET,
// Has to be consistent with MinTTL to avoid invalidation
Ttl: cfg.SOAConfig.Minttl,
},
Ns: "ns." + domain,
Serial: uint32(time.Now().Unix()),
Mbox: "hostmaster." + domain,
Refresh: cfg.SOAConfig.Refresh,
Retry: cfg.SOAConfig.Retry,
Expire: cfg.SOAConfig.Expire,
Minttl: cfg.SOAConfig.Minttl,
}
}
// addSOA is used to add an SOA record to a message for the given domain
func (d *DNSServer) addSOAToMessage(cfg *dnsConfig, msg *dns.Msg, questionName string) {
msg.Ns = append(msg.Ns, d.makeSOARecord(cfg, questionName))
}
// getNameserversAndNodeRecord returns the names and ip addresses of up to three random servers
// in the current cluster which serve as authoritative name servers for zone.
func (d *DNSServer) getNameserversAndNodeRecord(questionName string, cfg *dnsConfig, maxRecursionLevel int) (ns []dns.RR, extra []dns.RR) {
out, err := d.lookupServiceNodes(cfg, serviceLookup{
Datacenter: d.agent.config.Datacenter,
Service: structs.ConsulServiceName,
Connect: false,
Ingress: false,
EnterpriseMeta: d.defaultEnterpriseMeta,
})
if err != nil {
d.logger.Warn("Unable to get list of servers", "error", err)
return nil, nil
}
if len(out.Nodes) == 0 {
d.logger.Warn("no servers found")
return
}
// shuffle the nodes to randomize the output
out.Nodes.Shuffle()
for _, o := range out.Nodes {
name, dc := o.Node.Node, o.Node.Datacenter
if libdns.InvalidNameRe.MatchString(name) {
d.logger.Warn("Skipping invalid node for NS records", "node", name)
continue
}
respDomain := d.getResponseDomain(questionName)
fqdn := name + ".node." + dc + "." + respDomain
fqdn = dns.Fqdn(strings.ToLower(fqdn))
// NS record
nsrr := &dns.NS{
Hdr: dns.RR_Header{
Name: respDomain,
Rrtype: dns.TypeNS,
Class: dns.ClassINET,
Ttl: uint32(cfg.NodeTTL / time.Second),
},
Ns: fqdn,
}
ns = append(ns, nsrr)
extra = append(extra, d.makeRecordFromNode(o.Node, dns.TypeANY, fqdn, cfg.NodeTTL, maxRecursionLevel)...)
// don't provide more than 3 servers
if len(ns) >= 3 {
return
}
}
return
}
// parseDatacenter will do the following:
// - if zero labels are passed, return true without modifying the datacenter parameter
// - if one label is passed, set the datacenter parameter to the label and return true
// - Otherwise it will return false without modifying the datacenter parameter
func (d *DNSServer) parseDatacenter(labels []string, datacenter *string) bool {
switch len(labels) {
case 1:
*datacenter = labels[0]
return true
case 0:
return true
default:
return false
}
}
var errECSNotGlobal = fmt.Errorf("ECS response is not global")
var errNameNotFound = fmt.Errorf("DNS name not found")
// errNoData is used to indicate no resource records exist for the specified query type.
// Per the recommendation from Section 2.2 of RFC 2308, the server will return a TYPE 2
// NODATA response in which the RCODE is set to NOERROR (RcodeSuccess), the Answer
// section is empty, and the Authority section contains the SOA record.
var errNoData = fmt.Errorf("no DNS Answer")
// ecsNotGlobalError may be used to wrap an error or nil, to indicate that the
// EDNS client subnet source scope is not global.
type ecsNotGlobalError struct {
error
}
func (e ecsNotGlobalError) Error() string {
if e.error == nil {
return ""
}
return e.error.Error()
}
func (e ecsNotGlobalError) Is(other error) bool {
return other == errECSNotGlobal
}
func (e ecsNotGlobalError) Unwrap() error {
return e.error
}
type queryLocality struct {
// datacenter is the datacenter parsed from a label that has an explicit datacenter part.
// Example query: <service>.virtual.<namespace>.ns.<partition>.ap.<datacenter>.dc.consul
datacenter string
// peer is the peer name parsed from a label that has explicit parts.
// Example query: <service>.virtual.<namespace>.ns.<peer>.peer.<partition>.ap.consul
peer string
// peerOrDatacenter is parsed from DNS queries where the datacenter and peer name are
// specified in the same query part.
// Example query: <service>.virtual.<peerOrDatacenter>.consul
//
// Note that this field should only be a "peer" for virtual queries, since virtual IPs should
// not be shared between datacenters. In all other cases, it should be considered a DC.
peerOrDatacenter string
acl.EnterpriseMeta
}
func (l queryLocality) effectiveDatacenter(defaultDC string) string {
// Prefer the value parsed from a query with explicit parts: <namespace>.ns.<partition>.ap.<datacenter>.dc
if l.datacenter != "" {
return l.datacenter
}
// Fall back to the ambiguously parsed DC or Peer.
if l.peerOrDatacenter != "" {
return l.peerOrDatacenter
}
// If all are empty, use a default value.
return defaultDC
}
// dispatch is used to parse a request and invoke the correct handler.
// parameter maxRecursionLevel will handle whether recursive call can be performed
func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursionLevel int) error {
// Choose correct response domain
respDomain := d.getResponseDomain(req.Question[0].Name)
// Get the QName without the domain suffix
qName := strings.ToLower(dns.Fqdn(req.Question[0].Name))
qName = d.trimDomain(qName)
// Split into the label parts
labels := dns.SplitDomainName(qName)
cfg := d.config.Load().(*dnsConfig)
var queryKind string
var queryParts []string
var querySuffixes []string
done := false
for i := len(labels) - 1; i >= 0 && !done; i-- {
switch labels[i] {
case "service", "connect", "virtual", "ingress", "node", "query", "addr":
queryParts = labels[:i]
querySuffixes = labels[i+1:]
queryKind = labels[i]
done = true
default:
// If this is a SRV query the "service" label is optional, we add it back to use the
// existing code-path.
if req.Question[0].Qtype == dns.TypeSRV && strings.HasPrefix(labels[i], "_") {
queryKind = "service"
queryParts = labels[:i+1]
querySuffixes = labels[i+1:]
done = true
}
}
}
invalid := func() error {
d.logger.Warn("QName invalid", "qname", qName)
return errNameNotFound
}
switch queryKind {
case "service":
n := len(queryParts)
if n < 1 {
return invalid()
}
localities, err := d.parseSamenessGroupLocality(cfg, querySuffixes, invalid)
if err != nil {
return err
}
// Loop over the localities and return as soon as a lookup is successful
for _, locality := range localities {
d.logger.Debug("labels", "querySuffixes", querySuffixes)
lookup := serviceLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
PeerName: locality.peer,
Connect: false,
Ingress: false,
MaxRecursionLevel: maxRecursionLevel,
EnterpriseMeta: locality.EnterpriseMeta,
}
// Only one of dc or peer can be used.
if lookup.PeerName != "" {
lookup.Datacenter = ""
}
// Support RFC 2782 style syntax
if n == 2 && strings.HasPrefix(queryParts[1], "_") && strings.HasPrefix(queryParts[0], "_") {
// Grab the tag since we make nuke it if it's tcp
tag := queryParts[1][1:]
// Treat _name._tcp.service.consul as a default, no need to filter on that tag
if tag == "tcp" {
tag = ""
}
lookup.Tag = tag
lookup.Service = queryParts[0][1:]
// _name._tag.service.consul
} else {
// Consul 0.3 and prior format for SRV queries
// Support "." in the label, re-join all the parts
tag := ""
if n >= 2 {
tag = strings.Join(queryParts[:n-1], ".")
}
lookup.Tag = tag
lookup.Service = queryParts[n-1]
// tag[.tag].name.service.consul
}
err = d.handleServiceQuery(cfg, lookup, req, resp)
// Return if we are error free right away, otherwise loop again if we can
if err == nil {
return nil
}
}
// We've exhausted all DNS possibilities so return here
return err
case "connect":
if len(queryParts) < 1 {
return invalid()
}
locality, ok := d.parseLocality(querySuffixes, cfg)
if !ok {
return invalid()
}
// Peering is not currently supported for connect queries.
// Exposing this likely would not provide much value, since users would
// need to be very familiar with our TLS / SNI / mesh gateways to leverage it.
lookup := serviceLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
Service: queryParts[len(queryParts)-1],
Connect: true,
Ingress: false,
MaxRecursionLevel: maxRecursionLevel,
EnterpriseMeta: locality.EnterpriseMeta,
}
// name.connect.consul
return d.handleServiceQuery(cfg, lookup, req, resp)
case "virtual":
if len(queryParts) < 1 {
return invalid()
}
locality, ok := d.parseLocality(querySuffixes, cfg)
if !ok {
return invalid()
}
args := structs.ServiceSpecificRequest{
// The datacenter of the request is not specified because cross-datacenter virtual IP
// queries are not supported. This guard rail is in place because virtual IPs are allocated
// within a DC, therefore their uniqueness is not guaranteed globally.
PeerName: locality.peer,
ServiceName: queryParts[len(queryParts)-1],
EnterpriseMeta: locality.EnterpriseMeta,
QueryOptions: structs.QueryOptions{
Token: d.coalesceDNSToken(),
},
}
if args.PeerName == "" {
// If the peer name was not explicitly defined, fall back to the ambiguously-parsed version.
args.PeerName = locality.peerOrDatacenter
}
var out string
if err := d.agent.RPC(context.Background(), "Catalog.VirtualIPForService", &args, &out); err != nil {
return err
}
if out != "" {
resp.Answer = append(resp.Answer, &dns.A{
Hdr: dns.RR_Header{
Name: qName + respDomain,
Rrtype: dns.TypeA,
Class: dns.ClassINET,
Ttl: uint32(cfg.NodeTTL / time.Second),
},
A: net.ParseIP(out),
})
}
return nil
case "ingress":
if len(queryParts) < 1 {
return invalid()
}
locality, ok := d.parseLocality(querySuffixes, cfg)
if !ok {
return invalid()
}
// Peering is not currently supported for ingress queries.
// We probably should not be encouraging chained calls from ingress to peers anyway.
lookup := serviceLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
Service: queryParts[len(queryParts)-1],
Connect: false,
Ingress: true,
MaxRecursionLevel: maxRecursionLevel,
EnterpriseMeta: locality.EnterpriseMeta,
}
// name.ingress.consul
return d.handleServiceQuery(cfg, lookup, req, resp)
case "node":
if len(queryParts) < 1 {
return invalid()
}
locality, ok := d.parseLocality(querySuffixes, cfg)
if !ok {
return invalid()
}
// Nodes are only registered in the default namespace so queries
// must not specify a non-default namespace.
if !locality.InDefaultNamespace() {
return invalid()
}
// Allow a "." in the node name, just join all the parts
node := strings.Join(queryParts, ".")
lookup := nodeLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
PeerName: locality.peer,
Node: node,
MaxRecursionLevel: maxRecursionLevel,
EnterpriseMeta: locality.EnterpriseMeta,
}
// Only one of dc or peer can be used.
if lookup.PeerName != "" {
lookup.Datacenter = ""
}
return d.handleNodeQuery(cfg, lookup, req, resp)
case "query":
n := len(queryParts)
datacenter := d.agent.config.Datacenter
// ensure we have a query name
if n < 1 {
return invalid()
}
if !d.parseDatacenter(querySuffixes, &datacenter) {
return invalid()
}
query := ""
// If the first and last DNS query parts begin with _, this is an RFC 2782 style SRV lookup.
// This allows for prepared query names to include "." (for backwards compatibility).
// Otherwise, this is a standard prepared query lookup.
if n >= 2 && strings.HasPrefix(queryParts[0], "_") && strings.HasPrefix(queryParts[n-1], "_") {
// The last DNS query part is the protocol field (ignored).
// All prior parts are the prepared query name or ID.
query = strings.Join(queryParts[:n-1], ".")
// Strip leading underscore
query = query[1:]
} else {
// Allow a "." in the query name, just join all the parts.
query = strings.Join(queryParts, ".")
}
err := d.handlePreparedQuery(cfg, datacenter, query, remoteAddr, req, resp, maxRecursionLevel)
return ecsNotGlobalError{error: err}
case "addr":
// <address>.addr.<suffixes>.<domain> - addr must be the second label, datacenter is optional
if len(queryParts) != 1 {
return invalid()
}
switch len(queryParts[0]) / 2 {
// IPv4
case 4:
ip, err := hex.DecodeString(queryParts[0])
if err != nil {
return invalid()
}
// check if the query type is A for IPv4 or ANY
aRecord := &dns.A{
Hdr: dns.RR_Header{
Name: qName + respDomain,
Rrtype: dns.TypeA,
Class: dns.ClassINET,
Ttl: uint32(cfg.NodeTTL / time.Second),
},
A: ip,
}
if req.Question[0].Qtype != dns.TypeA && req.Question[0].Qtype != dns.TypeANY {
resp.Extra = append(resp.Answer, aRecord)
} else {
resp.Answer = append(resp.Answer, aRecord)
}
// IPv6
case 16:
ip, err := hex.DecodeString(queryParts[0])
if err != nil {
return invalid()
}
// check if the query type is AAAA for IPv6 or ANY
aaaaRecord := &dns.AAAA{
Hdr: dns.RR_Header{
Name: qName + respDomain,
Rrtype: dns.TypeAAAA,
Class: dns.ClassINET,
Ttl: uint32(cfg.NodeTTL / time.Second),
},
AAAA: ip,
}
if req.Question[0].Qtype != dns.TypeAAAA && req.Question[0].Qtype != dns.TypeANY {
resp.Extra = append(resp.Extra, aaaaRecord)
} else {
resp.Answer = append(resp.Answer, aaaaRecord)
}
default:
return invalid()
}
return nil
default:
return invalid()
}
}
func (d *DNSServer) trimDomain(query string) string {
longer := d.domain
shorter := d.altDomain
if len(shorter) > len(longer) {
longer, shorter = shorter, longer
}
if strings.HasSuffix(query, "."+strings.TrimLeft(longer, ".")) {
return strings.TrimSuffix(query, longer)
}
return strings.TrimSuffix(query, shorter)
}
// rCodeFromError return the appropriate DNS response code for a given error
func rCodeFromError(err error) int {
switch {
case err == nil:
return dns.RcodeSuccess
case errors.Is(err, errNoData):
return dns.RcodeSuccess
case errors.Is(err, errECSNotGlobal):
return rCodeFromError(errors.Unwrap(err))
case errors.Is(err, errNameNotFound):
return dns.RcodeNameError
case structs.IsErrNoDCPath(err) || structs.IsErrQueryNotFound(err):
return dns.RcodeNameError
default:
return dns.RcodeServerFailure
}
}
// handleNodeQuery is used to handle a node query
func (d *DNSServer) handleNodeQuery(cfg *dnsConfig, lookup nodeLookup, req, resp *dns.Msg) error {
// Only handle ANY, A, AAAA, and TXT type requests
qType := req.Question[0].Qtype
if qType != dns.TypeANY && qType != dns.TypeA && qType != dns.TypeAAAA && qType != dns.TypeTXT {
return nil
}
// Make an RPC request
args := &structs.NodeSpecificRequest{
Datacenter: lookup.Datacenter,
PeerName: lookup.PeerName,
Node: lookup.Node,
QueryOptions: structs.QueryOptions{
Token: d.coalesceDNSToken(),
AllowStale: cfg.AllowStale,
},
EnterpriseMeta: lookup.EnterpriseMeta,
}
out, err := d.lookupNode(cfg, args)
if err != nil {
return fmt.Errorf("failed rpc request: %w", err)
}
// If we have no out.NodeServices.Nodeaddress, return not found!
if out.NodeServices == nil {
return errNameNotFound
}
// Add the node record
n := out.NodeServices.Node
metaTarget := &resp.Extra
if qType == dns.TypeTXT || qType == dns.TypeANY {
metaTarget = &resp.Answer
}
q := req.Question[0]
// Only compute A and CNAME record if query is not TXT type
if qType != dns.TypeTXT {
records := d.makeRecordFromNode(n, q.Qtype, q.Name, cfg.NodeTTL, lookup.MaxRecursionLevel)
resp.Answer = append(resp.Answer, records...)
}
if cfg.NodeMetaTXT || qType == dns.TypeTXT || qType == dns.TypeANY {
metas := d.makeTXTRecordFromNodeMeta(q.Name, n, cfg.NodeTTL)
*metaTarget = append(*metaTarget, metas...)
}
return nil
}
// lookupNode is used to look up a node in the Consul catalog within NodeServices.
// If the config is set to UseCache, it will get the record from the agent cache.
func (d *DNSServer) lookupNode(cfg *dnsConfig, args *structs.NodeSpecificRequest) (*structs.IndexedNodeServices, error) {
var out structs.IndexedNodeServices
useCache := cfg.UseCache
RPC:
if useCache {
raw, _, err := d.agent.cache.Get(context.TODO(), cachetype.NodeServicesName, args)
if err != nil {
return nil, err
}
reply, ok := raw.(*structs.IndexedNodeServices)
if !ok {
// This should never happen, but we want to protect against panics
return nil, fmt.Errorf("internal error: response type not correct")
}
out = *reply
} else {
if err := d.agent.RPC(context.Background(), "Catalog.NodeServices", &args, &out); err != nil {
return nil, err
}
}
// Verify that request is not too stale, redo the request
if args.AllowStale {
if out.LastContact > cfg.MaxStale {
args.AllowStale = false
useCache = false
d.logger.Warn("Query results too stale, re-requesting")
goto RPC
} else if out.LastContact > staleCounterThreshold {
metrics.IncrCounter([]string{"dns", "stale_queries"}, 1)
}
}
return &out, nil
}
// encodeKVasRFC1464 encodes a key-value pair according to RFC1464
func encodeKVasRFC1464(key, value string) (txt string) {
// For details on these replacements c.f. https://www.ietf.org/rfc/rfc1464.txt
key = strings.Replace(key, "`", "``", -1)
key = strings.Replace(key, "=", "`=", -1)
// Backquote the leading spaces
leadingSpacesRE := regexp.MustCompile("^ +")
numLeadingSpaces := len(leadingSpacesRE.FindString(key))
key = leadingSpacesRE.ReplaceAllString(key, strings.Repeat("` ", numLeadingSpaces))
// Backquote the trailing spaces
trailingSpacesRE := regexp.MustCompile(" +$")
numTrailingSpaces := len(trailingSpacesRE.FindString(key))
key = trailingSpacesRE.ReplaceAllString(key, strings.Repeat("` ", numTrailingSpaces))
value = strings.Replace(value, "`", "``", -1)
return key + "=" + value
}
// indexRRs populates a map which indexes a given list of RRs by name. NOTE that
// the names are all squashed to lower case so we can perform case-insensitive
// lookups; the RRs are not modified.
func indexRRs(rrs []dns.RR, index map[string]dns.RR) {
for _, rr := range rrs {
name := strings.ToLower(rr.Header().Name)
if _, ok := index[name]; !ok {
index[name] = rr
}
}
}
// syncExtra takes a DNS response message and sets the extra data to the most
// minimal set needed to cover the answer data. A pre-made index of RRs is given
// so that can be re-used between calls. This assumes that the extra data is
// only used to provide info for SRV records. If that's not the case, then this
// will wipe out any additional data.
func syncExtra(index map[string]dns.RR, resp *dns.Msg) {
extra := make([]dns.RR, 0, len(resp.Answer))
resolved := make(map[string]struct{}, len(resp.Answer))
for _, ansRR := range resp.Answer {
srv, ok := ansRR.(*dns.SRV)
if !ok {
continue
}
// Note that we always use lower case when using the index so
// that compares are not case-sensitive. We don't alter the actual
// RRs we add into the extra section, however.
target := strings.ToLower(srv.Target)
RESOLVE:
if _, ok := resolved[target]; ok {
continue
}
resolved[target] = struct{}{}
extraRR, ok := index[target]
if ok {
extra = append(extra, extraRR)
if cname, ok := extraRR.(*dns.CNAME); ok {
target = strings.ToLower(cname.Target)
goto RESOLVE
}
}
}
resp.Extra = extra
}
// dnsBinaryTruncate find the optimal number of records using a fast binary search and return
// it in order to return a DNS answer lower than maxSize parameter.
func dnsBinaryTruncate(resp *dns.Msg, maxSize int, index map[string]dns.RR, hasExtra bool) int {
originalAnswser := resp.Answer
startIndex := 0
endIndex := len(resp.Answer) + 1
for endIndex-startIndex > 1 {
median := startIndex + (endIndex-startIndex)/2
resp.Answer = originalAnswser[:median]
if hasExtra {
syncExtra(index, resp)
}
aLen := resp.Len()
if aLen <= maxSize {
if maxSize-aLen < 10 {
// We are good, increasing will go out of bounds
return median
}
startIndex = median
} else {
endIndex = median
}
}
return startIndex
}
// trimTCPResponse limit the MaximumSize of messages to 64k as it is the limit
// of DNS responses
func trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
hasExtra := len(resp.Extra) > 0
// There is some overhead, 65535 does not work
maxSize := 65523 // 64k - 12 bytes DNS raw overhead
// We avoid some function calls and allocations by only handling the
// extra data when necessary.
var index map[string]dns.RR
// It is not possible to return more than 4k records even with compression
// Since we are performing binary search it is not a big deal, but it
// improves a bit performance, even with binary search
truncateAt := 4096
if req.Question[0].Qtype == dns.TypeSRV {
// More than 1024 SRV records do not fit in 64k
truncateAt = 1024
}
if len(resp.Answer) > truncateAt {
resp.Answer = resp.Answer[:truncateAt]
}
if hasExtra {
index = make(map[string]dns.RR, len(resp.Extra))
indexRRs(resp.Extra, index)
}
truncated := false
// This enforces the given limit on 64k, the max limit for DNS messages
for len(resp.Answer) > 1 && resp.Len() > maxSize {
truncated = true
// first try to remove the NS section may be it will truncate enough
if len(resp.Ns) != 0 {
resp.Ns = []dns.RR{}
}
// More than 100 bytes, find with a binary search
if resp.Len()-maxSize > 100 {
bestIndex := dnsBinaryTruncate(resp, maxSize, index, hasExtra)
resp.Answer = resp.Answer[:bestIndex]
} else {
resp.Answer = resp.Answer[:len(resp.Answer)-1]
}
if hasExtra {
syncExtra(index, resp)
}
}
return truncated
}
// trimUDPResponse makes sure a UDP response is not longer than allowed by RFC
// 1035. Enforce an arbitrary limit that can be further ratcheted down by
// config, and then make sure the response doesn't exceed 512 bytes. Any extra
// records will be trimmed along with answers.
func trimUDPResponse(req, resp *dns.Msg, udpAnswerLimit int) (trimmed bool) {
numAnswers := len(resp.Answer)
hasExtra := len(resp.Extra) > 0
maxSize := defaultMaxUDPSize
// Update to the maximum edns size
if edns := req.IsEdns0(); edns != nil {
if size := edns.UDPSize(); size > uint16(maxSize) {
maxSize = int(size)
}
}
// Overriding maxSize as the maxSize cannot be larger than the
// maxUDPDatagram size. Reliability guarantees disappear > than this amount.
if maxSize > maxUDPDatagramSize {
maxSize = maxUDPDatagramSize
}
// We avoid some function calls and allocations by only handling the
// extra data when necessary.
var index map[string]dns.RR
if hasExtra {
index = make(map[string]dns.RR, len(resp.Extra))
indexRRs(resp.Extra, index)
}
// This cuts UDP responses to a useful but limited number of responses.
maxAnswers := lib.MinInt(maxUDPAnswerLimit, udpAnswerLimit)
compress := resp.Compress
if maxSize == defaultMaxUDPSize && numAnswers > maxAnswers {
// We disable computation of Len ONLY for non-eDNS request (512 bytes)
resp.Compress = false
resp.Answer = resp.Answer[:maxAnswers]
if hasExtra {
syncExtra(index, resp)
}
}
// This enforces the given limit on the number bytes. The default is 512 as
// per the RFC, but EDNS0 allows for the user to specify larger sizes. Note
// that we temporarily switch to uncompressed so that we limit to a response
// that will not exceed 512 bytes uncompressed, which is more conservative and
// will allow our responses to be compliant even if some downstream server
// uncompresses them.
// Even when size is too big for one single record, try to send it anyway
// (useful for 512 bytes messages). 8 is removed from maxSize to ensure that we account
// for the udp header (8 bytes).
for len(resp.Answer) > 1 && resp.Len() > maxSize-8 {
// first try to remove the NS section may be it will truncate enough
if len(resp.Ns) != 0 {
resp.Ns = []dns.RR{}
}
// More than 100 bytes, find with a binary search
if resp.Len()-maxSize > 100 {
bestIndex := dnsBinaryTruncate(resp, maxSize, index, hasExtra)
resp.Answer = resp.Answer[:bestIndex]
} else {
resp.Answer = resp.Answer[:len(resp.Answer)-1]
}
if hasExtra {
syncExtra(index, resp)
}
}
// For 512 non-eDNS responses, while we compute size non-compressed,
// we send result compressed
resp.Compress = compress
return len(resp.Answer) < numAnswers
}
// trimDNSResponse will trim the response for UDP and TCP
func (d *DNSServer) trimDNSResponse(cfg *dnsConfig, network string, req, resp *dns.Msg) {
var trimmed bool
originalSize := resp.Len()
originalNumRecords := len(resp.Answer)
if network != "tcp" {
trimmed = trimUDPResponse(req, resp, cfg.UDPAnswerLimit)
} else {
trimmed = trimTCPResponse(req, resp)
}
// Flag that there are more records to return in the UDP response
if trimmed {
if cfg.EnableTruncate {
resp.Truncated = true
}
d.logger.Debug("DNS response too large, truncated",
"protocol", network,
"question", req.Question,
"records", fmt.Sprintf("%d/%d", len(resp.Answer), originalNumRecords),
"size", fmt.Sprintf("%d/%d", resp.Len(), originalSize),
)
}
}
// lookupServiceNodes is used to look up a node in the Consul health catalog within ServiceNodes.
// If the config is set to UseCache, it will get the record from the agent cache.
func (d *DNSServer) lookupServiceNodes(cfg *dnsConfig, lookup serviceLookup) (structs.IndexedCheckServiceNodes, error) {
serviceTags := []string{}
if lookup.Tag != "" {
serviceTags = []string{lookup.Tag}
}
args := structs.ServiceSpecificRequest{
PeerName: lookup.PeerName,
Connect: lookup.Connect,
Ingress: lookup.Ingress,
Datacenter: lookup.Datacenter,
ServiceName: lookup.Service,
ServiceTags: serviceTags,
TagFilter: lookup.Tag != "",
QueryOptions: structs.QueryOptions{
Token: d.coalesceDNSToken(),
AllowStale: cfg.AllowStale,
MaxAge: cfg.CacheMaxAge,
UseCache: cfg.UseCache,
MaxStaleDuration: cfg.MaxStale,
},
EnterpriseMeta: lookup.EnterpriseMeta,
}
out, _, err := d.agent.rpcClientHealth.ServiceNodes(context.TODO(), args)
if err != nil {
return out, err
}
// Filter out any service nodes due to health checks
// We copy the slice to avoid modifying the result if it comes from the cache
nodes := make(structs.CheckServiceNodes, len(out.Nodes))
copy(nodes, out.Nodes)
out.Nodes = nodes.Filter(cfg.OnlyPassing)
return out, nil
}
// handleServiceQuery is used to handle a service query
func (d *DNSServer) handleServiceQuery(cfg *dnsConfig, lookup serviceLookup, req, resp *dns.Msg) error {
out, err := d.lookupServiceNodes(cfg, lookup)
if err != nil {
return fmt.Errorf("rpc request failed: %w", err)
}
// If we have no nodes, return not found!
if len(out.Nodes) == 0 {
return errNameNotFound
}
// Perform a random shuffle
out.Nodes.Shuffle()
// Determine the TTL
ttl, _ := cfg.GetTTLForService(lookup.Service)
// Add various responses depending on the request
qType := req.Question[0].Qtype
if qType == dns.TypeSRV {
d.addServiceSRVRecordsToMessage(cfg, lookup, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
} else {
d.addServiceNodeRecordsToMessage(cfg, lookup, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
}
if len(resp.Answer) == 0 {
return errNoData
}
return nil
}
func ednsSubnetForRequest(req *dns.Msg) *dns.EDNS0_SUBNET {
// IsEdns0 returns the EDNS RR if present or nil otherwise
edns := req.IsEdns0()
if edns == nil {
return nil
}
for _, o := range edns.Option {
if subnet, ok := o.(*dns.EDNS0_SUBNET); ok {
return subnet
}
}
return nil
}
// handlePreparedQuery is used to handle a prepared query.
func (d *DNSServer) handlePreparedQuery(cfg *dnsConfig, datacenter, query string, remoteAddr net.Addr, req, resp *dns.Msg, maxRecursionLevel int) error {
// Execute the prepared query.
args := structs.PreparedQueryExecuteRequest{
Datacenter: datacenter,
QueryIDOrName: query,
QueryOptions: structs.QueryOptions{
Token: d.coalesceDNSToken(),
AllowStale: cfg.AllowStale,
MaxAge: cfg.CacheMaxAge,
},
// Always pass the local agent through. In the DNS interface, there
// is no provision for passing additional query parameters, so we
// send the local agent's data through to allow distance sorting
// relative to ourself on the server side.
Agent: structs.QuerySource{
Datacenter: d.agent.config.Datacenter,
Segment: d.agent.config.SegmentName,
Node: d.agent.config.NodeName,
NodePartition: d.agent.config.PartitionOrEmpty(),
},
}
subnet := ednsSubnetForRequest(req)
if subnet != nil {
args.Source.Ip = subnet.Address.String()
} else {
switch v := remoteAddr.(type) {
case *net.UDPAddr:
args.Source.Ip = v.IP.String()
case *net.TCPAddr:
args.Source.Ip = v.IP.String()
case *net.IPAddr:
args.Source.Ip = v.IP.String()
}
}
out, err := d.lookupPreparedQuery(cfg, args)
if err != nil {
return err
}
// TODO (slackpad) - What's a safe limit we can set here? It seems like
// with dup filtering done at this level we need to get everything to
// match the previous behavior. We can optimize by pushing more filtering
// into the query execution, but for now I think we need to get the full
// response. We could also choose a large arbitrary number that will
// likely work in practice, like 10*maxUDPAnswerLimit which should help
// reduce bandwidth if there are thousands of nodes available.
// Determine the TTL. The parse should never fail since we vet it when
// the query is created, but we check anyway. If the query didn't
// specify a TTL then we will try to use the agent's service-specific
// TTL configs.
var ttl time.Duration
if out.DNS.TTL != "" {
var err error
ttl, err = time.ParseDuration(out.DNS.TTL)
if err != nil {
d.logger.Warn("Failed to parse TTL for prepared query , ignoring",
"ttl", out.DNS.TTL,
"prepared_query", query,
)
}
} else {
ttl, _ = cfg.GetTTLForService(out.Service)
}
// If we have no nodes, return not found!
if len(out.Nodes) == 0 {
return errNameNotFound
}
// Add various responses depending on the request.
qType := req.Question[0].Qtype
// This serviceLookup only needs the datacenter field populated,
// because peering is not supported with prepared queries.
lookup := serviceLookup{Datacenter: out.Datacenter}
if qType == dns.TypeSRV {
d.addServiceSRVRecordsToMessage(cfg, lookup, out.Nodes, req, resp, ttl, maxRecursionLevel)
} else {
d.addServiceNodeRecordsToMessage(cfg, lookup, out.Nodes, req, resp, ttl, maxRecursionLevel)
}
if len(resp.Answer) == 0 {
return errNoData
}
return nil
}
// lookupPreparedQuery is used to execute a PreparedQuery against the Consul catalog.
// If the config is set to UseCache, it will use agent cache.
func (d *DNSServer) lookupPreparedQuery(cfg *dnsConfig, args structs.PreparedQueryExecuteRequest) (*structs.PreparedQueryExecuteResponse, error) {
var out structs.PreparedQueryExecuteResponse
RPC:
if cfg.UseCache {
raw, m, err := d.agent.cache.Get(context.TODO(), cachetype.PreparedQueryName, &args)
if err != nil {
return nil, err
}
reply, ok := raw.(*structs.PreparedQueryExecuteResponse)
if !ok {
// This should never happen, but we want to protect against panics
return nil, err
}
d.logger.Trace("cache results for prepared query",
"cache_hit", m.Hit,
"prepared_query", args.QueryIDOrName,
)
out = *reply
} else {
if err := d.agent.RPC(context.Background(), "PreparedQuery.Execute", &args, &out); err != nil {
return nil, err
}
}
// Verify that request is not too stale, redo the request.
if args.AllowStale {
if out.LastContact > cfg.MaxStale {
args.AllowStale = false
d.logger.Warn("Query results too stale, re-requesting")
goto RPC
} else if out.LastContact > staleCounterThreshold {
metrics.IncrCounter([]string{"dns", "stale_queries"}, 1)
}
}
return &out, nil
}
// addServiceNodeRecordsToMessage is used to add the node records for a service lookup
func (d *DNSServer) addServiceNodeRecordsToMessage(cfg *dnsConfig, lookup serviceLookup, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
handled := make(map[string]struct{})
var answerCNAME []dns.RR = nil
count := 0
for _, node := range nodes {
// Add the node record
had_answer := false
records, _ := d.makeNodeServiceRecords(lookup, node, req, ttl, cfg, maxRecursionLevel)
if len(records) == 0 {
continue
}
// Avoid duplicate entries, possible if a node has
// the same service on multiple ports, etc.
if _, ok := handled[records[0].String()]; ok {
continue
}
handled[records[0].String()] = struct{}{}
switch records[0].(type) {
case *dns.CNAME:
// keep track of the first CNAME + associated RRs but don't add to the resp.Answer yet
// this will only be added if no non-CNAME RRs are found
if len(answerCNAME) == 0 {
answerCNAME = records
}
default:
resp.Answer = append(resp.Answer, records...)
had_answer = true
}
if had_answer {
count++
if count == cfg.ARecordLimit {
// We stop only if greater than 0 or we reached the limit
return
}
}
}
if len(resp.Answer) == 0 && len(answerCNAME) > 0 {
resp.Answer = answerCNAME
}
}
func findWeight(node structs.CheckServiceNode) int {
// By default, when only_passing is false, warning and passing nodes are returned
// Those values will be used if using a client with support while server has no
// support for weights
weightPassing := 1
weightWarning := 1
if node.Service.Weights != nil {
weightPassing = node.Service.Weights.Passing
weightWarning = node.Service.Weights.Warning
}
serviceChecks := make(api.HealthChecks, 0)
for _, c := range node.Checks {
if c.ServiceName == node.Service.Service || c.ServiceName == "" {
healthCheck := &api.HealthCheck{
Node: c.Node,
CheckID: string(c.CheckID),
Name: c.Name,
Status: c.Status,
Notes: c.Notes,
Output: c.Output,
ServiceID: c.ServiceID,
ServiceName: c.ServiceName,
ServiceTags: c.ServiceTags,
}
serviceChecks = append(serviceChecks, healthCheck)
}
}
status := serviceChecks.AggregatedStatus()
switch status {
case api.HealthWarning:
return weightWarning
case api.HealthPassing:
return weightPassing
case api.HealthMaint:
// Not used in theory
return 0
case api.HealthCritical:
// Should not happen since already filtered
return 0
default:
// When non-standard status, return 1
return 1
}
}
func (d *DNSServer) encodeIPAsFqdn(questionName string, lookup serviceLookup, ip net.IP) string {
ipv4 := ip.To4()
respDomain := d.getResponseDomain(questionName)
ipStr := hex.EncodeToString(ip)
if ipv4 != nil {
ipStr = ipStr[len(ipStr)-(net.IPv4len*2):]
}
if lookup.PeerName != "" {
// Exclude the datacenter from the FQDN on the addr for peers.
// This technically makes no difference, since the addr endpoint ignores the DC
// component of the request, but do it anyway for a less confusing experience.
return fmt.Sprintf("%s.addr.%s", ipStr, respDomain)
}
return fmt.Sprintf("%s.addr.%s.%s", ipStr, lookup.Datacenter, respDomain)
}
// Craft dns records for a an A record for an IP address
func makeARecord(qType uint16, ip net.IP, ttl time.Duration) dns.RR {
var ipRecord dns.RR
ipv4 := ip.To4()
if ipv4 != nil {
if qType == dns.TypeSRV || qType == dns.TypeA || qType == dns.TypeANY || qType == dns.TypeNS || qType == dns.TypeTXT {
ipRecord = &dns.A{
Hdr: dns.RR_Header{
Rrtype: dns.TypeA,
Class: dns.ClassINET,
Ttl: uint32(ttl / time.Second),
},
A: ipv4,
}
}
} else if qType == dns.TypeSRV || qType == dns.TypeAAAA || qType == dns.TypeANY || qType == dns.TypeNS || qType == dns.TypeTXT {
ipRecord = &dns.AAAA{
Hdr: dns.RR_Header{
Rrtype: dns.TypeAAAA,
Class: dns.ClassINET,
Ttl: uint32(ttl / time.Second),
},
AAAA: ip,
}
}
return ipRecord
}
// Craft dns records for a node
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the node IP
// Otherwise it will return a IN A record
func (d *DNSServer) makeRecordFromNode(node *structs.Node, qType uint16, qName string, ttl time.Duration, maxRecursionLevel int) []dns.RR {
addrTranslate := dnsutil.TranslateAddressAcceptDomain
if qType == dns.TypeA {
addrTranslate |= dnsutil.TranslateAddressAcceptIPv4
} else if qType == dns.TypeAAAA {
addrTranslate |= dnsutil.TranslateAddressAcceptIPv6
} else {
addrTranslate |= dnsutil.TranslateAddressAcceptAny
}
addr := d.agent.TranslateAddress(node.Datacenter, node.Address, node.TaggedAddresses, addrTranslate)
ip := net.ParseIP(addr)
var res []dns.RR
if ip == nil {
res = append(res, &dns.CNAME{
Hdr: dns.RR_Header{
Name: qName,
Rrtype: dns.TypeCNAME,
Class: dns.ClassINET,
Ttl: uint32(ttl / time.Second),
},
Target: dns.Fqdn(node.Address),
})
res = append(res,
d.resolveCNAME(d.config.Load().(*dnsConfig), dns.Fqdn(node.Address), maxRecursionLevel)...,
)
return res
}
ipRecord := makeARecord(qType, ip, ttl)
if ipRecord == nil {
return nil
}
ipRecord.Header().Name = qName
return []dns.RR{ipRecord}
}
// Craft dns records for a service
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the node IP
// Otherwise it will return a IN A record
func (d *DNSServer) makeRecordFromServiceNode(lookup serviceLookup, serviceNode structs.CheckServiceNode, addr net.IP, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
q := req.Question[0]
ipRecord := makeARecord(q.Qtype, addr, ttl)
if ipRecord == nil {
return nil, nil
}
if q.Qtype == dns.TypeSRV {
respDomain := d.getResponseDomain(q.Name)
nodeFQDN := nodeCanonicalDNSName(lookup, serviceNode.Node.Node, respDomain)
answers := []dns.RR{
&dns.SRV{
Hdr: dns.RR_Header{
Name: q.Name,
Rrtype: dns.TypeSRV,
Class: dns.ClassINET,
Ttl: uint32(ttl / time.Second),
},
Priority: 1,
Weight: uint16(findWeight(serviceNode)),
Port: uint16(d.agent.TranslateServicePort(lookup.Datacenter, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Target: nodeFQDN,
},
}
ipRecord.Header().Name = nodeFQDN
return answers, []dns.RR{ipRecord}
}
ipRecord.Header().Name = q.Name
return []dns.RR{ipRecord}, nil
}
// Craft dns records for an IP
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the IP
// Otherwise it will return a IN A record
func (d *DNSServer) makeRecordFromIP(lookup serviceLookup, addr net.IP, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
q := req.Question[0]
ipRecord := makeARecord(q.Qtype, addr, ttl)
if ipRecord == nil {
return nil, nil
}
if q.Qtype == dns.TypeSRV {
ipFQDN := d.encodeIPAsFqdn(q.Name, lookup, addr)
answers := []dns.RR{
&dns.SRV{
Hdr: dns.RR_Header{
Name: q.Name,
Rrtype: dns.TypeSRV,
Class: dns.ClassINET,
Ttl: uint32(ttl / time.Second),
},
Priority: 1,
Weight: uint16(findWeight(serviceNode)),
Port: uint16(d.agent.TranslateServicePort(lookup.Datacenter, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Target: ipFQDN,
},
}
ipRecord.Header().Name = ipFQDN
return answers, []dns.RR{ipRecord}
}
ipRecord.Header().Name = q.Name
return []dns.RR{ipRecord}, nil
}
// Craft dns records for an FQDN
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the IP
// Otherwise it will return a CNAME and a IN A record
func (d *DNSServer) makeRecordFromFQDN(lookup serviceLookup, fqdn string, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
edns := req.IsEdns0() != nil
q := req.Question[0]
more := d.resolveCNAME(cfg, dns.Fqdn(fqdn), maxRecursionLevel)
var additional []dns.RR
extra := 0
MORE_REC:
for _, rr := range more {
switch rr.Header().Rrtype {
case dns.TypeCNAME, dns.TypeA, dns.TypeAAAA:
// set the TTL manually
rr.Header().Ttl = uint32(ttl / time.Second)
additional = append(additional, rr)
extra++
if extra == maxRecurseRecords && !edns {
break MORE_REC
}
}
}
if q.Qtype == dns.TypeSRV {
answers := []dns.RR{
&dns.SRV{
Hdr: dns.RR_Header{
Name: q.Name,
Rrtype: dns.TypeSRV,
Class: dns.ClassINET,
Ttl: uint32(ttl / time.Second),
},
Priority: 1,
Weight: uint16(findWeight(serviceNode)),
Port: uint16(d.agent.TranslateServicePort(lookup.Datacenter, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Target: dns.Fqdn(fqdn),
},
}
return answers, additional
}
answers := []dns.RR{
&dns.CNAME{
Hdr: dns.RR_Header{
Name: q.Name,
Rrtype: dns.TypeCNAME,
Class: dns.ClassINET,
Ttl: uint32(ttl / time.Second),
},
Target: dns.Fqdn(fqdn),
}}
answers = append(answers, additional...)
return answers, nil
}
// Craft dns records from a CheckServiceNode struct
func (d *DNSServer) makeNodeServiceRecords(lookup serviceLookup, node structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
addrTranslate := dnsutil.TranslateAddressAcceptDomain
if req.Question[0].Qtype == dns.TypeA {
addrTranslate |= dnsutil.TranslateAddressAcceptIPv4
} else if req.Question[0].Qtype == dns.TypeAAAA {
addrTranslate |= dnsutil.TranslateAddressAcceptIPv6
} else {
addrTranslate |= dnsutil.TranslateAddressAcceptAny
}
// The datacenter should be empty during translation if it is a peering lookup.
// This should be fine because we should always prefer the WAN address.
serviceAddr := d.agent.TranslateServiceAddress(lookup.Datacenter, node.Service.Address, node.Service.TaggedAddresses, addrTranslate)
nodeAddr := d.agent.TranslateAddress(node.Node.Datacenter, node.Node.Address, node.Node.TaggedAddresses, addrTranslate)
if serviceAddr == "" && nodeAddr == "" {
return nil, nil
}
nodeIPAddr := net.ParseIP(nodeAddr)
serviceIPAddr := net.ParseIP(serviceAddr)
// There is no service address and the node address is an IP
if serviceAddr == "" && nodeIPAddr != nil {
if node.Node.Address != nodeAddr {
// Do not CNAME node address in case of WAN address
return d.makeRecordFromIP(lookup, nodeIPAddr, node, req, ttl)
}
return d.makeRecordFromServiceNode(lookup, node, nodeIPAddr, req, ttl)
}
// There is no service address and the node address is a FQDN (external service)
if serviceAddr == "" {
return d.makeRecordFromFQDN(lookup, nodeAddr, node, req, ttl, cfg, maxRecursionLevel)
}
// The service address is an IP
if serviceIPAddr != nil {
return d.makeRecordFromIP(lookup, serviceIPAddr, node, req, ttl)
}
// If the service address is a CNAME for the service we are looking
// for then use the node address.
if dns.Fqdn(serviceAddr) == req.Question[0].Name && nodeIPAddr != nil {
return d.makeRecordFromServiceNode(lookup, node, nodeIPAddr, req, ttl)
}
// The service address is a FQDN (external service)
return d.makeRecordFromFQDN(lookup, serviceAddr, node, req, ttl, cfg, maxRecursionLevel)
}
// Craft dns records for TXT from a node's metadata
func (d *DNSServer) makeTXTRecordFromNodeMeta(qName string, node *structs.Node, ttl time.Duration) []dns.RR {
extra := make([]dns.RR, 0, len(node.Meta))
for key, value := range node.Meta {
txt := value
if !strings.HasPrefix(strings.ToLower(key), "rfc1035-") {
txt = encodeKVasRFC1464(key, value)
}
extra = append(extra, &dns.TXT{
Hdr: dns.RR_Header{
Name: qName,
Rrtype: dns.TypeTXT,
Class: dns.ClassINET,
Ttl: uint32(ttl / time.Second),
},
Txt: []string{txt},
})
}
return extra
}
// addServiceSRVRecordsToMessage is used to add the SRV records for a service lookup
func (d *DNSServer) addServiceSRVRecordsToMessage(cfg *dnsConfig, lookup serviceLookup, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
handled := make(map[string]struct{})
for _, node := range nodes {
// Avoid duplicate entries, possible if a node has
// the same service the same port, etc.
// The datacenter should be empty during translation if it is a peering lookup.
// This should be fine because we should always prefer the WAN address.
serviceAddress := d.agent.TranslateServiceAddress(lookup.Datacenter, node.Service.Address, node.Service.TaggedAddresses, dnsutil.TranslateAddressAcceptAny)
servicePort := d.agent.TranslateServicePort(lookup.Datacenter, node.Service.Port, node.Service.TaggedAddresses)
tuple := fmt.Sprintf("%s:%s:%d", node.Node.Node, serviceAddress, servicePort)
if _, ok := handled[tuple]; ok {
continue
}
handled[tuple] = struct{}{}
answers, extra := d.makeNodeServiceRecords(lookup, node, req, ttl, cfg, maxRecursionLevel)
respDomain := d.getResponseDomain(req.Question[0].Name)
resp.Answer = append(resp.Answer, answers...)
resp.Extra = append(resp.Extra, extra...)
if cfg.NodeMetaTXT {
resp.Extra = append(resp.Extra, d.makeTXTRecordFromNodeMeta(nodeCanonicalDNSName(lookup, node.Node.Node, respDomain), node.Node, ttl)...)
}
}
}
// handleRecurse is used to handle recursive DNS queries
func (d *DNSServer) handleRecurse(resp dns.ResponseWriter, req *dns.Msg) {
cfg := d.config.Load().(*dnsConfig)
q := req.Question[0]
network := "udp"
defer func(s time.Time) {
d.logger.Debug("request served from client",
"question", q,
"network", network,
"latency", time.Since(s).String(),
"client", resp.RemoteAddr().String(),
"client_network", resp.RemoteAddr().Network(),
)
}(time.Now())
// Switch to TCP if the client is
if _, ok := resp.RemoteAddr().(*net.TCPAddr); ok {
network = "tcp"
}
// Recursively resolve
c := &dns.Client{Net: network, Timeout: cfg.RecursorTimeout}
var r *dns.Msg
var rtt time.Duration
var err error
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(req, recursor)
// Check if the response is valid and has the desired Response code
if r != nil && (r.Rcode != dns.RcodeSuccess && r.Rcode != dns.RcodeNameError) {
d.logger.Debug("recurse failed for question",
"question", q,
"rtt", rtt,
"recursor", recursor,
"rcode", dns.RcodeToString[r.Rcode],
)
// If we still have recursors to forward the query to,
// we move forward onto the next one else the loop ends
continue
} else if err == nil || (r != nil && r.Truncated) {
// Compress the response; we don't know if the incoming
// response was compressed or not, so by not compressing
// we might generate an invalid packet on the way out.
r.Compress = !cfg.DisableCompression
// Forward the response
d.logger.Debug("recurse succeeded for question",
"question", q,
"rtt", rtt,
"recursor", recursor,
)
if err := resp.WriteMsg(r); err != nil {
d.logger.Warn("failed to respond", "error", err)
}
return
}
d.logger.Error("recurse failed", "error", err)
}
// If all resolvers fail, return a SERVFAIL message
d.logger.Error("all resolvers failed for question from client",
"question", q,
"client", resp.RemoteAddr().String(),
"client_network", resp.RemoteAddr().Network(),
)
m := &dns.Msg{}
m.SetReply(req)
m.Compress = !cfg.DisableCompression
m.RecursionAvailable = true
m.SetRcode(req, dns.RcodeServerFailure)
if edns := req.IsEdns0(); edns != nil {
setEDNS(req, m, true)
}
resp.WriteMsg(m)
}
// resolveCNAME is used to recursively resolve CNAME records
func (d *DNSServer) resolveCNAME(cfg *dnsConfig, name string, maxRecursionLevel int) []dns.RR {
// If the CNAME record points to a Consul address, resolve it internally
// Convert query to lowercase because DNS is case insensitive; d.domain and
// d.altDomain are already converted
if ln := strings.ToLower(name); strings.HasSuffix(ln, "."+d.domain) || strings.HasSuffix(ln, "."+d.altDomain) {
if maxRecursionLevel < 1 {
d.logger.Error("Infinite recursion detected for name, won't perform any CNAME resolution.", "name", name)
return nil
}
req := &dns.Msg{}
resp := &dns.Msg{}
req.SetQuestion(name, dns.TypeANY)
// TODO: handle error response
d.dispatch(nil, req, resp, maxRecursionLevel-1)
return resp.Answer
}
// Do nothing if we don't have a recursor
if len(cfg.Recursors) == 0 {
return nil
}
// Ask for any A records
m := new(dns.Msg)
m.SetQuestion(name, dns.TypeA)
// Make a DNS lookup request
c := &dns.Client{Net: "udp", Timeout: cfg.RecursorTimeout}
var r *dns.Msg
var rtt time.Duration
var err error
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(m, recursor)
if err == nil {
d.logger.Debug("cname recurse RTT for name",
"name", name,
"rtt", rtt,
)
return r.Answer
}
d.logger.Error("cname recurse failed for name",
"name", name,
"error", err,
)
}
d.logger.Error("all resolvers failed for name", "name", name)
return nil
}
func (d *DNSServer) coalesceDNSToken() string {
if d.agent.tokens.DNSToken() != "" {
return d.agent.tokens.DNSToken()
} else {
return d.agent.tokens.UserToken()
}
}