consul/command/agent/dns.go

572 lines
14 KiB
Go

package agent
import (
"fmt"
"github.com/hashicorp/consul/consul/structs"
"github.com/miekg/dns"
"io"
"log"
"math/rand"
"net"
"strings"
"time"
)
const (
testQuery = "_test.consul."
consulDomain = "consul."
maxServiceResponses = 3 // For UDP only
maxRecurseRecords = 3
)
// DNSServer is used to wrap an Agent and expose various
// service discovery endpoints using a DNS interface.
type DNSServer struct {
agent *Agent
dnsHandler *dns.ServeMux
dnsServer *dns.Server
dnsServerTCP *dns.Server
domain string
recursor string
logger *log.Logger
}
// NewDNSServer starts a new DNS server to provide an agent interface
func NewDNSServer(agent *Agent, logOutput io.Writer, domain, bind, recursor string) (*DNSServer, error) {
// Make sure domain is FQDN
domain = dns.Fqdn(domain)
// Construct the DNS components
mux := dns.NewServeMux()
// Setup the servers
server := &dns.Server{
Addr: bind,
Net: "udp",
Handler: mux,
UDPSize: 65535,
}
serverTCP := &dns.Server{
Addr: bind,
Net: "tcp",
Handler: mux,
}
// Create the server
srv := &DNSServer{
agent: agent,
dnsHandler: mux,
dnsServer: server,
dnsServerTCP: serverTCP,
domain: domain,
recursor: recursor,
logger: log.New(logOutput, "", log.LstdFlags),
}
// Register mux handlers, always handle "consul."
mux.HandleFunc(domain, srv.handleQuery)
if domain != consulDomain {
mux.HandleFunc(consulDomain, srv.handleTest)
}
if recursor != "" {
recursor, err := recursorAddr(recursor)
if err != nil {
return nil, fmt.Errorf("Invalid recursor address: %v", err)
}
srv.recursor = recursor
mux.HandleFunc(".", srv.handleRecurse)
}
// Async start the DNS Servers, handle a potential error
errCh := make(chan error, 1)
go func() {
err := server.ListenAndServe()
srv.logger.Printf("[ERR] dns: error starting udp server: %v", err)
errCh <- err
}()
errChTCP := make(chan error, 1)
go func() {
err := serverTCP.ListenAndServe()
srv.logger.Printf("[ERR] dns: error starting tcp server: %v", err)
errChTCP <- err
}()
// Check the server is running, do a test lookup
checkCh := make(chan error, 1)
go func() {
// This is jank, but we have no way to edge trigger on
// the start of our server, so we just wait and hope it is up.
time.Sleep(50 * time.Millisecond)
m := new(dns.Msg)
m.SetQuestion(testQuery, dns.TypeANY)
c := new(dns.Client)
in, _, err := c.Exchange(m, bind)
if err != nil {
checkCh <- err
return
}
if len(in.Answer) == 0 {
checkCh <- fmt.Errorf("no response to test message")
return
}
close(checkCh)
}()
// Wait for either the check, listen error, or timeout
select {
case e := <-errCh:
return srv, e
case e := <-errChTCP:
return srv, e
case e := <-checkCh:
return srv, e
case <-time.After(time.Second):
return srv, fmt.Errorf("timeout setting up DNS server")
}
return srv, nil
}
// recursorAddr is used to add a port to the recursor if omitted.
func recursorAddr(recursor string) (string, error) {
// Add the port if none
START:
_, _, err := net.SplitHostPort(recursor)
if ae, ok := err.(*net.AddrError); ok && ae.Err == "missing port in address" {
recursor = fmt.Sprintf("%s:%d", recursor, 53)
goto START
}
if err != nil {
return "", err
}
// Get the address
addr, err := net.ResolveTCPAddr("tcp", recursor)
if err != nil {
return "", err
}
// Return string
return addr.String(), nil
}
// handleQUery is used to handle DNS queries in the configured domain
func (d *DNSServer) handleQuery(resp dns.ResponseWriter, req *dns.Msg) {
q := req.Question[0]
defer func(s time.Time) {
d.logger.Printf("[DEBUG] dns: request for %v (%v)", q, time.Now().Sub(s))
}(time.Now())
// Check if this is potentially a test query
if q.Name == testQuery {
d.handleTest(resp, req)
return
}
// Switch to TCP if the client is
network := "udp"
if _, ok := resp.RemoteAddr().(*net.TCPAddr); ok {
network = "tcp"
}
// Setup the message response
m := new(dns.Msg)
m.SetReply(req)
m.Authoritative = true
m.RecursionAvailable = (d.recursor != "")
// Only add the SOA if requested
if req.Question[0].Qtype == dns.TypeSOA {
d.addSOA(d.domain, m)
}
// Dispatch the correct handler
d.dispatch(network, req, m)
// Write out the complete response
if err := resp.WriteMsg(m); err != nil {
d.logger.Printf("[WARN] dns: failed to respond: %v", err)
}
}
// handleTest is used to handle DNS queries in the ".consul." domain
func (d *DNSServer) handleTest(resp dns.ResponseWriter, req *dns.Msg) {
q := req.Question[0]
defer func(s time.Time) {
d.logger.Printf("[DEBUG] dns: request for %v (%v)", q, time.Now().Sub(s))
}(time.Now())
if !(q.Qtype == dns.TypeANY || q.Qtype == dns.TypeTXT) {
return
}
if q.Name != testQuery {
return
}
// Always respond with TXT "ok"
m := new(dns.Msg)
m.SetReply(req)
m.Authoritative = true
m.RecursionAvailable = true
header := dns.RR_Header{Name: q.Name, Rrtype: dns.TypeTXT, Class: dns.ClassINET, Ttl: 0}
txt := &dns.TXT{header, []string{"ok"}}
m.Answer = append(m.Answer, txt)
d.addSOA(consulDomain, m)
if err := resp.WriteMsg(m); err != nil {
d.logger.Printf("[WARN] dns: failed to respond: %v", err)
}
}
// addSOA is used to add an SOA record to a message for the given domain
func (d *DNSServer) addSOA(domain string, msg *dns.Msg) {
soa := &dns.SOA{
Hdr: dns.RR_Header{
Name: domain,
Rrtype: dns.TypeSOA,
Class: dns.ClassINET,
Ttl: 0,
},
Ns: "ns." + domain,
Mbox: "postmaster." + domain,
Serial: uint32(time.Now().Unix()),
Refresh: 3600,
Retry: 600,
Expire: 86400,
Minttl: 0,
}
msg.Ns = append(msg.Ns, soa)
}
// dispatch is used to parse a request and invoke the correct handler
func (d *DNSServer) dispatch(network string, req, resp *dns.Msg) {
// By default the query is in the default datacenter
datacenter := d.agent.config.Datacenter
// Get the QName without the domain suffix
qName := dns.Fqdn(req.Question[0].Name)
qName = strings.TrimSuffix(qName, d.domain)
// Split into the label parts
labels := dns.SplitDomainName(qName)
// The last label is either "node", "service" or a datacenter name
PARSE:
if len(labels) == 0 {
goto INVALID
}
switch labels[len(labels)-1] {
case "service":
// Handle lookup with and without tag
switch len(labels) {
case 2:
d.serviceLookup(network, datacenter, labels[0], "", req, resp)
case 3:
d.serviceLookup(network, datacenter, labels[1], labels[0], req, resp)
default:
goto INVALID
}
case "node":
if len(labels) != 2 {
goto INVALID
}
d.nodeLookup(network, datacenter, labels[0], req, resp)
default:
// Store the DC, and re-parse
datacenter = labels[len(labels)-1]
labels = labels[:len(labels)-1]
goto PARSE
}
return
INVALID:
d.logger.Printf("[WARN] dns: QName invalid: %s", qName)
resp.SetRcode(req, dns.RcodeNameError)
}
// nodeLookup is used to handle a node query
func (d *DNSServer) nodeLookup(network, datacenter, node string, req, resp *dns.Msg) {
// Only handle ANY and A type requests
qType := req.Question[0].Qtype
if qType != dns.TypeANY && qType != dns.TypeA {
return
}
// Make an RPC request
args := structs.NodeSpecificRequest{
Datacenter: datacenter,
Node: node,
}
var out structs.IndexedNodeServices
if err := d.agent.RPC("Catalog.NodeServices", &args, &out); err != nil {
d.logger.Printf("[ERR] dns: rpc error: %v", err)
resp.SetRcode(req, dns.RcodeServerFailure)
return
}
// If we have no address, return not found!
if out.NodeServices.Node.Address == "" {
resp.SetRcode(req, dns.RcodeNameError)
return
}
// Add the node record
records := d.formatNodeRecord(&out.NodeServices.Node, req.Question[0].Name, qType)
if records != nil {
resp.Answer = append(resp.Answer, records...)
}
}
// formatNodeRecord takes a Node and returns an A, AAAA, or CNAME record
func (d *DNSServer) formatNodeRecord(node *structs.Node, qName string, qType uint16) (records []dns.RR) {
// Parse the IP
ip := net.ParseIP(node.Address)
var ipv4 net.IP
if ip != nil {
ipv4 = ip.To4()
}
switch {
case ipv4 != nil && (qType == dns.TypeANY || qType == dns.TypeA):
return []dns.RR{&dns.A{
Hdr: dns.RR_Header{
Name: qName,
Rrtype: dns.TypeA,
Class: dns.ClassINET,
Ttl: 0,
},
A: ip,
}}
case ip != nil && ipv4 == nil && (qType == dns.TypeANY || qType == dns.TypeAAAA):
return []dns.RR{&dns.AAAA{
Hdr: dns.RR_Header{
Name: qName,
Rrtype: dns.TypeAAAA,
Class: dns.ClassINET,
Ttl: 0,
},
AAAA: ip,
}}
case ip == nil && (qType == dns.TypeANY || qType == dns.TypeCNAME ||
qType == dns.TypeA || qType == dns.TypeAAAA):
// Get the CNAME
cnRec := &dns.CNAME{
Hdr: dns.RR_Header{
Name: qName,
Rrtype: dns.TypeCNAME,
Class: dns.ClassINET,
Ttl: 0,
},
Target: dns.Fqdn(node.Address),
}
records = append(records, cnRec)
// Recurse
more := d.resolveCNAME(cnRec.Target)
extra := 0
MORE_REC:
for _, rr := range more {
switch rr.Header().Rrtype {
case dns.TypeA:
fallthrough
case dns.TypeAAAA:
records = append(records, rr)
extra++
if extra == maxRecurseRecords {
break MORE_REC
}
}
}
}
return records
}
// serviceLookup is used to handle a service query
func (d *DNSServer) serviceLookup(network, datacenter, service, tag string, req, resp *dns.Msg) {
// Make an RPC request
args := structs.ServiceSpecificRequest{
Datacenter: datacenter,
ServiceName: service,
ServiceTag: tag,
TagFilter: tag != "",
}
var out structs.IndexedCheckServiceNodes
if err := d.agent.RPC("Health.ServiceNodes", &args, &out); err != nil {
d.logger.Printf("[ERR] dns: rpc error: %v", err)
resp.SetRcode(req, dns.RcodeServerFailure)
return
}
// If we have no nodes, return not found!
if len(out.Nodes) == 0 {
resp.SetRcode(req, dns.RcodeNameError)
return
}
// Filter out any service nodes due to health checks
out.Nodes = d.filterServiceNodes(out.Nodes)
// Perform a random shuffle
shuffleServiceNodes(out.Nodes)
// If the network is not TCP, restrict the number of responses
if network != "tcp" && len(out.Nodes) > maxServiceResponses {
out.Nodes = out.Nodes[:maxServiceResponses]
}
// Add various responses depending on the request
qType := req.Question[0].Qtype
d.serviceNodeRecords(out.Nodes, req, resp)
if qType == dns.TypeSRV {
d.serviceSRVRecords(datacenter, out.Nodes, req, resp)
}
}
// filterServiceNodes is used to filter out nodes that are failing
// health checks to prevent routing to unhealthy nodes
func (d *DNSServer) filterServiceNodes(nodes structs.CheckServiceNodes) structs.CheckServiceNodes {
n := len(nodes)
for i := 0; i < n; i++ {
node := nodes[i]
for _, check := range node.Checks {
if check.Status == structs.HealthCritical {
d.logger.Printf("[WARN] dns: node '%s' failing health check '%s: %s', dropping from service '%s'",
node.Node.Node, check.CheckID, check.Name, node.Service.Service)
nodes[i], nodes[n-1] = nodes[n-1], structs.CheckServiceNode{}
n--
i--
}
}
}
return nodes[:n]
}
// shuffleServiceNodes does an in-place random shuffle using the Fisher-Yates algorithm
func shuffleServiceNodes(nodes structs.CheckServiceNodes) {
for i := len(nodes) - 1; i > 0; i-- {
j := rand.Int31() % int32(i+1)
nodes[i], nodes[j] = nodes[j], nodes[i]
}
}
// serviceNodeRecords is used to add the node records for a service lookup
func (d *DNSServer) serviceNodeRecords(nodes structs.CheckServiceNodes, req, resp *dns.Msg) {
qName := req.Question[0].Name
qType := req.Question[0].Qtype
handled := make(map[string]struct{})
for _, node := range nodes {
// Avoid duplicate entries, possible if a node has
// the same service on multiple ports, etc.
addr := node.Node.Address
if _, ok := handled[addr]; ok {
continue
}
handled[addr] = struct{}{}
// Add the node record
records := d.formatNodeRecord(&node.Node, qName, qType)
if records != nil {
resp.Answer = append(resp.Answer, records...)
}
}
}
// serviceARecords is used to add the SRV records for a service lookup
func (d *DNSServer) serviceSRVRecords(dc string, nodes structs.CheckServiceNodes, req, resp *dns.Msg) {
handled := make(map[string]struct{})
for _, node := range nodes {
// Avoid duplicate entries, possible if a node has
// the same service the same port, etc.
tuple := fmt.Sprintf("%s:%d", node.Node.Node, node.Service.Port)
if _, ok := handled[tuple]; ok {
continue
}
handled[tuple] = struct{}{}
// Add the SRV record
srvRec := &dns.SRV{
Hdr: dns.RR_Header{
Name: req.Question[0].Name,
Rrtype: dns.TypeSRV,
Class: dns.ClassINET,
Ttl: 0,
},
Priority: 1,
Weight: 1,
Port: uint16(node.Service.Port),
Target: fmt.Sprintf("%s.node.%s.%s", node.Node.Node, dc, d.domain),
}
resp.Answer = append(resp.Answer, srvRec)
// Add the extra record
records := d.formatNodeRecord(&node.Node, srvRec.Target, dns.TypeANY)
if records != nil {
resp.Extra = append(resp.Extra, records...)
}
}
}
// handleRecurse is used to handle recursive DNS queries
func (d *DNSServer) handleRecurse(resp dns.ResponseWriter, req *dns.Msg) {
q := req.Question[0]
network := "udp"
defer func(s time.Time) {
d.logger.Printf("[DEBUG] dns: request for %v (%s) (%v)", q, network, time.Now().Sub(s))
}(time.Now())
// Switch to TCP if the client is
if _, ok := resp.RemoteAddr().(*net.TCPAddr); ok {
network = "tcp"
}
// Recursively resolve
c := &dns.Client{Net: network}
r, rtt, err := c.Exchange(req, d.recursor)
// On failure, return a SERVFAIL message
if err != nil {
d.logger.Printf("[ERR] dns: recurse failed: %v", err)
m := &dns.Msg{}
m.SetReply(req)
m.RecursionAvailable = true
m.SetRcode(req, dns.RcodeServerFailure)
resp.WriteMsg(m)
return
}
d.logger.Printf("[DEBUG] dns: recurse RTT for %v (%v)", q, rtt)
// Forward the response
if err := resp.WriteMsg(r); err != nil {
d.logger.Printf("[WARN] dns: failed to respond: %v", err)
}
}
// resolveCNAME is used to recursively resolve CNAME records
func (d *DNSServer) resolveCNAME(name string) []dns.RR {
// Do nothing if we don't have a recursor
if d.recursor == "" {
return nil
}
// Ask for any A records
m := new(dns.Msg)
m.SetQuestion(name, dns.TypeA)
// Make a DNS lookup request
c := &dns.Client{Net: "udp"}
r, rtt, err := c.Exchange(m, d.recursor)
if err != nil {
d.logger.Printf("[ERR] dns: cname recurse failed: %v", err)
return nil
}
d.logger.Printf("[DEBUG] dns: cname recurse RTT for %v (%v)", name, rtt)
// Return all the answers
return r.Answer
}