mirror of https://github.com/status-im/consul.git
[CE] feat(v2dns): add v2 style query metrics (#20608)
feat(v2dns): add v2 style query metrics
This commit is contained in:
parent
7575b53737
commit
0f0b080514
|
@ -13,6 +13,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
|
||||
"github.com/hashicorp/go-hclog"
|
||||
|
||||
|
@ -29,6 +30,15 @@ const (
|
|||
staleCounterThreshold = 5 * time.Second
|
||||
)
|
||||
|
||||
// DNSCounters pre-registers the staleness metric.
|
||||
// This value is used by both the V1 and V2 DNS (V1 Catalog-only) servers.
|
||||
var DNSCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"dns", "stale_queries"},
|
||||
Help: "Increments when an agent serves a query within the allowed stale threshold.",
|
||||
},
|
||||
}
|
||||
|
||||
// v1DataFetcherDynamicConfig is used to store the dynamic configuration of the V1 data fetcher.
|
||||
type v1DataFetcherDynamicConfig struct {
|
||||
// Default request tenancy
|
||||
|
|
41
agent/dns.go
41
agent/dns.go
|
@ -16,7 +16,6 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/armon/go-radix"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
"github.com/miekg/dns"
|
||||
|
@ -33,24 +32,6 @@ import (
|
|||
"github.com/hashicorp/consul/logging"
|
||||
)
|
||||
|
||||
var DNSCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"dns", "stale_queries"},
|
||||
Help: "Increments when an agent serves a query within the allowed stale threshold.",
|
||||
},
|
||||
}
|
||||
|
||||
var DNSSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"dns", "ptr_query"},
|
||||
Help: "Measures the time spent handling a reverse DNS query for the given node.",
|
||||
},
|
||||
{
|
||||
Name: []string{"dns", "domain_query"},
|
||||
Help: "Measures the time spent handling a domain query for the given node.",
|
||||
},
|
||||
}
|
||||
|
||||
const (
|
||||
// UDP can fit ~25 A records in a 512B response, and ~14 AAAA
|
||||
// records. Limit further to prevent unintentional configuration
|
||||
|
@ -406,8 +387,17 @@ func (d *DNSServer) getResponseDomain(questionName string) string {
|
|||
func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) {
|
||||
q := req.Question[0]
|
||||
defer func(s time.Time) {
|
||||
// V1 DNS-style metrics
|
||||
metrics.MeasureSinceWithLabels([]string{"dns", "ptr_query"}, s,
|
||||
[]metrics.Label{{Name: "node", Value: d.agent.config.NodeName}})
|
||||
|
||||
// V2 DNS-style metrics for forward compatibility
|
||||
metrics.MeasureSinceWithLabels([]string{"dns", "query"}, s,
|
||||
[]metrics.Label{
|
||||
{Name: "node", Value: d.agent.config.NodeName},
|
||||
{Name: "type", Value: dns.Type(dns.TypePTR).String()},
|
||||
})
|
||||
|
||||
d.logger.Debug("request served from client",
|
||||
"question", q,
|
||||
"latency", time.Since(s).String(),
|
||||
|
@ -519,12 +509,21 @@ func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) {
|
|||
func (d *DNSServer) handleQuery(resp dns.ResponseWriter, req *dns.Msg) {
|
||||
q := req.Question[0]
|
||||
defer func(s time.Time) {
|
||||
// V1 DNS-style metrics
|
||||
metrics.MeasureSinceWithLabels([]string{"dns", "domain_query"}, s,
|
||||
[]metrics.Label{{Name: "node", Value: d.agent.config.NodeName}})
|
||||
|
||||
// V2 DNS-style metrics for forward compatibility
|
||||
metrics.MeasureSinceWithLabels([]string{"dns", "query"}, s,
|
||||
[]metrics.Label{
|
||||
{Name: "node", Value: d.agent.config.NodeName},
|
||||
{Name: "type", Value: dns.Type(q.Qtype).String()},
|
||||
})
|
||||
|
||||
d.logger.Debug("request served from client",
|
||||
"name", q.Name,
|
||||
"type", dns.Type(q.Qtype),
|
||||
"class", dns.Class(q.Qclass),
|
||||
"type", dns.Type(q.Qtype).String(),
|
||||
"class", dns.Class(q.Qclass).String(),
|
||||
"latency", time.Since(s).String(),
|
||||
"client", resp.RemoteAddr().String(),
|
||||
"client_network", resp.RemoteAddr().Network(),
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-radix"
|
||||
"github.com/miekg/dns"
|
||||
|
||||
|
@ -47,8 +48,6 @@ var (
|
|||
trailingSpacesRE = regexp.MustCompile(" +$")
|
||||
)
|
||||
|
||||
// TODO (v2-dns): metrics
|
||||
|
||||
// Context is used augment a DNS message with Consul-specific metadata.
|
||||
type Context struct {
|
||||
Token string
|
||||
|
@ -105,6 +104,7 @@ type Router struct {
|
|||
domain string
|
||||
altDomain string
|
||||
datacenter string
|
||||
nodeName string
|
||||
logger hclog.Logger
|
||||
|
||||
tokenFunc func() string
|
||||
|
@ -124,8 +124,6 @@ func NewRouter(cfg Config) (*Router, error) {
|
|||
domain := dns.CanonicalName(cfg.AgentConfig.DNSDomain)
|
||||
altDomain := dns.CanonicalName(cfg.AgentConfig.DNSAltDomain)
|
||||
|
||||
// TODO (v2-dns): need to figure out tenancy information here in a way that work for V2 and V1
|
||||
|
||||
logger := cfg.Logger.Named(logging.DNS)
|
||||
|
||||
router := &Router{
|
||||
|
@ -135,6 +133,7 @@ func NewRouter(cfg Config) (*Router, error) {
|
|||
altDomain: altDomain,
|
||||
datacenter: cfg.AgentConfig.Datacenter,
|
||||
logger: logger,
|
||||
nodeName: cfg.AgentConfig.NodeName,
|
||||
tokenFunc: cfg.TokenFunc,
|
||||
translateAddressFunc: cfg.TranslateAddressFunc,
|
||||
translateServiceAddressFunc: cfg.TranslateServiceAddressFunc,
|
||||
|
@ -148,21 +147,6 @@ func NewRouter(cfg Config) (*Router, error) {
|
|||
|
||||
// HandleRequest is used to process an individual DNS request. It returns a message in success or fail cases.
|
||||
func (r *Router) HandleRequest(req *dns.Msg, reqCtx Context, remoteAddress net.Addr) *dns.Msg {
|
||||
return r.handleRequestRecursively(req, reqCtx, remoteAddress, maxRecursionLevelDefault)
|
||||
}
|
||||
|
||||
// getErrorFromECSNotGlobalError returns the underlying error from an ECSNotGlobalError, if it exists.
|
||||
func getErrorFromECSNotGlobalError(err error) error {
|
||||
if errors.Is(err, discovery.ErrECSNotGlobal) {
|
||||
return err.(discovery.ECSNotGlobalError).Unwrap()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// handleRequestRecursively is used to process an individual DNS request. It will recurse as needed
|
||||
// a maximum number of times and returns a message in success or fail cases.
|
||||
func (r *Router) handleRequestRecursively(req *dns.Msg, reqCtx Context,
|
||||
remoteAddress net.Addr, maxRecursionLevel int) *dns.Msg {
|
||||
configCtx := r.dynamicConfig.Load().(*RouterDynamicConfig)
|
||||
|
||||
r.logger.Trace("received request", "question", req.Question[0].Name, "type", dns.Type(req.Question[0].Qtype).String())
|
||||
|
@ -176,6 +160,45 @@ func (r *Router) handleRequestRecursively(req *dns.Msg, reqCtx Context,
|
|||
return createServerFailureResponse(req, configCtx, false)
|
||||
}
|
||||
|
||||
defer func(s time.Time, q dns.Question) {
|
||||
metrics.MeasureSinceWithLabels([]string{"dns", "query"}, s,
|
||||
[]metrics.Label{
|
||||
{Name: "node", Value: r.nodeName},
|
||||
{Name: "type", Value: dns.Type(q.Qtype).String()},
|
||||
})
|
||||
|
||||
r.logger.Debug("request served from client",
|
||||
"name", q.Name,
|
||||
"type", dns.Type(q.Qtype).String(),
|
||||
"class", dns.Class(q.Qclass).String(),
|
||||
"latency", time.Since(s).String(),
|
||||
"client", remoteAddress.String(),
|
||||
"client_network", remoteAddress.Network(),
|
||||
)
|
||||
}(time.Now(), req.Question[0])
|
||||
|
||||
return r.handleRequestRecursively(req, reqCtx, configCtx, remoteAddress, maxRecursionLevelDefault)
|
||||
}
|
||||
|
||||
// getErrorFromECSNotGlobalError returns the underlying error from an ECSNotGlobalError, if it exists.
|
||||
func getErrorFromECSNotGlobalError(err error) error {
|
||||
if errors.Is(err, discovery.ErrECSNotGlobal) {
|
||||
return err.(discovery.ECSNotGlobalError).Unwrap()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// handleRequestRecursively is used to process an individual DNS request. It will recurse as needed
|
||||
// a maximum number of times and returns a message in success or fail cases.
|
||||
func (r *Router) handleRequestRecursively(req *dns.Msg, reqCtx Context, configCtx *RouterDynamicConfig,
|
||||
remoteAddress net.Addr, maxRecursionLevel int) *dns.Msg {
|
||||
|
||||
r.logger.Trace(
|
||||
"received request",
|
||||
"question", req.Question[0].Name,
|
||||
"type", dns.Type(req.Question[0].Qtype).String(),
|
||||
"recursion_remaining", maxRecursionLevel)
|
||||
|
||||
responseDomain, needRecurse := r.parseDomain(req.Question[0].Name)
|
||||
if needRecurse && !canRecurse(configCtx) {
|
||||
// This is the same error as an unmatched domain
|
||||
|
@ -655,7 +678,7 @@ func (r *Router) defaultAgentDNSRequestContext() Context {
|
|||
}
|
||||
|
||||
// resolveCNAME is used to recursively resolve CNAME records
|
||||
func (r *Router) resolveCNAME(cfg *RouterDynamicConfig, name string, reqCtx Context,
|
||||
func (r *Router) resolveCNAME(cfgContext *RouterDynamicConfig, name string, reqCtx Context,
|
||||
remoteAddress net.Addr, maxRecursionLevel int) []dns.RR {
|
||||
// If the CNAME record points to a Consul address, resolve it internally
|
||||
// Convert query to lowercase because DNS is case-insensitive; d.domain and
|
||||
|
@ -670,13 +693,13 @@ func (r *Router) resolveCNAME(cfg *RouterDynamicConfig, name string, reqCtx Cont
|
|||
|
||||
req.SetQuestion(name, dns.TypeANY)
|
||||
// TODO: handle error response
|
||||
resp := r.handleRequestRecursively(req, reqCtx, nil, maxRecursionLevel-1)
|
||||
resp := r.handleRequestRecursively(req, reqCtx, cfgContext, nil, maxRecursionLevel-1)
|
||||
|
||||
return resp.Answer
|
||||
}
|
||||
|
||||
// Do nothing if we don't have a recursor
|
||||
if !canRecurse(cfg) {
|
||||
if !canRecurse(cfgContext) {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -685,7 +708,7 @@ func (r *Router) resolveCNAME(cfg *RouterDynamicConfig, name string, reqCtx Cont
|
|||
m.SetQuestion(name, dns.TypeA)
|
||||
|
||||
// Make a DNS lookup request
|
||||
recursorResponse, err := r.recursor.handle(m, cfg, remoteAddress)
|
||||
recursorResponse, err := r.recursor.handle(m, cfgContext, remoteAddress)
|
||||
if err == nil {
|
||||
return recursorResponse.Answer
|
||||
}
|
||||
|
|
|
@ -6,12 +6,13 @@ package dns
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/hashicorp/consul/internal/dnsutil"
|
||||
"net"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/internal/dnsutil"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
@ -2743,7 +2744,15 @@ func runHandleTestCases(t *testing.T, tc HandleTestCase) {
|
|||
if ctx == nil {
|
||||
ctx = &Context{}
|
||||
}
|
||||
actual := router.HandleRequest(tc.request, *ctx, tc.remoteAddress)
|
||||
|
||||
var remoteAddress net.Addr
|
||||
if tc.remoteAddress != nil {
|
||||
remoteAddress = tc.remoteAddress
|
||||
} else {
|
||||
remoteAddress = &net.UDPAddr{}
|
||||
}
|
||||
|
||||
actual := router.HandleRequest(tc.request, *ctx, remoteAddress)
|
||||
require.Equal(t, tc.response, actual)
|
||||
}
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
"github.com/hashicorp/consul/agent/consul/stream"
|
||||
"github.com/hashicorp/consul/agent/consul/usagemetrics"
|
||||
"github.com/hashicorp/consul/agent/consul/xdscapacity"
|
||||
"github.com/hashicorp/consul/agent/discovery"
|
||||
"github.com/hashicorp/consul/agent/grpc-external/limiter"
|
||||
grpcInt "github.com/hashicorp/consul/agent/grpc-internal"
|
||||
"github.com/hashicorp/consul/agent/grpc-internal/balancer"
|
||||
|
@ -434,6 +435,7 @@ func getPrometheusDefs(cfg *config.RuntimeConfig, isServer bool) ([]prometheus.G
|
|||
consul.CatalogCounters,
|
||||
consul.ClientCounters,
|
||||
consul.RPCCounters,
|
||||
discovery.DNSCounters,
|
||||
grpcWare.StatsCounters,
|
||||
local.StateCounters,
|
||||
xds.StatsCounters,
|
||||
|
|
Loading…
Reference in New Issue