mirror of
https://github.com/status-im/consul.git
synced 2025-01-22 11:40:06 +00:00
3c44116a8f
This continues the work done in #14908 where a crude solution to prevent a goroutine leak was implemented. The former code would launch a perpetual goroutine family every iteration (+1 +1) and the fixed code simply caused a new goroutine family to first cancel the prior one to prevent the leak (-1 +1 == 0). This PR refactors this code completely to: - make it more understandable - remove the recursion-via-goroutine strangeness - prevent unnecessary RPC fetches when the prior one has errored. The core issue arose from a conflation of the entry.Fetching field to mean: - there is an RPC (blocking query) in flight right now - there is a goroutine running to manage the RPC fetch retry loop The problem is that the goroutine-leak-avoidance check would treat Fetching like (2), but within the body of a goroutine it would flip that boolean back to false before the retry sleep. This would cause a new chain of goroutines to launch which #14908 would correct crudely. The refactored code uses a plain for-loop and changes the semantics to track state for "is there a goroutine associated with this cache entry" instead of the former. We use a uint64 unique identity per goroutine instead of a boolean so that any orphaned goroutines can tell when they've been replaced when the expiry loop deletes a cache entry while the goroutine is still running and is later replaced.
1019 lines
38 KiB
Go
1019 lines
38 KiB
Go
// Package cache provides caching features for data from a Consul server.
|
|
//
|
|
// While this is similar in some ways to the "agent/ae" package, a key
|
|
// difference is that with anti-entropy, the agent is the authoritative
|
|
// source so it resolves differences the server may have. With caching (this
|
|
// package), the server is the authoritative source and we do our best to
|
|
// balance performance and correctness, depending on the type of data being
|
|
// requested.
|
|
//
|
|
// The types of data that can be cached is configurable via the Type interface.
|
|
// This allows specialized behavior for certain types of data. Each type of
|
|
// Consul data (CA roots, leaf certs, intentions, KV, catalog, etc.) will
|
|
// have to be manually implemented. This usually is not much work, see
|
|
// the "agent/cache-types" package.
|
|
package cache
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"strconv"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/armon/go-metrics"
|
|
"github.com/armon/go-metrics/prometheus"
|
|
"github.com/hashicorp/go-hclog"
|
|
"golang.org/x/time/rate"
|
|
|
|
"github.com/hashicorp/consul/acl"
|
|
"github.com/hashicorp/consul/lib"
|
|
"github.com/hashicorp/consul/lib/ttlcache"
|
|
)
|
|
|
|
// TODO(kit): remove the namespace from these once the metrics themselves change
|
|
var Gauges = []prometheus.GaugeDefinition{
|
|
{
|
|
Name: []string{"consul", "cache", "entries_count"},
|
|
Help: "Deprecated - please use cache_entries_count instead.",
|
|
},
|
|
{
|
|
Name: []string{"cache", "entries_count"},
|
|
Help: "Represents the number of entries in this cache.",
|
|
},
|
|
}
|
|
|
|
// TODO(kit): remove the namespace from these once the metrics themselves change
|
|
var Counters = []prometheus.CounterDefinition{
|
|
{
|
|
Name: []string{"consul", "cache", "bypass"},
|
|
Help: "Deprecated - please use cache_bypass instead.",
|
|
},
|
|
{
|
|
Name: []string{"cache", "bypass"},
|
|
Help: "Counts how many times a request bypassed the cache because no cache-key was provided.",
|
|
},
|
|
{
|
|
Name: []string{"consul", "cache", "fetch_success"},
|
|
Help: "Deprecated - please use cache_fetch_success instead.",
|
|
},
|
|
{
|
|
Name: []string{"cache", "fetch_success"},
|
|
Help: "Counts the number of successful fetches by the cache.",
|
|
},
|
|
{
|
|
Name: []string{"consul", "cache", "fetch_error"},
|
|
Help: "Deprecated - please use cache_fetch_error instead.",
|
|
},
|
|
{
|
|
Name: []string{"cache", "fetch_error"},
|
|
Help: "Counts the number of failed fetches by the cache.",
|
|
},
|
|
{
|
|
Name: []string{"consul", "cache", "evict_expired"},
|
|
Help: "Deprecated - please use cache_evict_expired instead.",
|
|
},
|
|
{
|
|
Name: []string{"cache", "evict_expired"},
|
|
Help: "Counts the number of expired entries that are evicted.",
|
|
},
|
|
}
|
|
|
|
// Constants related to refresh backoff. We probably don't ever need to
|
|
// make these configurable knobs since they primarily exist to lower load.
|
|
const (
|
|
DefaultCacheRefreshBackoffMin = 3 // 3 attempts before backing off
|
|
DefaultCacheRefreshMaxWait = 1 * time.Minute // maximum backoff wait time
|
|
|
|
// The following constants are default values for the cache entry
|
|
// rate limiter settings.
|
|
|
|
// DefaultEntryFetchRate is the default rate at which cache entries can
|
|
// be fetch. This defaults to not being unlimited
|
|
DefaultEntryFetchRate = rate.Inf
|
|
|
|
// DefaultEntryFetchMaxBurst is the number of cache entry fetches that can
|
|
// occur in a burst.
|
|
DefaultEntryFetchMaxBurst = 2
|
|
)
|
|
|
|
// Cache is a agent-local cache of Consul data. Create a Cache using the
|
|
// New function. A zero-value Cache is not ready for usage and will result
|
|
// in a panic.
|
|
//
|
|
// The types of data to be cached must be registered via RegisterType. Then,
|
|
// calls to Get specify the type and a Request implementation. The
|
|
// implementation of Request is usually done directly on the standard RPC
|
|
// struct in agent/structs. This API makes cache usage a mostly drop-in
|
|
// replacement for non-cached RPC calls.
|
|
//
|
|
// The cache is partitioned by ACL and datacenter/peer. This allows the cache
|
|
// to be safe for multi-DC queries and for queries where the data is modified
|
|
// due to ACLs all without the cache having to have any clever logic, at
|
|
// the slight expense of a less perfect cache.
|
|
//
|
|
// The Cache exposes various metrics via go-metrics. Please view the source
|
|
// searching for "metrics." to see the various metrics exposed. These can be
|
|
// used to explore the performance of the cache.
|
|
type Cache struct {
|
|
// types stores the list of data types that the cache knows how to service.
|
|
// These can be dynamically registered with RegisterType.
|
|
typesLock sync.RWMutex
|
|
types map[string]typeEntry
|
|
|
|
// entries contains the actual cache data. Access to entries and
|
|
// entriesExpiryHeap must be protected by entriesLock.
|
|
//
|
|
// entriesExpiryHeap is a heap of *cacheEntry values ordered by
|
|
// expiry, with the soonest to expire being first in the list (index 0).
|
|
//
|
|
// NOTE(mitchellh): The entry map key is currently a string in the format
|
|
// of "<DC>/<ACL token>/<Request key>" in order to properly partition
|
|
// requests to different datacenters and ACL tokens. This format has some
|
|
// big drawbacks: we can't evict by datacenter, ACL token, etc. For an
|
|
// initial implementation this works and the tests are agnostic to the
|
|
// internal storage format so changing this should be possible safely.
|
|
entriesLock sync.RWMutex
|
|
entries map[string]cacheEntry
|
|
entriesExpiryHeap *ttlcache.ExpiryHeap
|
|
lastGoroutineID uint64
|
|
|
|
// stopped is used as an atomic flag to signal that the Cache has been
|
|
// discarded so background fetches and expiry processing should stop.
|
|
stopped uint32
|
|
// stopCh is closed when Close is called
|
|
stopCh chan struct{}
|
|
// options includes a per Cache Rate limiter specification to avoid performing too many queries
|
|
options Options
|
|
rateLimitContext context.Context
|
|
rateLimitCancel context.CancelFunc
|
|
}
|
|
|
|
// typeEntry is a single type that is registered with a Cache.
|
|
type typeEntry struct {
|
|
// Name that was used to register the Type
|
|
Name string
|
|
Type Type
|
|
Opts *RegisterOptions
|
|
}
|
|
|
|
// ResultMeta is returned from Get calls along with the value and can be used
|
|
// to expose information about the cache status for debugging or testing.
|
|
type ResultMeta struct {
|
|
// Hit indicates whether or not the request was a cache hit
|
|
Hit bool
|
|
|
|
// Age identifies how "stale" the result is. It's semantics differ based on
|
|
// whether or not the cache type performs background refresh or not as defined
|
|
// in https://www.consul.io/api/index.html#agent-caching.
|
|
//
|
|
// For background refresh types, Age is 0 unless the background blocking query
|
|
// is currently in a failed state and so not keeping up with the server's
|
|
// values. If it is non-zero it represents the time since the first failure to
|
|
// connect during background refresh, and is reset after a background request
|
|
// does manage to reconnect and either return successfully, or block for at
|
|
// least the yamux keepalive timeout of 30 seconds (which indicates the
|
|
// connection is OK but blocked as expected).
|
|
//
|
|
// For simple cache types, Age is the time since the result being returned was
|
|
// fetched from the servers.
|
|
Age time.Duration
|
|
|
|
// Index is the internal ModifyIndex for the cache entry. Not all types
|
|
// support blocking and all that do will likely have this in their result type
|
|
// already but this allows generic code to reason about whether cache values
|
|
// have changed.
|
|
Index uint64
|
|
}
|
|
|
|
// Options are options for the Cache.
|
|
type Options struct {
|
|
Logger hclog.Logger
|
|
|
|
// EntryFetchMaxBurst max burst size of RateLimit for a single cache entry
|
|
EntryFetchMaxBurst int
|
|
// EntryFetchRate represents the max calls/sec for a single cache entry
|
|
EntryFetchRate rate.Limit
|
|
|
|
// CacheRefreshBackoffMin is the number of attempts to wait before backing off.
|
|
// Mostly configurable just for testing.
|
|
CacheRefreshBackoffMin uint
|
|
// CacheRefreshMaxWait is the maximum backoff wait time.
|
|
// Mostly configurable just for testing.
|
|
CacheRefreshMaxWait time.Duration
|
|
}
|
|
|
|
// Equal return true if both options are equivalent
|
|
func (o Options) Equal(other Options) bool {
|
|
return o.EntryFetchMaxBurst == other.EntryFetchMaxBurst && o.EntryFetchRate == other.EntryFetchRate
|
|
}
|
|
|
|
// applyDefaultValuesOnOptions set default values on options and returned updated value
|
|
func applyDefaultValuesOnOptions(options Options) Options {
|
|
if options.EntryFetchRate == 0.0 {
|
|
options.EntryFetchRate = DefaultEntryFetchRate
|
|
}
|
|
if options.EntryFetchMaxBurst == 0 {
|
|
options.EntryFetchMaxBurst = DefaultEntryFetchMaxBurst
|
|
}
|
|
if options.CacheRefreshBackoffMin == 0 {
|
|
options.CacheRefreshBackoffMin = DefaultCacheRefreshBackoffMin
|
|
}
|
|
if options.CacheRefreshMaxWait == 0 {
|
|
options.CacheRefreshMaxWait = DefaultCacheRefreshMaxWait
|
|
}
|
|
if options.Logger == nil {
|
|
options.Logger = hclog.New(nil)
|
|
}
|
|
return options
|
|
}
|
|
|
|
// New creates a new cache with the given RPC client and reasonable defaults.
|
|
// Further settings can be tweaked on the returned value.
|
|
func New(options Options) *Cache {
|
|
options = applyDefaultValuesOnOptions(options)
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
c := &Cache{
|
|
types: make(map[string]typeEntry),
|
|
entries: make(map[string]cacheEntry),
|
|
entriesExpiryHeap: ttlcache.NewExpiryHeap(),
|
|
stopCh: make(chan struct{}),
|
|
options: options,
|
|
rateLimitContext: ctx,
|
|
rateLimitCancel: cancel,
|
|
}
|
|
|
|
// Start the expiry watcher
|
|
go c.runExpiryLoop()
|
|
|
|
return c
|
|
}
|
|
|
|
// RegisterOptions are options that can be associated with a type being
|
|
// registered for the cache. This changes the behavior of the cache for
|
|
// this type.
|
|
type RegisterOptions struct {
|
|
// LastGetTTL is the time that the values returned by this type remain
|
|
// in the cache after the last get operation. If a value isn't accessed
|
|
// within this duration, the value is purged from the cache and
|
|
// background refreshing will cease.
|
|
LastGetTTL time.Duration
|
|
|
|
// Refresh configures whether the data is actively refreshed or if
|
|
// the data is only refreshed on an explicit Get. The default (false)
|
|
// is to only request data on explicit Get.
|
|
Refresh bool
|
|
|
|
// SupportsBlocking should be set to true if the type supports blocking queries.
|
|
// Types that do not support blocking queries will not be able to use
|
|
// background refresh nor will the cache attempt blocking fetches if the
|
|
// client requests them with MinIndex.
|
|
SupportsBlocking bool
|
|
|
|
// RefreshTimer is the time to sleep between attempts to refresh data.
|
|
// If this is zero, then data is refreshed immediately when a fetch
|
|
// is returned.
|
|
//
|
|
// Using different values for RefreshTimer and QueryTimeout, various
|
|
// "refresh" mechanisms can be implemented:
|
|
//
|
|
// * With a high timer duration and a low timeout, a timer-based
|
|
// refresh can be set that minimizes load on the Consul servers.
|
|
//
|
|
// * With a low timer and high timeout duration, a blocking-query-based
|
|
// refresh can be set so that changes in server data are recognized
|
|
// within the cache very quickly.
|
|
//
|
|
RefreshTimer time.Duration
|
|
|
|
// QueryTimeout is the default value for the maximum query time for a fetch
|
|
// operation. It is set as FetchOptions.Timeout so that cache.Type
|
|
// implementations can use it as the MaxQueryTime.
|
|
QueryTimeout time.Duration
|
|
}
|
|
|
|
// RegisterType registers a cacheable type.
|
|
//
|
|
// This makes the type available for Get but does not automatically perform
|
|
// any prefetching. In order to populate the cache, Get must be called.
|
|
func (c *Cache) RegisterType(n string, typ Type) {
|
|
opts := typ.RegisterOptions()
|
|
if opts.LastGetTTL == 0 {
|
|
opts.LastGetTTL = 72 * time.Hour // reasonable default is days
|
|
}
|
|
|
|
c.typesLock.Lock()
|
|
defer c.typesLock.Unlock()
|
|
c.types[n] = typeEntry{Name: n, Type: typ, Opts: &opts}
|
|
}
|
|
|
|
// ReloadOptions updates the cache with the new options
|
|
// return true if Cache is updated, false if already up to date
|
|
func (c *Cache) ReloadOptions(options Options) bool {
|
|
options = applyDefaultValuesOnOptions(options)
|
|
modified := !options.Equal(c.options)
|
|
if modified {
|
|
c.entriesLock.RLock()
|
|
defer c.entriesLock.RUnlock()
|
|
for _, entry := range c.entries {
|
|
if c.options.EntryFetchRate != options.EntryFetchRate {
|
|
entry.FetchRateLimiter.SetLimit(options.EntryFetchRate)
|
|
}
|
|
if c.options.EntryFetchMaxBurst != options.EntryFetchMaxBurst {
|
|
entry.FetchRateLimiter.SetBurst(options.EntryFetchMaxBurst)
|
|
}
|
|
}
|
|
c.options.EntryFetchRate = options.EntryFetchRate
|
|
c.options.EntryFetchMaxBurst = options.EntryFetchMaxBurst
|
|
}
|
|
return modified
|
|
}
|
|
|
|
// Get loads the data for the given type and request. If data satisfying the
|
|
// minimum index is present in the cache, it is returned immediately. Otherwise,
|
|
// this will block until the data is available or the request timeout is
|
|
// reached.
|
|
//
|
|
// Multiple Get calls for the same Request (matching CacheKey value) will
|
|
// block on a single network request.
|
|
//
|
|
// The timeout specified by the Request will be the timeout on the cache
|
|
// Get, and does not correspond to the timeout of any background data
|
|
// fetching. If the timeout is reached before data satisfying the minimum
|
|
// index is retrieved, the last known value (maybe nil) is returned. No
|
|
// error is returned on timeout. This matches the behavior of Consul blocking
|
|
// queries.
|
|
func (c *Cache) Get(ctx context.Context, t string, r Request) (interface{}, ResultMeta, error) {
|
|
c.typesLock.RLock()
|
|
tEntry, ok := c.types[t]
|
|
c.typesLock.RUnlock()
|
|
if !ok {
|
|
// Shouldn't happen given that we successfully fetched this at least
|
|
// once. But be robust against panics.
|
|
return nil, ResultMeta{}, fmt.Errorf("unknown type in cache: %s", t)
|
|
}
|
|
return c.getWithIndex(ctx, newGetOptions(tEntry, r))
|
|
}
|
|
|
|
// getOptions contains the arguments for a Get request. It is used in place of
|
|
// Request so that internal functions can modify Info without having to extract
|
|
// it from the Request each time.
|
|
type getOptions struct {
|
|
// Fetch is a closure over tEntry.Type.Fetch which provides the original
|
|
// Request from the caller.
|
|
Fetch func(opts FetchOptions) (FetchResult, error)
|
|
Info RequestInfo
|
|
TypeEntry typeEntry
|
|
}
|
|
|
|
func newGetOptions(tEntry typeEntry, r Request) getOptions {
|
|
return getOptions{
|
|
Fetch: func(opts FetchOptions) (FetchResult, error) {
|
|
return tEntry.Type.Fetch(opts, r)
|
|
},
|
|
Info: r.CacheInfo(),
|
|
TypeEntry: tEntry,
|
|
}
|
|
}
|
|
|
|
// getEntryLocked retrieves a cache entry and checks if it is ready to be
|
|
// returned given the other parameters. It reads from entries and the caller
|
|
// has to issue a read lock if necessary.
|
|
func (c *Cache) getEntryLocked(
|
|
tEntry typeEntry,
|
|
key string,
|
|
info RequestInfo,
|
|
) (entryExists bool, entryValid bool, entry cacheEntry) {
|
|
entry, ok := c.entries[key]
|
|
if !entry.Valid {
|
|
return ok, false, entry
|
|
}
|
|
|
|
// Check index is not specified or lower than value, or the type doesn't
|
|
// support blocking.
|
|
if tEntry.Opts.SupportsBlocking && info.MinIndex > 0 && info.MinIndex >= entry.Index {
|
|
// MinIndex was given and matches or is higher than current value so we
|
|
// ignore the cache and fallthrough to blocking on a new value below.
|
|
return true, false, entry
|
|
}
|
|
|
|
// Check MaxAge is not exceeded if this is not a background refreshing type
|
|
// and MaxAge was specified.
|
|
if !tEntry.Opts.Refresh && info.MaxAge > 0 && entryExceedsMaxAge(info.MaxAge, entry) {
|
|
return true, false, entry
|
|
}
|
|
|
|
// Check if re-validate is requested. If so the first time round the
|
|
// loop is not a hit but subsequent ones should be treated normally.
|
|
if !tEntry.Opts.Refresh && info.MustRevalidate {
|
|
// It is important to note that this block ONLY applies when we are not
|
|
// in indefinite refresh mode (where the underlying goroutine will
|
|
// continue to re-query for data).
|
|
//
|
|
// In this mode goroutines have a 1:1 relationship to RPCs that get
|
|
// executed, and importantly they DO NOT SLEEP after executing.
|
|
//
|
|
// This means that a running goroutine for this cache entry extremely
|
|
// strongly implies that the RPC has not yet completed, which is why
|
|
// this check works for the revalidation-avoidance optimization here.
|
|
if entry.GoroutineID != 0 {
|
|
// There is an active goroutine performing a blocking query for
|
|
// this data, which has not returned.
|
|
//
|
|
// We can logically deduce that the contents of the cache are
|
|
// actually current, and we can simply return this while leaving
|
|
// the blocking query alone.
|
|
return true, true, entry
|
|
}
|
|
return true, false, entry
|
|
}
|
|
|
|
return true, true, entry
|
|
}
|
|
|
|
func entryExceedsMaxAge(maxAge time.Duration, entry cacheEntry) bool {
|
|
return !entry.FetchedAt.IsZero() && maxAge < time.Since(entry.FetchedAt)
|
|
}
|
|
|
|
// getWithIndex implements the main Get functionality but allows internal
|
|
// callers (Watch) to manipulate the blocking index separately from the actual
|
|
// request object.
|
|
func (c *Cache) getWithIndex(ctx context.Context, r getOptions) (interface{}, ResultMeta, error) {
|
|
if r.Info.Key == "" {
|
|
metrics.IncrCounter([]string{"consul", "cache", "bypass"}, 1)
|
|
metrics.IncrCounter([]string{"cache", "bypass"}, 1)
|
|
|
|
// If no key is specified, then we do not cache this request.
|
|
// Pass directly through to the backend.
|
|
result, err := r.Fetch(FetchOptions{MinIndex: r.Info.MinIndex})
|
|
return result.Value, ResultMeta{}, err
|
|
}
|
|
|
|
key := makeEntryKey(r.TypeEntry.Name, r.Info.Datacenter, r.Info.PeerName, r.Info.Token, r.Info.Key)
|
|
|
|
// First time through
|
|
first := true
|
|
|
|
// timeoutCh for watching our timeout
|
|
var timeoutCh <-chan time.Time
|
|
|
|
RETRY_GET:
|
|
// Get the current value
|
|
c.entriesLock.RLock()
|
|
_, entryValid, entry := c.getEntryLocked(r.TypeEntry, key, r.Info)
|
|
c.entriesLock.RUnlock()
|
|
|
|
if entry.Expiry != nil {
|
|
// The entry already exists in the TTL heap, touch it to keep it alive since
|
|
// this Get is still interested in the value. Note that we used to only do
|
|
// this in the `entryValid` block below but that means that a cache entry
|
|
// will expire after it's TTL regardless of how many callers are waiting for
|
|
// updates in this method in a couple of cases:
|
|
// 1. If the agent is disconnected from servers for the TTL then the client
|
|
// will be in backoff getting errors on each call to Get and since an
|
|
// errored cache entry has Valid = false it won't be touching the TTL.
|
|
// 2. If the value is just not changing then the client's current index
|
|
// will be equal to the entry index and entryValid will be false. This
|
|
// is a common case!
|
|
//
|
|
// But regardless of the state of the entry, assuming it's already in the
|
|
// TTL heap, we should touch it every time around here since this caller at
|
|
// least still cares about the value!
|
|
c.entriesLock.Lock()
|
|
c.entriesExpiryHeap.Update(entry.Expiry.Index(), r.TypeEntry.Opts.LastGetTTL)
|
|
c.entriesLock.Unlock()
|
|
}
|
|
|
|
if entryValid {
|
|
meta := ResultMeta{Index: entry.Index}
|
|
if first {
|
|
metrics.IncrCounter([]string{"consul", "cache", r.TypeEntry.Name, "hit"}, 1)
|
|
metrics.IncrCounter([]string{"cache", r.TypeEntry.Name, "hit"}, 1)
|
|
meta.Hit = true
|
|
}
|
|
|
|
// If refresh is enabled, calculate age based on whether the background
|
|
// routine is still connected.
|
|
if r.TypeEntry.Opts.Refresh {
|
|
meta.Age = time.Duration(0)
|
|
if !entry.RefreshLostContact.IsZero() {
|
|
meta.Age = time.Since(entry.RefreshLostContact)
|
|
}
|
|
} else {
|
|
// For non-background refresh types, the age is just how long since we
|
|
// fetched it last.
|
|
if !entry.FetchedAt.IsZero() {
|
|
meta.Age = time.Since(entry.FetchedAt)
|
|
}
|
|
}
|
|
|
|
// We purposely do not return an error here since the cache only works with
|
|
// fetching values that either have a value or have an error, but not both.
|
|
// The Error may be non-nil in the entry in the case that an error has
|
|
// occurred _since_ the last good value, but we still want to return the
|
|
// good value to clients that are not requesting a specific version. The
|
|
// effect of this is that blocking clients will all see an error immediately
|
|
// without waiting a whole timeout to see it, but clients that just look up
|
|
// cache with an older index than the last valid result will still see the
|
|
// result and not the error here. I.e. the error is not "cached" without a
|
|
// new fetch attempt occurring, but the last good value can still be fetched
|
|
// from cache.
|
|
return entry.Value, meta, nil
|
|
}
|
|
|
|
// If this isn't our first time through and our last value has an error, then
|
|
// we return the error. This has the behavior that we don't sit in a retry
|
|
// loop getting the same error for the entire duration of the timeout.
|
|
// Instead, we make one effort to fetch a new value, and if there was an
|
|
// error, we return. Note that the invariant is that if both entry.Value AND
|
|
// entry.Error are non-nil, the error _must_ be more recent than the Value. In
|
|
// other words valid fetches should reset the error. See
|
|
// https://github.com/hashicorp/consul/issues/4480.
|
|
if !first && entry.Error != nil {
|
|
return entry.Value, ResultMeta{Index: entry.Index}, entry.Error
|
|
}
|
|
|
|
if first {
|
|
// We increment two different counters for cache misses depending on
|
|
// whether we're missing because we didn't have the data at all,
|
|
// or if we're missing because we're blocking on a set index.
|
|
missKey := "miss_block"
|
|
if r.Info.MinIndex == 0 {
|
|
missKey = "miss_new"
|
|
}
|
|
metrics.IncrCounter([]string{"consul", "cache", r.TypeEntry.Name, missKey}, 1)
|
|
metrics.IncrCounter([]string{"cache", r.TypeEntry.Name, missKey}, 1)
|
|
}
|
|
|
|
// Set our timeout channel if we must
|
|
if r.Info.Timeout > 0 && timeoutCh == nil {
|
|
timeoutCh = time.After(r.Info.Timeout)
|
|
}
|
|
|
|
// At this point, we know we either don't have a value at all or the
|
|
// value we have is too old. We need to wait for new data.
|
|
waiterCh := c.fetch(key, r)
|
|
|
|
// No longer our first time through
|
|
first = false
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil, ResultMeta{}, ctx.Err()
|
|
case <-waiterCh:
|
|
// Our fetch returned, retry the get from the cache.
|
|
r.Info.MustRevalidate = false
|
|
goto RETRY_GET
|
|
|
|
case <-timeoutCh:
|
|
// Timeout on the cache read, just return whatever we have.
|
|
return entry.Value, ResultMeta{Index: entry.Index}, nil
|
|
}
|
|
}
|
|
|
|
func makeEntryKey(t, dc, peerName, token, key string) string {
|
|
// TODO(peering): figure out if this is the desired format
|
|
if peerName != "" {
|
|
return fmt.Sprintf("%s/%s/%s/%s", t, "peer:"+peerName, token, key)
|
|
}
|
|
return fmt.Sprintf("%s/%s/%s/%s", t, dc, token, key)
|
|
}
|
|
|
|
// fetch triggers a new background fetch for the given Request. If a background
|
|
// fetch is already running or a goroutine to manage that still exists for a
|
|
// matching Request, the waiter channel for that request is returned. The
|
|
// effect of this is that there is only ever one blocking query and goroutine
|
|
// for any matching requests.
|
|
func (c *Cache) fetch(key string, r getOptions) <-chan struct{} {
|
|
c.entriesLock.Lock()
|
|
defer c.entriesLock.Unlock()
|
|
|
|
ok, entryValid, entry := c.getEntryLocked(r.TypeEntry, key, r.Info)
|
|
|
|
switch {
|
|
case ok && entryValid:
|
|
// This handles the case where a fetch succeeded after checking for its
|
|
// existence in getWithIndex. This ensures that we don't miss updates.
|
|
ch := make(chan struct{})
|
|
close(ch)
|
|
return ch
|
|
|
|
case ok && entry.GoroutineID != 0:
|
|
// If we already have an entry and there's a goroutine to keep it
|
|
// refreshed then don't spawn another one to do the same work.
|
|
//
|
|
// Return the currently active waiter.
|
|
return entry.Waiter
|
|
|
|
case !ok:
|
|
// If we don't have an entry, then create it. The entry must be marked
|
|
// as invalid so that it isn't returned as a valid value for a zero
|
|
// index.
|
|
entry = cacheEntry{
|
|
Valid: false,
|
|
Waiter: make(chan struct{}),
|
|
FetchRateLimiter: rate.NewLimiter(
|
|
c.options.EntryFetchRate,
|
|
c.options.EntryFetchMaxBurst,
|
|
),
|
|
}
|
|
}
|
|
|
|
// Assign each background fetching goroutine a unique ID and fingerprint
|
|
// the cache entry with the same ID. This way if the cache entry is ever
|
|
// cleaned up due to expiry and later recreated the old goroutine can
|
|
// detect that and terminate rather than leak and do double work.
|
|
c.lastGoroutineID++
|
|
entry.GoroutineID = c.lastGoroutineID
|
|
c.entries[key] = entry
|
|
metrics.SetGauge([]string{"consul", "cache", "entries_count"}, float32(len(c.entries)))
|
|
metrics.SetGauge([]string{"cache", "entries_count"}, float32(len(c.entries)))
|
|
|
|
// The actual Fetch must be performed in a goroutine.
|
|
go c.launchBackgroundFetcher(entry.GoroutineID, key, r)
|
|
|
|
return entry.Waiter
|
|
}
|
|
|
|
func (c *Cache) launchBackgroundFetcher(goroutineID uint64, key string, r getOptions) {
|
|
defer func() {
|
|
c.entriesLock.Lock()
|
|
defer c.entriesLock.Unlock()
|
|
entry, ok := c.entries[key]
|
|
if ok && entry.GoroutineID == goroutineID {
|
|
entry.GoroutineID = 0
|
|
c.entries[key] = entry
|
|
}
|
|
}()
|
|
|
|
var attempt uint
|
|
for {
|
|
shouldStop, shouldBackoff := c.runBackgroundFetcherOnce(goroutineID, key, r)
|
|
if shouldStop {
|
|
return
|
|
}
|
|
|
|
if shouldBackoff {
|
|
attempt++
|
|
} else {
|
|
attempt = 0
|
|
}
|
|
// If we're over the attempt minimum, start an exponential backoff.
|
|
wait := backOffWait(c.options, attempt)
|
|
|
|
// If we have a timer, wait for it
|
|
wait += r.TypeEntry.Opts.RefreshTimer
|
|
|
|
select {
|
|
case <-time.After(wait):
|
|
case <-c.stopCh:
|
|
return // Check if cache was stopped
|
|
}
|
|
|
|
// Trigger.
|
|
r.Info.MustRevalidate = false
|
|
r.Info.MinIndex = 0
|
|
|
|
// We acquire a write lock because we may have to set Fetching to true.
|
|
c.entriesLock.Lock()
|
|
|
|
entry, ok := c.entries[key]
|
|
if !ok || entry.GoroutineID != goroutineID {
|
|
// If we don't have an existing entry, return immediately.
|
|
//
|
|
// Also if we already have an entry and it is actively fetching, then
|
|
// return immediately.
|
|
//
|
|
// If we've somehow lost control of the entry, also return.
|
|
c.entriesLock.Unlock()
|
|
return
|
|
}
|
|
|
|
c.entries[key] = entry
|
|
metrics.SetGauge([]string{"consul", "cache", "entries_count"}, float32(len(c.entries)))
|
|
metrics.SetGauge([]string{"cache", "entries_count"}, float32(len(c.entries)))
|
|
c.entriesLock.Unlock()
|
|
}
|
|
}
|
|
|
|
func (c *Cache) runBackgroundFetcherOnce(goroutineID uint64, key string, r getOptions) (shouldStop, shouldBackoff bool) {
|
|
// Freshly re-read this, rather than relying upon the caller to fetch it
|
|
// and pass it in.
|
|
c.entriesLock.RLock()
|
|
entry, ok := c.entries[key]
|
|
c.entriesLock.RUnlock()
|
|
|
|
if !ok || entry.GoroutineID != goroutineID {
|
|
// If we don't have an existing entry, return immediately.
|
|
//
|
|
// Also if something weird has happened to orphan this goroutine, also
|
|
// return immediately.
|
|
return true, false
|
|
}
|
|
|
|
tEntry := r.TypeEntry
|
|
{ // NOTE: this indentation is here to facilitate the PR review diff only
|
|
// If we have background refresh and currently are in "disconnected" state,
|
|
// waiting for a response might mean we mark our results as stale for up to
|
|
// 10 minutes (max blocking timeout) after connection is restored. To reduce
|
|
// that window, we assume that if the fetch takes more than 31 seconds then
|
|
// they are correctly blocking. We choose 31 seconds because yamux
|
|
// keepalives are every 30 seconds so the RPC should fail if the packets are
|
|
// being blackholed for more than 30 seconds.
|
|
var connectedTimer *time.Timer
|
|
if tEntry.Opts.Refresh && entry.Index > 0 && tEntry.Opts.QueryTimeout > 31*time.Second {
|
|
connectedTimer = time.AfterFunc(31*time.Second, func() {
|
|
c.entriesLock.Lock()
|
|
defer c.entriesLock.Unlock()
|
|
entry, ok := c.entries[key]
|
|
if !ok || entry.RefreshLostContact.IsZero() || entry.GoroutineID != goroutineID {
|
|
return
|
|
}
|
|
entry.RefreshLostContact = time.Time{}
|
|
c.entries[key] = entry
|
|
})
|
|
}
|
|
|
|
fOpts := FetchOptions{}
|
|
if tEntry.Opts.SupportsBlocking {
|
|
fOpts.MinIndex = entry.Index
|
|
fOpts.Timeout = tEntry.Opts.QueryTimeout
|
|
|
|
if fOpts.Timeout == 0 {
|
|
fOpts.Timeout = 10 * time.Minute
|
|
}
|
|
}
|
|
if entry.Valid {
|
|
fOpts.LastResult = &FetchResult{
|
|
Value: entry.Value,
|
|
State: entry.State,
|
|
Index: entry.Index,
|
|
}
|
|
}
|
|
|
|
if err := entry.FetchRateLimiter.Wait(c.rateLimitContext); err != nil {
|
|
if connectedTimer != nil {
|
|
connectedTimer.Stop()
|
|
}
|
|
entry.Error = fmt.Errorf("rateLimitContext canceled: %s", err.Error())
|
|
// NOTE: this can only happen when the entire cache is being
|
|
// shutdown and isn't something that can happen normally.
|
|
return true, false
|
|
}
|
|
// Start building the new entry by blocking on the fetch.
|
|
result, err := r.Fetch(fOpts)
|
|
if connectedTimer != nil {
|
|
connectedTimer.Stop()
|
|
}
|
|
|
|
// Copy the existing entry to start.
|
|
newEntry := entry
|
|
|
|
// Importantly, always reset the Error. Having both Error and a Value that
|
|
// are non-nil is allowed in the cache entry but it indicates that the Error
|
|
// is _newer_ than the last good value. So if the err is nil then we need to
|
|
// reset to replace any _older_ errors and avoid them bubbling up. If the
|
|
// error is non-nil then we need to set it anyway and used to do it in the
|
|
// code below. See https://github.com/hashicorp/consul/issues/4480.
|
|
newEntry.Error = err
|
|
|
|
if result.Value != nil {
|
|
// A new value was given, so we create a brand new entry.
|
|
if !result.NotModified {
|
|
newEntry.Value = result.Value
|
|
}
|
|
newEntry.State = result.State
|
|
newEntry.Index = result.Index
|
|
newEntry.FetchedAt = time.Now()
|
|
if newEntry.Index < 1 {
|
|
// Less than one is invalid unless there was an error and in this case
|
|
// there wasn't since a value was returned. If a badly behaved RPC
|
|
// returns 0 when it has no data, we might get into a busy loop here. We
|
|
// set this to minimum of 1 which is safe because no valid user data can
|
|
// ever be written at raft index 1 due to the bootstrap process for
|
|
// raft. This insure that any subsequent background refresh request will
|
|
// always block, but allows the initial request to return immediately
|
|
// even if there is no data.
|
|
newEntry.Index = 1
|
|
}
|
|
|
|
// This is a valid entry with a result
|
|
newEntry.Valid = true
|
|
} else if result.State != nil && err == nil {
|
|
// Also set state if it's non-nil but Value is nil. This is important in the
|
|
// case we are returning nil due to a timeout or a transient error like rate
|
|
// limiting that we want to mask from the user - there is no result yet but
|
|
// we want to manage retrying internally before we return an error to user.
|
|
// The retrying state is in State so we need to still update that in the
|
|
// entry even if we don't have an actual result yet (e.g. hit a rate limit
|
|
// on first request for a leaf certificate).
|
|
newEntry.State = result.State
|
|
}
|
|
|
|
preventRefresh := acl.IsErrNotFound(err)
|
|
|
|
// Error handling
|
|
if err == nil {
|
|
labels := []metrics.Label{{Name: "result_not_modified", Value: strconv.FormatBool(result.NotModified)}}
|
|
// TODO(kit): move tEntry.Name to a label on the first write here and deprecate the second write
|
|
metrics.IncrCounterWithLabels([]string{"consul", "cache", "fetch_success"}, 1, labels)
|
|
metrics.IncrCounterWithLabels([]string{"cache", "fetch_success"}, 1, labels)
|
|
metrics.IncrCounterWithLabels([]string{"consul", "cache", tEntry.Name, "fetch_success"}, 1, labels)
|
|
metrics.IncrCounterWithLabels([]string{"cache", tEntry.Name, "fetch_success"}, 1, labels)
|
|
|
|
if result.Index > 0 {
|
|
// Reset the attempts counter so we don't have any backoff
|
|
shouldBackoff = false
|
|
} else {
|
|
// Result having a zero index is an implicit error case. There was no
|
|
// actual error but it implies the RPC found in index (nothing written
|
|
// yet for that type) but didn't take care to return safe "1" index. We
|
|
// don't want to actually treat it like an error by setting
|
|
// newEntry.Error to something non-nil, but we should guard against 100%
|
|
// CPU burn hot loops caused by that case which will never block but
|
|
// also won't backoff either. So we treat it as a failed attempt so that
|
|
// at least the failure backoff will save our CPU while still
|
|
// periodically refreshing so normal service can resume when the servers
|
|
// actually have something to return from the RPC. If we get in this
|
|
// state it can be considered a bug in the RPC implementation (to ever
|
|
// return a zero index) however since it can happen this is a safety net
|
|
// for the future.
|
|
shouldBackoff = true
|
|
}
|
|
|
|
// If we have refresh active, this successful response means cache is now
|
|
// "connected" and should not be stale. Reset the lost contact timer.
|
|
if tEntry.Opts.Refresh {
|
|
newEntry.RefreshLostContact = time.Time{}
|
|
}
|
|
} else {
|
|
// TODO (mkeeler) maybe change the name of this label to be more indicative of it just
|
|
// stopping the background refresh
|
|
labels := []metrics.Label{{Name: "fatal", Value: strconv.FormatBool(preventRefresh)}}
|
|
|
|
// TODO(kit): Add tEntry.Name to label on fetch_error and deprecate second write
|
|
metrics.IncrCounterWithLabels([]string{"consul", "cache", "fetch_error"}, 1, labels)
|
|
metrics.IncrCounterWithLabels([]string{"cache", "fetch_error"}, 1, labels)
|
|
metrics.IncrCounterWithLabels([]string{"consul", "cache", tEntry.Name, "fetch_error"}, 1, labels)
|
|
metrics.IncrCounterWithLabels([]string{"cache", tEntry.Name, "fetch_error"}, 1, labels)
|
|
|
|
// Increment attempt counter
|
|
shouldBackoff = true
|
|
|
|
// If we are refreshing and just failed, updated the lost contact time as
|
|
// our cache will be stale until we get successfully reconnected. We only
|
|
// set this on the first failure (if it's zero) so we can track how long
|
|
// it's been since we had a valid connection/up-to-date view of the state.
|
|
if tEntry.Opts.Refresh && newEntry.RefreshLostContact.IsZero() {
|
|
newEntry.RefreshLostContact = time.Now()
|
|
}
|
|
}
|
|
|
|
// Create a new waiter that will be used for the next fetch.
|
|
newEntry.Waiter = make(chan struct{})
|
|
|
|
// Set our entry
|
|
c.entriesLock.Lock()
|
|
|
|
if currEntry, ok := c.entries[key]; !ok || currEntry.GoroutineID != goroutineID {
|
|
// This entry was evicted during our fetch. DON'T re-insert it or fall
|
|
// through to the refresh loop below otherwise it will live forever! In
|
|
// theory there should not be any Get calls waiting on entry.Waiter since
|
|
// they would have prevented the eviction, but in practice there may be
|
|
// due to timing and the fact that we don't update the TTL on the entry if
|
|
// errors are being returned for a while. So we do need to unblock them,
|
|
// which will mean they recreate the entry again right away and so "reset"
|
|
// to a good state anyway!
|
|
c.entriesLock.Unlock()
|
|
|
|
// Trigger any waiters that are around.
|
|
close(entry.Waiter)
|
|
return true, false
|
|
}
|
|
|
|
// If this is a new entry (not in the heap yet), then setup the
|
|
// initial expiry information and insert. If we're already in
|
|
// the heap we do nothing since we're reusing the same entry.
|
|
if newEntry.Expiry == nil || newEntry.Expiry.Index() == ttlcache.NotIndexed {
|
|
newEntry.Expiry = c.entriesExpiryHeap.Add(key, tEntry.Opts.LastGetTTL)
|
|
}
|
|
|
|
c.entries[key] = newEntry
|
|
c.entriesLock.Unlock()
|
|
|
|
// Trigger the old waiter
|
|
close(entry.Waiter)
|
|
|
|
// If refresh is enabled, run the refresh in due time. The refresh
|
|
// below might block, but saves us from spawning another goroutine.
|
|
//
|
|
// We want to have ACL not found errors stop cache refresh for the cases
|
|
// where the token used for the query was deleted. If the request
|
|
// was coming from a cache notification then it will start the
|
|
// request back up again shortly but in the general case this prevents
|
|
// spamming the logs with tons of ACL not found errors for days.
|
|
if tEntry.Opts.Refresh && !preventRefresh {
|
|
return false, shouldBackoff
|
|
}
|
|
}
|
|
|
|
return true, false
|
|
}
|
|
|
|
func backOffWait(opts Options, failures uint) time.Duration {
|
|
if failures > opts.CacheRefreshBackoffMin {
|
|
shift := failures - opts.CacheRefreshBackoffMin
|
|
waitTime := opts.CacheRefreshMaxWait
|
|
if shift < 31 {
|
|
waitTime = (1 << shift) * time.Second
|
|
}
|
|
if waitTime > opts.CacheRefreshMaxWait {
|
|
waitTime = opts.CacheRefreshMaxWait
|
|
}
|
|
return waitTime + lib.RandomStagger(waitTime)
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// runExpiryLoop is a blocking function that watches the expiration
|
|
// heap and invalidates entries that have expired.
|
|
func (c *Cache) runExpiryLoop() {
|
|
for {
|
|
c.entriesLock.RLock()
|
|
timer := c.entriesExpiryHeap.Next()
|
|
c.entriesLock.RUnlock()
|
|
|
|
select {
|
|
case <-c.stopCh:
|
|
timer.Stop()
|
|
return
|
|
case <-c.entriesExpiryHeap.NotifyCh:
|
|
timer.Stop()
|
|
continue
|
|
|
|
case <-timer.Wait():
|
|
c.entriesLock.Lock()
|
|
|
|
entry := timer.Entry
|
|
if closer, ok := c.entries[entry.Key()].State.(io.Closer); ok {
|
|
closer.Close()
|
|
}
|
|
|
|
// Entry expired! Remove it.
|
|
delete(c.entries, entry.Key())
|
|
c.entriesExpiryHeap.Remove(entry.Index())
|
|
|
|
// Set some metrics
|
|
metrics.IncrCounter([]string{"consul", "cache", "evict_expired"}, 1)
|
|
metrics.IncrCounter([]string{"cache", "evict_expired"}, 1)
|
|
metrics.SetGauge([]string{"consul", "cache", "entries_count"}, float32(len(c.entries)))
|
|
metrics.SetGauge([]string{"cache", "entries_count"}, float32(len(c.entries)))
|
|
|
|
c.entriesLock.Unlock()
|
|
}
|
|
}
|
|
}
|
|
|
|
// Close stops any background work and frees all resources for the cache.
|
|
// Current Fetch requests are allowed to continue to completion and callers may
|
|
// still access the current cache values so coordination isn't needed with
|
|
// callers, however no background activity will continue. It's intended to close
|
|
// the cache at agent shutdown so no further requests should be made, however
|
|
// concurrent or in-flight ones won't break.
|
|
func (c *Cache) Close() error {
|
|
wasStopped := atomic.SwapUint32(&c.stopped, 1)
|
|
if wasStopped == 0 {
|
|
// First time only, close stop chan
|
|
close(c.stopCh)
|
|
c.rateLimitCancel()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Prepopulate puts something in the cache manually. This is useful when the
|
|
// correct initial value is know and the cache shouldn't refetch the same thing
|
|
// on startup. It is used to set the ConnectRootCA and AgentLeafCert when
|
|
// AutoEncrypt.TLS is turned on. The cache itself cannot fetch that the first
|
|
// time because it requires a special RPCType. Subsequent runs are fine though.
|
|
func (c *Cache) Prepopulate(t string, res FetchResult, dc, peerName, token, k string) error {
|
|
key := makeEntryKey(t, dc, peerName, token, k)
|
|
newEntry := cacheEntry{
|
|
Valid: true,
|
|
Value: res.Value,
|
|
State: res.State,
|
|
Index: res.Index,
|
|
FetchedAt: time.Now(),
|
|
Waiter: make(chan struct{}),
|
|
FetchRateLimiter: rate.NewLimiter(
|
|
c.options.EntryFetchRate,
|
|
c.options.EntryFetchMaxBurst,
|
|
),
|
|
}
|
|
c.entriesLock.Lock()
|
|
c.entries[key] = newEntry
|
|
c.entriesLock.Unlock()
|
|
return nil
|
|
}
|