mirror of https://github.com/status-im/consul.git
Merge pull request #4303 from pierresouchay/non_blocking_acl
Only send one single ACL cache refresh across network when TTL is over
This commit is contained in:
commit
d19c7d8882
|
@ -158,7 +158,7 @@ test: other-consul dev-build vet
|
|||
@# hide it from travis as it exceeds their log limits and causes job to be
|
||||
@# terminated (over 4MB and over 10k lines in the UI). We need to output
|
||||
@# _something_ to stop them terminating us due to inactivity...
|
||||
{ go test $(GOTEST_FLAGS) -tags '$(GOTAGS)' -timeout 5m $(GOTEST_PKGS) 2>&1 ; echo $$? > exit-code ; } | tee test.log | egrep '^(ok|FAIL)\s*github.com/hashicorp/consul'
|
||||
{ go test $(GOTEST_FLAGS) -tags '$(GOTAGS)' -timeout 7m $(GOTEST_PKGS) 2>&1 ; echo $$? > exit-code ; } | tee test.log | egrep '^(ok|FAIL)\s*github.com/hashicorp/consul'
|
||||
@echo "Exit code: $$(cat exit-code)" >> test.log
|
||||
@# This prints all the race report between ====== lines
|
||||
@awk '/^WARNING: DATA RACE/ {do_print=1; print "=================="} do_print==1 {print} /^={10,}/ {do_print=0}' test.log || true
|
||||
|
|
|
@ -103,7 +103,7 @@ func newACLManager(config *config.RuntimeConfig) (*aclManager, error) {
|
|||
down = acl.AllowAll()
|
||||
case "deny":
|
||||
down = acl.DenyAll()
|
||||
case "extend-cache":
|
||||
case "async-cache", "extend-cache":
|
||||
// Leave the down policy as nil to signal this.
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid ACL down policy %q", config.ACLDownPolicy)
|
||||
|
|
|
@ -274,8 +274,10 @@ func TestACL_Down_Allow(t *testing.T) {
|
|||
|
||||
func TestACL_Down_Extend(t *testing.T) {
|
||||
t.Parallel()
|
||||
aclExtendPolicies := []string{"extend-cache", "async-cache"}
|
||||
for _, aclDownPolicy := range aclExtendPolicies {
|
||||
a := NewTestAgent(t.Name(), TestACLConfig()+`
|
||||
acl_down_policy = "extend-cache"
|
||||
acl_down_policy = "`+aclDownPolicy+`"
|
||||
acl_enforce_version_8 = true
|
||||
`)
|
||||
defer a.Shutdown()
|
||||
|
@ -349,6 +351,7 @@ func TestACL_Down_Extend(t *testing.T) {
|
|||
t.Fatalf("should deny")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestACL_Cache(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
|
|
@ -94,8 +94,10 @@ type RuntimeConfig struct {
|
|||
// ACL's to be used to service requests. This
|
||||
// is the default. If the ACL is not in the cache,
|
||||
// this acts like deny.
|
||||
// * async-cache - Same behaviour as extend-cache, but perform ACL
|
||||
// Lookups asynchronously when cache TTL is expired.
|
||||
//
|
||||
// hcl: acl_down_policy = ("allow"|"deny"|"extend-cache")
|
||||
// hcl: acl_down_policy = ("allow"|"deny"|"extend-cache"|"async-cache")
|
||||
ACLDownPolicy string
|
||||
|
||||
// ACLEnforceVersion8 is used to gate a set of ACL policy features that
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
|
@ -116,6 +117,9 @@ type aclCache struct {
|
|||
// local is a function used to look for an ACL locally if replication is
|
||||
// enabled. This will be nil if replication isn't enabled.
|
||||
local acl.FaultFunc
|
||||
|
||||
fetchMutex sync.RWMutex
|
||||
fetchMap map[string][]chan (RemoteACLResult)
|
||||
}
|
||||
|
||||
// newACLCache returns a new non-authoritative cache for ACLs. This is used for
|
||||
|
@ -142,10 +146,17 @@ func newACLCache(conf *Config, logger *log.Logger, rpc rpcFn, local acl.FaultFun
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to create ACL policy cache: %v", err)
|
||||
}
|
||||
cache.fetchMap = make(map[string][]chan (RemoteACLResult))
|
||||
|
||||
return cache, nil
|
||||
}
|
||||
|
||||
// Result Type returned when fetching Remote ACLs asynchronously
|
||||
type RemoteACLResult struct {
|
||||
result acl.ACL
|
||||
err error
|
||||
}
|
||||
|
||||
// lookupACL is used when we are non-authoritative, and need to resolve an ACL.
|
||||
func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) {
|
||||
// Check the cache for the ACL.
|
||||
|
@ -161,8 +172,23 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) {
|
|||
return cached.ACL, nil
|
||||
}
|
||||
metrics.IncrCounter([]string{"acl", "cache_miss"}, 1)
|
||||
res := c.lookupACLRemote(id, authDC, cached)
|
||||
return res.result, res.err
|
||||
}
|
||||
|
||||
// Attempt to refresh the policy from the ACL datacenter via an RPC.
|
||||
func (c *aclCache) fireResult(id string, theACL acl.ACL, err error) {
|
||||
c.fetchMutex.Lock()
|
||||
channels := c.fetchMap[id]
|
||||
delete(c.fetchMap, id)
|
||||
c.fetchMutex.Unlock()
|
||||
aclResult := RemoteACLResult{theACL, err}
|
||||
for _, cx := range channels {
|
||||
cx <- aclResult
|
||||
close(cx)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *aclCache) loadACLInChan(id, authDC string, cached *aclCacheEntry) {
|
||||
args := structs.ACLPolicyRequest{
|
||||
Datacenter: authDC,
|
||||
ACL: id,
|
||||
|
@ -173,13 +199,21 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) {
|
|||
var reply structs.ACLPolicy
|
||||
err := c.rpc("ACL.GetPolicy", &args, &reply)
|
||||
if err == nil {
|
||||
return c.useACLPolicy(id, authDC, cached, &reply)
|
||||
theACL, theError := c.useACLPolicy(id, authDC, cached, &reply)
|
||||
if cached != nil && theACL != nil {
|
||||
cached.ACL = theACL
|
||||
cached.ETag = reply.ETag
|
||||
cached.Expires = time.Now().Add(c.config.ACLTTL)
|
||||
}
|
||||
c.fireResult(id, theACL, theError)
|
||||
return
|
||||
}
|
||||
|
||||
// Check for not-found, which will cause us to bail immediately. For any
|
||||
// other error we report it in the logs but can continue.
|
||||
if acl.IsErrNotFound(err) {
|
||||
return nil, acl.ErrNotFound
|
||||
c.fireResult(id, nil, acl.ErrNotFound)
|
||||
return
|
||||
}
|
||||
c.logger.Printf("[ERR] consul.acl: Failed to get policy from ACL datacenter: %v", err)
|
||||
|
||||
|
@ -200,7 +234,7 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) {
|
|||
// local ACL fault function is registered to query replicated ACL data,
|
||||
// and the user's policy allows it, we will try locally before we give
|
||||
// up.
|
||||
if c.local != nil && c.config.ACLDownPolicy == "extend-cache" {
|
||||
if c.local != nil && (c.config.ACLDownPolicy == "extend-cache" || c.config.ACLDownPolicy == "async-cache") {
|
||||
parent, rules, err := c.local(id)
|
||||
if err != nil {
|
||||
// We don't make an exception here for ACLs that aren't
|
||||
|
@ -227,24 +261,58 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) {
|
|||
reply.TTL = c.config.ACLTTL
|
||||
reply.Parent = parent
|
||||
reply.Policy = policy
|
||||
return c.useACLPolicy(id, authDC, cached, &reply)
|
||||
theACL, theError := c.useACLPolicy(id, authDC, cached, &reply)
|
||||
if cached != nil && theACL != nil {
|
||||
cached.ACL = theACL
|
||||
cached.ETag = reply.ETag
|
||||
cached.Expires = time.Now().Add(c.config.ACLTTL)
|
||||
}
|
||||
c.fireResult(id, theACL, theError)
|
||||
return
|
||||
}
|
||||
|
||||
ACL_DOWN:
|
||||
// Unable to refresh, apply the down policy.
|
||||
switch c.config.ACLDownPolicy {
|
||||
case "allow":
|
||||
return acl.AllowAll(), nil
|
||||
case "extend-cache":
|
||||
c.fireResult(id, acl.AllowAll(), nil)
|
||||
return
|
||||
case "async-cache", "extend-cache":
|
||||
if cached != nil {
|
||||
return cached.ACL, nil
|
||||
c.fireResult(id, cached.ACL, nil)
|
||||
return
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
return acl.DenyAll(), nil
|
||||
c.fireResult(id, acl.DenyAll(), nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (c *aclCache) lookupACLRemote(id, authDC string, cached *aclCacheEntry) RemoteACLResult {
|
||||
// Attempt to refresh the policy from the ACL datacenter via an RPC.
|
||||
myChan := make(chan RemoteACLResult)
|
||||
mustWaitForResult := cached == nil || c.config.ACLDownPolicy != "async-cache"
|
||||
c.fetchMutex.Lock()
|
||||
clients, ok := c.fetchMap[id]
|
||||
if !ok || clients == nil {
|
||||
clients = make([]chan RemoteACLResult, 0)
|
||||
}
|
||||
if mustWaitForResult {
|
||||
c.fetchMap[id] = append(clients, myChan)
|
||||
}
|
||||
c.fetchMutex.Unlock()
|
||||
|
||||
if !ok {
|
||||
go c.loadACLInChan(id, authDC, cached)
|
||||
}
|
||||
if !mustWaitForResult {
|
||||
return RemoteACLResult{cached.ACL, nil}
|
||||
}
|
||||
res := <-myChan
|
||||
return res
|
||||
}
|
||||
|
||||
// useACLPolicy handles an ACLPolicy response
|
||||
func (c *aclCache) useACLPolicy(id, authDC string, cached *aclCacheEntry, p *structs.ACLPolicy) (acl.ACL, error) {
|
||||
// Check if we can used the cached policy
|
||||
|
|
|
@ -508,10 +508,13 @@ func TestACL_DownPolicy_Allow(t *testing.T) {
|
|||
|
||||
func TestACL_DownPolicy_ExtendCache(t *testing.T) {
|
||||
t.Parallel()
|
||||
aclExtendPolicies := []string{"extend-cache", "async-cache"} //"async-cache"
|
||||
|
||||
for _, aclDownPolicy := range aclExtendPolicies {
|
||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||
c.ACLDatacenter = "dc1"
|
||||
c.ACLTTL = 0
|
||||
c.ACLDownPolicy = "extend-cache"
|
||||
c.ACLDownPolicy = aclDownPolicy
|
||||
c.ACLMasterToken = "root"
|
||||
})
|
||||
defer os.RemoveAll(dir1)
|
||||
|
@ -522,7 +525,7 @@ func TestACL_DownPolicy_ExtendCache(t *testing.T) {
|
|||
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
||||
c.ACLDatacenter = "dc1" // Enable ACLs!
|
||||
c.ACLTTL = 0
|
||||
c.ACLDownPolicy = "extend-cache"
|
||||
c.ACLDownPolicy = aclDownPolicy
|
||||
c.Bootstrap = false // Disable bootstrap
|
||||
})
|
||||
defer os.RemoveAll(dir2)
|
||||
|
@ -582,9 +585,13 @@ func TestACL_DownPolicy_ExtendCache(t *testing.T) {
|
|||
t.Fatalf("bad acl: %#v", aclR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestACL_Replication(t *testing.T) {
|
||||
t.Parallel()
|
||||
aclExtendPolicies := []string{"extend-cache", "async-cache"} //"async-cache"
|
||||
|
||||
for _, aclDownPolicy := range aclExtendPolicies {
|
||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||
c.ACLDatacenter = "dc1"
|
||||
c.ACLMasterToken = "root"
|
||||
|
@ -598,7 +605,7 @@ func TestACL_Replication(t *testing.T) {
|
|||
c.Datacenter = "dc2"
|
||||
c.ACLDatacenter = "dc1"
|
||||
c.ACLDefaultPolicy = "deny"
|
||||
c.ACLDownPolicy = "extend-cache"
|
||||
c.ACLDownPolicy = aclDownPolicy
|
||||
c.EnableACLReplication = true
|
||||
c.ACLReplicationInterval = 10 * time.Millisecond
|
||||
c.ACLReplicationApplyLimit = 1000000
|
||||
|
@ -697,6 +704,7 @@ func TestACL_Replication(t *testing.T) {
|
|||
t.Fatalf("unexpected read")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestACL_MultiDC_Found(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
|
|
@ -235,8 +235,9 @@ type Config struct {
|
|||
|
||||
// ACLDownPolicy controls the behavior of ACLs if the ACLDatacenter
|
||||
// cannot be contacted. It can be either "deny" to deny all requests,
|
||||
// or "extend-cache" which ignores the ACLCacheInterval and uses
|
||||
// cached policies. If a policy is not in the cache, it acts like deny.
|
||||
// "extend-cache" or "async-cache" which ignores the ACLCacheInterval and
|
||||
// uses cached policies.
|
||||
// If a policy is not in the cache, it acts like deny.
|
||||
// "allow" can be used to allow all requests. This is not recommended.
|
||||
ACLDownPolicy string
|
||||
|
||||
|
@ -378,7 +379,7 @@ func (c *Config) CheckACL() error {
|
|||
switch c.ACLDownPolicy {
|
||||
case "allow":
|
||||
case "deny":
|
||||
case "extend-cache":
|
||||
case "async-cache", "extend-cache":
|
||||
default:
|
||||
return fmt.Errorf("Unsupported down ACL policy: %s", c.ACLDownPolicy)
|
||||
}
|
||||
|
|
|
@ -496,11 +496,14 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
|
|||
to enable ACL support.
|
||||
|
||||
* <a name="acl_down_policy"></a><a href="#acl_down_policy">`acl_down_policy`</a> - Either
|
||||
"allow", "deny" or "extend-cache"; "extend-cache" is the default. In the case that the
|
||||
"allow", "deny", "extend-cache" or "async-cache"; "extend-cache" is the default. In the case that the
|
||||
policy for a token cannot be read from the [`acl_datacenter`](#acl_datacenter) or leader
|
||||
node, the down policy is applied. In "allow" mode, all actions are permitted, "deny" restricts
|
||||
all operations, and "extend-cache" allows any cached ACLs to be used, ignoring their TTL
|
||||
values. If a non-cached ACL is used, "extend-cache" acts like "deny".
|
||||
The value "async-cache" acts the same way as "extend-cache" but performs updates
|
||||
asynchronously when ACL is present but its TTL is expired, thus, if latency is bad between
|
||||
ACL authoritative and other datacenters, latency of operations is not impacted.
|
||||
|
||||
* <a name="acl_agent_master_token"></a><a href="#acl_agent_master_token">`acl_agent_master_token`</a> -
|
||||
Used to access <a href="/api/agent.html">agent endpoints</a> that require agent read
|
||||
|
|
|
@ -1061,6 +1061,11 @@ and the [`acl_down_policy`](/docs/agent/options.html#acl_down_policy)
|
|||
is set to "extend-cache", tokens will be resolved during the outage using the
|
||||
replicated set of ACLs. An [ACL replication status](/api/acl.html#acl_replication_status)
|
||||
endpoint is available to monitor the health of the replication process.
|
||||
Also note that in recent versions of Consul (greater than 1.2.0), using
|
||||
`acl_down_policy = "async-cache"` refreshes token asynchronously when an ACL is
|
||||
already cached and is expired while similar semantics than "extend-cache".
|
||||
It allows to avoid having issues when connectivity with the authoritative is not completely
|
||||
broken, but very slow.
|
||||
|
||||
Locally-resolved ACLs will be cached using the [`acl_ttl`](/docs/agent/options.html#acl_ttl)
|
||||
setting of the non-authoritative datacenter, so these entries may persist in the
|
||||
|
|
Loading…
Reference in New Issue