From 80db61193cf8c7e709de0defb3fe720661f166ea Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Mon, 16 Mar 2020 12:54:45 -0400 Subject: [PATCH] Fix ACL mode advertisement and detection (#7451) These changes are necessary to ensure advertisement happens correctly even when datacenters are connected via network areas in Consul enterprise. This also changes how we check if ACLs can be upgraded within the local datacenter. Previously we would iterate through all LAN members. Now we just use the ServerLookup type to iterate through all known servers in the DC. --- agent/consul/acl_server.go | 94 +++++++++++++++++++++++---- agent/consul/acl_server_oss.go | 3 - agent/consul/enterprise_server_oss.go | 6 ++ agent/consul/server_lookup.go | 11 ++++ agent/consul/server_serf.go | 1 + 5 files changed, 101 insertions(+), 14 deletions(-) diff --git a/agent/consul/acl_server.go b/agent/consul/acl_server.go index eb5024acd4..3ac8eee61f 100644 --- a/agent/consul/acl_server.go +++ b/agent/consul/acl_server.go @@ -1,12 +1,15 @@ package consul import ( + "fmt" "sync/atomic" "time" "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/lib" + "github.com/hashicorp/serf/serf" ) var serverACLCacheConfig *structs.ACLCachesConfig = &structs.ACLCachesConfig{ @@ -84,19 +87,81 @@ func (s *Server) checkBindingRuleUUID(id string) (bool, error) { return !structs.ACLIDReserved(id), nil } +func (s *Server) updateSerfTags(key, value string) { + // Update the LAN serf + lib.UpdateSerfTag(s.serfLAN, key, value) + + if s.serfWAN != nil { + lib.UpdateSerfTag(s.serfWAN, key, value) + } + + s.updateEnterpriseSerfTags(key, value) +} + func (s *Server) updateACLAdvertisement() { // One thing to note is that once in new ACL mode the server will // never transition to legacy ACL mode. This is not currently a // supported use case. + s.updateSerfTags("acls", string(structs.ACLModeEnabled)) +} - // always advertise to all the LAN Members - lib.UpdateSerfTag(s.serfLAN, "acls", string(structs.ACLModeEnabled)) - s.updateSegmentACLAdvertisements() +type serversACLMode struct { + // leader is the address of the leader + leader string - if s.serfWAN != nil { - // advertise on the WAN only when we are inside the ACL datacenter - lib.UpdateSerfTag(s.serfWAN, "acls", string(structs.ACLModeEnabled)) + // mode indicates the overall ACL mode of the servers + mode structs.ACLMode + + // leaderMode is the ACL mode of the leader server + leaderMode structs.ACLMode + + // indicates that at least one server was processed + found bool + + // + server *Server +} + +func (s *serversACLMode) init(leader string) { + s.leader = leader + s.mode = structs.ACLModeEnabled + s.leaderMode = structs.ACLModeUnknown + s.found = false +} + +func (s *serversACLMode) update(srv *metadata.Server) bool { + fmt.Printf("Processing server acl mode for: %s - %s\n", srv.Name, srv.ACLs) + if srv.Status != serf.StatusAlive && srv.Status != serf.StatusFailed { + // they are left or something so regardless we treat these servers as meeting + // the version requirement + return true } + + // mark that we processed at least one server + s.found = true + + if srvAddr := srv.Addr.String(); srvAddr == s.leader { + s.leaderMode = srv.ACLs + } + + switch srv.ACLs { + case structs.ACLModeDisabled: + // anything disabled means we cant enable ACLs + s.mode = structs.ACLModeDisabled + case structs.ACLModeEnabled: + // do nothing + case structs.ACLModeLegacy: + // This covers legacy mode and older server versions that don't advertise ACL support + if s.mode != structs.ACLModeDisabled && s.mode != structs.ACLModeUnknown { + s.mode = structs.ACLModeLegacy + } + default: + if s.mode != structs.ACLModeDisabled { + s.mode = structs.ACLModeUnknown + } + } + + return true } func (s *Server) canUpgradeToNewACLs(isLeader bool) bool { @@ -105,24 +170,31 @@ func (s *Server) canUpgradeToNewACLs(isLeader bool) bool { return false } + var state serversACLMode if !s.InACLDatacenter() { - numServers, mode, _ := ServersGetACLMode(s.WANMembers(), "", s.config.ACLDatacenter) - if mode != structs.ACLModeEnabled || numServers == 0 { + state.init("") + // use the router to check server information for non-local datacenters + s.router.CheckServers(s.config.ACLDatacenter, state.update) + if state.mode != structs.ACLModeEnabled || !state.found { + s.logger.Info("Cannot upgrade to new ACLs, servers in acl datacenter are not yet upgraded", "ACLDatacenter", s.config.ACLDatacenter, "mode", state.mode, "found", state.found) return false } } + state.init(string(s.raft.Leader())) + // this uses the serverLookup instead of the router as its for the local datacenter + s.serverLookup.CheckServers(state.update) if isLeader { - if _, mode, _ := ServersGetACLMode(s.LANMembers(), "", ""); mode == structs.ACLModeLegacy { + if state.mode == structs.ACLModeLegacy { return true } } else { - leader := string(s.raft.Leader()) - if _, _, leaderMode := ServersGetACLMode(s.LANMembers(), leader, ""); leaderMode == structs.ACLModeEnabled { + if state.leaderMode == structs.ACLModeEnabled { return true } } + s.logger.Info("Cannot upgrade to new ACLs", "leaderMode", state.leaderMode, "mode", state.mode, "found", state.found, "leader", state.leader) return false } diff --git a/agent/consul/acl_server_oss.go b/agent/consul/acl_server_oss.go index 2730f3ce71..32aedd9405 100644 --- a/agent/consul/acl_server_oss.go +++ b/agent/consul/acl_server_oss.go @@ -16,6 +16,3 @@ func (s *Server) ResolveEntTokenToIdentityAndAuthorizer(token string) (structs.A func (s *Server) validateEnterpriseToken(identity structs.ACLIdentity) error { return nil } - -// Consul-enterprise only -func (s *Server) updateSegmentACLAdvertisements() {} diff --git a/agent/consul/enterprise_server_oss.go b/agent/consul/enterprise_server_oss.go index 2b6c782382..0ee39c9245 100644 --- a/agent/consul/enterprise_server_oss.go +++ b/agent/consul/enterprise_server_oss.go @@ -62,3 +62,9 @@ func (s *Server) validateEnterpriseRequest(entMeta *structs.EnterpriseMeta, writ func (_ *Server) addEnterpriseSerfTags(_ map[string]string) { // do nothing } + +// updateEnterpriseSerfTags in enterprise will update any instances of Serf with the tag that +// are not the normal LAN or WAN serf instances (network segments and network areas) +func (_ *Server) updateEnterpriseSerfTags(_, _ string) { + // do nothing +} diff --git a/agent/consul/server_lookup.go b/agent/consul/server_lookup.go index f40b573770..5d290b019e 100644 --- a/agent/consul/server_lookup.go +++ b/agent/consul/server_lookup.go @@ -63,3 +63,14 @@ func (sl *ServerLookup) Servers() []*metadata.Server { } return ret } + +func (sl *ServerLookup) CheckServers(fn func(srv *metadata.Server) bool) { + sl.lock.RLock() + defer sl.lock.RUnlock() + + for _, srv := range sl.addressToServer { + if !fn(srv) { + return + } + } +} diff --git a/agent/consul/server_serf.go b/agent/consul/server_serf.go index cd54572e87..d46ee71b71 100644 --- a/agent/consul/server_serf.go +++ b/agent/consul/server_serf.go @@ -207,6 +207,7 @@ func (s *Server) lanEventHandler() { case serf.EventUser: s.localEvent(e.(serf.UserEvent)) case serf.EventMemberUpdate: + s.lanNodeUpdate(e.(serf.MemberEvent)) s.localMemberEvent(e.(serf.MemberEvent)) case serf.EventQuery: // Ignore default: