From 7d3640329158e857b32cfedea7d9f84897b6e074 Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Wed, 12 Apr 2017 15:28:18 -0700 Subject: [PATCH 1/4] Wait to initialize autopilot until all servers are >= 0.8.0 --- consul/autopilot.go | 29 ++++++++++++++++++++++++++++- consul/autopilot_test.go | 2 ++ consul/leader.go | 11 ++++++++--- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/consul/autopilot.go b/consul/autopilot.go index 7ea138566e..0a3294bf49 100644 --- a/consul/autopilot.go +++ b/consul/autopilot.go @@ -10,6 +10,7 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/consul/consul/agent" "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/go-version" "github.com/hashicorp/raft" "github.com/hashicorp/serf/serf" ) @@ -33,6 +34,8 @@ func (s *Server) stopAutopilot() { s.autopilotWaitGroup.Wait() } +var minAutopilotVersion, _ = version.NewVersion("0.8.0") + // autopilotLoop periodically looks for nonvoting servers to promote and dead servers to remove. func (s *Server) autopilotLoop() { defer s.autopilotWaitGroup.Done() @@ -53,6 +56,15 @@ func (s *Server) autopilotLoop() { break } + // Setup autopilot config if we need to + if autopilotConf == nil { + if err := s.initializeAutopilot(); err != nil { + s.logger.Printf("[ERR] autopilot: %v", err) + } + + continue + } + if err := s.autopilotPolicy.PromoteNonVoters(autopilotConf); err != nil { s.logger.Printf("[ERR] autopilot: error checking for non-voters to promote: %s", err) } @@ -68,11 +80,26 @@ func (s *Server) autopilotLoop() { } } +// lowestServerVersion returns the lowest version among the alive servers +func (s *Server) lowestServerVersion() *version.Version { + lowest := minAutopilotVersion + + for _, member := range s.LANMembers() { + if valid, parts := agent.IsConsulServer(member); valid && parts.Status == serf.StatusAlive { + if parts.Build.LessThan(lowest) { + lowest = &parts.Build + } + } + } + + return lowest +} + // pruneDeadServers removes up to numPeers/2 failed servers func (s *Server) pruneDeadServers() error { state := s.fsm.State() _, autopilotConf, err := state.AutopilotConfig() - if err != nil { + if err != nil || autopilotConf == nil { return err } diff --git a/consul/autopilot_test.go b/consul/autopilot_test.go index 5aed5719ba..744e05f7f5 100644 --- a/consul/autopilot_test.go +++ b/consul/autopilot_test.go @@ -191,6 +191,8 @@ func TestAutopilot_CleanupStaleRaftServer(t *testing.T) { } } + testutil.WaitForLeader(t, s1.RPC, "dc1") + // Add s4 to peers directly s4addr := fmt.Sprintf("127.0.0.1:%d", s4.config.SerfLANConfig.MemberlistConfig.BindPort) diff --git a/consul/leader.go b/consul/leader.go index dc41a48792..bf48048e41 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -153,10 +153,9 @@ func (s *Server) establishLeadership() error { return err } - // Setup autopilot config if we are the leader and need to + // Setup autopilot config if we need to if err := s.initializeAutopilot(); err != nil { - s.logger.Printf("[ERR] consul: Autopilot initialization failed: %v", err) - return err + s.logger.Printf("[ERR] autopilot: %v", err) } s.startAutopilot() @@ -252,6 +251,12 @@ func (s *Server) initializeACL() error { // initializeAutopilot is used to setup the autopilot config if we are // the leader and need to do this func (s *Server) initializeAutopilot() error { + lowestVersion := s.lowestServerVersion() + + if !lowestVersion.Equal(minAutopilotVersion) && !lowestVersion.GreaterThan(minAutopilotVersion) { + return fmt.Errorf("can't initialize autopilot until all servers are >= %s", minAutopilotVersion.String()) + } + // Bail if the config has already been initialized state := s.fsm.State() _, config, err := state.AutopilotConfig() From e467da5d3e5d532f5a7e140dae43cf1687407e13 Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Wed, 12 Apr 2017 17:09:57 -0700 Subject: [PATCH 2/4] Reorganize version check logic for autopilot --- consul/autopilot.go | 59 ++++++++++++--------------------------------- consul/leader.go | 34 ++++++++++++-------------- consul/util.go | 16 ++++++++++++ 3 files changed, 47 insertions(+), 62 deletions(-) diff --git a/consul/autopilot.go b/consul/autopilot.go index 0a3294bf49..ed0f500abf 100644 --- a/consul/autopilot.go +++ b/consul/autopilot.go @@ -34,7 +34,7 @@ func (s *Server) stopAutopilot() { s.autopilotWaitGroup.Wait() } -var minAutopilotVersion, _ = version.NewVersion("0.8.0") +var minAutopilotVersion = version.Must(version.NewVersion("0.8.0")) // autopilotLoop periodically looks for nonvoting servers to promote and dead servers to remove. func (s *Server) autopilotLoop() { @@ -49,66 +49,39 @@ func (s *Server) autopilotLoop() { case <-s.autopilotShutdownCh: return case <-ticker.C: - state := s.fsm.State() - _, autopilotConf, err := state.AutopilotConfig() - if err != nil { - s.logger.Printf("[ERR] autopilot: error retrieving config from state store: %s", err) - break - } - - // Setup autopilot config if we need to - if autopilotConf == nil { - if err := s.initializeAutopilot(); err != nil { - s.logger.Printf("[ERR] autopilot: %v", err) - } - + autopilotConfig, ok := s.getAutopilotConfig() + if !ok { continue } - if err := s.autopilotPolicy.PromoteNonVoters(autopilotConf); err != nil { + if err := s.autopilotPolicy.PromoteNonVoters(autopilotConfig); err != nil { s.logger.Printf("[ERR] autopilot: error checking for non-voters to promote: %s", err) } - if err := s.pruneDeadServers(); err != nil { + if err := s.pruneDeadServers(autopilotConfig); err != nil { s.logger.Printf("[ERR] autopilot: error checking for dead servers to remove: %s", err) } case <-s.autopilotRemoveDeadCh: - if err := s.pruneDeadServers(); err != nil { + autopilotConfig, ok := s.getAutopilotConfig() + if !ok { + continue + } + + if err := s.pruneDeadServers(autopilotConfig); err != nil { s.logger.Printf("[ERR] autopilot: error checking for dead servers to remove: %s", err) } } } } -// lowestServerVersion returns the lowest version among the alive servers -func (s *Server) lowestServerVersion() *version.Version { - lowest := minAutopilotVersion - - for _, member := range s.LANMembers() { - if valid, parts := agent.IsConsulServer(member); valid && parts.Status == serf.StatusAlive { - if parts.Build.LessThan(lowest) { - lowest = &parts.Build - } - } - } - - return lowest -} - // pruneDeadServers removes up to numPeers/2 failed servers -func (s *Server) pruneDeadServers() error { - state := s.fsm.State() - _, autopilotConf, err := state.AutopilotConfig() - if err != nil || autopilotConf == nil { - return err - } - +func (s *Server) pruneDeadServers(autopilotConfig *structs.AutopilotConfig) error { // Find any failed servers var failed []string staleRaftServers := make(map[string]raft.Server) - if autopilotConf.CleanupDeadServers { + if autopilotConfig.CleanupDeadServers { future := s.raft.GetConfiguration() - if future.Error() != nil { + if err := future.Error(); err != nil { return err } @@ -182,7 +155,7 @@ type BasicAutopilot struct { } // PromoteNonVoters promotes eligible non-voting servers to voters. -func (b *BasicAutopilot) PromoteNonVoters(autopilotConf *structs.AutopilotConfig) error { +func (b *BasicAutopilot) PromoteNonVoters(autopilotConfig *structs.AutopilotConfig) error { minRaftProtocol, err := ServerMinRaftProtocol(b.server.LANMembers()) if err != nil { return fmt.Errorf("error getting server raft protocol versions: %s", err) @@ -205,7 +178,7 @@ func (b *BasicAutopilot) PromoteNonVoters(autopilotConf *structs.AutopilotConfig // If this server has been stable and passing for long enough, promote it to a voter if !isVoter(server.Suffrage) { health := b.server.getServerHealth(string(server.ID)) - if health.IsStable(time.Now(), autopilotConf) { + if health.IsStable(time.Now(), autopilotConfig) { promotions = append(promotions, server) } } else { diff --git a/consul/leader.go b/consul/leader.go index bf48048e41..19667b87ba 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -154,9 +154,7 @@ func (s *Server) establishLeadership() error { } // Setup autopilot config if we need to - if err := s.initializeAutopilot(); err != nil { - s.logger.Printf("[ERR] autopilot: %v", err) - } + s.getAutopilotConfig() s.startAutopilot() @@ -248,33 +246,31 @@ func (s *Server) initializeACL() error { return nil } -// initializeAutopilot is used to setup the autopilot config if we are -// the leader and need to do this -func (s *Server) initializeAutopilot() error { - lowestVersion := s.lowestServerVersion() - - if !lowestVersion.Equal(minAutopilotVersion) && !lowestVersion.GreaterThan(minAutopilotVersion) { - return fmt.Errorf("can't initialize autopilot until all servers are >= %s", minAutopilotVersion.String()) - } - - // Bail if the config has already been initialized +// getAutopilotConfig is used to get the autopilot config, initializing it if necessary +func (s *Server) getAutopilotConfig() (*structs.AutopilotConfig, bool) { state := s.fsm.State() _, config, err := state.AutopilotConfig() if err != nil { - return fmt.Errorf("failed to get autopilot config: %v", err) + s.logger.Printf("failed to get autopilot config: %v", err) + return nil, false } if config != nil { - return nil + return config, true } - req := structs.AutopilotSetConfigRequest{ - Config: *s.config.AutopilotConfig, + if !ServersMeetMinimumVersion(s.LANMembers(), minAutopilotVersion) { + s.logger.Printf("can't initialize autopilot until all servers are >= %s", minAutopilotVersion.String()) + return nil, false } + + config = s.config.AutopilotConfig + req := structs.AutopilotSetConfigRequest{Config: *config} if _, err = s.raftApply(structs.AutopilotRequestType, req); err != nil { - return fmt.Errorf("failed to initialize autopilot config") + s.logger.Printf("failed to initialize autopilot config") + return nil, false } - return nil + return config, true } // reconcile is used to reconcile the differences between Serf diff --git a/consul/util.go b/consul/util.go index f0d56e4533..044dd3d118 100644 --- a/consul/util.go +++ b/consul/util.go @@ -7,6 +7,8 @@ import ( "runtime" "strconv" + "github.com/hashicorp/consul/consul/agent" + "github.com/hashicorp/go-version" "github.com/hashicorp/serf/serf" ) @@ -295,3 +297,17 @@ func runtimeStats() map[string]string { "cpu_count": strconv.FormatInt(int64(runtime.NumCPU()), 10), } } + +// ServersMeetMinimumVersion returns whether the given alive servers are at least on the +// given Consul version +func ServersMeetMinimumVersion(members []serf.Member, minVersion *version.Version) bool { + for _, member := range members { + if valid, parts := agent.IsConsulServer(member); valid && parts.Status == serf.StatusAlive { + if parts.Build.LessThan(minVersion) { + return false + } + } + } + + return true +} \ No newline at end of file From 196dd81cc58d7c9e7ef3cfa7b5a99dfd6499652d Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Wed, 12 Apr 2017 18:38:36 -0700 Subject: [PATCH 3/4] Add formatting to autopilot init messages --- consul/autopilot.go | 4 +-- consul/leader.go | 12 ++++---- consul/util.go | 2 +- consul/util_test.go | 70 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 9 deletions(-) diff --git a/consul/autopilot.go b/consul/autopilot.go index ed0f500abf..34d28f457d 100644 --- a/consul/autopilot.go +++ b/consul/autopilot.go @@ -49,7 +49,7 @@ func (s *Server) autopilotLoop() { case <-s.autopilotShutdownCh: return case <-ticker.C: - autopilotConfig, ok := s.getAutopilotConfig() + autopilotConfig, ok := s.getOrCreateAutopilotConfig() if !ok { continue } @@ -62,7 +62,7 @@ func (s *Server) autopilotLoop() { s.logger.Printf("[ERR] autopilot: error checking for dead servers to remove: %s", err) } case <-s.autopilotRemoveDeadCh: - autopilotConfig, ok := s.getAutopilotConfig() + autopilotConfig, ok := s.getOrCreateAutopilotConfig() if !ok { continue } diff --git a/consul/leader.go b/consul/leader.go index 19667b87ba..95e5a7da94 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -154,7 +154,7 @@ func (s *Server) establishLeadership() error { } // Setup autopilot config if we need to - s.getAutopilotConfig() + s.getOrCreateAutopilotConfig() s.startAutopilot() @@ -246,12 +246,12 @@ func (s *Server) initializeACL() error { return nil } -// getAutopilotConfig is used to get the autopilot config, initializing it if necessary -func (s *Server) getAutopilotConfig() (*structs.AutopilotConfig, bool) { +// getOrCreateAutopilotConfig is used to get the autopilot config, initializing it if necessary +func (s *Server) getOrCreateAutopilotConfig() (*structs.AutopilotConfig, bool) { state := s.fsm.State() _, config, err := state.AutopilotConfig() if err != nil { - s.logger.Printf("failed to get autopilot config: %v", err) + s.logger.Printf("[ERR] autopilot: failed to get config: %v", err) return nil, false } if config != nil { @@ -259,14 +259,14 @@ func (s *Server) getAutopilotConfig() (*structs.AutopilotConfig, bool) { } if !ServersMeetMinimumVersion(s.LANMembers(), minAutopilotVersion) { - s.logger.Printf("can't initialize autopilot until all servers are >= %s", minAutopilotVersion.String()) + s.logger.Printf("[ERR] autopilot: can't initialize until all servers are >= %s", minAutopilotVersion.String()) return nil, false } config = s.config.AutopilotConfig req := structs.AutopilotSetConfigRequest{Config: *config} if _, err = s.raftApply(structs.AutopilotRequestType, req); err != nil { - s.logger.Printf("failed to initialize autopilot config") + s.logger.Printf("[ERR] autopilot: failed to initialize config: %v", err) return nil, false } diff --git a/consul/util.go b/consul/util.go index 044dd3d118..dd04a340b6 100644 --- a/consul/util.go +++ b/consul/util.go @@ -310,4 +310,4 @@ func ServersMeetMinimumVersion(members []serf.Member, minVersion *version.Versio } return true -} \ No newline at end of file +} diff --git a/consul/util_test.go b/consul/util_test.go index 042a4f6779..567e77c7ed 100644 --- a/consul/util_test.go +++ b/consul/util_test.go @@ -7,6 +7,7 @@ import ( "regexp" "testing" + "github.com/hashicorp/go-version" "github.com/hashicorp/serf/serf" ) @@ -325,3 +326,72 @@ func TestGetPublicIPv6(t *testing.T) { } } } + +func TestServersMeetMinimumVersion(t *testing.T) { + makeMember := func(version string) serf.Member { + return serf.Member{ + Name: "foo", + Addr: net.IP([]byte{127, 0, 0, 1}), + Tags: map[string]string{ + "role": "consul", + "id": "asdf", + "dc": "east-aws", + "port": "10000", + "build": version, + "wan_join_port": "1234", + "vsn": "1", + "expect": "3", + "raft_vsn": "3", + }, + Status: serf.StatusAlive, + } + } + + cases := []struct { + members []serf.Member + ver *version.Version + expected bool + }{ + // One server, meets reqs + { + members: []serf.Member{ + makeMember("0.7.5"), + }, + ver: version.Must(version.NewVersion("0.7.5")), + expected: true, + }, + // One server, doesn't meet reqs + { + members: []serf.Member{ + makeMember("0.7.5"), + }, + ver: version.Must(version.NewVersion("0.8.0")), + expected: false, + }, + // Multiple servers, meets req version + { + members: []serf.Member{ + makeMember("0.7.5"), + makeMember("0.8.0"), + }, + ver: version.Must(version.NewVersion("0.7.5")), + expected: true, + }, + // Multiple servers, doesn't meet req version + { + members: []serf.Member{ + makeMember("0.7.5"), + makeMember("0.8.0"), + }, + ver: version.Must(version.NewVersion("0.8.0")), + expected: false, + }, + } + + for _, tc := range cases { + result := ServersMeetMinimumVersion(tc.members, tc.ver) + if result != tc.expected { + t.Fatalf("bad: %v, %v, %v", result, tc.ver.String(), tc) + } + } +} From 76d96e2eb03a9558ef3a74ca7580f5dc461199eb Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Thu, 13 Apr 2017 10:43:07 -0700 Subject: [PATCH 4/4] Add nil check to operator autopilot endpoint --- consul/leader.go | 2 +- consul/operator_autopilot_endpoint.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/consul/leader.go b/consul/leader.go index 95e5a7da94..2bffdd967d 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -259,7 +259,7 @@ func (s *Server) getOrCreateAutopilotConfig() (*structs.AutopilotConfig, bool) { } if !ServersMeetMinimumVersion(s.LANMembers(), minAutopilotVersion) { - s.logger.Printf("[ERR] autopilot: can't initialize until all servers are >= %s", minAutopilotVersion.String()) + s.logger.Printf("[WARN] autopilot: can't initialize until all servers are >= %s", minAutopilotVersion.String()) return nil, false } diff --git a/consul/operator_autopilot_endpoint.go b/consul/operator_autopilot_endpoint.go index abe21dd895..133ed94591 100644 --- a/consul/operator_autopilot_endpoint.go +++ b/consul/operator_autopilot_endpoint.go @@ -26,6 +26,9 @@ func (op *Operator) AutopilotGetConfiguration(args *structs.DCSpecificRequest, r if err != nil { return err } + if config == nil { + return fmt.Errorf("autopilot config not initialized yet") + } *reply = *config