From 7d3640329158e857b32cfedea7d9f84897b6e074 Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Wed, 12 Apr 2017 15:28:18 -0700 Subject: [PATCH] Wait to initialize autopilot until all servers are >= 0.8.0 --- consul/autopilot.go | 29 ++++++++++++++++++++++++++++- consul/autopilot_test.go | 2 ++ consul/leader.go | 11 ++++++++--- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/consul/autopilot.go b/consul/autopilot.go index 7ea138566e..0a3294bf49 100644 --- a/consul/autopilot.go +++ b/consul/autopilot.go @@ -10,6 +10,7 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/consul/consul/agent" "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/go-version" "github.com/hashicorp/raft" "github.com/hashicorp/serf/serf" ) @@ -33,6 +34,8 @@ func (s *Server) stopAutopilot() { s.autopilotWaitGroup.Wait() } +var minAutopilotVersion, _ = version.NewVersion("0.8.0") + // autopilotLoop periodically looks for nonvoting servers to promote and dead servers to remove. func (s *Server) autopilotLoop() { defer s.autopilotWaitGroup.Done() @@ -53,6 +56,15 @@ func (s *Server) autopilotLoop() { break } + // Setup autopilot config if we need to + if autopilotConf == nil { + if err := s.initializeAutopilot(); err != nil { + s.logger.Printf("[ERR] autopilot: %v", err) + } + + continue + } + if err := s.autopilotPolicy.PromoteNonVoters(autopilotConf); err != nil { s.logger.Printf("[ERR] autopilot: error checking for non-voters to promote: %s", err) } @@ -68,11 +80,26 @@ func (s *Server) autopilotLoop() { } } +// lowestServerVersion returns the lowest version among the alive servers +func (s *Server) lowestServerVersion() *version.Version { + lowest := minAutopilotVersion + + for _, member := range s.LANMembers() { + if valid, parts := agent.IsConsulServer(member); valid && parts.Status == serf.StatusAlive { + if parts.Build.LessThan(lowest) { + lowest = &parts.Build + } + } + } + + return lowest +} + // pruneDeadServers removes up to numPeers/2 failed servers func (s *Server) pruneDeadServers() error { state := s.fsm.State() _, autopilotConf, err := state.AutopilotConfig() - if err != nil { + if err != nil || autopilotConf == nil { return err } diff --git a/consul/autopilot_test.go b/consul/autopilot_test.go index 5aed5719ba..744e05f7f5 100644 --- a/consul/autopilot_test.go +++ b/consul/autopilot_test.go @@ -191,6 +191,8 @@ func TestAutopilot_CleanupStaleRaftServer(t *testing.T) { } } + testutil.WaitForLeader(t, s1.RPC, "dc1") + // Add s4 to peers directly s4addr := fmt.Sprintf("127.0.0.1:%d", s4.config.SerfLANConfig.MemberlistConfig.BindPort) diff --git a/consul/leader.go b/consul/leader.go index dc41a48792..bf48048e41 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -153,10 +153,9 @@ func (s *Server) establishLeadership() error { return err } - // Setup autopilot config if we are the leader and need to + // Setup autopilot config if we need to if err := s.initializeAutopilot(); err != nil { - s.logger.Printf("[ERR] consul: Autopilot initialization failed: %v", err) - return err + s.logger.Printf("[ERR] autopilot: %v", err) } s.startAutopilot() @@ -252,6 +251,12 @@ func (s *Server) initializeACL() error { // initializeAutopilot is used to setup the autopilot config if we are // the leader and need to do this func (s *Server) initializeAutopilot() error { + lowestVersion := s.lowestServerVersion() + + if !lowestVersion.Equal(minAutopilotVersion) && !lowestVersion.GreaterThan(minAutopilotVersion) { + return fmt.Errorf("can't initialize autopilot until all servers are >= %s", minAutopilotVersion.String()) + } + // Bail if the config has already been initialized state := s.fsm.State() _, config, err := state.AutopilotConfig()