Merge pull request #2319 from hashicorp/f-bootstrap-abort

Adds check that aborts bootstrap mode if there's an existing cluster.
This commit is contained in:
James Phillips 2016-09-01 09:49:03 -07:00 committed by GitHub
commit 1488af4277
2 changed files with 75 additions and 19 deletions

View File

@ -1,7 +1,6 @@
package consul package consul
import ( import (
"net"
"strings" "strings"
"github.com/hashicorp/consul/consul/agent" "github.com/hashicorp/consul/consul/agent"
@ -192,7 +191,7 @@ func (s *Server) wanNodeJoin(me serf.MemberEvent) {
} }
} }
// maybeBootsrap is used to handle bootstrapping when a new consul server joins // maybeBootstrap is used to handle bootstrapping when a new consul server joins.
func (s *Server) maybeBootstrap() { func (s *Server) maybeBootstrap() {
// Bootstrap can only be done if there are no committed logs, remove our // Bootstrap can only be done if there are no committed logs, remove our
// expectations of bootstrapping. This is slightly cheaper than the full // expectations of bootstrapping. This is slightly cheaper than the full
@ -203,13 +202,14 @@ func (s *Server) maybeBootstrap() {
return return
} }
if index != 0 { if index != 0 {
s.logger.Printf("[INFO] consul: Raft data found, disabling bootstrap mode")
s.config.BootstrapExpect = 0 s.config.BootstrapExpect = 0
return return
} }
// Scan for all the known servers. // Scan for all the known servers.
members := s.serfLAN.Members() members := s.serfLAN.Members()
addrs := make([]string, 0) var servers []agent.Server
for _, member := range members { for _, member := range members {
valid, p := agent.IsConsulServer(member) valid, p := agent.IsConsulServer(member)
if !valid { if !valid {
@ -227,34 +227,62 @@ func (s *Server) maybeBootstrap() {
s.logger.Printf("[ERR] consul: Member %v has bootstrap mode. Expect disabled.", member) s.logger.Printf("[ERR] consul: Member %v has bootstrap mode. Expect disabled.", member)
return return
} }
addr := &net.TCPAddr{IP: member.Addr, Port: p.Port} servers = append(servers, *p)
addrs = append(addrs, addr.String())
} }
// Skip if we haven't met the minimum expect count. // Skip if we haven't met the minimum expect count.
if len(addrs) < s.config.BootstrapExpect { if len(servers) < s.config.BootstrapExpect {
return return
} }
// Query each of the servers and make sure they report no Raft peers.
for _, server := range servers {
var peers []string
if err := s.connPool.RPC(s.config.Datacenter, server.Addr, server.Version,
"Status.Peers", &struct{}{}, &peers); err != nil {
s.logger.Printf("[ERR] consul: Failed to confirm peer status for %s: %v", server.Name, err)
return
}
// Found a node with some Raft peers, stop bootstrap since there's
// evidence of an existing cluster. We should get folded in by the
// existing servers if that's the case, so it's cleaner to sit as a
// candidate with no peers so we don't cause spurious elections.
// It's OK this is racy, because even with an initial bootstrap
// as long as one peer runs bootstrap things will work, and if we
// have multiple peers bootstrap in the same way, that's OK. We
// just don't want a server added much later to do a live bootstrap
// and interfere with the cluster. This isn't required for Raft's
// correctness because no server in the existing cluster will vote
// for this server, but it makes things much more stable.
if len(peers) > 0 {
s.logger.Printf("[INFO] consul: Existing Raft peers reported by %s, disabling bootstrap mode", server.Name)
s.config.BootstrapExpect = 0
return
}
}
// Attempt a live bootstrap! // Attempt a live bootstrap!
var configuration raft.Configuration var configuration raft.Configuration
for _, addr := range addrs { var addrs []string
// TODO (slackpad) - This will need to be updated once we support for _, server := range servers {
// node IDs. addr := server.Addr.String()
server := raft.Server{ addrs = append(addrs, addr)
peer := raft.Server{
ID: raft.ServerID(addr), ID: raft.ServerID(addr),
Address: raft.ServerAddress(addr), Address: raft.ServerAddress(addr),
} }
configuration.Servers = append(configuration.Servers, server) configuration.Servers = append(configuration.Servers, peer)
} }
s.logger.Printf("[INFO] consul: Found expected number of peers (%s), attempting to bootstrap cluster...", s.logger.Printf("[INFO] consul: Found expected number of peers, attempting bootstrap: %s",
strings.Join(addrs, ",")) strings.Join(addrs, ","))
future := s.raft.BootstrapCluster(configuration) future := s.raft.BootstrapCluster(configuration)
if err := future.Error(); err != nil { if err := future.Error(); err != nil {
s.logger.Printf("[ERR] consul: Failed to bootstrap cluster: %v", err) s.logger.Printf("[ERR] consul: Failed to bootstrap cluster: %v", err)
} }
// Bootstrapping complete, don't enter this again. // Bootstrapping complete, or failed for some reason, don't enter this
// again.
s.config.BootstrapExpect = 0 s.config.BootstrapExpect = 0
} }

View File

@ -500,7 +500,9 @@ func TestServer_JoinLAN_TLS(t *testing.T) {
} }
func TestServer_Expect(t *testing.T) { func TestServer_Expect(t *testing.T) {
// all test servers should be in expect=3 mode // All test servers should be in expect=3 mode, except for the 3rd one,
// but one with expect=0 can cause a bootstrap to occur from the other
// servers as currently implemented.
dir1, s1 := testServerDCExpect(t, "dc1", 3) dir1, s1 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir1) defer os.RemoveAll(dir1)
defer s1.Shutdown() defer s1.Shutdown()
@ -513,7 +515,11 @@ func TestServer_Expect(t *testing.T) {
defer os.RemoveAll(dir3) defer os.RemoveAll(dir3)
defer s3.Shutdown() defer s3.Shutdown()
// Try to join dir4, s4 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir4)
defer s4.Shutdown()
// Join the first two servers.
addr := fmt.Sprintf("127.0.0.1:%d", addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort) s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := s2.JoinLAN([]string{addr}); err != nil { if _, err := s2.JoinLAN([]string{addr}); err != nil {
@ -523,7 +529,7 @@ func TestServer_Expect(t *testing.T) {
var p1 int var p1 int
var p2 int var p2 int
// should have no peers yet // Should have no peers yet since the bootstrap didn't occur.
testutil.WaitForResult(func() (bool, error) { testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.numPeers() p1, _ = s1.numPeers()
return p1 == 0, errors.New(fmt.Sprintf("%d", p1)) return p1 == 0, errors.New(fmt.Sprintf("%d", p1))
@ -538,14 +544,14 @@ func TestServer_Expect(t *testing.T) {
t.Fatalf("should have 0 peers: %v", err) t.Fatalf("should have 0 peers: %v", err)
}) })
// join the third node // Join the third node.
if _, err := s3.JoinLAN([]string{addr}); err != nil { if _, err := s3.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
var p3 int var p3 int
// should now have all three peers // Now we have three servers so we should bootstrap.
testutil.WaitForResult(func() (bool, error) { testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.numPeers() p1, _ = s1.numPeers()
return p1 == 3, errors.New(fmt.Sprintf("%d", p1)) return p1 == 3, errors.New(fmt.Sprintf("%d", p1))
@ -567,8 +573,30 @@ func TestServer_Expect(t *testing.T) {
t.Fatalf("should have 3 peers: %v", err) t.Fatalf("should have 3 peers: %v", err)
}) })
// check if there is one leader now // Make sure a leader is elected, grab the current term and then add in
// the fourth server.
testutil.WaitForLeader(t, s1.RPC, "dc1") testutil.WaitForLeader(t, s1.RPC, "dc1")
termBefore := s1.raft.Stats()["last_log_term"]
if _, err := s4.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
// Wait for the new server to see itself added to the cluster.
var p4 int
testutil.WaitForResult(func() (bool, error) {
p4, _ = s4.numPeers()
return p4 == 4, errors.New(fmt.Sprintf("%d", p4))
}, func(err error) {
t.Fatalf("should have 4 peers: %v", err)
})
// Make sure there's still a leader and that the term didn't change,
// so we know an election didn't occur.
testutil.WaitForLeader(t, s1.RPC, "dc1")
termAfter := s1.raft.Stats()["last_log_term"]
if termAfter != termBefore {
t.Fatalf("looks like an election took place")
}
} }
func TestServer_BadExpect(t *testing.T) { func TestServer_BadExpect(t *testing.T) {