mirror of https://github.com/status-im/consul.git
Merge pull request #2319 from hashicorp/f-bootstrap-abort
Adds check that aborts bootstrap mode if there's an existing cluster.
This commit is contained in:
commit
1488af4277
|
@ -1,7 +1,6 @@
|
||||||
package consul
|
package consul
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"net"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/hashicorp/consul/consul/agent"
|
"github.com/hashicorp/consul/consul/agent"
|
||||||
|
@ -192,7 +191,7 @@ func (s *Server) wanNodeJoin(me serf.MemberEvent) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// maybeBootsrap is used to handle bootstrapping when a new consul server joins
|
// maybeBootstrap is used to handle bootstrapping when a new consul server joins.
|
||||||
func (s *Server) maybeBootstrap() {
|
func (s *Server) maybeBootstrap() {
|
||||||
// Bootstrap can only be done if there are no committed logs, remove our
|
// Bootstrap can only be done if there are no committed logs, remove our
|
||||||
// expectations of bootstrapping. This is slightly cheaper than the full
|
// expectations of bootstrapping. This is slightly cheaper than the full
|
||||||
|
@ -203,13 +202,14 @@ func (s *Server) maybeBootstrap() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if index != 0 {
|
if index != 0 {
|
||||||
|
s.logger.Printf("[INFO] consul: Raft data found, disabling bootstrap mode")
|
||||||
s.config.BootstrapExpect = 0
|
s.config.BootstrapExpect = 0
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan for all the known servers.
|
// Scan for all the known servers.
|
||||||
members := s.serfLAN.Members()
|
members := s.serfLAN.Members()
|
||||||
addrs := make([]string, 0)
|
var servers []agent.Server
|
||||||
for _, member := range members {
|
for _, member := range members {
|
||||||
valid, p := agent.IsConsulServer(member)
|
valid, p := agent.IsConsulServer(member)
|
||||||
if !valid {
|
if !valid {
|
||||||
|
@ -227,34 +227,62 @@ func (s *Server) maybeBootstrap() {
|
||||||
s.logger.Printf("[ERR] consul: Member %v has bootstrap mode. Expect disabled.", member)
|
s.logger.Printf("[ERR] consul: Member %v has bootstrap mode. Expect disabled.", member)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
addr := &net.TCPAddr{IP: member.Addr, Port: p.Port}
|
servers = append(servers, *p)
|
||||||
addrs = append(addrs, addr.String())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip if we haven't met the minimum expect count.
|
// Skip if we haven't met the minimum expect count.
|
||||||
if len(addrs) < s.config.BootstrapExpect {
|
if len(servers) < s.config.BootstrapExpect {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Query each of the servers and make sure they report no Raft peers.
|
||||||
|
for _, server := range servers {
|
||||||
|
var peers []string
|
||||||
|
if err := s.connPool.RPC(s.config.Datacenter, server.Addr, server.Version,
|
||||||
|
"Status.Peers", &struct{}{}, &peers); err != nil {
|
||||||
|
s.logger.Printf("[ERR] consul: Failed to confirm peer status for %s: %v", server.Name, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Found a node with some Raft peers, stop bootstrap since there's
|
||||||
|
// evidence of an existing cluster. We should get folded in by the
|
||||||
|
// existing servers if that's the case, so it's cleaner to sit as a
|
||||||
|
// candidate with no peers so we don't cause spurious elections.
|
||||||
|
// It's OK this is racy, because even with an initial bootstrap
|
||||||
|
// as long as one peer runs bootstrap things will work, and if we
|
||||||
|
// have multiple peers bootstrap in the same way, that's OK. We
|
||||||
|
// just don't want a server added much later to do a live bootstrap
|
||||||
|
// and interfere with the cluster. This isn't required for Raft's
|
||||||
|
// correctness because no server in the existing cluster will vote
|
||||||
|
// for this server, but it makes things much more stable.
|
||||||
|
if len(peers) > 0 {
|
||||||
|
s.logger.Printf("[INFO] consul: Existing Raft peers reported by %s, disabling bootstrap mode", server.Name)
|
||||||
|
s.config.BootstrapExpect = 0
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Attempt a live bootstrap!
|
// Attempt a live bootstrap!
|
||||||
var configuration raft.Configuration
|
var configuration raft.Configuration
|
||||||
for _, addr := range addrs {
|
var addrs []string
|
||||||
// TODO (slackpad) - This will need to be updated once we support
|
for _, server := range servers {
|
||||||
// node IDs.
|
addr := server.Addr.String()
|
||||||
server := raft.Server{
|
addrs = append(addrs, addr)
|
||||||
|
peer := raft.Server{
|
||||||
ID: raft.ServerID(addr),
|
ID: raft.ServerID(addr),
|
||||||
Address: raft.ServerAddress(addr),
|
Address: raft.ServerAddress(addr),
|
||||||
}
|
}
|
||||||
configuration.Servers = append(configuration.Servers, server)
|
configuration.Servers = append(configuration.Servers, peer)
|
||||||
}
|
}
|
||||||
s.logger.Printf("[INFO] consul: Found expected number of peers (%s), attempting to bootstrap cluster...",
|
s.logger.Printf("[INFO] consul: Found expected number of peers, attempting bootstrap: %s",
|
||||||
strings.Join(addrs, ","))
|
strings.Join(addrs, ","))
|
||||||
future := s.raft.BootstrapCluster(configuration)
|
future := s.raft.BootstrapCluster(configuration)
|
||||||
if err := future.Error(); err != nil {
|
if err := future.Error(); err != nil {
|
||||||
s.logger.Printf("[ERR] consul: Failed to bootstrap cluster: %v", err)
|
s.logger.Printf("[ERR] consul: Failed to bootstrap cluster: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bootstrapping complete, don't enter this again.
|
// Bootstrapping complete, or failed for some reason, don't enter this
|
||||||
|
// again.
|
||||||
s.config.BootstrapExpect = 0
|
s.config.BootstrapExpect = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -500,7 +500,9 @@ func TestServer_JoinLAN_TLS(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestServer_Expect(t *testing.T) {
|
func TestServer_Expect(t *testing.T) {
|
||||||
// all test servers should be in expect=3 mode
|
// All test servers should be in expect=3 mode, except for the 3rd one,
|
||||||
|
// but one with expect=0 can cause a bootstrap to occur from the other
|
||||||
|
// servers as currently implemented.
|
||||||
dir1, s1 := testServerDCExpect(t, "dc1", 3)
|
dir1, s1 := testServerDCExpect(t, "dc1", 3)
|
||||||
defer os.RemoveAll(dir1)
|
defer os.RemoveAll(dir1)
|
||||||
defer s1.Shutdown()
|
defer s1.Shutdown()
|
||||||
|
@ -513,7 +515,11 @@ func TestServer_Expect(t *testing.T) {
|
||||||
defer os.RemoveAll(dir3)
|
defer os.RemoveAll(dir3)
|
||||||
defer s3.Shutdown()
|
defer s3.Shutdown()
|
||||||
|
|
||||||
// Try to join
|
dir4, s4 := testServerDCExpect(t, "dc1", 3)
|
||||||
|
defer os.RemoveAll(dir4)
|
||||||
|
defer s4.Shutdown()
|
||||||
|
|
||||||
|
// Join the first two servers.
|
||||||
addr := fmt.Sprintf("127.0.0.1:%d",
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
||||||
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
||||||
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
||||||
|
@ -523,7 +529,7 @@ func TestServer_Expect(t *testing.T) {
|
||||||
var p1 int
|
var p1 int
|
||||||
var p2 int
|
var p2 int
|
||||||
|
|
||||||
// should have no peers yet
|
// Should have no peers yet since the bootstrap didn't occur.
|
||||||
testutil.WaitForResult(func() (bool, error) {
|
testutil.WaitForResult(func() (bool, error) {
|
||||||
p1, _ = s1.numPeers()
|
p1, _ = s1.numPeers()
|
||||||
return p1 == 0, errors.New(fmt.Sprintf("%d", p1))
|
return p1 == 0, errors.New(fmt.Sprintf("%d", p1))
|
||||||
|
@ -538,14 +544,14 @@ func TestServer_Expect(t *testing.T) {
|
||||||
t.Fatalf("should have 0 peers: %v", err)
|
t.Fatalf("should have 0 peers: %v", err)
|
||||||
})
|
})
|
||||||
|
|
||||||
// join the third node
|
// Join the third node.
|
||||||
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
||||||
t.Fatalf("err: %v", err)
|
t.Fatalf("err: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var p3 int
|
var p3 int
|
||||||
|
|
||||||
// should now have all three peers
|
// Now we have three servers so we should bootstrap.
|
||||||
testutil.WaitForResult(func() (bool, error) {
|
testutil.WaitForResult(func() (bool, error) {
|
||||||
p1, _ = s1.numPeers()
|
p1, _ = s1.numPeers()
|
||||||
return p1 == 3, errors.New(fmt.Sprintf("%d", p1))
|
return p1 == 3, errors.New(fmt.Sprintf("%d", p1))
|
||||||
|
@ -567,8 +573,30 @@ func TestServer_Expect(t *testing.T) {
|
||||||
t.Fatalf("should have 3 peers: %v", err)
|
t.Fatalf("should have 3 peers: %v", err)
|
||||||
})
|
})
|
||||||
|
|
||||||
// check if there is one leader now
|
// Make sure a leader is elected, grab the current term and then add in
|
||||||
|
// the fourth server.
|
||||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||||
|
termBefore := s1.raft.Stats()["last_log_term"]
|
||||||
|
if _, err := s4.JoinLAN([]string{addr}); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the new server to see itself added to the cluster.
|
||||||
|
var p4 int
|
||||||
|
testutil.WaitForResult(func() (bool, error) {
|
||||||
|
p4, _ = s4.numPeers()
|
||||||
|
return p4 == 4, errors.New(fmt.Sprintf("%d", p4))
|
||||||
|
}, func(err error) {
|
||||||
|
t.Fatalf("should have 4 peers: %v", err)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Make sure there's still a leader and that the term didn't change,
|
||||||
|
// so we know an election didn't occur.
|
||||||
|
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||||
|
termAfter := s1.raft.Stats()["last_log_term"]
|
||||||
|
if termAfter != termBefore {
|
||||||
|
t.Fatalf("looks like an election took place")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestServer_BadExpect(t *testing.T) {
|
func TestServer_BadExpect(t *testing.T) {
|
||||||
|
|
Loading…
Reference in New Issue