2017-03-01 14:04:40 -08:00
|
|
|
package consul
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
2017-04-19 16:00:11 -07:00
|
|
|
"github.com/hashicorp/consul/testrpc"
|
2017-04-29 09:34:02 -07:00
|
|
|
"github.com/hashicorp/consul/testutil/retry"
|
2017-03-01 14:04:40 -08:00
|
|
|
"github.com/hashicorp/raft"
|
|
|
|
"github.com/hashicorp/serf/serf"
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestAutopilot_CleanupDeadServer(t *testing.T) {
|
2017-03-15 16:09:55 -07:00
|
|
|
for i := 1; i <= 3; i++ {
|
|
|
|
testCleanupDeadServer(t, i)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func testCleanupDeadServer(t *testing.T, raftVersion int) {
|
|
|
|
conf := func(c *Config) {
|
|
|
|
c.Datacenter = "dc1"
|
|
|
|
c.Bootstrap = false
|
|
|
|
c.BootstrapExpect = 3
|
|
|
|
c.RaftConfig.ProtocolVersion = raft.ProtocolVersion(raftVersion)
|
|
|
|
}
|
|
|
|
dir1, s1 := testServerWithConfig(t, conf)
|
2017-03-01 14:04:40 -08:00
|
|
|
defer os.RemoveAll(dir1)
|
|
|
|
defer s1.Shutdown()
|
|
|
|
|
2017-03-15 16:09:55 -07:00
|
|
|
dir2, s2 := testServerWithConfig(t, conf)
|
2017-03-01 14:04:40 -08:00
|
|
|
defer os.RemoveAll(dir2)
|
|
|
|
defer s2.Shutdown()
|
|
|
|
|
2017-03-15 16:09:55 -07:00
|
|
|
dir3, s3 := testServerWithConfig(t, conf)
|
2017-03-01 14:04:40 -08:00
|
|
|
defer os.RemoveAll(dir3)
|
|
|
|
defer s3.Shutdown()
|
|
|
|
|
|
|
|
servers := []*Server{s1, s2, s3}
|
|
|
|
|
|
|
|
// Try to join
|
|
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, s := range servers {
|
2017-05-04 15:52:53 -07:00
|
|
|
retry.Run(t, func(r *retry.R) {
|
2017-04-29 09:34:02 -07:00
|
|
|
if got, want := numPeers(s), 3; got != want {
|
|
|
|
r.Fatalf("got %d peers want %d", got, want)
|
|
|
|
}
|
|
|
|
})
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|
|
|
|
|
2017-03-15 16:09:55 -07:00
|
|
|
// Bring up a new server
|
|
|
|
dir4, s4 := testServerWithConfig(t, conf)
|
|
|
|
defer os.RemoveAll(dir4)
|
|
|
|
defer s4.Shutdown()
|
|
|
|
|
2017-03-01 14:04:40 -08:00
|
|
|
// Kill a non-leader server
|
2017-03-15 16:09:55 -07:00
|
|
|
s3.Shutdown()
|
2017-05-04 15:52:53 -07:00
|
|
|
retry.Run(t, func(r *retry.R) {
|
2017-03-01 14:04:40 -08:00
|
|
|
alive := 0
|
|
|
|
for _, m := range s1.LANMembers() {
|
|
|
|
if m.Status == serf.StatusAlive {
|
|
|
|
alive++
|
|
|
|
}
|
|
|
|
}
|
2017-04-29 09:34:02 -07:00
|
|
|
if alive != 2 {
|
|
|
|
r.Fatal(nil)
|
|
|
|
}
|
|
|
|
})
|
2017-03-01 14:04:40 -08:00
|
|
|
|
2017-03-15 16:09:55 -07:00
|
|
|
// Join the new server
|
2017-03-01 14:04:40 -08:00
|
|
|
if _, err := s4.JoinLAN([]string{addr}); err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
2017-03-15 16:09:55 -07:00
|
|
|
servers[2] = s4
|
2017-03-01 14:04:40 -08:00
|
|
|
|
|
|
|
// Make sure the dead server is removed and we're back to 3 total peers
|
|
|
|
for _, s := range servers {
|
2017-05-04 15:52:53 -07:00
|
|
|
retry.Run(t, func(r *retry.R) {
|
2017-04-29 09:34:02 -07:00
|
|
|
if got, want := numPeers(s), 3; got != want {
|
|
|
|
r.Fatalf("got %d peers want %d", got, want)
|
|
|
|
}
|
|
|
|
})
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAutopilot_CleanupDeadServerPeriodic(t *testing.T) {
|
|
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
|
|
c.Datacenter = "dc1"
|
|
|
|
c.Bootstrap = true
|
|
|
|
})
|
|
|
|
defer os.RemoveAll(dir1)
|
|
|
|
defer s1.Shutdown()
|
|
|
|
|
|
|
|
conf := func(c *Config) {
|
|
|
|
c.Datacenter = "dc1"
|
|
|
|
c.Bootstrap = false
|
|
|
|
}
|
|
|
|
dir2, s2 := testServerWithConfig(t, conf)
|
|
|
|
defer os.RemoveAll(dir2)
|
|
|
|
defer s2.Shutdown()
|
|
|
|
|
|
|
|
dir3, s3 := testServerWithConfig(t, conf)
|
|
|
|
defer os.RemoveAll(dir3)
|
|
|
|
defer s3.Shutdown()
|
|
|
|
|
2017-03-10 11:41:17 -08:00
|
|
|
dir4, s4 := testServerWithConfig(t, conf)
|
|
|
|
defer os.RemoveAll(dir4)
|
|
|
|
defer s4.Shutdown()
|
|
|
|
|
|
|
|
servers := []*Server{s1, s2, s3, s4}
|
2017-03-01 14:04:40 -08:00
|
|
|
|
|
|
|
// Join the servers to s1
|
2017-04-29 09:34:02 -07:00
|
|
|
addr := fmt.Sprintf("127.0.0.1:%d", s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
2017-03-10 11:41:17 -08:00
|
|
|
|
|
|
|
for _, s := range servers[1:] {
|
|
|
|
if _, err := s.JoinLAN([]string{addr}); err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, s := range servers {
|
2017-05-04 15:52:53 -07:00
|
|
|
retry.Run(t, func(r *retry.R) {
|
2017-04-29 09:34:02 -07:00
|
|
|
if got, want := numPeers(s), 4; got != want {
|
|
|
|
r.Fatalf("got %d peers want %d", got, want)
|
|
|
|
}
|
|
|
|
})
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Kill a non-leader server
|
2017-03-10 11:41:17 -08:00
|
|
|
s4.Shutdown()
|
2017-03-01 14:04:40 -08:00
|
|
|
|
|
|
|
// Should be removed from the peers automatically
|
2017-03-10 11:41:17 -08:00
|
|
|
for _, s := range []*Server{s1, s2, s3} {
|
2017-05-04 15:52:53 -07:00
|
|
|
retry.Run(t, func(r *retry.R) {
|
2017-04-29 09:34:02 -07:00
|
|
|
if got, want := numPeers(s), 3; got != want {
|
|
|
|
r.Fatalf("got %d peers want %d", got, want)
|
|
|
|
}
|
|
|
|
})
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-29 12:52:00 -07:00
|
|
|
func TestAutopilot_CleanupStaleRaftServer(t *testing.T) {
|
|
|
|
dir1, s1 := testServerDCBootstrap(t, "dc1", true)
|
|
|
|
defer os.RemoveAll(dir1)
|
|
|
|
defer s1.Shutdown()
|
|
|
|
|
|
|
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
|
|
|
defer os.RemoveAll(dir2)
|
|
|
|
defer s2.Shutdown()
|
|
|
|
|
|
|
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
|
|
|
defer os.RemoveAll(dir3)
|
|
|
|
defer s3.Shutdown()
|
|
|
|
|
|
|
|
dir4, s4 := testServerDCBootstrap(t, "dc1", false)
|
|
|
|
defer os.RemoveAll(dir4)
|
|
|
|
defer s4.Shutdown()
|
|
|
|
|
|
|
|
servers := []*Server{s1, s2, s3}
|
|
|
|
|
|
|
|
// Join the servers to s1
|
|
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
|
|
|
|
|
|
for _, s := range servers[1:] {
|
|
|
|
if _, err := s.JoinLAN([]string{addr}); err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, s := range servers {
|
2017-05-04 15:52:53 -07:00
|
|
|
retry.Run(t, func(r *retry.R) {
|
2017-04-29 09:34:02 -07:00
|
|
|
if got, want := numPeers(s), 3; got != want {
|
|
|
|
r.Fatalf("got %d peers want %d", got, want)
|
|
|
|
}
|
|
|
|
})
|
2017-03-29 12:52:00 -07:00
|
|
|
}
|
|
|
|
|
2017-04-19 16:00:11 -07:00
|
|
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
2017-04-12 15:28:18 -07:00
|
|
|
|
2017-03-29 12:52:00 -07:00
|
|
|
// Add s4 to peers directly
|
|
|
|
s4addr := fmt.Sprintf("127.0.0.1:%d",
|
|
|
|
s4.config.SerfLANConfig.MemberlistConfig.BindPort)
|
2017-03-29 16:01:53 -07:00
|
|
|
s1.raft.AddVoter(raft.ServerID(s4.config.NodeID), raft.ServerAddress(s4addr), 0, 0)
|
2017-03-29 12:52:00 -07:00
|
|
|
|
|
|
|
// Verify we have 4 peers
|
|
|
|
peers, err := s1.numPeers()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if peers != 4 {
|
|
|
|
t.Fatalf("bad: %v", peers)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for s4 to be removed
|
|
|
|
for _, s := range []*Server{s1, s2, s3} {
|
2017-05-04 15:52:53 -07:00
|
|
|
retry.Run(t, func(r *retry.R) {
|
2017-04-29 09:34:02 -07:00
|
|
|
if got, want := numPeers(s), 3; got != want {
|
|
|
|
r.Fatalf("got %d peers want %d", got, want)
|
|
|
|
}
|
|
|
|
})
|
2017-03-29 12:52:00 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-01 14:04:40 -08:00
|
|
|
func TestAutopilot_PromoteNonVoter(t *testing.T) {
|
|
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
|
|
c.Datacenter = "dc1"
|
|
|
|
c.Bootstrap = true
|
|
|
|
c.RaftConfig.ProtocolVersion = 3
|
|
|
|
c.AutopilotConfig.ServerStabilizationTime = 200 * time.Millisecond
|
|
|
|
c.ServerHealthInterval = 100 * time.Millisecond
|
2017-03-09 16:43:07 -08:00
|
|
|
c.AutopilotInterval = 100 * time.Millisecond
|
2017-03-01 14:04:40 -08:00
|
|
|
})
|
|
|
|
defer os.RemoveAll(dir1)
|
|
|
|
defer s1.Shutdown()
|
|
|
|
codec := rpcClient(t, s1)
|
|
|
|
defer codec.Close()
|
|
|
|
|
|
|
|
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
|
|
|
c.Datacenter = "dc1"
|
|
|
|
c.Bootstrap = false
|
|
|
|
c.RaftConfig.ProtocolVersion = 3
|
|
|
|
})
|
|
|
|
defer os.RemoveAll(dir2)
|
|
|
|
defer s2.Shutdown()
|
|
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2017-04-19 16:00:11 -07:00
|
|
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
2017-05-05 12:14:43 +02:00
|
|
|
// Wait for the new server to be added as a non-voter, but make sure
|
|
|
|
// it doesn't get promoted to a voter even after ServerStabilizationTime,
|
|
|
|
// because that would result in an even-numbered quorum count.
|
|
|
|
retry.Run(t, func(r *retry.R) {
|
|
|
|
future := s1.raft.GetConfiguration()
|
|
|
|
if err := future.Error(); err != nil {
|
|
|
|
r.Fatal(err)
|
|
|
|
}
|
2017-03-01 14:04:40 -08:00
|
|
|
|
2017-05-05 12:14:43 +02:00
|
|
|
servers := future.Configuration().Servers
|
2017-03-01 14:04:40 -08:00
|
|
|
|
2017-05-05 12:14:43 +02:00
|
|
|
if len(servers) != 2 {
|
|
|
|
r.Fatalf("bad: %v", servers)
|
|
|
|
}
|
|
|
|
if servers[1].Suffrage != raft.Nonvoter {
|
|
|
|
r.Fatalf("bad: %v", servers)
|
|
|
|
}
|
|
|
|
health := s1.getServerHealth(string(servers[1].ID))
|
|
|
|
if health == nil {
|
|
|
|
r.Fatal("nil health")
|
|
|
|
}
|
|
|
|
if !health.Healthy {
|
|
|
|
r.Fatalf("bad: %v", health)
|
|
|
|
}
|
|
|
|
if time.Now().Sub(health.StableSince) < s1.config.AutopilotConfig.ServerStabilizationTime {
|
|
|
|
r.Fatal("stable period not elapsed")
|
|
|
|
}
|
|
|
|
})
|
2017-03-01 14:04:40 -08:00
|
|
|
|
|
|
|
// Now add another server and make sure they both get promoted to voters after stabilization
|
|
|
|
dir3, s3 := testServerWithConfig(t, func(c *Config) {
|
|
|
|
c.Datacenter = "dc1"
|
|
|
|
c.Bootstrap = false
|
|
|
|
c.RaftConfig.ProtocolVersion = 3
|
|
|
|
})
|
|
|
|
defer os.RemoveAll(dir3)
|
|
|
|
defer s3.Shutdown()
|
|
|
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
2017-05-04 15:52:53 -07:00
|
|
|
retry.Run(t, func(r *retry.R) {
|
2017-03-01 14:04:40 -08:00
|
|
|
future := s1.raft.GetConfiguration()
|
|
|
|
if err := future.Error(); err != nil {
|
2017-04-29 09:34:02 -07:00
|
|
|
r.Fatal(err)
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
servers := future.Configuration().Servers
|
|
|
|
if len(servers) != 3 {
|
2017-04-29 09:34:02 -07:00
|
|
|
r.Fatalf("bad: %v", servers)
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|
|
|
|
if servers[1].Suffrage != raft.Voter {
|
2017-04-29 09:34:02 -07:00
|
|
|
r.Fatalf("bad: %v", servers)
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|
|
|
|
if servers[2].Suffrage != raft.Voter {
|
2017-04-29 09:34:02 -07:00
|
|
|
r.Fatalf("bad: %v", servers)
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|
2017-04-29 09:34:02 -07:00
|
|
|
})
|
2017-03-01 14:04:40 -08:00
|
|
|
}
|