diff --git a/consul/leader.go b/consul/leader.go index f55e99ea60..10355e54a6 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -130,6 +130,7 @@ func (s *Server) handleAliveMember(member serf.Member) error { } } } + s.logger.Printf("[INFO] consul: member '%s' joined, marking health alive", member.Name) // Register with the catalog req := structs.RegisterRequest{ @@ -163,6 +164,7 @@ func (s *Server) handleFailedMember(member serf.Member) error { } } } + s.logger.Printf("[INFO] consul: member '%s' failed, marking health critical", member.Name) // Register with the catalog req := structs.RegisterRequest{ @@ -190,6 +192,7 @@ func (s *Server) handleLeftMember(member serf.Member) error { if !found { return nil } + s.logger.Printf("[INFO] consul: member '%s' left, deregistering", member.Name) // Deregister the node req := structs.DeregisterRequest{ diff --git a/consul/leader_test.go b/consul/leader_test.go new file mode 100644 index 0000000000..67d8312e77 --- /dev/null +++ b/consul/leader_test.go @@ -0,0 +1,178 @@ +package consul + +import ( + "fmt" + "github.com/hashicorp/consul/consul/structs" + "os" + "testing" + "time" +) + +func TestLeader_RegisterMember(t *testing.T) { + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, c1 := testClient(t) + defer os.RemoveAll(dir2) + defer c1.Shutdown() + + // Wait until we have a leader + time.Sleep(100 * time.Millisecond) + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := c1.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + // Wait for registration + time.Sleep(10 * time.Millisecond) + + // Should be registered + state := s1.fsm.State() + found, _ := state.GetNode(c1.config.NodeName) + if !found { + t.Fatalf("client not registered") + } + + // Should have a check + checks := state.NodeChecks(c1.config.NodeName) + if len(checks) != 1 { + t.Fatalf("client missing check") + } + if checks[0].CheckID != serfCheckID { + t.Fatalf("bad check: %v", checks[0]) + } + if checks[0].Name != serfCheckName { + t.Fatalf("bad check: %v", checks[0]) + } + if checks[0].Status != structs.HealthPassing { + t.Fatalf("bad check: %v", checks[0]) + } +} + +func TestLeader_FailedMember(t *testing.T) { + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, c1 := testClient(t) + defer os.RemoveAll(dir2) + defer c1.Shutdown() + + // Wait until we have a leader + time.Sleep(100 * time.Millisecond) + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := c1.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + // Fail the member + c1.Shutdown() + + // Wait for failure detection + time.Sleep(500 * time.Millisecond) + + // Should be registered + state := s1.fsm.State() + found, _ := state.GetNode(c1.config.NodeName) + if !found { + t.Fatalf("client not registered") + } + + // Should have a check + checks := state.NodeChecks(c1.config.NodeName) + if len(checks) != 1 { + t.Fatalf("client missing check") + } + if checks[0].CheckID != serfCheckID { + t.Fatalf("bad check: %v", checks[0]) + } + if checks[0].Name != serfCheckName { + t.Fatalf("bad check: %v", checks[0]) + } + if checks[0].Status != structs.HealthCritical { + t.Fatalf("bad check: %v", checks[0]) + } +} + +func TestLeader_LeftMember(t *testing.T) { + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, c1 := testClient(t) + defer os.RemoveAll(dir2) + defer c1.Shutdown() + + // Wait until we have a leader + time.Sleep(100 * time.Millisecond) + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := c1.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + // Wait for registration + time.Sleep(10 * time.Millisecond) + + // Should be registered + state := s1.fsm.State() + found, _ := state.GetNode(c1.config.NodeName) + if !found { + t.Fatalf("client not registered") + } + + // Node should leave + c1.Leave() + c1.Shutdown() + + // Wait for failure detection + time.Sleep(500 * time.Millisecond) + + // Should be deregistered + found, _ = state.GetNode(c1.config.NodeName) + if found { + t.Fatalf("client registered") + } +} + +func TestLeader_Reconcile(t *testing.T) { + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, c1 := testClient(t) + defer os.RemoveAll(dir2) + defer c1.Shutdown() + + // Join before we have a leader, this should cause a reconcile! + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := c1.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + // Should not be registered + state := s1.fsm.State() + found, _ := state.GetNode(c1.config.NodeName) + if found { + t.Fatalf("client registered") + } + + // Wait for leader + time.Sleep(100 * time.Millisecond) + + // Should be registered + found, _ = state.GetNode(c1.config.NodeName) + if !found { + t.Fatalf("client not registered") + } +} diff --git a/consul/server_test.go b/consul/server_test.go index 3d01e8c5ed..be9bfc10bc 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -45,14 +45,16 @@ func testServerDC(t *testing.T, dc string) (string, *Server) { } config.SerfLANConfig.MemberlistConfig.BindAddr = "127.0.0.1" config.SerfLANConfig.MemberlistConfig.BindPort = getPort() - config.SerfLANConfig.MemberlistConfig.ProbeTimeout = 200 * time.Millisecond - config.SerfLANConfig.MemberlistConfig.ProbeInterval = time.Second + config.SerfLANConfig.MemberlistConfig.SuspicionMult = 2 + config.SerfLANConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond + config.SerfLANConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond config.SerfLANConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond config.SerfWANConfig.MemberlistConfig.BindAddr = "127.0.0.1" config.SerfWANConfig.MemberlistConfig.BindPort = getPort() - config.SerfWANConfig.MemberlistConfig.ProbeTimeout = 200 * time.Millisecond - config.SerfWANConfig.MemberlistConfig.ProbeInterval = time.Second + config.SerfWANConfig.MemberlistConfig.SuspicionMult = 2 + config.SerfWANConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond + config.SerfWANConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond config.SerfWANConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond