From 45646ac3f48cbda03e5c6b4de2b9e30d7882f580 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Mon, 25 Sep 2017 15:27:04 -0700 Subject: [PATCH] Bumps default Raft protocol to version 3. (#3477) * Changes default Raft protocol to 3. * Changes numPeers() to report only voters. This should have been there before, but it's more obvious that this is incorrect now that we default the Raft protocol to 3, which puts new servers in a read-only state while Autopilot waits for them to become healthy. * Fixes TestLeader_RollRaftServer. * Fixes TestOperator_RaftRemovePeerByAddress. * Fixes TestServer_*. Relaxed the check for a given number of voter peers and instead do a thorough check that all servers see each other in their Raft configurations. * Fixes TestACL_*. These now just check for Raft replication to be set up, and don't care about the number of voter peers. * Fixes TestOperator_Raft_ListPeers. * Fixes TestAutopilot_CleanupDeadServerPeriodic. * Fixes TestCatalog_ListNodes_ConsistentRead_Fail. * Fixes TestLeader_ChangeServerID and adjusts the conn pool to throw away sockets when it sees io.EOF. * Changes version to 1.0.0 in the options doc. * Makes metrics test more deterministic with autopilot metrics possible. --- CHANGELOG.md | 4 ++- agent/consul/acl_test.go | 12 +++---- agent/consul/autopilot_test.go | 30 +++++++++++----- agent/consul/catalog_endpoint_test.go | 40 ++++++++++++--------- agent/consul/config.go | 7 ++-- agent/consul/helper_test.go | 37 +++++++++++++++++++ agent/consul/leader_test.go | 27 ++++++++++---- agent/consul/operator_raft_endpoint_test.go | 4 ++- agent/consul/server.go | 17 +++++---- agent/consul/server_test.go | 31 ++++------------ agent/pool/pool.go | 9 +++++ api/agent_test.go | 13 ++++--- command/operator_raft_list_test.go | 4 +-- website/source/docs/agent/options.html.md | 8 +++-- 14 files changed, 160 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 741ad4a418..0ca5fc4ab4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ BREAKING CHANGES: +* **Raft Protocol Defaults to 3:** The [`-raft-protocol`](https://www.consul.io/docs/agent/options.html#_raft_protocol) default has been changed from 2 to 3, enabling all Autopilot features by default. Version 3 requires Consul running 0.8.0 or newer on all servers in order to work, so if you are upgrading with older servers in a cluster then you will need to set this back to 2 in order to upgrade. See [Raft Protocol Version Compatibility](https://www.consul.io/docs/upgrade-specific.html#raft-protocol-version-compatibility) for more details. Also the format of `peers.json` used for outage recovery is different when running with the lastest Raft protocol. See [Manual Recovery Using peers.json](https://www.consul.io/docs/guides/outage.html#manual-recovery-using-peers-json) for a description of the required format. + FEATURES: IMPROVEMENTS: @@ -22,7 +24,7 @@ IMPROVEMENTS: * agent: Switched to using a read lock for the agent's RPC dispatcher, which prevents RPC calls from getting serialized. [GH-3376] * agent: When joining a cluster, Consul now skips the unique node ID constraint for Consul members running Consul older than 0.8.5. This makes it easier to upgrade to newer versions of Consul in an existing cluster with non-unique node IDs. [GH-3070] * build: Upgraded Go version to 1.9. [GH-3428] -* server: Consul servers can re-establish quorum after all of them change their IP addresses upon a restart. [GH-1580] +* server: Consul servers can re-establish quorum after all of them change their IP addresses upon a restart. [GH-1580] BUG FIXES: diff --git a/agent/consul/acl_test.go b/agent/consul/acl_test.go index 92027b84d2..9a1eaba6c5 100644 --- a/agent/consul/acl_test.go +++ b/agent/consul/acl_test.go @@ -235,7 +235,7 @@ func TestACL_NonAuthority_NotFound(t *testing.T) { // Try to join joinLAN(t, s2, s1) - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s1, 2)) }) + retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) }) client := rpcClient(t, s1) defer client.Close() @@ -278,7 +278,7 @@ func TestACL_NonAuthority_Found(t *testing.T) { // Try to join joinLAN(t, s2, s1) - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s1, 2)) }) + retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) }) testrpc.WaitForLeader(t, s1.RPC, "dc1") @@ -346,7 +346,7 @@ func TestACL_NonAuthority_Management(t *testing.T) { // Try to join joinLAN(t, s2, s1) - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s1, 2)) }) + retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) }) testrpc.WaitForLeader(t, s1.RPC, "dc1") @@ -395,7 +395,7 @@ func TestACL_DownPolicy_Deny(t *testing.T) { // Try to join joinLAN(t, s2, s1) - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s1, 2)) }) + retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) }) testrpc.WaitForLeader(t, s1.RPC, "dc1") @@ -461,7 +461,7 @@ func TestACL_DownPolicy_Allow(t *testing.T) { // Try to join joinLAN(t, s2, s1) - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s1, 2)) }) + retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) }) testrpc.WaitForLeader(t, s1.RPC, "dc1") @@ -529,7 +529,7 @@ func TestACL_DownPolicy_ExtendCache(t *testing.T) { // Try to join joinLAN(t, s2, s1) - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s1, 2)) }) + retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) }) testrpc.WaitForLeader(t, s1.RPC, "dc1") diff --git a/agent/consul/autopilot_test.go b/agent/consul/autopilot_test.go index a4ed98ecae..f9c37add01 100644 --- a/agent/consul/autopilot_test.go +++ b/agent/consul/autopilot_test.go @@ -89,6 +89,7 @@ func TestAutopilot_CleanupDeadServerPeriodic(t *testing.T) { c.Datacenter = "dc1" c.Bootstrap = false } + dir2, s2 := testServerWithConfig(t, conf) defer os.RemoveAll(dir2) defer s2.Shutdown() @@ -101,24 +102,35 @@ func TestAutopilot_CleanupDeadServerPeriodic(t *testing.T) { defer os.RemoveAll(dir4) defer s4.Shutdown() - servers := []*Server{s1, s2, s3, s4} + dir5, s5 := testServerWithConfig(t, conf) + defer os.RemoveAll(dir5) + defer s5.Shutdown() - // Join the servers to s1 + servers := []*Server{s1, s2, s3, s4, s5} + + // Join the servers to s1, and wait until they are all promoted to + // voters. for _, s := range servers[1:] { joinLAN(t, s, s1) } - - for _, s := range servers { - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 4)) }) - } + retry.Run(t, func(r *retry.R) { + r.Check(wantRaft(servers)) + for _, s := range servers { + r.Check(wantPeers(s, 5)) + } + }) // Kill a non-leader server s4.Shutdown() // Should be removed from the peers automatically - for _, s := range []*Server{s1, s2, s3} { - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) - } + servers = []*Server{s1, s2, s3, s5} + retry.Run(t, func(r *retry.R) { + r.Check(wantRaft(servers)) + for _, s := range servers { + r.Check(wantPeers(s, 4)) + } + }) } func TestAutopilot_CleanupStaleRaftServer(t *testing.T) { diff --git a/agent/consul/catalog_endpoint_test.go b/agent/consul/catalog_endpoint_test.go index f1279e32bb..0a31100aba 100644 --- a/agent/consul/catalog_endpoint_test.go +++ b/agent/consul/catalog_endpoint_test.go @@ -779,29 +779,38 @@ func TestCatalog_ListNodes_ConsistentRead_Fail(t *testing.T) { dir1, s1 := testServer(t) defer os.RemoveAll(dir1) defer s1.Shutdown() - codec1 := rpcClient(t, s1) - defer codec1.Close() dir2, s2 := testServerDCBootstrap(t, "dc1", false) defer os.RemoveAll(dir2) defer s2.Shutdown() - codec2 := rpcClient(t, s2) - defer codec2.Close() - // Try to join + dir3, s3 := testServerDCBootstrap(t, "dc1", false) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join and wait for all servers to get promoted to voters. joinLAN(t, s2, s1) + joinLAN(t, s3, s2) + servers := []*Server{s1, s2, s3} + retry.Run(t, func(r *retry.R) { + r.Check(wantRaft(servers)) + for _, s := range servers { + r.Check(wantPeers(s, 3)) + } + }) - testrpc.WaitForLeader(t, s1.RPC, "dc1") - testrpc.WaitForLeader(t, s2.RPC, "dc1") - - // Use the leader as the client, kill the follower + // Use the leader as the client, kill the followers. var codec rpc.ClientCodec - if s1.IsLeader() { - codec = codec1 - s2.Shutdown() - } else { - codec = codec2 - s1.Shutdown() + for _, s := range servers { + if s.IsLeader() { + codec = rpcClient(t, s) + defer codec.Close() + } else { + s.Shutdown() + } + } + if codec == nil { + t.Fatalf("no leader") } args := structs.DCSpecificRequest{ @@ -813,7 +822,6 @@ func TestCatalog_ListNodes_ConsistentRead_Fail(t *testing.T) { if err == nil || !strings.HasPrefix(err.Error(), "leadership lost") { t.Fatalf("err: %v", err) } - if out.QueryMeta.LastContact != 0 { t.Fatalf("should not have a last contact time") } diff --git a/agent/consul/config.go b/agent/consul/config.go index a2cbbbb7c8..1b62af96c5 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -434,10 +434,9 @@ func DefaultConfig() *Config { conf.SerfLANConfig.MemberlistConfig.BindPort = DefaultLANSerfPort conf.SerfWANConfig.MemberlistConfig.BindPort = DefaultWANSerfPort - // TODO: default to 3 in Consul 0.9 - // Use a transitional version of the raft protocol to interoperate with - // versions 1 and 3 - conf.RaftConfig.ProtocolVersion = 2 + // Raft protocol version 3 only works with other Consul servers running + // 0.8.0 or later. + conf.RaftConfig.ProtocolVersion = 3 // Disable shutdown on removal conf.RaftConfig.ShutdownOnRemove = false diff --git a/agent/consul/helper_test.go b/agent/consul/helper_test.go index 6c2e3c2bfd..e0a3623d08 100644 --- a/agent/consul/helper_test.go +++ b/agent/consul/helper_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/hashicorp/consul/testutil/retry" + "github.com/hashicorp/raft" "github.com/hashicorp/serf/serf" ) @@ -41,6 +42,42 @@ func wantPeers(s *Server, peers int) error { return nil } +// wantRaft determines if the servers have all of each other in their +// Raft configurations, +func wantRaft(servers []*Server) error { + // Make sure all the servers are represented in the Raft config, + // and that there are no extras. + verifyRaft := func(c raft.Configuration) error { + want := make(map[raft.ServerID]bool) + for _, s := range servers { + want[s.config.RaftConfig.LocalID] = true + } + + for _, s := range c.Servers { + if !want[s.ID] { + return fmt.Errorf("don't want %q", s.ID) + } + delete(want, s.ID) + } + + if len(want) > 0 { + return fmt.Errorf("didn't find %v", want) + } + return nil + } + + for _, s := range servers { + future := s.raft.GetConfiguration() + if err := future.Error(); err != nil { + return err + } + if err := verifyRaft(future.Configuration()); err != nil { + return err + } + } + return nil +} + // joinAddrLAN returns the address other servers can // use to join the cluster on the LAN interface. func joinAddrLAN(s *Server) string { diff --git a/agent/consul/leader_test.go b/agent/consul/leader_test.go index b879bc75a8..4de48dd834 100644 --- a/agent/consul/leader_test.go +++ b/agent/consul/leader_test.go @@ -703,6 +703,7 @@ func TestLeader_RollRaftServer(t *testing.T) { dir1, s1 := testServerWithConfig(t, func(c *Config) { c.Bootstrap = true c.Datacenter = "dc1" + c.RaftConfig.ProtocolVersion = 2 }) defer os.RemoveAll(dir1) defer s1.Shutdown() @@ -715,7 +716,11 @@ func TestLeader_RollRaftServer(t *testing.T) { defer os.RemoveAll(dir2) defer s2.Shutdown() - dir3, s3 := testServerDCBootstrap(t, "dc1", false) + dir3, s3 := testServerWithConfig(t, func(c *Config) { + c.Bootstrap = false + c.Datacenter = "dc1" + c.RaftConfig.ProtocolVersion = 2 + }) defer os.RemoveAll(dir3) defer s3.Shutdown() @@ -803,10 +808,9 @@ func TestLeader_ChangeServerID(t *testing.T) { servers := []*Server{s1, s2, s3} - // Try to join + // Try to join and wait for all servers to get promoted joinLAN(t, s2, s1) joinLAN(t, s3, s1) - for _, s := range servers { retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) } @@ -841,10 +845,21 @@ func TestLeader_ChangeServerID(t *testing.T) { joinLAN(t, s4, s1) servers[2] = s4 + // While integrating #3327 it uncovered that this test was flaky. The + // connection pool would use the same TCP connection to the old server + // which would give EOF errors to the autopilot health check RPC call. + // To make this more reliable we changed the connection pool to throw + // away the connection if it sees an EOF error, since there's no way + // that connection is going to work again. This made this test reliable + // since it will make a new connection to s4. + // Make sure the dead server is removed and we're back to 3 total peers - for _, s := range servers { - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) - } + retry.Run(t, func(r *retry.R) { + r.Check(wantRaft(servers)) + for _, s := range servers { + r.Check(wantPeers(s, 3)) + } + }) } func TestLeader_ACL_Initialization(t *testing.T) { diff --git a/agent/consul/operator_raft_endpoint_test.go b/agent/consul/operator_raft_endpoint_test.go index 29b72115b4..e7ba78dbeb 100644 --- a/agent/consul/operator_raft_endpoint_test.go +++ b/agent/consul/operator_raft_endpoint_test.go @@ -6,6 +6,7 @@ import ( "reflect" "strings" "testing" + "time" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/structs" @@ -157,7 +158,8 @@ func TestOperator_RaftRemovePeerByAddress(t *testing.T) { // Add it manually to Raft. { - future := s1.raft.AddPeer(arg.Address) + id := raft.ServerID("fake-node-id") + future := s1.raft.AddVoter(id, arg.Address, 0, time.Second) if err := future.Error(); err != nil { t.Fatalf("err: %v", err) } diff --git a/agent/consul/server.go b/agent/consul/server.go index 390797ef9f..ac85326dfe 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -588,8 +588,6 @@ func (s *Server) setupRaft() error { return err } if !hasState { - // TODO (slackpad) - This will need to be updated when - // we add support for node IDs. configuration := raft.Configuration{ Servers: []raft.Server{ raft.Server{ @@ -835,15 +833,22 @@ func (s *Server) Leave() error { return nil } -// numPeers is used to check on the number of known peers, including the local -// node. +// numPeers is used to check on the number of known peers, including potentially +// the local node. We count only voters, since others can't actually become +// leader, so aren't considered peers. func (s *Server) numPeers() (int, error) { future := s.raft.GetConfiguration() if err := future.Error(); err != nil { return 0, err } - configuration := future.Configuration() - return len(configuration.Servers), nil + + var numPeers int + for _, server := range future.Configuration().Servers { + if server.Suffrage == raft.Voter { + numPeers++ + } + } + return numPeers, nil } // JoinLAN is used to have Consul join the inner-DC pool diff --git a/agent/consul/server_test.go b/agent/consul/server_test.go index d1ba235ec3..39b9644e8f 100644 --- a/agent/consul/server_test.go +++ b/agent/consul/server_test.go @@ -492,18 +492,10 @@ func TestServer_JoinLAN_TLS(t *testing.T) { // Try to join joinLAN(t, s2, s1) - retry.Run(t, func(r *retry.R) { - if got, want := len(s1.LANMembers()), 2; got != want { - r.Fatalf("got %d s1 LAN members want %d", got, want) - } - if got, want := len(s2.LANMembers()), 2; got != want { - r.Fatalf("got %d s2 LAN members want %d", got, want) - } - }) + // Verify Raft has established a peer retry.Run(t, func(r *retry.R) { - r.Check(wantPeers(s1, 2)) - r.Check(wantPeers(s2, 2)) + r.Check(wantRaft([]*Server{s1, s2})) }) } @@ -555,10 +547,7 @@ func TestServer_Expect(t *testing.T) { // Wait for the new server to see itself added to the cluster. retry.Run(t, func(r *retry.R) { - r.Check(wantPeers(s1, 4)) - r.Check(wantPeers(s2, 4)) - r.Check(wantPeers(s3, 4)) - r.Check(wantPeers(s4, 4)) + r.Check(wantRaft([]*Server{s1, s2, s3, s4})) }) // Make sure there's still a leader and that the term didn't change, @@ -661,16 +650,10 @@ func TestServer_Encrypted(t *testing.T) { } func testVerifyRPC(s1, s2 *Server, t *testing.T) (bool, error) { - // Try to join - addr := fmt.Sprintf("127.0.0.1:%d", - s1.config.SerfLANConfig.MemberlistConfig.BindPort) - if _, err := s2.JoinLAN([]string{addr}); err != nil { - t.Fatalf("err: %v", err) - } - - // make sure both servers know about each other - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s1, 2)) }) - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s2, 2)) }) + joinLAN(t, s1, s2) + retry.Run(t, func(r *retry.R) { + r.Check(wantRaft([]*Server{s1, s2})) + }) // Have s2 make an RPC call to s1 var leader *metadata.Server diff --git a/agent/pool/pool.go b/agent/pool/pool.go index 4be5b72ca5..7fb55baebc 100644 --- a/agent/pool/pool.go +++ b/agent/pool/pool.go @@ -413,6 +413,15 @@ func (p *ConnPool) RPC(dc string, addr net.Addr, version int, method string, use err = msgpackrpc.CallWithCodec(sc.codec, method, args, reply) if err != nil { sc.Close() + + // See the comment in leader_test.go TestLeader_ChangeServerID + // about how we found this. The tldr is that if we see this + // error, we know this connection is toast, so we should clear + // it and make a new one on the next attempt. + if err == io.EOF { + p.clearConn(conn) + } + p.releaseConn(conn) return fmt.Errorf("rpc error: %v", err) } diff --git a/api/agent_test.go b/api/agent_test.go index a51f467a0c..92b7b5c32a 100644 --- a/api/agent_test.go +++ b/api/agent_test.go @@ -41,12 +41,15 @@ func TestAPI_AgentMetrics(t *testing.T) { t.Fatalf("err: %v", err) } - if len(metrics.Gauges) < 0 { - t.Fatalf("bad: %v", metrics) + var found bool + for _, g := range metrics.Gauges { + if g.Name == "consul.runtime.alloc_bytes" { + found = true + break + } } - - if metrics.Gauges[0].Name != "consul.runtime.alloc_bytes" { - t.Fatalf("bad: %v", metrics.Gauges[0]) + if !found { + t.Fatalf("missing runtime metrics") } } diff --git a/command/operator_raft_list_test.go b/command/operator_raft_list_test.go index e43b820c06..204491ab53 100644 --- a/command/operator_raft_list_test.go +++ b/command/operator_raft_list_test.go @@ -19,8 +19,8 @@ func TestOperator_Raft_ListPeers(t *testing.T) { a := agent.NewTestAgent(t.Name(), ``) defer a.Shutdown() - expected := fmt.Sprintf("%s 127.0.0.1:%d 127.0.0.1:%d leader true 2", - a.Config.NodeName, a.Config.ServerPort, a.Config.ServerPort) + expected := fmt.Sprintf("%s %s 127.0.0.1:%d leader true 3", + a.Config.NodeName, a.Config.NodeID, a.Config.ServerPort) // Test the legacy mode with 'consul operator raft -list-peers' { diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index df9172e402..c249a87fcf 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -458,9 +458,11 @@ will exit with an error at startup. You can view the protocol versions supported by Consul by running `consul -v`. * `-raft-protocol` - This controls the internal - version of the Raft consensus protocol used for server communications. This defaults to 2 but must - be set to 3 in order to gain access to Autopilot features, with the exception of - [`cleanup_dead_servers`](#cleanup_dead_servers). + version of the Raft consensus protocol used for server communications. This must be set to 3 in order to + gain access to Autopilot features, with the exception of [`cleanup_dead_servers`](#cleanup_dead_servers). + Defaults to 3 in Consul 1.0.0 and later (defaulted to 2 previously). See + [Raft Protocol Version Compatibility](/docs/upgrade-specific.html#raft-protocol-version-compatibility) + for more details. * `-recursor` - Specifies the address of an upstream DNS server. This option may be provided multiple times, and is functionally