Adds support for new-style peers.json recovery for newer Raft protocol versions.

This commit is contained in:
James Phillips 2017-05-04 14:15:59 -07:00
parent 3d43031fbf
commit 5d37086506
No known key found for this signature in database
GPG Key ID: 77183E682AC5FC11
2 changed files with 109 additions and 18 deletions

View File

@ -518,10 +518,17 @@ func (s *Server) setupRaft() error {
} }
} else if _, err := os.Stat(peersFile); err == nil { } else if _, err := os.Stat(peersFile); err == nil {
s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...") s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...")
configuration, err := raft.ReadPeersJSON(peersFile)
var configuration raft.Configuration
if s.config.RaftConfig.ProtocolVersion < 3 {
configuration, err = raft.ReadPeersJSON(peersFile)
} else {
configuration, err = raft.ReadConfigJSON(peersFile)
}
if err != nil { if err != nil {
return fmt.Errorf("recovery failed to parse peers.json: %v", err) return fmt.Errorf("recovery failed to parse peers.json: %v", err)
} }
tmpFsm, err := NewFSM(s.tombstoneGC, s.config.LogOutput) tmpFsm, err := NewFSM(s.tombstoneGC, s.config.LogOutput)
if err != nil { if err != nil {
return fmt.Errorf("recovery failed to make temp FSM: %v", err) return fmt.Errorf("recovery failed to make temp FSM: %v", err)
@ -530,6 +537,7 @@ func (s *Server) setupRaft() error {
log, stable, snap, trans, configuration); err != nil { log, stable, snap, trans, configuration); err != nil {
return fmt.Errorf("recovery failed: %v", err) return fmt.Errorf("recovery failed: %v", err)
} }
if err := os.Remove(peersFile); err != nil { if err := os.Remove(peersFile); err != nil {
return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err) return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
} }
@ -965,10 +973,54 @@ func (s *Server) GetWANCoordinate() (*coordinate.Coordinate, error) {
// location. // location.
const peersInfoContent = ` const peersInfoContent = `
As of Consul 0.7.0, the peers.json file is only used for recovery As of Consul 0.7.0, the peers.json file is only used for recovery
after an outage. It should be formatted as a JSON array containing the address after an outage. The format of this file depends on what the server has
and port of each Consul server in the cluster, like this: configured for its Raft protocol version. Please see the agent configuration
page at https://www.consul.io/docs/agent/options.html#_raft_protocol for more
details about this parameter.
["10.1.0.1:8300","10.1.0.2:8300","10.1.0.3:8300"] For Raft protocol version 2 and earlier, this should be formatted as a JSON
array containing the address and port of each Consul server in the cluster, like
this:
[
"10.1.0.1:8300",
"10.1.0.2:8300",
"10.1.0.3:8300"
]
For Raft protocol version 3 and later, this should be formatted as a JSON
array containing the node ID, address:port, and suffrage information of each
Consul server in the cluster, like this:
[
{
"id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e",
"address": "10.1.0.1:8300",
"non_voter": false
},
{
"id": "8b6dda82-3103-11e7-93ae-92361f002671",
"address": "10.1.0.2:8300",
"non_voter": false
},
{
"id": "97e17742-3103-11e7-93ae-92361f002671",
"address": "10.1.0.3:8300",
"non_voter": false
}
]
The "id" field is the node ID of the server. This can be found in the logs when
the server starts up, or in the "node-id" file inside the server's data
directory.
The "address" field is the address and port of the server.
The "non_voter" field controls whether the server is a non-voter, which is used
in some advanced Autopilot configurations, please see
https://www.consul.io/docs/guides/autopilot.html for more information. If
"non_voter" is omitted it will default to false, which is typical for most
clusters.
Under normal operation, the peers.json file will not be present. Under normal operation, the peers.json file will not be present.

View File

@ -124,21 +124,60 @@ periodic basis.
The next step is to go to the [`-data-dir`](/docs/agent/options.html#_data_dir) The next step is to go to the [`-data-dir`](/docs/agent/options.html#_data_dir)
of each Consul server. Inside that directory, there will be a `raft/` of each Consul server. Inside that directory, there will be a `raft/`
sub-directory. We need to create a `raft/peers.json` file. It should look sub-directory. We need to create a `raft/peers.json` file. The format of this file
something like: depends on what the server has configured for its
[Raft protocol](/docs/agent/options.html#_raft_protocol) version.
```javascript For Raft protocol version 2 and earlier, this should be formatted as a JSON
array containing the address and port of each Consul server in the cluster, like
this:
```json
[ [
"10.0.1.8:8300", "10.1.0.1:8300",
"10.0.1.6:8300", "10.1.0.2:8300",
"10.0.1.7:8300" "10.1.0.3:8300"
] ]
``` ```
Simply create entries for all remaining servers. You must confirm For Raft protocol version 3 and later, this should be formatted as a JSON
that servers you do not include here have indeed failed and will not later array containing the node ID, address:port, and suffrage information of each
rejoin the cluster. Ensure that this file is the same across all remaining Consul server in the cluster, like this:
server nodes.
```
[
{
"id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e",
"address": "10.1.0.1:8300",
"non_voter": false
},
{
"id": "8b6dda82-3103-11e7-93ae-92361f002671",
"address": "10.1.0.2:8300",
"non_voter": false
},
{
"id": "97e17742-3103-11e7-93ae-92361f002671",
"address": "10.1.0.3:8300",
"non_voter": false
}
]
```
- `id` `(string: <required>)` - Specifies the [node ID](/docs/agent/options.html#_node_id)
of the server. This can be found in the logs when the server starts up if it was auto-generated,
and it can also be found inside the `node-id` file in the server's data directory.
- `address` `(string: <required>)` - Specifies the IP and port of the server. The port is the
server's RPC port used for cluster communications.
- `non_voter` `(bool: <false>)` - This controls whether the server is a non-voter, which is used
in some advanced [Autopilot](/docs/guides/autopilot.html) configurations. If omitted, it will
default to false, which is typical for most clusters.
Simply create entries for all servers. You must confirm that servers you do not include here have
indeed failed and will not later rejoin the cluster. Ensure that this file is the same across all
remaining server nodes.
At this point, you can restart all the remaining servers. In Consul 0.7 and At this point, you can restart all the remaining servers. In Consul 0.7 and
later you will see them ingest recovery file: later you will see them ingest recovery file:
@ -177,8 +216,8 @@ command to inspect the Raft configuration:
``` ```
$ consul operator raft -list-peers $ consul operator raft -list-peers
Node ID Address State Voter Node ID Address State Voter RaftProtocol
alice 10.0.1.8:8300 10.0.1.8:8300 follower true alice 10.0.1.8:8300 10.0.1.8:8300 follower true 2
bob 10.0.1.6:8300 10.0.1.6:8300 leader true bob 10.0.1.6:8300 10.0.1.6:8300 leader true 2
carol 10.0.1.7:8300 10.0.1.7:8300 follower true carol 10.0.1.7:8300 10.0.1.7:8300 follower true 2
``` ```