From 3a855d362f8bf6851d86f024136056d00d42bfc0 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Fri, 26 Aug 2016 16:34:28 -0700 Subject: [PATCH 01/17] Preallocates result struct, which was a profiling hot spot. --- consul/state/state_store.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consul/state/state_store.go b/consul/state/state_store.go index 7949d89f12..1f92e26e01 100644 --- a/consul/state/state_store.go +++ b/consul/state/state_store.go @@ -1248,7 +1248,7 @@ func (s *StateStore) parseCheckServiceNodes( return 0, nil, err } - var results structs.CheckServiceNodes + results := make(structs.CheckServiceNodes, 0, len(services)) for _, sn := range services { // Retrieve the node. n, err := tx.First("nodes", "id", sn.Node) From a7cfcc3634eb0b5e55f5389a68596310a2e1b09d Mon Sep 17 00:00:00 2001 From: James Phillips Date: Fri, 26 Aug 2016 21:51:07 -0700 Subject: [PATCH 02/17] Tweaks platform information in the README. --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c8d73d7ba2..221f3a8ae9 100644 --- a/README.md +++ b/README.md @@ -25,8 +25,7 @@ Consul provides several key features: * **Multi-Datacenter** - Consul is built to be datacenter aware, and can support any number of regions without complex configuration. -Consul runs on Linux, Mac OS X, and Windows. It is recommended to run the -Consul servers only on Linux, however. +Consul runs on Linux, Mac OS X, FreeBSD, Solaris, and Windows. ## Quick Start @@ -56,7 +55,7 @@ $ bin/consul ... ``` -*note: `make` will also place a copy of the binary in the first part of your $GOPATH* +*Note: `make` will also place a copy of the binary in the first part of your `$GOPATH`.* You can run tests by typing `make test`. @@ -85,4 +84,4 @@ with MinGW. ## Vendoring Consul currently uses [govendor](https://github.com/kardianos/govendor) for -vendoring. \ No newline at end of file +vendoring. From 53149bd2f9f8a266d317ef852bb8399d7a563391 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Mon, 29 Aug 2016 19:12:07 -0700 Subject: [PATCH 03/17] Makes empty checkServiceNode return a nil. The change in #2308 had an inadvertent interface change, so we fix that with a special case in this fix. --- consul/state/state_store.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/consul/state/state_store.go b/consul/state/state_store.go index 1f92e26e01..a2c94b7df6 100644 --- a/consul/state/state_store.go +++ b/consul/state/state_store.go @@ -1248,6 +1248,12 @@ func (s *StateStore) parseCheckServiceNodes( return 0, nil, err } + // Special-case the zero return value to nil, since this ends up in + // external APIs. + if len(services) == 0 { + return idx, nil, nil + } + results := make(structs.CheckServiceNodes, 0, len(services)) for _, sn := range services { // Retrieve the node. From e5850d8a2606da8abfaa5c75100b8285e198cb84 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Mon, 29 Aug 2016 19:09:57 -0700 Subject: [PATCH 04/17] Adds new consul operator endpoint, CLI, and ACL and some basic Raft commands. --- acl/acl.go | 45 +++- acl/acl_test.go | 49 +++- acl/policy.go | 6 + acl/policy_test.go | 29 ++- api/operator.go | 71 ++++++ api/operator_test.go | 38 +++ command/agent/http.go | 3 + command/agent/operator_endpoint.go | 57 +++++ command/agent/operator_endpoint_test.go | 58 +++++ command/operator.go | 180 ++++++++++++++ command/operator_test.go | 52 ++++ commands.go | 6 + consul/operator_endpoint.go | 120 +++++++++ consul/operator_endpoint_test.go | 229 ++++++++++++++++++ consul/server.go | 39 +-- consul/structs/operator.go | 42 ++++ .../docs/agent/http/operator.html.markdown | 48 ++++ .../source/docs/commands/index.html.markdown | 1 + .../docs/commands/operator.html.markdown | 100 ++++++++ website/source/layouts/docs.erb | 8 + 20 files changed, 1156 insertions(+), 25 deletions(-) create mode 100644 api/operator.go create mode 100644 api/operator_test.go create mode 100644 command/agent/operator_endpoint.go create mode 100644 command/agent/operator_endpoint_test.go create mode 100644 command/operator.go create mode 100644 command/operator_test.go create mode 100644 consul/operator_endpoint.go create mode 100644 consul/operator_endpoint_test.go create mode 100644 consul/structs/operator.go create mode 100644 website/source/docs/agent/http/operator.html.markdown create mode 100644 website/source/docs/commands/operator.html.markdown diff --git a/acl/acl.go b/acl/acl.go index 33f5e23372..f13dc5b569 100644 --- a/acl/acl.go +++ b/acl/acl.go @@ -73,6 +73,14 @@ type ACL interface { // KeyringWrite determines if the keyring can be manipulated KeyringWrite() bool + // OperatorRead determines if the read-only Consul operator functions + // can be used. + OperatorRead() bool + + // OperatorWrite determines if the state-changing Consul operator + // functions can be used. + OperatorWrite() bool + // ACLList checks for permission to list all the ACLs ACLList() bool @@ -132,6 +140,14 @@ func (s *StaticACL) KeyringWrite() bool { return s.defaultAllow } +func (s *StaticACL) OperatorRead() bool { + return s.defaultAllow +} + +func (s *StaticACL) OperatorWrite() bool { + return s.defaultAllow +} + func (s *StaticACL) ACLList() bool { return s.allowManage } @@ -188,10 +204,13 @@ type PolicyACL struct { // preparedQueryRules contains the prepared query policies preparedQueryRules *radix.Tree - // keyringRules contains the keyring policies. The keyring has + // keyringRule contains the keyring policies. The keyring has // a very simple yes/no without prefix matching, so here we // don't need to use a radix tree. keyringRule string + + // operatorRule contains the operator policies. + operatorRule string } // New is used to construct a policy based ACL from a set of policies @@ -228,6 +247,9 @@ func New(parent ACL, policy *Policy) (*PolicyACL, error) { // Load the keyring policy p.keyringRule = policy.Keyring + // Load the operator policy + p.operatorRule = policy.Operator + return p, nil } @@ -422,6 +444,27 @@ func (p *PolicyACL) KeyringWrite() bool { return p.parent.KeyringWrite() } +// OperatorRead determines if the read-only operator functions are allowed. +func (p *PolicyACL) OperatorRead() bool { + switch p.operatorRule { + case PolicyRead, PolicyWrite: + return true + case PolicyDeny: + return false + default: + return p.parent.OperatorRead() + } +} + +// OperatorWrite determines if the state-changing operator functions are +// allowed. +func (p *PolicyACL) OperatorWrite() bool { + if p.operatorRule == PolicyWrite { + return true + } + return p.parent.OperatorWrite() +} + // ACLList checks if listing of ACLs is allowed func (p *PolicyACL) ACLList() bool { return p.parent.ACLList() diff --git a/acl/acl_test.go b/acl/acl_test.go index 69c4f0a1bf..aa1c7972d4 100644 --- a/acl/acl_test.go +++ b/acl/acl_test.go @@ -65,6 +65,12 @@ func TestStaticACL(t *testing.T) { if !all.KeyringWrite() { t.Fatalf("should allow") } + if !all.OperatorRead() { + t.Fatalf("should allow") + } + if !all.OperatorWrite() { + t.Fatalf("should allow") + } if all.ACLList() { t.Fatalf("should not allow") } @@ -108,6 +114,12 @@ func TestStaticACL(t *testing.T) { if none.KeyringWrite() { t.Fatalf("should not allow") } + if none.OperatorRead() { + t.Fatalf("should now allow") + } + if none.OperatorWrite() { + t.Fatalf("should not allow") + } if none.ACLList() { t.Fatalf("should not allow") } @@ -145,6 +157,12 @@ func TestStaticACL(t *testing.T) { if !manage.KeyringWrite() { t.Fatalf("should allow") } + if !manage.OperatorRead() { + t.Fatalf("should allow") + } + if !manage.OperatorWrite() { + t.Fatalf("should allow") + } if !manage.ACLList() { t.Fatalf("should allow") } @@ -480,19 +498,18 @@ func TestPolicyACL_Parent(t *testing.T) { } func TestPolicyACL_Keyring(t *testing.T) { - // Test keyring ACLs type keyringcase struct { inp string read bool write bool } - keyringcases := []keyringcase{ + cases := []keyringcase{ {"", false, false}, {PolicyRead, true, false}, {PolicyWrite, true, true}, {PolicyDeny, false, false}, } - for _, c := range keyringcases { + for _, c := range cases { acl, err := New(DenyAll(), &Policy{Keyring: c.inp}) if err != nil { t.Fatalf("bad: %s", err) @@ -505,3 +522,29 @@ func TestPolicyACL_Keyring(t *testing.T) { } } } + +func TestPolicyACL_Operator(t *testing.T) { + type operatorcase struct { + inp string + read bool + write bool + } + cases := []operatorcase{ + {"", false, false}, + {PolicyRead, true, false}, + {PolicyWrite, true, true}, + {PolicyDeny, false, false}, + } + for _, c := range cases { + acl, err := New(DenyAll(), &Policy{Operator: c.inp}) + if err != nil { + t.Fatalf("bad: %s", err) + } + if acl.OperatorRead() != c.read { + t.Fatalf("bad: %#v", c) + } + if acl.OperatorWrite() != c.write { + t.Fatalf("bad: %#v", c) + } + } +} diff --git a/acl/policy.go b/acl/policy.go index a0e56da425..ae69067fea 100644 --- a/acl/policy.go +++ b/acl/policy.go @@ -21,6 +21,7 @@ type Policy struct { Events []*EventPolicy `hcl:"event,expand"` PreparedQueries []*PreparedQueryPolicy `hcl:"query,expand"` Keyring string `hcl:"keyring"` + Operator string `hcl:"operator"` } // KeyPolicy represents a policy for a key @@ -125,5 +126,10 @@ func Parse(rules string) (*Policy, error) { return nil, fmt.Errorf("Invalid keyring policy: %#v", p.Keyring) } + // Validate the operator policy - this one is allowed to be empty + if p.Operator != "" && !isPolicyValid(p.Operator) { + return nil, fmt.Errorf("Invalid operator policy: %#v", p.Operator) + } + return p, nil } diff --git a/acl/policy_test.go b/acl/policy_test.go index c59a4e0146..7f31bf8608 100644 --- a/acl/policy_test.go +++ b/acl/policy_test.go @@ -45,6 +45,7 @@ query "bar" { policy = "deny" } keyring = "deny" +operator = "deny" ` exp := &Policy{ Keys: []*KeyPolicy{ @@ -103,7 +104,8 @@ keyring = "deny" Policy: PolicyDeny, }, }, - Keyring: PolicyDeny, + Keyring: PolicyDeny, + Operator: PolicyDeny, } out, err := Parse(inp) @@ -162,7 +164,8 @@ func TestACLPolicy_Parse_JSON(t *testing.T) { "policy": "deny" } }, - "keyring": "deny" + "keyring": "deny", + "operator": "deny" }` exp := &Policy{ Keys: []*KeyPolicy{ @@ -221,7 +224,8 @@ func TestACLPolicy_Parse_JSON(t *testing.T) { Policy: PolicyDeny, }, }, - Keyring: PolicyDeny, + Keyring: PolicyDeny, + Operator: PolicyDeny, } out, err := Parse(inp) @@ -252,6 +256,24 @@ keyring = "" } } +func TestACLPolicy_Operator_Empty(t *testing.T) { + inp := ` +operator = "" + ` + exp := &Policy{ + Operator: "", + } + + out, err := Parse(inp) + if err != nil { + t.Fatalf("err: %v", err) + } + + if !reflect.DeepEqual(out, exp) { + t.Fatalf("bad: %#v %#v", out, exp) + } +} + func TestACLPolicy_Bad_Policy(t *testing.T) { cases := []string{ `key "" { policy = "nope" }`, @@ -259,6 +281,7 @@ func TestACLPolicy_Bad_Policy(t *testing.T) { `event "" { policy = "nope" }`, `query "" { policy = "nope" }`, `keyring = "nope"`, + `operator = "nope"`, } for _, c := range cases { _, err := Parse(c) diff --git a/api/operator.go b/api/operator.go new file mode 100644 index 0000000000..b7389f0f59 --- /dev/null +++ b/api/operator.go @@ -0,0 +1,71 @@ +package api + +import ( + "github.com/hashicorp/raft" +) + +// Operator can be used to perform low-level operator tasks for Consul. +type Operator struct { + c *Client +} + +// Operator returns a handle to the operator endpoints. +func (c *Client) Operator() *Operator { + return &Operator{c} +} + +// RaftConfigration is returned when querying for the current Raft configuration. +// This has the low-level Raft structure, as well as some supplemental +// information from Consul. +type RaftConfiguration struct { + // Configuration is the low-level Raft configuration structure. + Configuration raft.Configuration + + // NodeMap maps IDs in the Raft configuration to node names known by + // Consul. It's possible that not all configuration entries may have + // an entry here if the node isn't known to Consul. Given how this is + // generated, this may also contain entries that aren't present in the + // Raft configuration. + NodeMap map[raft.ServerID]string + + // Leader is the ID of the current Raft leader. This may be blank if + // there isn't one. + Leader raft.ServerID +} + +// RaftGetConfiguration is used to query the current Raft peer set. +func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, error) { + r := op.c.newRequest("GET", "/v1/operator/raft/configuration") + r.setQueryOptions(q) + _, resp, err := requireOK(op.c.doRequest(r)) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var out RaftConfiguration + if err := decodeBody(resp, &out); err != nil { + return nil, err + } + return &out, nil +} + +// RaftRemovePeerByAddress is used to kick a stale peer (one that it in the Raft +// quorum but no longer known to Serf or the catalog) by address in the form of +// "IP:port". +func (op *Operator) RaftRemovePeerByAddress(address raft.ServerAddress, q *WriteOptions) error { + r := op.c.newRequest("DELETE", "/v1/operator/raft/peer") + r.setWriteOptions(q) + + // TODO (slackpad) Currently we made address a query parameter. Once + // IDs are in place this will be DELETE /v1/raft-peer/. + r.params.Set("address", string(address)) + + _, resp, err := requireOK(op.c.doRequest(r)) + if err != nil { + return err + } + + resp.Body.Close() + return nil +} diff --git a/api/operator_test.go b/api/operator_test.go new file mode 100644 index 0000000000..a0d8af69e2 --- /dev/null +++ b/api/operator_test.go @@ -0,0 +1,38 @@ +package api + +import ( + "strings" + "testing" +) + +func TestOperator_RaftGetConfiguration(t *testing.T) { + t.Parallel() + c, s := makeClient(t) + defer s.Stop() + + operator := c.Operator() + out, err := operator.RaftGetConfiguration(nil) + if err != nil { + t.Fatalf("err: %v", err) + } + if len(out.Configuration.Servers) != 1 || + len(out.NodeMap) != 1 || + len(out.Leader) == 0 { + t.Fatalf("bad: %v", out) + } +} + +func TestOperator_RaftRemovePeerByAddress(t *testing.T) { + t.Parallel() + c, s := makeClient(t) + defer s.Stop() + + // If we get this error, it proves we sent the address all the way + // through. + operator := c.Operator() + err := operator.RaftRemovePeerByAddress("nope", nil) + if err == nil || !strings.Contains(err.Error(), + "address \"nope\" was not found in the Raft configuration") { + t.Fatalf("err: %v", err) + } +} diff --git a/command/agent/http.go b/command/agent/http.go index 5d7dcce7c6..52ed69e8e1 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -230,6 +230,9 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) { s.handleFuncMetrics("/v1/status/leader", s.wrap(s.StatusLeader)) s.handleFuncMetrics("/v1/status/peers", s.wrap(s.StatusPeers)) + s.handleFuncMetrics("/v1/operator/raft/configuration", s.wrap(s.OperatorRaftConfiguration)) + s.handleFuncMetrics("/v1/operator/raft/peer", s.wrap(s.OperatorRaftPeer)) + s.handleFuncMetrics("/v1/catalog/register", s.wrap(s.CatalogRegister)) s.handleFuncMetrics("/v1/catalog/deregister", s.wrap(s.CatalogDeregister)) s.handleFuncMetrics("/v1/catalog/datacenters", s.wrap(s.CatalogDatacenters)) diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go new file mode 100644 index 0000000000..cdab48c387 --- /dev/null +++ b/command/agent/operator_endpoint.go @@ -0,0 +1,57 @@ +package agent + +import ( + "net/http" + + "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/raft" +) + +// OperatorRaftConfiguration is used to inspect the current Raft configuration. +// This supports the stale query mode in case the cluster doesn't have a leader. +func (s *HTTPServer) OperatorRaftConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + if req.Method != "GET" { + resp.WriteHeader(http.StatusMethodNotAllowed) + return nil, nil + } + + var args structs.DCSpecificRequest + if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done { + return nil, nil + } + + var reply structs.RaftConfigurationResponse + if err := s.agent.RPC("Operator.RaftGetConfiguration", &args, &reply); err != nil { + return nil, err + } + + return reply, nil +} + +// OperatorRaftPeer supports actions on Raft peers. Currently we only support +// removing peers by address. +func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + if req.Method != "DELETE" { + resp.WriteHeader(http.StatusMethodNotAllowed) + return nil, nil + } + + var args structs.RaftPeerByAddressRequest + s.parseDC(req, &args.Datacenter) + s.parseToken(req, &args.Token) + + params := req.URL.Query() + if _, ok := params["address"]; ok { + args.Address = raft.ServerAddress(params.Get("address")) + } else { + resp.WriteHeader(http.StatusBadRequest) + resp.Write([]byte("Must specify ?address with IP:port of peer to remove")) + return nil, nil + } + + var reply struct{} + if err := s.agent.RPC("Operator.RaftRemovePeerByAddress", &args, &reply); err != nil { + return nil, err + } + return nil, nil +} diff --git a/command/agent/operator_endpoint_test.go b/command/agent/operator_endpoint_test.go new file mode 100644 index 0000000000..8e3ebe7200 --- /dev/null +++ b/command/agent/operator_endpoint_test.go @@ -0,0 +1,58 @@ +package agent + +import ( + "bytes" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/hashicorp/consul/consul/structs" +) + +func TestOperator_OperatorRaftConfiguration(t *testing.T) { + httpTest(t, func(srv *HTTPServer) { + body := bytes.NewBuffer(nil) + req, err := http.NewRequest("GET", "/v1/operator/raft/configuration", body) + if err != nil { + t.Fatalf("err: %v", err) + } + + resp := httptest.NewRecorder() + obj, err := srv.OperatorRaftConfiguration(resp, req) + if err != nil { + t.Fatalf("err: %v", err) + } + if resp.Code != 200 { + t.Fatalf("bad code: %d", resp.Code) + } + out, ok := obj.(structs.RaftConfigurationResponse) + if !ok { + t.Fatalf("unexpected: %T", obj) + } + if len(out.Configuration.Servers) != 1 || + len(out.NodeMap) != 1 || + len(out.Leader) == 0 { + t.Fatalf("bad: %v", out) + } + }) +} + +func TestOperator_OperatorRaftPeer(t *testing.T) { + httpTest(t, func(srv *HTTPServer) { + body := bytes.NewBuffer(nil) + req, err := http.NewRequest("DELETE", "/v1/operator/raft/peer?address=nope", body) + if err != nil { + t.Fatalf("err: %v", err) + } + + // If we get this error, it proves we sent the address all the + // way through. + resp := httptest.NewRecorder() + _, err = srv.OperatorRaftPeer(resp, req) + if err == nil || !strings.Contains(err.Error(), + "address \"nope\" was not found in the Raft configuration") { + t.Fatalf("err: %v", err) + } + }) +} diff --git a/command/operator.go b/command/operator.go new file mode 100644 index 0000000000..d1a6d8d3b3 --- /dev/null +++ b/command/operator.go @@ -0,0 +1,180 @@ +package command + +import ( + "flag" + "fmt" + "strings" + + "github.com/hashicorp/consul/api" + "github.com/hashicorp/raft" + "github.com/mitchellh/cli" + "github.com/ryanuber/columnize" +) + +// OperatorCommand is used to provide various low-level tools for Consul +// operators. +type OperatorCommand struct { + Ui cli.Ui +} + +func (c *OperatorCommand) Help() string { + helpText := ` +Usage: consul operator [common options] [action] [options] + + Provides cluster-level tools for Consul operators, such as interacting with + the Raft subsystem. NOTE: Use this command with extreme caution, as improper + use could lead to a Consul outage and even loss of data. + + If ACLs are enabled then a token with operator privileges may required in + order to use this command. Requests are forwarded internally to the leader + if required, so this can be run from any Consul node in a cluster. + + Run consul operator with no arguments for help on that + subcommand. + +Common Options: + + -http-addr=127.0.0.1:8500 HTTP address of the Consul agent. + -token="" ACL token to use. Defaults to that of agent. + +Subcommands: + + raft View and modify Consul's Raft configuration. +` + return strings.TrimSpace(helpText) +} + +func (c *OperatorCommand) Run(args []string) int { + if len(args) < 1 { + c.Ui.Error("A subcommand must be specified") + c.Ui.Error("") + c.Ui.Error(c.Help()) + return 1 + } + + var err error + subcommand := args[0] + switch subcommand { + case "raft": + err = c.raft(args[1:]) + default: + err = fmt.Errorf("unknown subcommand %q", subcommand) + } + + if err != nil { + c.Ui.Error(fmt.Sprintf("Operator %q subcommand failed: %v", subcommand, err)) + return 1 + } + return 0 +} + +// Synopsis returns a one-line description of this command. +func (c *OperatorCommand) Synopsis() string { + return "Provides cluster-level tools for Consul operators" +} + +const raftHelp = ` +Raft Subcommand Actions: + + raft -list-peers -stale=[true|false] + + Displays the current Raft peer configuration. + + The -stale argument defaults to "false" which means the leader provides the + result. If the cluster is in an outage state without a leader, you may need + to set -stale to "true" to get the configuration from a non-leader server. + + raft -remove-peer -address="IP:port" + + Removes Consul server with given -address from the Raft configuration. + + There are rare cases where a peer may be left behind in the Raft quorum even + though the server is no longer present and known to the cluster. This + command can be used to remove the failed server so that it is no longer + affects the Raft quorum. If the server still shows in the output of the + "consul members" command, it is preferable to clean up by simply running + "consul force-leave" instead of this command. +` + +// raft handles the raft subcommands. +func (c *OperatorCommand) raft(args []string) error { + cmdFlags := flag.NewFlagSet("raft", flag.ContinueOnError) + cmdFlags.Usage = func() { c.Ui.Output(c.Help()) } + + // Parse verb arguments. + var listPeers, removePeer bool + cmdFlags.BoolVar(&listPeers, "list-peers", false, "") + cmdFlags.BoolVar(&removePeer, "remove-peer", false, "") + + // Parse other arguments. + var stale bool + var address, token string + cmdFlags.StringVar(&address, "address", "", "") + cmdFlags.BoolVar(&stale, "stale", false, "") + cmdFlags.StringVar(&token, "token", "", "") + httpAddr := HTTPAddrFlag(cmdFlags) + if err := cmdFlags.Parse(args); err != nil { + return err + } + + // Set up a client. + conf := api.DefaultConfig() + conf.Address = *httpAddr + client, err := api.NewClient(conf) + if err != nil { + return fmt.Errorf("error connecting to Consul agent: %s", err) + } + operator := client.Operator() + + // Dispatch based on the verb argument. + if listPeers { + // Fetch the current configuration. + q := &api.QueryOptions{ + AllowStale: stale, + Token: token, + } + reply, err := operator.RaftGetConfiguration(q) + if err != nil { + return err + } + + // Format it as a nice table. + result := []string{"Node|ID|Address|State|Voter"} + for _, s := range reply.Configuration.Servers { + node := "(unknown)" + if mappedNode, ok := reply.NodeMap[s.ID]; ok { + node = mappedNode + } + state := "follower" + if s.ID == reply.Leader { + state = "leader" + } + voter := s.Suffrage == raft.Voter + result = append(result, fmt.Sprintf("%s|%s|%s|%s|%v", + node, s.ID, s.Address, state, voter)) + } + c.Ui.Output(columnize.SimpleFormat(result)) + } else if removePeer { + // TODO (slackpad) Once we expose IDs, add support for removing + // by ID, add support for that. + if len(address) == 0 { + return fmt.Errorf("an address is required for the peer to remove") + } + + // Try to kick the peer. + w := &api.WriteOptions{ + Token: token, + } + sa := raft.ServerAddress(address) + if err := operator.RaftRemovePeerByAddress(sa, w); err != nil { + return err + } + c.Ui.Output(fmt.Sprintf("Removed peer with address %q", address)) + } else { + c.Ui.Output(c.Help()) + c.Ui.Output("") + c.Ui.Output(strings.TrimSpace(raftHelp)) + } + + return nil +} diff --git a/command/operator_test.go b/command/operator_test.go new file mode 100644 index 0000000000..e65434b75d --- /dev/null +++ b/command/operator_test.go @@ -0,0 +1,52 @@ +package command + +import ( + "strings" + "testing" + + "github.com/mitchellh/cli" +) + +func TestOperator_Implements(t *testing.T) { + var _ cli.Command = &OperatorCommand{} +} + +func TestOperator_Raft_ListPeers(t *testing.T) { + a1 := testAgent(t) + defer a1.Shutdown() + waitForLeader(t, a1.httpAddr) + + ui := new(cli.MockUi) + c := &OperatorCommand{Ui: ui} + args := []string{"raft", "-http-addr=" + a1.httpAddr, "-list-peers"} + + code := c.Run(args) + if code != 0 { + t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String()) + } + output := strings.TrimSpace(ui.OutputWriter.String()) + if !strings.Contains(output, "leader") { + t.Fatalf("bad: %s", output) + } +} + +func TestOperator_Raft_RemovePeer(t *testing.T) { + a1 := testAgent(t) + defer a1.Shutdown() + waitForLeader(t, a1.httpAddr) + + ui := new(cli.MockUi) + c := &OperatorCommand{Ui: ui} + args := []string{"raft", "-http-addr=" + a1.httpAddr, "-remove-peer", "-address=nope"} + + code := c.Run(args) + if code != 1 { + t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String()) + } + + // If we get this error, it proves we sent the address all they through. + output := strings.TrimSpace(ui.ErrorWriter.String()) + if !strings.Contains(output, "address \"nope\" was not found in the Raft configuration") { + t.Fatalf("bad: %s", output) + } +} diff --git a/commands.go b/commands.go index 84f0c07fe6..2a25c77f81 100644 --- a/commands.go +++ b/commands.go @@ -103,6 +103,12 @@ func init() { }, nil }, + "operator": func() (cli.Command, error) { + return &command.OperatorCommand{ + Ui: ui, + }, nil + }, + "info": func() (cli.Command, error) { return &command.InfoCommand{ Ui: ui, diff --git a/consul/operator_endpoint.go b/consul/operator_endpoint.go new file mode 100644 index 0000000000..a33c07a781 --- /dev/null +++ b/consul/operator_endpoint.go @@ -0,0 +1,120 @@ +package consul + +import ( + "fmt" + "net" + + "github.com/hashicorp/consul/consul/agent" + "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/raft" +) + +// Operator endpoint is used to perform low-level operator tasks for Consul. +type Operator struct { + srv *Server +} + +// RaftGetConfiguration is used to retrieve the current Raft configuration. +func (op *Operator) RaftGetConfiguration(args *structs.DCSpecificRequest, reply *structs.RaftConfigurationResponse) error { + if done, err := op.srv.forward("Operator.RaftGetConfiguration", args, args, reply); done { + return err + } + + // This action requires operator read access. + acl, err := op.srv.resolveToken(args.Token) + if err != nil { + return err + } + if acl != nil && !acl.OperatorRead() { + return permissionDeniedErr + } + + // We can't fetch the leader and the configuration atomically with + // the current Raft API. + future := op.srv.raft.GetConfiguration() + if err := future.Error(); err != nil { + return err + } + reply.Configuration = future.Configuration() + leader := op.srv.raft.Leader() + + // Index the configuration so we can easily look up IDs by address. + idMap := make(map[raft.ServerAddress]raft.ServerID) + for _, s := range reply.Configuration.Servers { + idMap[s.Address] = s.ID + } + + // Fill out the node map and leader. + reply.NodeMap = make(map[raft.ServerID]string) + members := op.srv.serfLAN.Members() + for _, member := range members { + valid, parts := agent.IsConsulServer(member) + if !valid { + continue + } + + // TODO (slackpad) We need to add a Raft API to get the leader by + // ID so we don't have to do this mapping. + addr := (&net.TCPAddr{IP: member.Addr, Port: parts.Port}).String() + if id, ok := idMap[raft.ServerAddress(addr)]; ok { + reply.NodeMap[id] = member.Name + if leader == raft.ServerAddress(addr) { + reply.Leader = id + } + } + } + return nil +} + +// RaftRemovePeerByAddress is used to kick a stale peer (one that it in the Raft +// quorum but no longer known to Serf or the catalog) by address in the form of +// "IP:port". The reply argument is not used, but it required to fulfill the RPC +// interface. +func (op *Operator) RaftRemovePeerByAddress(args *structs.RaftPeerByAddressRequest, reply *struct{}) error { + if done, err := op.srv.forward("Operator.RaftRemovePeerByAddress", args, args, reply); done { + return err + } + + // This is a super dangerous operation that requires operator write + // access. + acl, err := op.srv.resolveToken(args.Token) + if err != nil { + return err + } + if acl != nil && !acl.OperatorWrite() { + return permissionDeniedErr + } + + // Since this is an operation designed for humans to use, we will return + // an error if the supplied address isn't among the peers since it's + // likely they screwed up. + { + future := op.srv.raft.GetConfiguration() + if err := future.Error(); err != nil { + return err + } + for _, s := range future.Configuration().Servers { + if s.Address == args.Address { + goto REMOVE + } + } + return fmt.Errorf("address %q was not found in the Raft configuration", + args.Address) + } + +REMOVE: + // The Raft library itself will prevent various forms of foot-shooting, + // like making a configuration with no voters. Some consideration was + // given here to adding more checks, but it was decided to make this as + // low-level and direct as possible. We've got ACL coverage to lock this + // down, and if you are an operator, it's assumed you know what you are + // doing if you are calling this. If you remove a peer that's known to + // Serf, for example, it will come back when the leader does a reconcile + // pass. + future := op.srv.raft.RemovePeer(args.Address) + if err := future.Error(); err != nil { + return err + } + + return nil +} diff --git a/consul/operator_endpoint_test.go b/consul/operator_endpoint_test.go new file mode 100644 index 0000000000..c48ff83814 --- /dev/null +++ b/consul/operator_endpoint_test.go @@ -0,0 +1,229 @@ +package consul + +import ( + "fmt" + "os" + "reflect" + "strings" + "testing" + + "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/consul/testutil" + "github.com/hashicorp/net-rpc-msgpackrpc" + "github.com/hashicorp/raft" +) + +func TestOperator_RaftGetConfiguration(t *testing.T) { + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + codec := rpcClient(t, s1) + defer codec.Close() + + testutil.WaitForLeader(t, s1.RPC, "dc1") + + arg := structs.DCSpecificRequest{ + Datacenter: "dc1", + } + var reply structs.RaftConfigurationResponse + if err := msgpackrpc.CallWithCodec(codec, "Operator.RaftGetConfiguration", &arg, &reply); err != nil { + t.Fatalf("err: %v", err) + } + + future := s1.raft.GetConfiguration() + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + expected := structs.RaftConfigurationResponse{ + Configuration: future.Configuration(), + NodeMap: map[raft.ServerID]string{ + raft.ServerID(s1.config.RPCAddr.String()): s1.config.NodeName, + }, + Leader: raft.ServerID(s1.config.RPCAddr.String()), + } + if !reflect.DeepEqual(reply, expected) { + t.Fatalf("bad: %v", reply) + } +} + +func TestOperator_RaftGetConfiguration_ACLDeny(t *testing.T) { + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.ACLDatacenter = "dc1" + c.ACLMasterToken = "root" + c.ACLDefaultPolicy = "deny" + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + codec := rpcClient(t, s1) + defer codec.Close() + + testutil.WaitForLeader(t, s1.RPC, "dc1") + + // Make a request with no token to make sure it gets denied. + arg := structs.DCSpecificRequest{ + Datacenter: "dc1", + } + var reply structs.RaftConfigurationResponse + err := msgpackrpc.CallWithCodec(codec, "Operator.RaftGetConfiguration", &arg, &reply) + if err == nil || !strings.Contains(err.Error(), permissionDenied) { + t.Fatalf("err: %v", err) + } + + // Create an ACL with operator read permissions. + var token string + { + var rules = ` + operator = "read" + ` + + req := structs.ACLRequest{ + Datacenter: "dc1", + Op: structs.ACLSet, + ACL: structs.ACL{ + Name: "User token", + Type: structs.ACLTypeClient, + Rules: rules, + }, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + if err := msgpackrpc.CallWithCodec(codec, "ACL.Apply", &req, &token); err != nil { + t.Fatalf("err: %v", err) + } + } + + // Now it should go through. + arg.Token = token + if err := msgpackrpc.CallWithCodec(codec, "Operator.RaftGetConfiguration", &arg, &reply); err != nil { + t.Fatalf("err: %v", err) + } + + future := s1.raft.GetConfiguration() + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + expected := structs.RaftConfigurationResponse{ + Configuration: future.Configuration(), + NodeMap: map[raft.ServerID]string{ + raft.ServerID(s1.config.RPCAddr.String()): s1.config.NodeName, + }, + Leader: raft.ServerID(s1.config.RPCAddr.String()), + } + if !reflect.DeepEqual(reply, expected) { + t.Fatalf("bad: %v", reply) + } +} + +func TestOperator_RaftRemovePeerByAddress(t *testing.T) { + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + codec := rpcClient(t, s1) + defer codec.Close() + + testutil.WaitForLeader(t, s1.RPC, "dc1") + + // Try to remove a peer that's not there. + arg := structs.RaftPeerByAddressRequest{ + Datacenter: "dc1", + Address: raft.ServerAddress(fmt.Sprintf("127.0.0.1:%d", getPort())), + } + var reply struct{} + err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByAddress", &arg, &reply) + if err == nil || !strings.Contains(err.Error(), "not found in the Raft configuration") { + t.Fatalf("err: %v", err) + } + + // Add it manually to Raft. + { + future := s1.raft.AddPeer(arg.Address) + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + } + + // Make sure it's there. + { + future := s1.raft.GetConfiguration() + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + configuration := future.Configuration() + if len(configuration.Servers) != 2 { + t.Fatalf("bad: %v", configuration) + } + } + + // Remove it, now it should go through. + if err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByAddress", &arg, &reply); err != nil { + t.Fatalf("err: %v", err) + } + + // Make sure it's not there. + { + future := s1.raft.GetConfiguration() + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + configuration := future.Configuration() + if len(configuration.Servers) != 1 { + t.Fatalf("bad: %v", configuration) + } + } +} + +func TestOperator_RaftRemovePeerByAddress_ACLDeny(t *testing.T) { + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.ACLDatacenter = "dc1" + c.ACLMasterToken = "root" + c.ACLDefaultPolicy = "deny" + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + codec := rpcClient(t, s1) + defer codec.Close() + + testutil.WaitForLeader(t, s1.RPC, "dc1") + + // Make a request with no token to make sure it gets denied. + arg := structs.RaftPeerByAddressRequest{ + Datacenter: "dc1", + Address: raft.ServerAddress(s1.config.RPCAddr.String()), + } + var reply struct{} + err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByAddress", &arg, &reply) + if err == nil || !strings.Contains(err.Error(), permissionDenied) { + t.Fatalf("err: %v", err) + } + + // Create an ACL with operator write permissions. + var token string + { + var rules = ` + operator = "write" + ` + + req := structs.ACLRequest{ + Datacenter: "dc1", + Op: structs.ACLSet, + ACL: structs.ACL{ + Name: "User token", + Type: structs.ACLTypeClient, + Rules: rules, + }, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + if err := msgpackrpc.CallWithCodec(codec, "ACL.Apply", &req, &token); err != nil { + t.Fatalf("err: %v", err) + } + } + + // Now it should kick back for being an invalid config, which means it + // tried to do the operation. + arg.Token = token + err = msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByAddress", &arg, &reply) + if err == nil || !strings.Contains(err.Error(), "at least one voter") { + t.Fatalf("err: %v", err) + } +} diff --git a/consul/server.go b/consul/server.go index ab240ce45f..509bc32944 100644 --- a/consul/server.go +++ b/consul/server.go @@ -162,15 +162,16 @@ type Server struct { // Holds the RPC endpoints type endpoints struct { - Catalog *Catalog - Health *Health - Status *Status - KVS *KVS - Session *Session - Internal *Internal ACL *ACL + Catalog *Catalog Coordinate *Coordinate + Health *Health + Internal *Internal + KVS *KVS + Operator *Operator PreparedQuery *PreparedQuery + Session *Session + Status *Status Txn *Txn } @@ -496,27 +497,29 @@ func (s *Server) setupRaft() error { // setupRPC is used to setup the RPC listener func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error { // Create endpoints - s.endpoints.Status = &Status{s} - s.endpoints.Catalog = &Catalog{s} - s.endpoints.Health = &Health{s} - s.endpoints.KVS = &KVS{s} - s.endpoints.Session = &Session{s} - s.endpoints.Internal = &Internal{s} s.endpoints.ACL = &ACL{s} + s.endpoints.Catalog = &Catalog{s} s.endpoints.Coordinate = NewCoordinate(s) + s.endpoints.Health = &Health{s} + s.endpoints.Internal = &Internal{s} + s.endpoints.KVS = &KVS{s} + s.endpoints.Operator = &Operator{s} s.endpoints.PreparedQuery = &PreparedQuery{s} + s.endpoints.Session = &Session{s} + s.endpoints.Status = &Status{s} s.endpoints.Txn = &Txn{s} // Register the handlers - s.rpcServer.Register(s.endpoints.Status) - s.rpcServer.Register(s.endpoints.Catalog) - s.rpcServer.Register(s.endpoints.Health) - s.rpcServer.Register(s.endpoints.KVS) - s.rpcServer.Register(s.endpoints.Session) - s.rpcServer.Register(s.endpoints.Internal) s.rpcServer.Register(s.endpoints.ACL) + s.rpcServer.Register(s.endpoints.Catalog) s.rpcServer.Register(s.endpoints.Coordinate) + s.rpcServer.Register(s.endpoints.Health) + s.rpcServer.Register(s.endpoints.Internal) + s.rpcServer.Register(s.endpoints.KVS) + s.rpcServer.Register(s.endpoints.Operator) s.rpcServer.Register(s.endpoints.PreparedQuery) + s.rpcServer.Register(s.endpoints.Session) + s.rpcServer.Register(s.endpoints.Status) s.rpcServer.Register(s.endpoints.Txn) list, err := net.ListenTCP("tcp", s.config.RPCAddr) diff --git a/consul/structs/operator.go b/consul/structs/operator.go new file mode 100644 index 0000000000..83372d1316 --- /dev/null +++ b/consul/structs/operator.go @@ -0,0 +1,42 @@ +package structs + +import ( + "github.com/hashicorp/raft" +) + +// RaftConfigrationResponse is returned when querying for the current Raft +// configuration. This has the low-level Raft structure, as well as some +// supplemental information from Consul. +type RaftConfigurationResponse struct { + // Configuration is the low-level Raft configuration structure. + Configuration raft.Configuration + + // NodeMap maps IDs in the Raft configuration to node names known by + // Consul. It's possible that not all configuration entries may have + // an entry here if the node isn't known to Consul. Given how this is + // generated, this may also contain entries that aren't present in the + // Raft configuration. + NodeMap map[raft.ServerID]string + + // Leader is the ID of the current Raft leader. This may be blank if + // there isn't one. + Leader raft.ServerID +} + +// RaftPeerByAddressRequest is used by the Operator endpoint to apply a Raft +// operation on a specific Raft peer by address in the form of "IP:port". +type RaftPeerByAddressRequest struct { + // Datacenter is the target this request is intended for. + Datacenter string + + // Address is the peer to remove, in the form "IP:port". + Address raft.ServerAddress + + // WriteRequest holds the ACL token to go along with this request. + WriteRequest +} + +// RequestDatacenter returns the datacenter for a given request. +func (op *RaftPeerByAddressRequest) RequestDatacenter() string { + return op.Datacenter +} diff --git a/website/source/docs/agent/http/operator.html.markdown b/website/source/docs/agent/http/operator.html.markdown new file mode 100644 index 0000000000..f13e5c4b62 --- /dev/null +++ b/website/source/docs/agent/http/operator.html.markdown @@ -0,0 +1,48 @@ +--- +layout: "docs" +page_title: "Operator (HTTP)" +sidebar_current: "docs-agent-http-operator" +description: > + The operator endpoint provides cluster-level tools for Consul operators. +--- + +# Operator HTTP Endpoint + +The Operator endpoints provide cluster-level tools for Consul operators, such +as interacting with the Raft subsystem. This was added in Consul 0.7. + +~> Use this interface with extreme caution, as improper use could lead to a Consul + outage and even loss of data. + +If ACLs are enabled then a token with operator privileges may required in +order to use this interface. See the [ACL](/docs/internals/acl.html#operator) +internals guide for more information. + +See the [Outage Recovery](/docs/guides/outage.html) guide for some examples of how +these capabilities are used. For a CLI to perform these operations manually, please +see the documentation for the [`consul operator`](/docs/commands/operator.html) +command. + +The following endpoints are supported: + +* [`/v1/operator/raft/configuration`](#raft-configuration): Inspects the Raft configuration +* [`/v1/operator/raft/peer`](#raft-peer): Operates on Raft peers + +Not all endpoints support blocking queries and all consistency modes, +see details in the sections below. + +The operator endpoints support the use of ACL Tokens. See the +[ACL](/docs/internals/acl.html#operator) internals guide for more information. + +### /v1/operator/raft/configuration + +The Raft configuration endpoint supports the `GET` method. + +#### GET Method + +### /v1/operator/raft/peer + +The Raft peer endpoint supports the `DELETE` method. + +#### DELETE Method + diff --git a/website/source/docs/commands/index.html.markdown b/website/source/docs/commands/index.html.markdown index e466a94b44..7e0302754d 100644 --- a/website/source/docs/commands/index.html.markdown +++ b/website/source/docs/commands/index.html.markdown @@ -38,6 +38,7 @@ Available commands are: lock Execute a command holding a lock members Lists the members of a Consul cluster monitor Stream logs from a Consul agent + operator Provides cluster-level tools for Consul operators reload Triggers the agent to reload configuration files rtt Estimates network round trip time between nodes version Prints the Consul version diff --git a/website/source/docs/commands/operator.html.markdown b/website/source/docs/commands/operator.html.markdown new file mode 100644 index 0000000000..542cfee81f --- /dev/null +++ b/website/source/docs/commands/operator.html.markdown @@ -0,0 +1,100 @@ +--- +layout: "docs" +page_title: "Commands: Operator" +sidebar_current: "docs-commands-operator" +description: > + The operator command provides cluster-level tools for Consul operators. +--- + +# Consul Operator + +Command: `consul operator` + +The `operator` command provides cluster-level tools for Consul operators, such +as interacting with the Raft subsystem. This was added in Consul 0.7. + +~> Use this command with extreme caution, as improper use could lead to a Consul + outage and even loss of data. + +If ACLs are enabled then a token with operator privileges may required in +order to use this command. Requests are forwarded internally to the leader +if required, so this can be run from any Consul node in a cluster. See the +[ACL](/docs/internals/acl.html#operator) internals guide for more information. + +See the [Outage Recovery](/docs/guides/outage.html) guide for some examples of how +this command is used. For an API to perform these operations programatically, +please see the documentation for the [Operator](/docs/agent/http/operator.html) +endpoint. + +## Usage + +Usage: `consul operator [common options] [action] [options]` + +Run `consul operator ` with no arguments for help on that +subcommand. The following subcommands are available: + +* `raft` - View and modify Consul's Raft configuration. + +Options common to all subcommands include: + +* `-http-addr` - Address to the HTTP server of the agent you want to contact + to send this command. If this isn't specified, the command will contact + "127.0.0.1:8500" which is the default HTTP address of a Consul agent. + +* `-token` - ACL token to use. Defaults to that of agent. + +## Raft Operations + +The `raft` subcommand is used to view and modify Consul's Raft configuration. +Two actions are available, as detailed in this section. + + +`raft -list-peers -stale=[true|false]` + +This action displays the current Raft peer configuration. + +The `-stale` argument defaults to "false" which means the leader provides the +result. If the cluster is in an outage state without a leader, you may need +to set `-stale` to "true" to get the configuration from a non-leader server. + +The output looks like this: + +``` +Node ID Address State Voter +alice 127.0.0.1:8300 127.0.0.1:8300 follower true +bob 127.0.0.2:8300 127.0.0.2:8300 leader true +carol 127.0.0.3:8300 127.0.0.3:8300 follower true +``` + +* `Node` is the node name of the server, as known to Consul, or "(unknown)" if + the node is stale at not known. + +* `ID` is the ID of the server. This is the same as the `Address` in Consul 0.7 + but may be upgraded to a GUID in a future version of Consul. + +* `Address` is the IP:port for the server. + +* `State` is either "follower" or "leader" depending on the server's role in the + Raft configuration. + +* `Voter` is "true" or "false", indicating if the server has a vote in the Raft + configuration. Future versions of Consul may add support for non-voting + servers. + + +`raft -remove-peer -address="IP:port"` + +This command removes Consul server with given -address from the Raft +configuration. + +The `-address` argument is required and is the "IP:port" for the server to +remove. The port number is usually 8300, unless configured otherwise. + +There are rare cases where a peer may be left behind in the Raft quorum even +though the server is no longer present and known to the cluster. This command +can be used to remove the failed server so that it is no longer affects the +Raft quorum. If the server still shows in the output of the +[`consul members`](/docs/commands/members.html) command, it is preferable to +clean up by simply running +[`consul force-leave`](http://localhost:4567/docs/commands/force-leave.html) +instead of this command. diff --git a/website/source/layouts/docs.erb b/website/source/layouts/docs.erb index 78475804d1..5c02bc7dea 100644 --- a/website/source/layouts/docs.erb +++ b/website/source/layouts/docs.erb @@ -118,6 +118,10 @@ monitor + > + operator + + > info @@ -178,6 +182,10 @@ Network Coordinates + > + Operator + + > Prepared Queries From 0558b9c44b35b22c21d37f68c01525b6cc6df990 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 00:10:46 -0700 Subject: [PATCH 05/17] Fixes a stale comment. --- api/operator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/operator.go b/api/operator.go index b7389f0f59..8d74da3643 100644 --- a/api/operator.go +++ b/api/operator.go @@ -58,7 +58,7 @@ func (op *Operator) RaftRemovePeerByAddress(address raft.ServerAddress, q *Write r.setWriteOptions(q) // TODO (slackpad) Currently we made address a query parameter. Once - // IDs are in place this will be DELETE /v1/raft-peer/. + // IDs are in place this will be DELETE /v1/operator/raft/peer/. r.params.Set("address", string(address)) _, resp, err := requireOK(op.c.doRequest(r)) From 3f16142b4090eaf89ef7825466df6943971c3aa8 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 10:23:32 -0700 Subject: [PATCH 06/17] Adds a log warning when operator peer changes occur. --- consul/operator_endpoint.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/consul/operator_endpoint.go b/consul/operator_endpoint.go index a33c07a781..dac7ff748f 100644 --- a/consul/operator_endpoint.go +++ b/consul/operator_endpoint.go @@ -113,8 +113,11 @@ REMOVE: // pass. future := op.srv.raft.RemovePeer(args.Address) if err := future.Error(); err != nil { + op.srv.logger.Printf("[WARN] consul.operator: Failed to remove Raft peer %q: %v", + args.Address, err) return err } + op.srv.logger.Printf("[WARN] consul.operator: Removed Raft peer %q by", args.Address) return nil } From 3c9188c38b07e3179f732f9f86a1ad3b3bcec865 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 11:30:56 -0700 Subject: [PATCH 07/17] Makes the Raft configuration API easier to consume. --- api/operator.go | 42 +++++++++++++++-------- api/operator_test.go | 6 ++-- command/agent/operator_endpoint_test.go | 6 ++-- command/operator.go | 11 ++----- consul/operator_endpoint.go | 44 ++++++++++++++----------- consul/operator_endpoint_test.go | 36 ++++++++++++++------ consul/structs/operator.go | 43 ++++++++++++++++-------- 7 files changed, 116 insertions(+), 72 deletions(-) diff --git a/api/operator.go b/api/operator.go index 8d74da3643..b39a015be3 100644 --- a/api/operator.go +++ b/api/operator.go @@ -14,23 +14,37 @@ func (c *Client) Operator() *Operator { return &Operator{c} } +// RaftServer has information about a server in the Raft configuration. +type RaftServer struct { + // ID is the unique ID for the server. These are currently the same + // as the address, but they will be changed to a real GUID in a future + // release of Consul. + ID raft.ServerID + + // Node is the node name of the server, as known by Consul, or this + // will be set to "(unknown)" otherwise. + Node string + + // Address is the IP:port of the server, used for Raft communications. + Address raft.ServerAddress + + // Leader is true if this server is the current cluster leader. + Leader bool + + // Voter is true if this server has a vote in the cluster. This might + // be false if the server is staging and still coming online, or if + // it's a non-voting server, which will be added in a future release of + // Consul. + Voter bool +} + // RaftConfigration is returned when querying for the current Raft configuration. -// This has the low-level Raft structure, as well as some supplemental -// information from Consul. type RaftConfiguration struct { - // Configuration is the low-level Raft configuration structure. - Configuration raft.Configuration + // Servers has the list of servers in the Raft configuration. + Servers []*RaftServer - // NodeMap maps IDs in the Raft configuration to node names known by - // Consul. It's possible that not all configuration entries may have - // an entry here if the node isn't known to Consul. Given how this is - // generated, this may also contain entries that aren't present in the - // Raft configuration. - NodeMap map[raft.ServerID]string - - // Leader is the ID of the current Raft leader. This may be blank if - // there isn't one. - Leader raft.ServerID + // Index has the Raft index of this configuration. + Index uint64 } // RaftGetConfiguration is used to query the current Raft peer set. diff --git a/api/operator_test.go b/api/operator_test.go index a0d8af69e2..f9d242b810 100644 --- a/api/operator_test.go +++ b/api/operator_test.go @@ -15,9 +15,9 @@ func TestOperator_RaftGetConfiguration(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } - if len(out.Configuration.Servers) != 1 || - len(out.NodeMap) != 1 || - len(out.Leader) == 0 { + if len(out.Servers) != 1 || + !out.Servers[0].Leader || + !out.Servers[0].Voter { t.Fatalf("bad: %v", out) } } diff --git a/command/agent/operator_endpoint_test.go b/command/agent/operator_endpoint_test.go index 8e3ebe7200..bc9b51ad4e 100644 --- a/command/agent/operator_endpoint_test.go +++ b/command/agent/operator_endpoint_test.go @@ -30,9 +30,9 @@ func TestOperator_OperatorRaftConfiguration(t *testing.T) { if !ok { t.Fatalf("unexpected: %T", obj) } - if len(out.Configuration.Servers) != 1 || - len(out.NodeMap) != 1 || - len(out.Leader) == 0 { + if len(out.Servers) != 1 || + !out.Servers[0].Leader || + !out.Servers[0].Voter { t.Fatalf("bad: %v", out) } }) diff --git a/command/operator.go b/command/operator.go index d1a6d8d3b3..7d69739243 100644 --- a/command/operator.go +++ b/command/operator.go @@ -140,18 +140,13 @@ func (c *OperatorCommand) raft(args []string) error { // Format it as a nice table. result := []string{"Node|ID|Address|State|Voter"} - for _, s := range reply.Configuration.Servers { - node := "(unknown)" - if mappedNode, ok := reply.NodeMap[s.ID]; ok { - node = mappedNode - } + for _, s := range reply.Servers { state := "follower" - if s.ID == reply.Leader { + if s.Leader { state = "leader" } - voter := s.Suffrage == raft.Voter result = append(result, fmt.Sprintf("%s|%s|%s|%s|%v", - node, s.ID, s.Address, state, voter)) + s.Node, s.ID, s.Address, state, s.Voter)) } c.Ui.Output(columnize.SimpleFormat(result)) } else if removePeer { diff --git a/consul/operator_endpoint.go b/consul/operator_endpoint.go index dac7ff748f..2add169a27 100644 --- a/consul/operator_endpoint.go +++ b/consul/operator_endpoint.go @@ -7,6 +7,7 @@ import ( "github.com/hashicorp/consul/consul/agent" "github.com/hashicorp/consul/consul/structs" "github.com/hashicorp/raft" + "github.com/hashicorp/serf/serf" ) // Operator endpoint is used to perform low-level operator tasks for Consul. @@ -35,33 +36,36 @@ func (op *Operator) RaftGetConfiguration(args *structs.DCSpecificRequest, reply if err := future.Error(); err != nil { return err } - reply.Configuration = future.Configuration() - leader := op.srv.raft.Leader() - // Index the configuration so we can easily look up IDs by address. - idMap := make(map[raft.ServerAddress]raft.ServerID) - for _, s := range reply.Configuration.Servers { - idMap[s.Address] = s.ID - } - - // Fill out the node map and leader. - reply.NodeMap = make(map[raft.ServerID]string) - members := op.srv.serfLAN.Members() - for _, member := range members { + // Index the Consul information about the servers. + serverMap := make(map[raft.ServerAddress]*serf.Member) + for _, member := range op.srv.serfLAN.Members() { valid, parts := agent.IsConsulServer(member) if !valid { continue } - // TODO (slackpad) We need to add a Raft API to get the leader by - // ID so we don't have to do this mapping. addr := (&net.TCPAddr{IP: member.Addr, Port: parts.Port}).String() - if id, ok := idMap[raft.ServerAddress(addr)]; ok { - reply.NodeMap[id] = member.Name - if leader == raft.ServerAddress(addr) { - reply.Leader = id - } + serverMap[raft.ServerAddress(addr)] = &member + } + + // Fill out the reply. + leader := op.srv.raft.Leader() + reply.Index = future.Index() + for _, server := range future.Configuration().Servers { + node := "(unknown)" + if member, ok := serverMap[server.Address]; ok { + node = member.Name } + + entry := &structs.RaftServer{ + ID: server.ID, + Node: node, + Address: server.Address, + Leader: server.Address == leader, + Voter: server.Suffrage == raft.Voter, + } + reply.Servers = append(reply.Servers, entry) } return nil } @@ -118,6 +122,6 @@ REMOVE: return err } - op.srv.logger.Printf("[WARN] consul.operator: Removed Raft peer %q by", args.Address) + op.srv.logger.Printf("[WARN] consul.operator: Removed Raft peer %q", args.Address) return nil } diff --git a/consul/operator_endpoint_test.go b/consul/operator_endpoint_test.go index c48ff83814..6fcc1bc7de 100644 --- a/consul/operator_endpoint_test.go +++ b/consul/operator_endpoint_test.go @@ -34,13 +34,21 @@ func TestOperator_RaftGetConfiguration(t *testing.T) { if err := future.Error(); err != nil { t.Fatalf("err: %v", err) } - + if len(future.Configuration().Servers) != 1 { + t.Fatalf("bad: %v", future.Configuration().Servers) + } + me := future.Configuration().Servers[0] expected := structs.RaftConfigurationResponse{ - Configuration: future.Configuration(), - NodeMap: map[raft.ServerID]string{ - raft.ServerID(s1.config.RPCAddr.String()): s1.config.NodeName, + Servers: []*structs.RaftServer{ + &structs.RaftServer{ + ID: me.ID, + Node: s1.config.NodeName, + Address: me.Address, + Leader: true, + Voter: true, + }, }, - Leader: raft.ServerID(s1.config.RPCAddr.String()), + Index: future.Index(), } if !reflect.DeepEqual(reply, expected) { t.Fatalf("bad: %v", reply) @@ -102,13 +110,21 @@ func TestOperator_RaftGetConfiguration_ACLDeny(t *testing.T) { if err := future.Error(); err != nil { t.Fatalf("err: %v", err) } - + if len(future.Configuration().Servers) != 1 { + t.Fatalf("bad: %v", future.Configuration().Servers) + } + me := future.Configuration().Servers[0] expected := structs.RaftConfigurationResponse{ - Configuration: future.Configuration(), - NodeMap: map[raft.ServerID]string{ - raft.ServerID(s1.config.RPCAddr.String()): s1.config.NodeName, + Servers: []*structs.RaftServer{ + &structs.RaftServer{ + ID: me.ID, + Node: s1.config.NodeName, + Address: me.Address, + Leader: true, + Voter: true, + }, }, - Leader: raft.ServerID(s1.config.RPCAddr.String()), + Index: future.Index(), } if !reflect.DeepEqual(reply, expected) { t.Fatalf("bad: %v", reply) diff --git a/consul/structs/operator.go b/consul/structs/operator.go index 83372d1316..d564400bf9 100644 --- a/consul/structs/operator.go +++ b/consul/structs/operator.go @@ -4,23 +4,38 @@ import ( "github.com/hashicorp/raft" ) +// RaftServer has information about a server in the Raft configuration. +type RaftServer struct { + // ID is the unique ID for the server. These are currently the same + // as the address, but they will be changed to a real GUID in a future + // release of Consul. + ID raft.ServerID + + // Node is the node name of the server, as known by Consul, or this + // will be set to "(unknown)" otherwise. + Node string + + // Address is the IP:port of the server, used for Raft communications. + Address raft.ServerAddress + + // Leader is true if this server is the current cluster leader. + Leader bool + + // Voter is true if this server has a vote in the cluster. This might + // be false if the server is staging and still coming online, or if + // it's a non-voting server, which will be added in a future release of + // Consul. + Voter bool +} + // RaftConfigrationResponse is returned when querying for the current Raft -// configuration. This has the low-level Raft structure, as well as some -// supplemental information from Consul. +// configuration. type RaftConfigurationResponse struct { - // Configuration is the low-level Raft configuration structure. - Configuration raft.Configuration + // Servers has the list of servers in the Raft configuration. + Servers []*RaftServer - // NodeMap maps IDs in the Raft configuration to node names known by - // Consul. It's possible that not all configuration entries may have - // an entry here if the node isn't known to Consul. Given how this is - // generated, this may also contain entries that aren't present in the - // Raft configuration. - NodeMap map[raft.ServerID]string - - // Leader is the ID of the current Raft leader. This may be blank if - // there isn't one. - Leader raft.ServerID + // Index has the Raft index of this configuration. + Index uint64 } // RaftPeerByAddressRequest is used by the Operator endpoint to apply a Raft From 2c9885d10d5ca5709dae8e93af364723ab4c7af6 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 13:15:37 -0700 Subject: [PATCH 08/17] Updates documentation with details on the Consul operator actions. --- .../docs/agent/http/operator.html.markdown | 86 ++++++++++- .../docs/commands/operator.html.markdown | 50 ++++--- .../source/docs/guides/outage.html.markdown | 138 +++++++++--------- .../source/docs/internals/acl.html.markdown | 36 ++++- 4 files changed, 213 insertions(+), 97 deletions(-) diff --git a/website/source/docs/agent/http/operator.html.markdown b/website/source/docs/agent/http/operator.html.markdown index f13e5c4b62..763625c703 100644 --- a/website/source/docs/agent/http/operator.html.markdown +++ b/website/source/docs/agent/http/operator.html.markdown @@ -8,7 +8,7 @@ description: > # Operator HTTP Endpoint -The Operator endpoints provide cluster-level tools for Consul operators, such +The Operator endpoint provides cluster-level tools for Consul operators, such as interacting with the Raft subsystem. This was added in Consul 0.7. ~> Use this interface with extreme caution, as improper use could lead to a Consul @@ -40,9 +40,93 @@ The Raft configuration endpoint supports the `GET` method. #### GET Method +When using the `GET` method, the request will be forwarded to the cluster +leader to retrieve its latest Raft peer configuration. + +If the cluster doesn't currently have a leader an error will be returned. You +can use the "?stale" query parameter to read the Raft configuration from any +of the Consul servers. + +By default, the datacenter of the agent is queried; however, the `dc` can be +provided using the "?dc=" query parameter. + +If ACLs are enabled, the client will need to supply an ACL Token with +[`operator`](/docs/internals/acl.html#operator) read privileges. + +A JSON body is returned that looks like this: + +```javascript +{ + "Servers": [ + { + "ID": "127.0.0.1:8300", + "Node": "alice", + "Address": "127.0.0.1:8300", + "Leader": true, + "Voter": true + }, + { + "ID": "127.0.0.2:8300", + "Node": "bob", + "Address": "127.0.0.2:8300", + "Leader": false, + "Voter": true + }, + { + "ID": "127.0.0.3:8300", + "Node": "carol", + "Address": "127.0.0.3:8300", + "Leader": false, + "Voter": true + } + ], + "Index": 22 +} +``` + +The `Servers` array has information about the servers in the Raft peer +configuration: + +`ID` is the ID of the server. This is the same as the `Address` in Consul 0.7 +but may be upgraded to a GUID in a future version of Consul. + +`Node` is the node name of the server, as known to Consul, or "(unknown)" if +the node is stale and not known. + +`Address` is the IP:port for the server. + +`Leader` is either "true" or "false" depending on the server's role in the +Raft configuration. + +`Voter` is "true" or "false", indicating if the server has a vote in the Raft +configuration. Future versions of Consul may add support for non-voting servers. + +The `Index` value is the Raft corresponding to this configuration. Note that +the latest configuration may not yet be committed if changes are in flight. + ### /v1/operator/raft/peer The Raft peer endpoint supports the `DELETE` method. #### DELETE Method +Using the `DELETE` method, this endpoint will remove the Consul server with +given address from the Raft configuration. + +There are rare cases where a peer may be left behind in the Raft configuration +even though the server is no longer present and known to the cluster. This +endpoint can be used to remove the failed server so that it is no longer +affects the Raft quorum. + +An "?address=" query parameter is required and should be set to the +"IP:port" for the server to remove. The port number is usually 8300, unless +configured otherwise. Nothing is required in the body of the request. + +By default, the datacenter of the agent is targeted; however, the `dc` can be +provided using the "?dc=" query parameter. + +If ACLs are enabled, the client will need to supply an ACL Token with +[`operator`](/docs/internals/acl.html#operator) write privileges. + +The return code will indicate success or failure. + diff --git a/website/source/docs/commands/operator.html.markdown b/website/source/docs/commands/operator.html.markdown index 542cfee81f..d9acf8cbc2 100644 --- a/website/source/docs/commands/operator.html.markdown +++ b/website/source/docs/commands/operator.html.markdown @@ -49,13 +49,14 @@ The `raft` subcommand is used to view and modify Consul's Raft configuration. Two actions are available, as detailed in this section. -`raft -list-peers -stale=[true|false]` - +#### Display Peer Configuration This action displays the current Raft peer configuration. -The `-stale` argument defaults to "false" which means the leader provides the -result. If the cluster is in an outage state without a leader, you may need -to set `-stale` to "true" to get the configuration from a non-leader server. +Usage: `raft -list-peers -stale=[true|false]` + +* `-stale` - Optional and defaults to "false" which means the leader provides +the result. If the cluster is in an outage state without a leader, you may need +to set this to "true" to get the configuration from a non-leader server. The output looks like this: @@ -66,35 +67,36 @@ bob 127.0.0.2:8300 127.0.0.2:8300 leader true carol 127.0.0.3:8300 127.0.0.3:8300 follower true ``` -* `Node` is the node name of the server, as known to Consul, or "(unknown)" if - the node is stale at not known. +`Node` is the node name of the server, as known to Consul, or "(unknown)" if +the node is stale and not known. -* `ID` is the ID of the server. This is the same as the `Address` in Consul 0.7 - but may be upgraded to a GUID in a future version of Consul. +`ID` is the ID of the server. This is the same as the `Address` in Consul 0.7 +but may be upgraded to a GUID in a future version of Consul. -* `Address` is the IP:port for the server. +`Address` is the IP:port for the server. -* `State` is either "follower" or "leader" depending on the server's role in the - Raft configuration. +`State` is either "follower" or "leader" depending on the server's role in the +Raft configuration. -* `Voter` is "true" or "false", indicating if the server has a vote in the Raft - configuration. Future versions of Consul may add support for non-voting - servers. +`Voter` is "true" or "false", indicating if the server has a vote in the Raft +configuration. Future versions of Consul may add support for non-voting servers. -`raft -remove-peer -address="IP:port"` +#### Remove a Peer +This command removes Consul server with given address from the Raft configuration. -This command removes Consul server with given -address from the Raft -configuration. - -The `-address` argument is required and is the "IP:port" for the server to -remove. The port number is usually 8300, unless configured otherwise. - -There are rare cases where a peer may be left behind in the Raft quorum even -though the server is no longer present and known to the cluster. This command +There are rare cases where a peer may be left behind in the Raft configuration +even though the server is no longer present and known to the cluster. This command can be used to remove the failed server so that it is no longer affects the Raft quorum. If the server still shows in the output of the [`consul members`](/docs/commands/members.html) command, it is preferable to clean up by simply running [`consul force-leave`](http://localhost:4567/docs/commands/force-leave.html) instead of this command. + +Usage: `raft -remove-peer -address="IP:port"` + +* `-address` - "IP:port" for the server to remove. The port number is usually +8300, unless configured otherwise. + +The return code will indicate success or failure. diff --git a/website/source/docs/guides/outage.html.markdown b/website/source/docs/guides/outage.html.markdown index 15bb657a12..b82e1bf6ac 100644 --- a/website/source/docs/guides/outage.html.markdown +++ b/website/source/docs/guides/outage.html.markdown @@ -38,20 +38,72 @@ comes online as agents perform [anti-entropy](/docs/internals/anti-entropy.html) ## Failure of a Server in a Multi-Server Cluster If you think the failed server is recoverable, the easiest option is to bring -it back online and have it rejoin the cluster, returning the cluster to a fully -healthy state. Similarly, even if you need to rebuild a new Consul server to -replace the failed node, you may wish to do that immediately. Keep in mind that -the rebuilt server needs to have the same IP as the failed server. Again, once -this server is online, the cluster will return to a fully healthy state. +it back online and have it rejoin the cluster with the same IP address, returning +the cluster to a fully healthy state. Similarly, even if you need to rebuild a +new Consul server to replace the failed node, you may wish to do that immediately. +Keep in mind that the rebuilt server needs to have the same IP address as the failed +server. Again, once this server is online and has rejoined, the cluster will return +to a fully healthy state. Both of these strategies involve a potentially lengthy time to reboot or rebuild a failed server. If this is impractical or if building a new server with the same IP isn't an option, you need to remove the failed server. Usually, you can issue -a [`force-leave`](/docs/commands/force-leave.html) command to remove the failed +a [`consul force-leave`](/docs/commands/force-leave.html) command to remove the failed server if it's still a member of the cluster. -If the `force-leave` isn't able to remove the server, you can remove it manually -using the `raft/peers.json` recovery file on all remaining servers. +If [`consul force-leave`](/docs/commands/force-leave.html) isn't able to remove the +server, you have two methods available to remove it, depending on your version of Consul: + +* In Consul 0.7 and later, you can use the [`consul operator`](/docs/commands/operator.html#raft-remove-peer) +command to remove the stale peer server on the fly with no downtime. + +* In versions of Consul prior to 0.7, you can manually remove the stale peer +server using the `raft/peers.json` recovery file on all remaining servers. See +the [section below](#peers.json) for details on this procedure. This process +requires a Consul downtime to complete. + +In Consul 0.7 and later, you can use the [`consul operator`](/docs/commands/operator.html#raft-list-peers) +command to inspect the Raft configuration: + +``` +$ consul operator raft -list-peers +Node ID Address State Voter +alice 10.0.1.8:8300 10.0.1.8:8300 follower true +bob 10.0.1.6:8300 10.0.1.6:8300 leader true +carol 10.0.1.7:8300 10.0.1.7:8300 follower true +``` + +## Failure of Multiple Servers in a Multi-Server Cluster + +In the event that multiple servers are lost, causing a loss of quorum and a +complete outage, partial recovery is possible using data on the remaining +servers in the cluster. There may be data loss in this situation because multiple +servers were lost, so information about what's committed could be incomplete. +The recovery process implicitly commits all outstanding Raft log entries, so +it's also possible to commit data that was uncommitted before the failure. + +See the [section below](#peers.json) for details of the recovery procedure. You +simply include just the remaining servers in the `raft/peers.json` recovery file. +The cluster should be able to elect a leader once the remaining servers are all +restarted with an identical `raft/peers.json` configuration. + +Any new servers you introduce later can be fresh with totally clean data directories +and joined using Consul's `join` command. + +In extreme cases, it should be possible to recover with just a single remaining +server by starting that single server with itself as the only peer in the +`raft/peers.json` recovery file. + +Note that prior to Consul 0.7 it wasn't always possible to recover from certain +types of outages with `raft/peers.json` because this was ingested before any Raft +log entries were played back. In Consul 0.7 and later, the `raft/peers.json` +recovery file is final, and a snapshot is taken after it is ingested, so you are +guaranteed to start with your recovered configuration. This does implicitly commit +all Raft log entries, so should only be used to recover from an outage, but it +should allow recovery from any situation where there's some cluster data available. + + +## Manual Recovery Using peers.json To begin, stop all remaining servers. You can attempt a graceful leave, but it will not work in most cases. Do not worry if the leave exits with an @@ -70,11 +122,6 @@ implicitly committed, so this should only be used after an outage where no other option is available to recover a lost server. Make sure you don't have any automated processes that will put the peers file in place on a periodic basis, for example. -
-
-When the final version of Consul 0.7 ships, it should include a command to -remove a dead peer without having to stop servers and edit the `raft/peers.json` -recovery file. The next step is to go to the [`-data-dir`](/docs/agent/options.html#_data_dir) of each Consul server. Inside that directory, there will be a `raft/` @@ -83,9 +130,9 @@ something like: ```javascript [ - "10.0.1.8:8300", - "10.0.1.6:8300", - "10.0.1.7:8300" +"10.0.1.8:8300", +"10.0.1.6:8300", +"10.0.1.7:8300" ] ``` @@ -126,56 +173,13 @@ nodes should claim leadership and emit a log like: [INFO] consul: cluster leadership acquired ``` -Additionally, the [`info`](/docs/commands/info.html) command can be a useful -debugging tool: +In Consul 0.7 and later, you can use the [`consul operator`](/docs/commands/operator.html#raft-list-peers) +command to inspect the Raft configuration: -```text -$ consul info -... -raft: - applied_index = 47244 - commit_index = 47244 - fsm_pending = 0 - last_log_index = 47244 - last_log_term = 21 - last_snapshot_index = 40966 - last_snapshot_term = 20 - num_peers = 2 - state = Leader - term = 21 -... ``` - -You should verify that one server claims to be the `Leader` and all the -others should be in the `Follower` state. All the nodes should agree on the -peer count as well. This count is (N-1), since a server does not count itself -as a peer. - -## Failure of Multiple Servers in a Multi-Server Cluster - -In the event that multiple servers are lost, causing a loss of quorum and a -complete outage, partial recovery is possible using data on the remaining -servers in the cluster. There may be data loss in this situation because multiple -servers were lost, so information about what's committed could be incomplete. -The recovery process implicitly commits all outstanding Raft log entries, so -it's also possible to commit data that was uncommitted before the failure. - -The procedure is the same as for the single-server case above; you simply include -just the remaining servers in the `raft/peers.json` recovery file. The cluster -should be able to elect a leader once the remaining servers are all restarted with -an identical `raft/peers.json` configuration. - -Any new servers you introduce later can be fresh with totally clean data directories -and joined using Consul's `join` command. - -In extreme cases, it should be possible to recover with just a single remaining -server by starting that single server with itself as the only peer in the -`raft/peers.json` recovery file. - -Note that prior to Consul 0.7 it wasn't always possible to recover from certain -types of outages with `raft/peers.json` because this was ingested before any Raft -log entries were played back. In Consul 0.7 and later, the `raft/peers.json` -recovery file is final, and a snapshot is taken after it is ingested, so you are -guaranteed to start with your recovered configuration. This does implicitly commit -all Raft log entries, so should only be used to recover from an outage, but it -should allow recovery from any situation where there's some cluster data available. +$ consul operator raft -list-peers +Node ID Address State Voter +alice 10.0.1.8:8300 10.0.1.8:8300 follower true +bob 10.0.1.6:8300 10.0.1.6:8300 leader true +carol 10.0.1.7:8300 10.0.1.7:8300 follower true +``` diff --git a/website/source/docs/internals/acl.html.markdown b/website/source/docs/internals/acl.html.markdown index 527b366fbf..78ba000e7c 100644 --- a/website/source/docs/internals/acl.html.markdown +++ b/website/source/docs/internals/acl.html.markdown @@ -210,6 +210,9 @@ query "" { # Read-only mode for the encryption keyring by default (list only) keyring = "read" + +# Read-only mode for Consul operator interfaces (list only) +operator = "read" ``` This is equivalent to the following JSON input: @@ -248,13 +251,14 @@ This is equivalent to the following JSON input: "policy": "read" } }, - "keyring": "read" + "keyring": "read", + "operator": "read" } ``` ## Building ACL Policies -#### Blacklist mode and `consul exec` +#### Blacklist Mode and `consul exec` If you set [`acl_default_policy`](/docs/agent/options.html#acl_default_policy) to `deny`, the `anonymous` token won't have permission to read the default @@ -279,7 +283,7 @@ Alternatively, you can, of course, add an explicit [`acl_token`](/docs/agent/options.html#acl_token) to each agent, giving it access to that prefix. -#### Blacklist mode and Service Discovery +#### Blacklist Mode and Service Discovery If your [`acl_default_policy`](/docs/agent/options.html#acl_default_policy) is set to `deny`, the `anonymous` token will be unable to read any service @@ -327,12 +331,12 @@ event "" { As always, the more secure way to handle user events is to explicitly grant access to each API token based on the events they should be able to fire. -#### Blacklist mode and Prepared Queries +#### Blacklist Mode and Prepared Queries After Consul 0.6.3, significant changes were made to ACLs for prepared queries, including a new `query` ACL policy. See [Prepared Query ACLs](#prepared_query_acls) below for more details. -#### Blacklist mode and Keyring Operations +#### Blacklist Mode and Keyring Operations Consul 0.6 and later supports securing the encryption keyring operations using ACL's. Encryption is an optional component of the gossip layer. More information @@ -353,6 +357,28 @@ Encryption keyring operations are sensitive and should be properly secured. It is recommended that instead of configuring a wide-open policy like above, a per-token policy is applied to maximize security. + +#### Blacklist Mode and Consul Operator Actions + +Consul 0.7 added special Consul operator actions which are protected by a new +`operator` ACL policy. The operator actions cover: + +* [Operator HTTP endpoint](/docs/agent/http/operator.html) +* [Operator CLI command](/docs/commands/operator.html) + +If your [`acl_default_policy`](/docs/agent/options.html#acl_default_policy) is +set to `deny`, then the `anonymous` token will not have access to Consul operator +actions. Granting `read` access allows reading information for diagnostic purposes +without making any changes to state. Granting `write` access allows reading +information and changing state. Here's an example policy: + +``` +operator = "write" +``` + +~> Grant `write` access to operator actions with extreme caution, as improper use + could lead to a Consul outage and even loss of data. + #### Services and Checks with ACLs Consul allows configuring ACL policies which may control access to service and From 672365b7d9d45af60137ea17e79b158ba6ef719d Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 13:18:33 -0700 Subject: [PATCH 09/17] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 186c6aabbc..2c743371c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,10 @@ FEATURES: quorum. This version also provides a foundation for new features that will appear in future Consul versions once the remainder of the v2 library is complete. [GH-2222] +* Added new `consul operator` command, HTTP endpoint, and associated ACL to + allow Consul operators to view and update the Raft configuration. This allows + for a stale server to be removed without requiring downtime and peers.json + recovery file use. [GH-2312] * Extended the [`translate_wan_addrs`](https://www.consul.io/docs/agent/options.html#translate_wan_addrs) config option to also translate node addresses in HTTP responses, making it easy to use this feature from non-DNS clients. [GH-2118] From ed7356dd5c1c8b63ff1b7cb7c84b55a2ba25e42c Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 13:40:43 -0700 Subject: [PATCH 10/17] Changes default DNS allow_stale to true. --- command/agent/config.go | 7 ++++--- command/agent/config_test.go | 6 +++--- command/agent/dns.go | 8 ++++---- command/agent/dns_test.go | 8 ++++---- .../source/docs/agent/options.html.markdown | 5 +++-- .../docs/upgrade-specific.html.markdown | 19 ++++++++++++++++--- 6 files changed, 34 insertions(+), 19 deletions(-) diff --git a/command/agent/config.go b/command/agent/config.go index 0e2574e402..d94dbb2c85 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -68,7 +68,7 @@ type DNSConfig struct { // data. This gives horizontal read scalability since // any Consul server can service the query instead of // only the leader. - AllowStale bool `mapstructure:"allow_stale"` + AllowStale *bool `mapstructure:"allow_stale"` // EnableTruncate is used to enable setting the truncate // flag for UDP DNS queries. This allows unmodified @@ -644,6 +644,7 @@ func DefaultConfig() *Config { Server: 8300, }, DNSConfig: DNSConfig{ + AllowStale: Bool(true), UDPAnswerLimit: 3, MaxStale: 5 * time.Second, }, @@ -1335,8 +1336,8 @@ func MergeConfig(a, b *Config) *Config { result.DNSConfig.ServiceTTL[service] = dur } } - if b.DNSConfig.AllowStale { - result.DNSConfig.AllowStale = true + if b.DNSConfig.AllowStale != nil { + result.DNSConfig.AllowStale = b.DNSConfig.AllowStale } if b.DNSConfig.UDPAnswerLimit != 0 { result.DNSConfig.UDPAnswerLimit = b.DNSConfig.UDPAnswerLimit diff --git a/command/agent/config_test.go b/command/agent/config_test.go index ed174a9f82..1da34a3db8 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -544,13 +544,13 @@ func TestDecodeConfig(t *testing.T) { } // DNS node ttl, max stale - input = `{"dns_config": {"allow_stale": true, "enable_truncate": false, "max_stale": "15s", "node_ttl": "5s", "only_passing": true, "udp_answer_limit": 6}}` + input = `{"dns_config": {"allow_stale": false, "enable_truncate": false, "max_stale": "15s", "node_ttl": "5s", "only_passing": true, "udp_answer_limit": 6}}` config, err = DecodeConfig(bytes.NewReader([]byte(input))) if err != nil { t.Fatalf("err: %s", err) } - if !config.DNSConfig.AllowStale { + if *config.DNSConfig.AllowStale { t.Fatalf("bad: %#v", config) } if config.DNSConfig.EnableTruncate { @@ -1408,7 +1408,7 @@ func TestMergeConfig(t *testing.T) { DataDir: "/tmp/bar", DNSRecursors: []string{"127.0.0.2:1001"}, DNSConfig: DNSConfig{ - AllowStale: false, + AllowStale: Bool(false), EnableTruncate: true, DisableCompression: true, MaxStale: 30 * time.Second, diff --git a/command/agent/dns.go b/command/agent/dns.go index e75c5a7913..c859f9a278 100644 --- a/command/agent/dns.go +++ b/command/agent/dns.go @@ -198,7 +198,7 @@ func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) { Datacenter: datacenter, QueryOptions: structs.QueryOptions{ Token: d.agent.config.ACLToken, - AllowStale: d.config.AllowStale, + AllowStale: *d.config.AllowStale, }, } var out structs.IndexedNodes @@ -384,7 +384,7 @@ func (d *DNSServer) nodeLookup(network, datacenter, node string, req, resp *dns. Node: node, QueryOptions: structs.QueryOptions{ Token: d.agent.config.ACLToken, - AllowStale: d.config.AllowStale, + AllowStale: *d.config.AllowStale, }, } var out structs.IndexedNodeServices @@ -584,7 +584,7 @@ func (d *DNSServer) serviceLookup(network, datacenter, service, tag string, req, TagFilter: tag != "", QueryOptions: structs.QueryOptions{ Token: d.agent.config.ACLToken, - AllowStale: d.config.AllowStale, + AllowStale: *d.config.AllowStale, }, } var out structs.IndexedCheckServiceNodes @@ -658,7 +658,7 @@ func (d *DNSServer) preparedQueryLookup(network, datacenter, query string, req, QueryIDOrName: query, QueryOptions: structs.QueryOptions{ Token: d.agent.config.ACLToken, - AllowStale: d.config.AllowStale, + AllowStale: *d.config.AllowStale, }, // Always pass the local agent through. In the DNS interface, there diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go index a909050739..f686fc922f 100644 --- a/command/agent/dns_test.go +++ b/command/agent/dns_test.go @@ -2308,7 +2308,7 @@ func TestDNS_NodeLookup_TTL(t *testing.T) { c.DNSRecursor = recursor.Addr }, func(c *DNSConfig) { c.NodeTTL = 10 * time.Second - c.AllowStale = true + *c.AllowStale = true c.MaxStale = time.Second }) defer os.RemoveAll(dir) @@ -2428,7 +2428,7 @@ func TestDNS_ServiceLookup_TTL(t *testing.T) { "db": 10 * time.Second, "*": 5 * time.Second, } - c.AllowStale = true + *c.AllowStale = true c.MaxStale = time.Second } dir, srv := makeDNSServerConfig(t, nil, confFn) @@ -2531,7 +2531,7 @@ func TestDNS_PreparedQuery_TTL(t *testing.T) { "db": 10 * time.Second, "*": 5 * time.Second, } - c.AllowStale = true + *c.AllowStale = true c.MaxStale = time.Second } dir, srv := makeDNSServerConfig(t, nil, confFn) @@ -3192,7 +3192,7 @@ func TestDNS_NonExistingLookupEmptyAorAAAA(t *testing.T) { func TestDNS_PreparedQuery_AllowStale(t *testing.T) { confFn := func(c *DNSConfig) { - c.AllowStale = true + *c.AllowStale = true c.MaxStale = time.Second } dir, srv := makeDNSServerConfig(t, nil, confFn) diff --git a/website/source/docs/agent/options.html.markdown b/website/source/docs/agent/options.html.markdown index 7d439cd357..318c28f5e5 100644 --- a/website/source/docs/agent/options.html.markdown +++ b/website/source/docs/agent/options.html.markdown @@ -485,8 +485,9 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass * `allow_stale` - Enables a stale query for DNS information. This allows any Consul server, rather than only the leader, to service the request. The advantage of this is you get linear read scalability with Consul servers. - By default, this is false, meaning all requests are serviced by the leader, providing stronger - consistency but less throughput and higher latency. + In versions of Consul prior to 0.7, this defaulted to false, meaning all requests are serviced + by the leader, providing stronger consistency but less throughput and higher latency. In Consul + 0.7 and later, this defaults to true for better utilization of available servers. * `max_stale` When [`allow_stale`](#allow_stale) is specified, this is used to limit how diff --git a/website/source/docs/upgrade-specific.html.markdown b/website/source/docs/upgrade-specific.html.markdown index 069caa36f5..9896631f57 100644 --- a/website/source/docs/upgrade-specific.html.markdown +++ b/website/source/docs/upgrade-specific.html.markdown @@ -19,9 +19,22 @@ standard upgrade flow. Consul version 0.7 is a very large release with many important changes. Changes to be aware of during an upgrade are categorized below. -#### Performance Tuning and New Defaults +#### Defaults Changed for Better Performance -Consul 0.7 introduced support for tuning Raft performance using a new +Consul 0.7 now defaults the DNS configuration to allow for stale queries by defaulting +[`allow_stale`](/docs/agent/options.html#allow_stale) to true for better utilization +of available servers. If you want to retain the previous behavior, set the following +configuration: + +```javascript +{ + "dns_config": { + "allow_stale": false + } +} +``` + +Consul also 0.7 introduced support for tuning Raft performance using a new [performance configuration block](/docs/agent/options.html#performance). Also, the default Raft timing is set to a lower-performance mode suitable for [minimal Consul servers](/docs/guides/performance.html#minumum). @@ -40,7 +53,7 @@ to all Consul servers when upgrading: See the [Server Performance](/docs/guides/performance.html) guide for more details. -#### Default Configuration Changes +#### Servers No Longer Default to Leave on Interrupt The default behavior of [`skip_leave_on_interrupt`](/docs/agent/options.html#skip_leave_on_interrupt) is now dependent on whether or not the agent is acting as a server or client. When Consul is started as a From 74a40a2de902f6c03922bc649e93e0cc9cf0de43 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 14:45:15 -0700 Subject: [PATCH 11/17] Update CHANGELOG.md --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c743371c2..fec492de2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,9 @@ BACKWARDS INCOMPATIBILITIES: * `skip_leave_on_interrupt`'s default behavior is now dependent on whether or not the agent is acting as a server or client. When Consul is started as a server the default is `true` and `false` when a client. [GH-1909] +* `allow_stale` for DNS queries now defaults to `true`, allowing for better + utilization of available Consul servers and higher throughput at the exponse of + weaker consistency. [GH-2315] * HTTP check output is truncated to 4k, similar to script check output. [GH-1952] * Consul's Go API client will now send ACL tokens using HTTP headers instead of query parameters, requiring Consul 0.6.0 or later. [GH-2233] From 84e8fc584877786aa6f6b9bc08dd485a5a688a59 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 14:59:16 -0700 Subject: [PATCH 12/17] Removes Raft types from public API interface. This will cause a lot of breakage because we've currently vendored a branch of the Raft library. --- api/operator.go | 10 +++------- command/operator.go | 4 +--- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/api/operator.go b/api/operator.go index b39a015be3..48d74f3ca6 100644 --- a/api/operator.go +++ b/api/operator.go @@ -1,9 +1,5 @@ package api -import ( - "github.com/hashicorp/raft" -) - // Operator can be used to perform low-level operator tasks for Consul. type Operator struct { c *Client @@ -19,14 +15,14 @@ type RaftServer struct { // ID is the unique ID for the server. These are currently the same // as the address, but they will be changed to a real GUID in a future // release of Consul. - ID raft.ServerID + ID string // Node is the node name of the server, as known by Consul, or this // will be set to "(unknown)" otherwise. Node string // Address is the IP:port of the server, used for Raft communications. - Address raft.ServerAddress + Address string // Leader is true if this server is the current cluster leader. Leader bool @@ -67,7 +63,7 @@ func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, e // RaftRemovePeerByAddress is used to kick a stale peer (one that it in the Raft // quorum but no longer known to Serf or the catalog) by address in the form of // "IP:port". -func (op *Operator) RaftRemovePeerByAddress(address raft.ServerAddress, q *WriteOptions) error { +func (op *Operator) RaftRemovePeerByAddress(address string, q *WriteOptions) error { r := op.c.newRequest("DELETE", "/v1/operator/raft/peer") r.setWriteOptions(q) diff --git a/command/operator.go b/command/operator.go index 7d69739243..68ae585319 100644 --- a/command/operator.go +++ b/command/operator.go @@ -6,7 +6,6 @@ import ( "strings" "github.com/hashicorp/consul/api" - "github.com/hashicorp/raft" "github.com/mitchellh/cli" "github.com/ryanuber/columnize" ) @@ -160,8 +159,7 @@ func (c *OperatorCommand) raft(args []string) error { w := &api.WriteOptions{ Token: token, } - sa := raft.ServerAddress(address) - if err := operator.RaftRemovePeerByAddress(sa, w); err != nil { + if err := operator.RaftRemovePeerByAddress(address, w); err != nil { return err } c.Ui.Output(fmt.Sprintf("Removed peer with address %q", address)) From cda2bd29a90846f7ded55933763d7993314626da Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 16:54:21 -0700 Subject: [PATCH 13/17] Copies the member data instead of referencing by pointer. --- consul/operator_endpoint.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/consul/operator_endpoint.go b/consul/operator_endpoint.go index 2add169a27..027e1d1e4e 100644 --- a/consul/operator_endpoint.go +++ b/consul/operator_endpoint.go @@ -38,7 +38,7 @@ func (op *Operator) RaftGetConfiguration(args *structs.DCSpecificRequest, reply } // Index the Consul information about the servers. - serverMap := make(map[raft.ServerAddress]*serf.Member) + serverMap := make(map[raft.ServerAddress]serf.Member) for _, member := range op.srv.serfLAN.Members() { valid, parts := agent.IsConsulServer(member) if !valid { @@ -46,7 +46,7 @@ func (op *Operator) RaftGetConfiguration(args *structs.DCSpecificRequest, reply } addr := (&net.TCPAddr{IP: member.Addr, Port: parts.Port}).String() - serverMap[raft.ServerAddress(addr)] = &member + serverMap[raft.ServerAddress(addr)] = member } // Fill out the reply. From a3fd12aecf3f7f0aa0d8868e95f53771af26a6ab Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 17:32:01 -0700 Subject: [PATCH 14/17] Preps tree for 0.7.0-rc2 release. --- version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.go b/version.go index e158f79dc1..94fe962cfa 100644 --- a/version.go +++ b/version.go @@ -17,7 +17,7 @@ const Version = "0.7.0" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release // such as "dev" (in development), "beta", "rc1", etc. -const VersionPrerelease = "dev" +const VersionPrerelease = "rc2" // GetHumanVersion composes the parts of the version in a way that's suitable // for displaying to humans. From 4fd419d609f4d5ee0a828b9d0b5d0de02da74201 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 30 Aug 2016 18:11:05 -0700 Subject: [PATCH 16/17] Adds a note about stale reads to the performance guide. --- website/source/docs/guides/performance.html.markdown | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/website/source/docs/guides/performance.html.markdown b/website/source/docs/guides/performance.html.markdown index dc42d406a2..c33c3bebca 100644 --- a/website/source/docs/guides/performance.html.markdown +++ b/website/source/docs/guides/performance.html.markdown @@ -78,8 +78,9 @@ or add more powerful servers. * For DNS-heavy workloads, configuring all Consul agents in a cluster with the [`allow_stale`](/docs/agent/options.html#allow_stale) configuration option will allow reads to -scale across all Consul servers, not just the leader. See [Stale Reads](/docs/guides/dns-cache.html#stale) -in the [DNS Caching](/docs/guides/dns-cache.html) guide for more details. It's also good to set +scale across all Consul servers, not just the leader. Consul 0.7 and later enables stale reads +for DNS by default. See [Stale Reads](/docs/guides/dns-cache.html#stale) in the +[DNS Caching](/docs/guides/dns-cache.html) guide for more details. It's also good to set reasonable, non-zero [DNS TTL values](/docs/guides/dns-cache.html#ttl) if your clients will respect them. From 7bcb34d4985ffa4a1334c8f7b18a8ef9e3d854de Mon Sep 17 00:00:00 2001 From: James Phillips Date: Wed, 31 Aug 2016 10:49:31 -0700 Subject: [PATCH 17/17] Puts tree back in dev mode. --- version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.go b/version.go index 94fe962cfa..e158f79dc1 100644 --- a/version.go +++ b/version.go @@ -17,7 +17,7 @@ const Version = "0.7.0" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release // such as "dev" (in development), "beta", "rc1", etc. -const VersionPrerelease = "rc2" +const VersionPrerelease = "dev" // GetHumanVersion composes the parts of the version in a way that's suitable // for displaying to humans.