mirror of https://github.com/status-im/consul.git
parent
4d8e5a18bc
commit
5e26971864
|
@ -127,7 +127,7 @@ func (a *TestACLAgent) LocalMember() serf.Member {
|
|||
func (a *TestACLAgent) JoinLAN(addrs []string) (n int, err error) {
|
||||
return 0, fmt.Errorf("Unimplemented")
|
||||
}
|
||||
func (a *TestACLAgent) RemoveFailedNode(node string) error {
|
||||
func (a *TestACLAgent) RemoveFailedNode(node string, prune bool) error {
|
||||
return fmt.Errorf("Unimplemented")
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,6 @@ import (
|
|||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/hashicorp/go-memdb"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
|
@ -20,6 +19,8 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-memdb"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
|
@ -133,7 +134,7 @@ type delegate interface {
|
|||
LANSegmentMembers(segment string) ([]serf.Member, error)
|
||||
LocalMember() serf.Member
|
||||
JoinLAN(addrs []string) (n int, err error)
|
||||
RemoveFailedNode(node string) error
|
||||
RemoveFailedNode(node string, prune bool) error
|
||||
ResolveToken(secretID string) (acl.Authorizer, error)
|
||||
RPC(method string, args interface{}, reply interface{}) error
|
||||
ACLsEnabled() bool
|
||||
|
@ -1778,9 +1779,9 @@ func (a *Agent) JoinWAN(addrs []string) (n int, err error) {
|
|||
}
|
||||
|
||||
// ForceLeave is used to remove a failed node from the cluster
|
||||
func (a *Agent) ForceLeave(node string) (err error) {
|
||||
func (a *Agent) ForceLeave(node string, prune bool) (err error) {
|
||||
a.logger.Printf("[INFO] agent: Force leaving node: %v", node)
|
||||
err = a.delegate.RemoveFailedNode(node)
|
||||
err = a.delegate.RemoveFailedNode(node, prune)
|
||||
if err != nil {
|
||||
a.logger.Printf("[WARN] agent: Failed to remove node: %v", err)
|
||||
}
|
||||
|
|
|
@ -438,8 +438,11 @@ func (s *HTTPServer) AgentForceLeave(resp http.ResponseWriter, req *http.Request
|
|||
return nil, acl.ErrPermissionDenied
|
||||
}
|
||||
|
||||
//Check the value of the prune query
|
||||
_, prune := req.URL.Query()["prune"]
|
||||
|
||||
addr := strings.TrimPrefix(req.URL.Path, "/v1/agent/force-leave/")
|
||||
return nil, s.agent.ForceLeave(addr)
|
||||
return nil, s.agent.ForceLeave(addr, prune)
|
||||
}
|
||||
|
||||
// syncChanges is a helper function which wraps a blocking call to sync
|
||||
|
|
|
@ -1655,6 +1655,54 @@ func TestAgent_ForceLeave_ACLDeny(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestAgent_ForceLeavePrune(t *testing.T) {
|
||||
t.Parallel()
|
||||
a1 := NewTestAgent(t, t.Name()+"-a1", "")
|
||||
defer a1.Shutdown()
|
||||
a2 := NewTestAgent(t, t.Name()+"-a2", "")
|
||||
testrpc.WaitForLeader(t, a1.RPC, "dc1")
|
||||
testrpc.WaitForLeader(t, a2.RPC, "dc1")
|
||||
|
||||
// Join first
|
||||
addr := fmt.Sprintf("127.0.0.1:%d", a2.Config.SerfPortLAN)
|
||||
_, err := a1.JoinLAN([]string{addr})
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// this test probably needs work
|
||||
a2.Shutdown()
|
||||
// Wait for agent being marked as failed, so we wait for full shutdown of Agent
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
m := a1.LANMembers()
|
||||
for _, member := range m {
|
||||
if member.Name == a2.Config.NodeName {
|
||||
if member.Status != serf.StatusFailed {
|
||||
r.Fatalf("got status %q want %q", member.Status, serf.StatusFailed)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Force leave now
|
||||
req, _ := http.NewRequest("PUT", fmt.Sprintf("/v1/agent/force-leave/%s?prune=true", a2.Config.NodeName), nil)
|
||||
obj, err := a1.srv.AgentForceLeave(nil, req)
|
||||
if err != nil {
|
||||
t.Fatalf("Err: %v", err)
|
||||
}
|
||||
if obj != nil {
|
||||
t.Fatalf("Err: %v", obj)
|
||||
}
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
m := len(a1.LANMembers())
|
||||
if m != 1 {
|
||||
r.Fatalf("want one member, got %v", m)
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestAgent_RegisterCheck(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := NewTestAgent(t, t.Name(), "")
|
||||
|
|
|
@ -263,7 +263,10 @@ func (c *Client) LANSegmentMembers(segment string) ([]serf.Member, error) {
|
|||
}
|
||||
|
||||
// RemoveFailedNode is used to remove a failed node from the cluster
|
||||
func (c *Client) RemoveFailedNode(node string) error {
|
||||
func (c *Client) RemoveFailedNode(node string, prune bool) error {
|
||||
if prune {
|
||||
c.serf.RemoveFailedNodePrune(node)
|
||||
}
|
||||
return c.serf.RemoveFailedNode(node)
|
||||
}
|
||||
|
||||
|
|
|
@ -549,7 +549,7 @@ func TestLeader_LeftServer(t *testing.T) {
|
|||
servers[0].Shutdown()
|
||||
|
||||
// Force remove the non-leader (transition to left state)
|
||||
if err := servers[1].RemoveFailedNode(servers[0].config.NodeName); err != nil {
|
||||
if err := servers[1].RemoveFailedNode(servers[0].config.NodeName, false); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
|
|
|
@ -1008,8 +1008,15 @@ func (s *Server) WANMembers() []serf.Member {
|
|||
}
|
||||
|
||||
// RemoveFailedNode is used to remove a failed node from the cluster
|
||||
func (s *Server) RemoveFailedNode(node string) error {
|
||||
if err := s.serfLAN.RemoveFailedNode(node); err != nil {
|
||||
func (s *Server) RemoveFailedNode(node string, prune bool) error {
|
||||
var removeFn func(*serf.Serf, string) error
|
||||
if prune {
|
||||
removeFn = (*serf.Serf).RemoveFailedNodePrune
|
||||
} else {
|
||||
removeFn = (*serf.Serf).RemoveFailedNode
|
||||
}
|
||||
|
||||
if err := removeFn(s.serfLAN, node); err != nil {
|
||||
return err
|
||||
}
|
||||
// The Serf WAN pool stores members as node.datacenter
|
||||
|
@ -1018,7 +1025,7 @@ func (s *Server) RemoveFailedNode(node string) error {
|
|||
node = node + "." + s.config.Datacenter
|
||||
}
|
||||
if s.serfWAN != nil {
|
||||
if err := s.serfWAN.RemoveFailedNode(node); err != nil {
|
||||
if err := removeFn(s.serfWAN, node); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
13
api/agent.go
13
api/agent.go
|
@ -730,6 +730,19 @@ func (a *Agent) ForceLeave(node string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
//ForceLeavePrune is used to have an a failed agent removed
|
||||
//from the list of members
|
||||
func (a *Agent) ForceLeavePrune(node string) error {
|
||||
r := a.c.newRequest("PUT", "/v1/agent/force-leave/"+node)
|
||||
r.params.Set("prune", "1")
|
||||
_, resp, err := requireOK(a.c.doRequest(r))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resp.Body.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
// ConnectAuthorize is used to authorize an incoming connection
|
||||
// to a natively integrated Connect service.
|
||||
func (a *Agent) ConnectAuthorize(auth *AgentAuthorizeParams) (*AgentAuthorize, error) {
|
||||
|
|
|
@ -1070,6 +1070,20 @@ func TestAPI_AgentForceLeave(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestAPI_AgentForceLeavePrune(t *testing.T) {
|
||||
t.Parallel()
|
||||
c, s := makeClient(t)
|
||||
defer s.Stop()
|
||||
|
||||
agent := c.Agent()
|
||||
|
||||
// Eject somebody
|
||||
err := agent.ForceLeavePrune("foo")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAPI_AgentMonitor(t *testing.T) {
|
||||
t.Parallel()
|
||||
c, s := makeClient(t)
|
||||
|
|
|
@ -690,6 +690,7 @@ func (r *request) setQueryOptions(q *QueryOptions) {
|
|||
r.header.Set("Cache-Control", strings.Join(cc, ", "))
|
||||
}
|
||||
}
|
||||
|
||||
r.ctx = q.ctx
|
||||
}
|
||||
|
||||
|
|
|
@ -19,10 +19,15 @@ type cmd struct {
|
|||
flags *flag.FlagSet
|
||||
http *flags.HTTPFlags
|
||||
help string
|
||||
|
||||
//flags
|
||||
prune bool
|
||||
}
|
||||
|
||||
func (c *cmd) init() {
|
||||
c.flags = flag.NewFlagSet("", flag.ContinueOnError)
|
||||
c.flags.BoolVar(&c.prune, "prune", false,
|
||||
"Remove agent completely from list of members")
|
||||
c.http = &flags.HTTPFlags{}
|
||||
flags.Merge(c.flags, c.http.ClientFlags())
|
||||
c.help = flags.Usage(help, c.flags)
|
||||
|
@ -47,7 +52,12 @@ func (c *cmd) Run(args []string) int {
|
|||
return 1
|
||||
}
|
||||
|
||||
err = client.Agent().ForceLeave(nodes[0])
|
||||
if c.prune {
|
||||
err = client.Agent().ForceLeavePrune(nodes[0])
|
||||
} else {
|
||||
err = client.Agent().ForceLeave(nodes[0])
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
c.UI.Error(fmt.Sprintf("Error force leaving: %s", err))
|
||||
return 1
|
||||
|
@ -74,4 +84,6 @@ Usage: consul force-leave [options] name
|
|||
that are never coming back. If you do not force leave a failed node,
|
||||
Consul will attempt to reconnect to those failed nodes for some period of
|
||||
time before eventually reaping them.
|
||||
|
||||
-prune Remove agent completely from list of members
|
||||
`
|
||||
|
|
|
@ -56,6 +56,42 @@ func TestForceLeaveCommand(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestForceLeaveCommand_prune(t *testing.T) {
|
||||
t.Parallel()
|
||||
a1 := agent.NewTestAgent(t, t.Name()+"-a1", ``)
|
||||
defer a1.Shutdown()
|
||||
a2 := agent.NewTestAgent(t, t.Name()+"-a2", ``)
|
||||
defer a2.Shutdown()
|
||||
|
||||
_, err := a2.JoinLAN([]string{a1.Config.SerfBindAddrLAN.String()})
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Forcibly shutdown a2 so that it appears "failed" in a1
|
||||
a2.Shutdown()
|
||||
|
||||
ui := cli.NewMockUi()
|
||||
c := New(ui)
|
||||
args := []string{
|
||||
"-http-addr=" + a1.HTTPAddr(),
|
||||
"-prune",
|
||||
a2.Config.NodeName,
|
||||
}
|
||||
|
||||
code := c.Run(args)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
m := len(a1.LANMembers())
|
||||
if m != 1 {
|
||||
r.Fatalf("should have 1 members, got %#v", m)
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestForceLeaveCommand_noAddrs(t *testing.T) {
|
||||
t.Parallel()
|
||||
ui := cli.NewMockUi()
|
||||
|
|
4
go.mod
4
go.mod
|
@ -30,7 +30,7 @@ require (
|
|||
github.com/hashicorp/go-checkpoint v0.0.0-20171009173528-1545e56e46de
|
||||
github.com/hashicorp/go-cleanhttp v0.5.1
|
||||
github.com/hashicorp/go-discover v0.0.0-20190403160810-22221edb15cd
|
||||
github.com/hashicorp/go-hclog v0.9.1
|
||||
github.com/hashicorp/go-hclog v0.9.2
|
||||
github.com/hashicorp/go-memdb v0.0.0-20180223233045-1289e7fffe71
|
||||
github.com/hashicorp/go-msgpack v0.5.5
|
||||
github.com/hashicorp/go-multierror v1.0.0
|
||||
|
@ -49,7 +49,7 @@ require (
|
|||
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69
|
||||
github.com/hashicorp/raft v1.1.1
|
||||
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea
|
||||
github.com/hashicorp/serf v0.8.2
|
||||
github.com/hashicorp/serf v0.8.5
|
||||
github.com/hashicorp/vault/api v1.0.4
|
||||
github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d
|
||||
github.com/imdario/mergo v0.3.6
|
||||
|
|
4
go.sum
4
go.sum
|
@ -123,6 +123,8 @@ github.com/hashicorp/go-hclog v0.0.0-20180709165350-ff2cf002a8dd/go.mod h1:9bjs9
|
|||
github.com/hashicorp/go-hclog v0.8.0/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
|
||||
github.com/hashicorp/go-hclog v0.9.1 h1:9PZfAcVEvez4yhLH2TBU64/h/z4xlFI80cWXRrxuKuM=
|
||||
github.com/hashicorp/go-hclog v0.9.1/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
|
||||
github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
|
||||
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
|
||||
github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0=
|
||||
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
|
||||
github.com/hashicorp/go-memdb v0.0.0-20180223233045-1289e7fffe71 h1:yxxFgVz31vFoKKTtRUNbXLNe4GFnbLKqg+0N7yG42L8=
|
||||
|
@ -184,6 +186,8 @@ github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea h1:xykPFhrBA
|
|||
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk=
|
||||
github.com/hashicorp/serf v0.8.2 h1:YZ7UKsJv+hKjqGVUUbtE3HNj79Eln2oQ75tniF6iPt0=
|
||||
github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
|
||||
github.com/hashicorp/serf v0.8.5 h1:ZynDUIQiA8usmRgPdGPHFdPnb1wgGI9tK3mO9hcAJjc=
|
||||
github.com/hashicorp/serf v0.8.5/go.mod h1:UpNcs7fFbpKIyZaUuSW6EPiH+eZC7OuyFD+wc1oal+k=
|
||||
github.com/hashicorp/vault/api v1.0.4 h1:j08Or/wryXT4AcHj1oCbMd7IijXcKzYUGw59LGu9onU=
|
||||
github.com/hashicorp/vault/api v1.0.4/go.mod h1:gDcqh3WGcR1cpF5AJz/B1UFheUEneMoIospckxBxk6Q=
|
||||
github.com/hashicorp/vault/sdk v0.1.13 h1:mOEPeOhT7jl0J4AMl1E705+BcmeRs1VmKNb9F0sMLy8=
|
||||
|
|
|
@ -22,7 +22,11 @@ var (
|
|||
// to be used in more specific contexts.
|
||||
func Default() Logger {
|
||||
protect.Do(func() {
|
||||
def = New(DefaultOptions)
|
||||
// If SetDefault was used before Default() was called, we need to
|
||||
// detect that here.
|
||||
if def == nil {
|
||||
def = New(DefaultOptions)
|
||||
}
|
||||
})
|
||||
|
||||
return def
|
||||
|
@ -32,3 +36,13 @@ func Default() Logger {
|
|||
func L() Logger {
|
||||
return Default()
|
||||
}
|
||||
|
||||
// SetDefault changes the logger to be returned by Default()and L()
|
||||
// to the one given. This allows packages to use the default logger
|
||||
// and have higher level packages change it to match the execution
|
||||
// environment. It returns any old default if there is one.
|
||||
func SetDefault(log Logger) Logger {
|
||||
old := def
|
||||
def = log
|
||||
return old
|
||||
}
|
||||
|
|
|
@ -242,6 +242,10 @@ type Config struct {
|
|||
// Merge can be optionally provided to intercept a cluster merge
|
||||
// and conditionally abort the merge.
|
||||
Merge MergeDelegate
|
||||
|
||||
// UserEventSizeLimit is maximum byte size limit of user event `name` + `payload` in bytes.
|
||||
// It's optimal to be relatively small, since it's going to be gossiped through the cluster.
|
||||
UserEventSizeLimit int
|
||||
}
|
||||
|
||||
// Init allocates the subdata structures
|
||||
|
@ -282,5 +286,6 @@ func DefaultConfig() *Config {
|
|||
QuerySizeLimit: 1024,
|
||||
EnableNameConflictResolution: true,
|
||||
DisableCoordinates: false,
|
||||
UserEventSizeLimit: 512,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,6 +55,7 @@ type messageJoin struct {
|
|||
type messageLeave struct {
|
||||
LTime LamportTime
|
||||
Node string
|
||||
Prune bool
|
||||
}
|
||||
|
||||
// messagePushPullType is used when doing a state exchange. This
|
||||
|
|
|
@ -223,8 +223,8 @@ type queries struct {
|
|||
}
|
||||
|
||||
const (
|
||||
UserEventSizeLimit = 512 // Maximum byte size for event name and payload
|
||||
snapshotSizeLimit = 128 * 1024 // Maximum 128 KB snapshot
|
||||
UserEventSizeLimit = 9 * 1024 // Maximum 9KB for event name and payload
|
||||
)
|
||||
|
||||
// Create creates a new Serf instance, starting all the background tasks
|
||||
|
@ -242,6 +242,10 @@ func Create(conf *Config) (*Serf, error) {
|
|||
conf.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
|
||||
}
|
||||
|
||||
if conf.UserEventSizeLimit > UserEventSizeLimit {
|
||||
return nil, fmt.Errorf("user event size limit exceeds limit of %d bytes", UserEventSizeLimit)
|
||||
}
|
||||
|
||||
logger := conf.Logger
|
||||
if logger == nil {
|
||||
logOutput := conf.LogOutput
|
||||
|
@ -437,14 +441,25 @@ func (s *Serf) KeyManager() *KeyManager {
|
|||
}
|
||||
|
||||
// UserEvent is used to broadcast a custom user event with a given
|
||||
// name and payload. The events must be fairly small, and if the
|
||||
// size limit is exceeded and error will be returned. If coalesce is enabled,
|
||||
// nodes are allowed to coalesce this event. Coalescing is only available
|
||||
// starting in v0.2
|
||||
// name and payload. If the configured size limit is exceeded and error will be returned.
|
||||
// If coalesce is enabled, nodes are allowed to coalesce this event.
|
||||
// Coalescing is only available starting in v0.2
|
||||
func (s *Serf) UserEvent(name string, payload []byte, coalesce bool) error {
|
||||
// Check the size limit
|
||||
if len(name)+len(payload) > UserEventSizeLimit {
|
||||
return fmt.Errorf("user event exceeds limit of %d bytes", UserEventSizeLimit)
|
||||
payloadSizeBeforeEncoding := len(name) + len(payload)
|
||||
|
||||
// Check size before encoding to prevent needless encoding and return early if it's over the specified limit.
|
||||
if payloadSizeBeforeEncoding > s.config.UserEventSizeLimit {
|
||||
return fmt.Errorf(
|
||||
"user event exceeds configured limit of %d bytes before encoding",
|
||||
s.config.UserEventSizeLimit,
|
||||
)
|
||||
}
|
||||
|
||||
if payloadSizeBeforeEncoding > UserEventSizeLimit {
|
||||
return fmt.Errorf(
|
||||
"user event exceeds sane limit of %d bytes before encoding",
|
||||
UserEventSizeLimit,
|
||||
)
|
||||
}
|
||||
|
||||
// Create a message
|
||||
|
@ -454,16 +469,34 @@ func (s *Serf) UserEvent(name string, payload []byte, coalesce bool) error {
|
|||
Payload: payload,
|
||||
CC: coalesce,
|
||||
}
|
||||
s.eventClock.Increment()
|
||||
|
||||
// Process update locally
|
||||
s.handleUserEvent(&msg)
|
||||
|
||||
// Start broadcasting the event
|
||||
raw, err := encodeMessage(messageUserEventType, &msg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check the size after encoding to be sure again that
|
||||
// we're not attempting to send over the specified size limit.
|
||||
if len(raw) > s.config.UserEventSizeLimit {
|
||||
return fmt.Errorf(
|
||||
"encoded user event exceeds configured limit of %d bytes after encoding",
|
||||
s.config.UserEventSizeLimit,
|
||||
)
|
||||
}
|
||||
|
||||
if len(raw) > UserEventSizeLimit {
|
||||
return fmt.Errorf(
|
||||
"encoded user event exceeds sane limit of %d bytes before encoding",
|
||||
UserEventSizeLimit,
|
||||
)
|
||||
}
|
||||
|
||||
s.eventClock.Increment()
|
||||
|
||||
// Process update locally
|
||||
s.handleUserEvent(&msg)
|
||||
|
||||
s.eventBroadcasts.QueueBroadcast(&broadcast{
|
||||
msg: raw,
|
||||
})
|
||||
|
@ -748,15 +781,26 @@ func (s *Serf) Members() []Member {
|
|||
return members
|
||||
}
|
||||
|
||||
// RemoveFailedNode forcibly removes a failed node from the cluster
|
||||
// RemoveFailedNode is a backwards compatible form
|
||||
// of forceleave
|
||||
func (s *Serf) RemoveFailedNode(node string) error {
|
||||
return s.forceLeave(node, false)
|
||||
}
|
||||
|
||||
func (s *Serf) RemoveFailedNodePrune(node string) error {
|
||||
return s.forceLeave(node, true)
|
||||
}
|
||||
|
||||
// ForceLeave forcibly removes a failed node from the cluster
|
||||
// immediately, instead of waiting for the reaper to eventually reclaim it.
|
||||
// This also has the effect that Serf will no longer attempt to reconnect
|
||||
// to this node.
|
||||
func (s *Serf) RemoveFailedNode(node string) error {
|
||||
func (s *Serf) forceLeave(node string, prune bool) error {
|
||||
// Construct the message to broadcast
|
||||
msg := messageLeave{
|
||||
LTime: s.clock.Time(),
|
||||
Node: node,
|
||||
Prune: prune,
|
||||
}
|
||||
s.clock.Increment()
|
||||
|
||||
|
@ -1027,6 +1071,7 @@ func (s *Serf) handleNodeUpdate(n *memberlist.Node) {
|
|||
|
||||
// handleNodeLeaveIntent is called when an intent to leave is received.
|
||||
func (s *Serf) handleNodeLeaveIntent(leaveMsg *messageLeave) bool {
|
||||
|
||||
// Witness a potentially newer time
|
||||
s.clock.Witness(leaveMsg.LTime)
|
||||
|
||||
|
@ -1057,6 +1102,10 @@ func (s *Serf) handleNodeLeaveIntent(leaveMsg *messageLeave) bool {
|
|||
case StatusAlive:
|
||||
member.Status = StatusLeaving
|
||||
member.statusLTime = leaveMsg.LTime
|
||||
|
||||
if leaveMsg.Prune {
|
||||
s.handlePrune(member)
|
||||
}
|
||||
return true
|
||||
case StatusFailed:
|
||||
member.Status = StatusLeft
|
||||
|
@ -1065,6 +1114,7 @@ func (s *Serf) handleNodeLeaveIntent(leaveMsg *messageLeave) bool {
|
|||
// Remove from the failed list and add to the left list. We add
|
||||
// to the left list so that when we do a sync, other nodes will
|
||||
// remove it from their failed list.
|
||||
|
||||
s.failedMembers = removeOldMember(s.failedMembers, member.Name)
|
||||
s.leftMembers = append(s.leftMembers, member)
|
||||
|
||||
|
@ -1079,12 +1129,40 @@ func (s *Serf) handleNodeLeaveIntent(leaveMsg *messageLeave) bool {
|
|||
Members: []Member{member.Member},
|
||||
}
|
||||
}
|
||||
|
||||
if leaveMsg.Prune {
|
||||
s.handlePrune(member)
|
||||
}
|
||||
|
||||
return true
|
||||
|
||||
case StatusLeaving, StatusLeft:
|
||||
if leaveMsg.Prune {
|
||||
s.handlePrune(member)
|
||||
}
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// handlePrune waits for nodes that are leaving and then forcibly
|
||||
// erases a member from the list of members
|
||||
func (s *Serf) handlePrune(member *memberState) {
|
||||
if member.Status == StatusLeaving {
|
||||
time.Sleep(s.config.BroadcastTimeout + s.config.LeavePropagateDelay)
|
||||
}
|
||||
|
||||
s.logger.Printf("[INFO] serf: EventMemberReap (forced): %s %s", member.Name, member.Member.Addr)
|
||||
|
||||
//If we are leaving or left we may be in that list of members
|
||||
if member.Status == StatusLeaving || member.Status == StatusLeft {
|
||||
s.leftMembers = removeOldMember(s.leftMembers, member.Name)
|
||||
}
|
||||
s.eraseNode(member)
|
||||
|
||||
}
|
||||
|
||||
// handleNodeJoinIntent is called when a node broadcasts a
|
||||
// join message to set the lamport time of its join
|
||||
func (s *Serf) handleNodeJoinIntent(joinMsg *messageJoin) bool {
|
||||
|
@ -1405,6 +1483,30 @@ func (s *Serf) resolveNodeConflict() {
|
|||
}
|
||||
}
|
||||
|
||||
//eraseNode takes a node completely out of the member list
|
||||
func (s *Serf) eraseNode(m *memberState) {
|
||||
// Delete from members
|
||||
delete(s.members, m.Name)
|
||||
|
||||
// Tell the coordinate client the node has gone away and delete
|
||||
// its cached coordinates.
|
||||
if !s.config.DisableCoordinates {
|
||||
s.coordClient.ForgetNode(m.Name)
|
||||
|
||||
s.coordCacheLock.Lock()
|
||||
delete(s.coordCache, m.Name)
|
||||
s.coordCacheLock.Unlock()
|
||||
}
|
||||
|
||||
// Send an event along
|
||||
if s.config.EventCh != nil {
|
||||
s.config.EventCh <- MemberEvent{
|
||||
Type: EventMemberReap,
|
||||
Members: []Member{m.Member},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleReap periodically reaps the list of failed and left members, as well
|
||||
// as old buffered intents.
|
||||
func (s *Serf) handleReap() {
|
||||
|
@ -1455,27 +1557,10 @@ func (s *Serf) reap(old []*memberState, now time.Time, timeout time.Duration) []
|
|||
n--
|
||||
i--
|
||||
|
||||
// Delete from members
|
||||
delete(s.members, m.Name)
|
||||
|
||||
// Tell the coordinate client the node has gone away and delete
|
||||
// its cached coordinates.
|
||||
if !s.config.DisableCoordinates {
|
||||
s.coordClient.ForgetNode(m.Name)
|
||||
|
||||
s.coordCacheLock.Lock()
|
||||
delete(s.coordCache, m.Name)
|
||||
s.coordCacheLock.Unlock()
|
||||
}
|
||||
|
||||
// Send an event along
|
||||
// Delete from members and send out event
|
||||
s.logger.Printf("[INFO] serf: EventMemberReap: %s", m.Name)
|
||||
if s.config.EventCh != nil {
|
||||
s.config.EventCh <- MemberEvent{
|
||||
Type: EventMemberReap,
|
||||
Members: []Member{m.Member},
|
||||
}
|
||||
}
|
||||
s.eraseNode(m)
|
||||
|
||||
}
|
||||
|
||||
return old
|
||||
|
|
|
@ -184,7 +184,7 @@ github.com/hashicorp/go-discover/provider/scaleway
|
|||
github.com/hashicorp/go-discover/provider/softlayer
|
||||
github.com/hashicorp/go-discover/provider/triton
|
||||
github.com/hashicorp/go-discover/provider/vsphere
|
||||
# github.com/hashicorp/go-hclog v0.9.1
|
||||
# github.com/hashicorp/go-hclog v0.9.2
|
||||
github.com/hashicorp/go-hclog
|
||||
# github.com/hashicorp/go-immutable-radix v1.0.0
|
||||
github.com/hashicorp/go-immutable-radix
|
||||
|
@ -242,7 +242,7 @@ github.com/hashicorp/net-rpc-msgpackrpc
|
|||
github.com/hashicorp/raft
|
||||
# github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea
|
||||
github.com/hashicorp/raft-boltdb
|
||||
# github.com/hashicorp/serf v0.8.2
|
||||
# github.com/hashicorp/serf v0.8.5
|
||||
github.com/hashicorp/serf/coordinate
|
||||
github.com/hashicorp/serf/serf
|
||||
# github.com/hashicorp/vault/api v1.0.4
|
||||
|
|
|
@ -490,6 +490,14 @@ state allows its old entries to be removed.
|
|||
| ------ | ---------------------------- | -------------------------- |
|
||||
| `PUT` | `/agent/force-leave/:node` | `application/json` |
|
||||
|
||||
Additionally, by specifying the `prune` flag, a node can be forcibly removed from
|
||||
the list of members entirely.
|
||||
|
||||
| Method | Path | Produces |
|
||||
| ------ | --------------------------------------- | -------------------------- |
|
||||
| `PUT` | `/agent/force-leave/:node?prune` | `application/json` |
|
||||
|
||||
|
||||
The table below shows this endpoint's support for
|
||||
[blocking queries](/api/features/blocking.html),
|
||||
[consistency modes](/api/features/consistency.html),
|
||||
|
|
|
@ -56,3 +56,7 @@ datacenter `us-east1`, run:
|
|||
```
|
||||
consul force-leave server1.us-east1
|
||||
```
|
||||
#### Command Options
|
||||
|
||||
* `-prune` - Removes failed or left agent from the list of
|
||||
members entirely
|
||||
|
|
Loading…
Reference in New Issue