consul operator raft transfer-leader should send the id (#17107)

Fixes #16955

Co-authored-by: Dhia Ayachi <dhia@hashicorp.com>
This commit is contained in:
James Hartig 2023-09-15 14:38:59 -04:00 committed by GitHub
parent aff13cd4c2
commit b2e21c103f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 232 additions and 24 deletions

3
.changelog/17107.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:breaking-change
api: RaftLeaderTransfer now requires an id string. An empty string can be specified to keep the old behavior.
```

View File

@ -68,9 +68,14 @@ func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, e
} }
// RaftLeaderTransfer is used to transfer the current raft leader to another node // RaftLeaderTransfer is used to transfer the current raft leader to another node
func (op *Operator) RaftLeaderTransfer(q *QueryOptions) (*TransferLeaderResponse, error) { // Optionally accepts a non-empty id of another node to transfer leadership to.
func (op *Operator) RaftLeaderTransfer(id string, q *QueryOptions) (*TransferLeaderResponse, error) {
r := op.c.newRequest("POST", "/v1/operator/raft/transfer-leader") r := op.c.newRequest("POST", "/v1/operator/raft/transfer-leader")
r.setQueryOptions(q) r.setQueryOptions(q)
if id != "" {
r.params.Set("id", id)
}
_, resp, err := op.c.doRequest(r) _, resp, err := op.c.doRequest(r)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -4,8 +4,9 @@
package api package api
import ( import (
"strings"
"testing" "testing"
"github.com/hashicorp/consul/sdk/testutil"
) )
func TestAPI_OperatorRaftGetConfiguration(t *testing.T) { func TestAPI_OperatorRaftGetConfiguration(t *testing.T) {
@ -27,33 +28,181 @@ func TestAPI_OperatorRaftGetConfiguration(t *testing.T) {
func TestAPI_OperatorRaftRemovePeerByAddress(t *testing.T) { func TestAPI_OperatorRaftRemovePeerByAddress(t *testing.T) {
t.Parallel() t.Parallel()
c, s := makeClient(t) c1, s1 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
defer s.Stop() if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s1.Stop()
// If we get this error, it proves we sent the address all the way _, s2 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
// through. conf.Server = true
operator := c.Operator() conf.Bootstrap = false
err := operator.RaftRemovePeerByAddress("nope", nil) conf.RetryJoin = []string{s1.LANAddr}
if err == nil || !strings.Contains(err.Error(), if conf.Autopilot == nil {
"address \"nope\" was not found in the Raft configuration") { conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s2.Stop()
s2.WaitForVoting(t)
operator := c1.Operator()
err := operator.RaftRemovePeerByAddress(s2.ServerAddr, nil)
if err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
cfg, err := c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if len(cfg.Servers) != 1 {
t.Fatalf("more than 1 server left: %+v", cfg.Servers)
}
}
func TestAPI_OperatorRaftRemovePeerByID(t *testing.T) {
t.Parallel()
c1, s1 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s1.Stop()
_, s2 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
conf.Server = true
conf.Bootstrap = false
conf.RetryJoin = []string{s1.LANAddr}
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s2.Stop()
s2.WaitForVoting(t)
operator := c1.Operator()
err := operator.RaftRemovePeerByID(s2.Config.NodeID, nil)
if err != nil {
t.Fatalf("err: %v", err)
}
cfg, err := c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if len(cfg.Servers) != 1 {
t.Fatalf("more than 1 server left: %+v", cfg.Servers)
}
} }
func TestAPI_OperatorRaftLeaderTransfer(t *testing.T) { func TestAPI_OperatorRaftLeaderTransfer(t *testing.T) {
t.Parallel() t.Parallel()
c, s := makeClient(t) c1, s1 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
defer s.Stop() if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s1.Stop()
// If we get this error, it proves we sent the address all the way _, s2 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
// through. conf.Server = true
operator := c.Operator() conf.Bootstrap = false
transfer, err := operator.RaftLeaderTransfer(nil) conf.RetryJoin = []string{s1.LANAddr}
if err == nil || !strings.Contains(err.Error(), if conf.Autopilot == nil {
"cannot find peer") { conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s2.Stop()
s2.WaitForVoting(t)
cfg, err := c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
if transfer != nil { if len(cfg.Servers) != 2 {
t.Fatalf("err:%v", transfer) t.Fatalf("not 2 servers: %#v", cfg.Servers)
}
var leaderID string
for _, srv := range cfg.Servers {
if srv.Leader {
leaderID = srv.ID
}
}
if leaderID == "" {
t.Fatalf("no leader: %+v", cfg.Servers)
}
transfer, err := c1.Operator().RaftLeaderTransfer("", nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if !transfer.Success {
t.Fatal("unsuccessful transfer")
}
s2.WaitForLeader(t)
cfg, err = c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
var newLeaderID string
for _, srv := range cfg.Servers {
if srv.Leader {
newLeaderID = srv.ID
}
}
if newLeaderID == "" {
t.Fatalf("no leader: %#v", cfg.Servers)
}
if newLeaderID == leaderID {
t.Fatalf("leader did not change: %v == %v", newLeaderID, leaderID)
}
_, s3 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
conf.Server = true
conf.Bootstrap = false
conf.RetryJoin = []string{s1.LANAddr, s2.LANAddr}
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s3.Stop()
s3.WaitForVoting(t)
// Transfer it to another member
transfer, err = c1.Operator().RaftLeaderTransfer(s3.Config.NodeID, nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if !transfer.Success {
t.Fatal("unsuccessful transfer")
}
s3.WaitForLeader(t)
cfg, err = c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
newLeaderID = ""
for _, srv := range cfg.Servers {
if srv.Leader {
newLeaderID = srv.ID
}
}
if newLeaderID == "" {
t.Fatalf("no leader: %#v", cfg.Servers)
}
if newLeaderID != s3.Config.NodeID {
t.Fatalf("leader is not s3: %v != %v", newLeaderID, s3.Config.NodeID)
} }
} }

View File

@ -53,7 +53,7 @@ func (c *cmd) Run(args []string) int {
} }
// Fetch the current configuration. // Fetch the current configuration.
result, err := raftTransferLeader(client, c.http.Stale()) result, err := raftTransferLeader(client, c.http.Stale(), c.id)
if err != nil { if err != nil {
c.UI.Error(fmt.Sprintf("Error transfering leadership: %v", err)) c.UI.Error(fmt.Sprintf("Error transfering leadership: %v", err))
return 1 return 1
@ -63,11 +63,11 @@ func (c *cmd) Run(args []string) int {
return 0 return 0
} }
func raftTransferLeader(client *api.Client, stale bool) (string, error) { func raftTransferLeader(client *api.Client, stale bool, id string) (string, error) {
q := &api.QueryOptions{ q := &api.QueryOptions{
AllowStale: stale, AllowStale: stale,
} }
reply, err := client.Operator().RaftLeaderTransfer(q) reply, err := client.Operator().RaftLeaderTransfer(id, q)
if err != nil { if err != nil {
return "", fmt.Errorf("Failed to transfer leadership %w", err) return "", fmt.Errorf("Failed to transfer leadership %w", err)
} }

View File

@ -86,6 +86,11 @@ type Locality struct {
Zone string `json:"zone"` Zone string `json:"zone"`
} }
// TestAutopilotConfig contains the configuration for autopilot.
type TestAutopilotConfig struct {
ServerStabilizationTime string `json:"server_stabilization_time,omitempty"`
}
// TestServerConfig is the main server configuration struct. // TestServerConfig is the main server configuration struct.
type TestServerConfig struct { type TestServerConfig struct {
NodeName string `json:"node_name"` NodeName string `json:"node_name"`
@ -123,6 +128,7 @@ type TestServerConfig struct {
EnableDebug bool `json:"enable_debug,omitempty"` EnableDebug bool `json:"enable_debug,omitempty"`
SkipLeaveOnInt bool `json:"skip_leave_on_interrupt"` SkipLeaveOnInt bool `json:"skip_leave_on_interrupt"`
Peering *TestPeeringConfig `json:"peering,omitempty"` Peering *TestPeeringConfig `json:"peering,omitempty"`
Autopilot *TestAutopilotConfig `json:"autopilot,omitempty"`
ReadyTimeout time.Duration `json:"-"` ReadyTimeout time.Duration `json:"-"`
StopTimeout time.Duration `json:"-"` StopTimeout time.Duration `json:"-"`
Stdout io.Writer `json:"-"` Stdout io.Writer `json:"-"`
@ -260,6 +266,7 @@ type TestServer struct {
HTTPSAddr string HTTPSAddr string
LANAddr string LANAddr string
WANAddr string WANAddr string
ServerAddr string
GRPCAddr string GRPCAddr string
GRPCTLSAddr string GRPCTLSAddr string
@ -344,6 +351,7 @@ func NewTestServerConfigT(t TestingTB, cb ServerConfigCallback) (*TestServer, er
HTTPSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.HTTPS), HTTPSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.HTTPS),
LANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfLan), LANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfLan),
WANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfWan), WANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfWan),
ServerAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.Server),
GRPCAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.GRPC), GRPCAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.GRPC),
GRPCTLSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.GRPCTLS), GRPCTLSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.GRPCTLS),
@ -442,7 +450,7 @@ func (s *TestServer) waitForAPI() error {
return nil return nil
} }
// waitForLeader waits for the Consul server's HTTP API to become // WaitForLeader waits for the Consul server's HTTP API to become
// available, and then waits for a known leader and an index of // available, and then waits for a known leader and an index of
// 2 or more to be observed to confirm leader election is done. // 2 or more to be observed to confirm leader election is done.
func (s *TestServer) WaitForLeader(t testing.TB) { func (s *TestServer) WaitForLeader(t testing.TB) {
@ -472,6 +480,49 @@ func (s *TestServer) WaitForLeader(t testing.TB) {
}) })
} }
// WaitForVoting waits for the Consul server to become a voter in the current raft
// configuration. You probably want to adjust the ServerStablizationTime autopilot
// configuration otherwise this could take 10 seconds.
func (s *TestServer) WaitForVoting(t testing.TB) {
// don't need to fully decode the response
type raftServer struct {
ID string
Voter bool
}
type raftCfgResponse struct {
Servers []raftServer
}
retry.Run(t, func(r *retry.R) {
// Query the API and get the current raft configuration.
url := s.url("/v1/operator/raft/configuration")
resp, err := s.privilegedGet(url)
if err != nil {
r.Fatalf("failed http get '%s': %v", url, err)
}
defer resp.Body.Close()
if err := s.requireOK(resp); err != nil {
r.Fatalf("failed OK response: %v", err)
}
var cfg raftCfgResponse
dec := json.NewDecoder(resp.Body)
if err := dec.Decode(&cfg); err != nil {
r.Fatal(err)
}
for _, srv := range cfg.Servers {
if srv.ID == s.Config.NodeID {
if srv.Voter {
return
}
break
}
}
r.Fatalf("Server is not voting: %#v", cfg.Servers)
})
}
// WaitForActiveCARoot waits until the server can return a Connect CA meaning // WaitForActiveCARoot waits until the server can return a Connect CA meaning
// connect has completed bootstrapping and is ready to use. // connect has completed bootstrapping and is ready to use.
func (s *TestServer) WaitForActiveCARoot(t testing.TB) { func (s *TestServer) WaitForActiveCARoot(t testing.TB) {