consul operator raft transfer-leader should send the id (#17107)

Fixes #16955

Co-authored-by: Dhia Ayachi <dhia@hashicorp.com>
This commit is contained in:
James Hartig 2023-09-15 14:38:59 -04:00 committed by GitHub
parent aff13cd4c2
commit b2e21c103f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 232 additions and 24 deletions

3
.changelog/17107.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:breaking-change
api: RaftLeaderTransfer now requires an id string. An empty string can be specified to keep the old behavior.
```

View File

@ -68,9 +68,14 @@ func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, e
}
// RaftLeaderTransfer is used to transfer the current raft leader to another node
func (op *Operator) RaftLeaderTransfer(q *QueryOptions) (*TransferLeaderResponse, error) {
// Optionally accepts a non-empty id of another node to transfer leadership to.
func (op *Operator) RaftLeaderTransfer(id string, q *QueryOptions) (*TransferLeaderResponse, error) {
r := op.c.newRequest("POST", "/v1/operator/raft/transfer-leader")
r.setQueryOptions(q)
if id != "" {
r.params.Set("id", id)
}
_, resp, err := op.c.doRequest(r)
if err != nil {
return nil, err

View File

@ -4,8 +4,9 @@
package api
import (
"strings"
"testing"
"github.com/hashicorp/consul/sdk/testutil"
)
func TestAPI_OperatorRaftGetConfiguration(t *testing.T) {
@ -27,33 +28,181 @@ func TestAPI_OperatorRaftGetConfiguration(t *testing.T) {
func TestAPI_OperatorRaftRemovePeerByAddress(t *testing.T) {
t.Parallel()
c, s := makeClient(t)
defer s.Stop()
c1, s1 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s1.Stop()
// If we get this error, it proves we sent the address all the way
// through.
operator := c.Operator()
err := operator.RaftRemovePeerByAddress("nope", nil)
if err == nil || !strings.Contains(err.Error(),
"address \"nope\" was not found in the Raft configuration") {
_, s2 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
conf.Server = true
conf.Bootstrap = false
conf.RetryJoin = []string{s1.LANAddr}
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s2.Stop()
s2.WaitForVoting(t)
operator := c1.Operator()
err := operator.RaftRemovePeerByAddress(s2.ServerAddr, nil)
if err != nil {
t.Fatalf("err: %v", err)
}
cfg, err := c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if len(cfg.Servers) != 1 {
t.Fatalf("more than 1 server left: %+v", cfg.Servers)
}
}
func TestAPI_OperatorRaftRemovePeerByID(t *testing.T) {
t.Parallel()
c1, s1 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s1.Stop()
_, s2 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
conf.Server = true
conf.Bootstrap = false
conf.RetryJoin = []string{s1.LANAddr}
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s2.Stop()
s2.WaitForVoting(t)
operator := c1.Operator()
err := operator.RaftRemovePeerByID(s2.Config.NodeID, nil)
if err != nil {
t.Fatalf("err: %v", err)
}
cfg, err := c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if len(cfg.Servers) != 1 {
t.Fatalf("more than 1 server left: %+v", cfg.Servers)
}
}
func TestAPI_OperatorRaftLeaderTransfer(t *testing.T) {
t.Parallel()
c, s := makeClient(t)
defer s.Stop()
c1, s1 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s1.Stop()
// If we get this error, it proves we sent the address all the way
// through.
operator := c.Operator()
transfer, err := operator.RaftLeaderTransfer(nil)
if err == nil || !strings.Contains(err.Error(),
"cannot find peer") {
_, s2 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
conf.Server = true
conf.Bootstrap = false
conf.RetryJoin = []string{s1.LANAddr}
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s2.Stop()
s2.WaitForVoting(t)
cfg, err := c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if transfer != nil {
t.Fatalf("err:%v", transfer)
if len(cfg.Servers) != 2 {
t.Fatalf("not 2 servers: %#v", cfg.Servers)
}
var leaderID string
for _, srv := range cfg.Servers {
if srv.Leader {
leaderID = srv.ID
}
}
if leaderID == "" {
t.Fatalf("no leader: %+v", cfg.Servers)
}
transfer, err := c1.Operator().RaftLeaderTransfer("", nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if !transfer.Success {
t.Fatal("unsuccessful transfer")
}
s2.WaitForLeader(t)
cfg, err = c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
var newLeaderID string
for _, srv := range cfg.Servers {
if srv.Leader {
newLeaderID = srv.ID
}
}
if newLeaderID == "" {
t.Fatalf("no leader: %#v", cfg.Servers)
}
if newLeaderID == leaderID {
t.Fatalf("leader did not change: %v == %v", newLeaderID, leaderID)
}
_, s3 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) {
conf.Server = true
conf.Bootstrap = false
conf.RetryJoin = []string{s1.LANAddr, s2.LANAddr}
if conf.Autopilot == nil {
conf.Autopilot = &testutil.TestAutopilotConfig{}
}
conf.Autopilot.ServerStabilizationTime = "1ms"
})
defer s3.Stop()
s3.WaitForVoting(t)
// Transfer it to another member
transfer, err = c1.Operator().RaftLeaderTransfer(s3.Config.NodeID, nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if !transfer.Success {
t.Fatal("unsuccessful transfer")
}
s3.WaitForLeader(t)
cfg, err = c1.Operator().RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
newLeaderID = ""
for _, srv := range cfg.Servers {
if srv.Leader {
newLeaderID = srv.ID
}
}
if newLeaderID == "" {
t.Fatalf("no leader: %#v", cfg.Servers)
}
if newLeaderID != s3.Config.NodeID {
t.Fatalf("leader is not s3: %v != %v", newLeaderID, s3.Config.NodeID)
}
}

View File

@ -53,7 +53,7 @@ func (c *cmd) Run(args []string) int {
}
// Fetch the current configuration.
result, err := raftTransferLeader(client, c.http.Stale())
result, err := raftTransferLeader(client, c.http.Stale(), c.id)
if err != nil {
c.UI.Error(fmt.Sprintf("Error transfering leadership: %v", err))
return 1
@ -63,11 +63,11 @@ func (c *cmd) Run(args []string) int {
return 0
}
func raftTransferLeader(client *api.Client, stale bool) (string, error) {
func raftTransferLeader(client *api.Client, stale bool, id string) (string, error) {
q := &api.QueryOptions{
AllowStale: stale,
}
reply, err := client.Operator().RaftLeaderTransfer(q)
reply, err := client.Operator().RaftLeaderTransfer(id, q)
if err != nil {
return "", fmt.Errorf("Failed to transfer leadership %w", err)
}

View File

@ -86,6 +86,11 @@ type Locality struct {
Zone string `json:"zone"`
}
// TestAutopilotConfig contains the configuration for autopilot.
type TestAutopilotConfig struct {
ServerStabilizationTime string `json:"server_stabilization_time,omitempty"`
}
// TestServerConfig is the main server configuration struct.
type TestServerConfig struct {
NodeName string `json:"node_name"`
@ -123,6 +128,7 @@ type TestServerConfig struct {
EnableDebug bool `json:"enable_debug,omitempty"`
SkipLeaveOnInt bool `json:"skip_leave_on_interrupt"`
Peering *TestPeeringConfig `json:"peering,omitempty"`
Autopilot *TestAutopilotConfig `json:"autopilot,omitempty"`
ReadyTimeout time.Duration `json:"-"`
StopTimeout time.Duration `json:"-"`
Stdout io.Writer `json:"-"`
@ -260,6 +266,7 @@ type TestServer struct {
HTTPSAddr string
LANAddr string
WANAddr string
ServerAddr string
GRPCAddr string
GRPCTLSAddr string
@ -344,6 +351,7 @@ func NewTestServerConfigT(t TestingTB, cb ServerConfigCallback) (*TestServer, er
HTTPSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.HTTPS),
LANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfLan),
WANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfWan),
ServerAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.Server),
GRPCAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.GRPC),
GRPCTLSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.GRPCTLS),
@ -442,7 +450,7 @@ func (s *TestServer) waitForAPI() error {
return nil
}
// waitForLeader waits for the Consul server's HTTP API to become
// WaitForLeader waits for the Consul server's HTTP API to become
// available, and then waits for a known leader and an index of
// 2 or more to be observed to confirm leader election is done.
func (s *TestServer) WaitForLeader(t testing.TB) {
@ -472,6 +480,49 @@ func (s *TestServer) WaitForLeader(t testing.TB) {
})
}
// WaitForVoting waits for the Consul server to become a voter in the current raft
// configuration. You probably want to adjust the ServerStablizationTime autopilot
// configuration otherwise this could take 10 seconds.
func (s *TestServer) WaitForVoting(t testing.TB) {
// don't need to fully decode the response
type raftServer struct {
ID string
Voter bool
}
type raftCfgResponse struct {
Servers []raftServer
}
retry.Run(t, func(r *retry.R) {
// Query the API and get the current raft configuration.
url := s.url("/v1/operator/raft/configuration")
resp, err := s.privilegedGet(url)
if err != nil {
r.Fatalf("failed http get '%s': %v", url, err)
}
defer resp.Body.Close()
if err := s.requireOK(resp); err != nil {
r.Fatalf("failed OK response: %v", err)
}
var cfg raftCfgResponse
dec := json.NewDecoder(resp.Body)
if err := dec.Decode(&cfg); err != nil {
r.Fatal(err)
}
for _, srv := range cfg.Servers {
if srv.ID == s.Config.NodeID {
if srv.Voter {
return
}
break
}
}
r.Fatalf("Server is not voting: %#v", cfg.Servers)
})
}
// WaitForActiveCARoot waits until the server can return a Connect CA meaning
// connect has completed bootstrapping and is ready to use.
func (s *TestServer) WaitForActiveCARoot(t testing.TB) {