2016-08-29 19:09:57 -07:00
|
|
|
package structs
|
|
|
|
|
|
|
|
import (
|
2017-03-01 14:04:40 -08:00
|
|
|
"time"
|
|
|
|
|
2016-08-29 19:09:57 -07:00
|
|
|
"github.com/hashicorp/raft"
|
2017-03-01 14:04:40 -08:00
|
|
|
"github.com/hashicorp/serf/serf"
|
2016-08-29 19:09:57 -07:00
|
|
|
)
|
|
|
|
|
2017-03-08 11:31:32 -08:00
|
|
|
// AutopilotConfig holds the Autopilot configuration for a cluster.
|
2017-02-23 20:32:13 -08:00
|
|
|
type AutopilotConfig struct {
|
2017-02-28 14:23:14 -08:00
|
|
|
// CleanupDeadServers controls whether to remove dead servers when a new
|
2017-03-01 14:04:40 -08:00
|
|
|
// server is added to the Raft peers.
|
2017-02-28 14:23:14 -08:00
|
|
|
CleanupDeadServers bool
|
2017-02-24 13:08:49 -08:00
|
|
|
|
2017-03-01 14:04:40 -08:00
|
|
|
// LastContactThreshold is the limit on the amount of time a server can go
|
|
|
|
// without leader contact before being considered unhealthy.
|
|
|
|
LastContactThreshold time.Duration
|
|
|
|
|
|
|
|
// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
|
|
|
|
// be behind before being considered unhealthy.
|
|
|
|
MaxTrailingLogs uint64
|
|
|
|
|
|
|
|
// ServerStabilizationTime is the minimum amount of time a server must be
|
|
|
|
// in a stable, healthy state before it can be added to the cluster. Only
|
|
|
|
// applicable with Raft protocol version 3 or higher.
|
|
|
|
ServerStabilizationTime time.Duration
|
|
|
|
|
2017-03-21 16:36:44 -07:00
|
|
|
// (Enterprise-only) RedundancyZoneTag is the node tag to use for separating
|
|
|
|
// servers into zones for redundancy. If left blank, this feature will be disabled.
|
|
|
|
RedundancyZoneTag string
|
|
|
|
|
|
|
|
// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration
|
|
|
|
// strategy of waiting until enough newer-versioned servers have been added to the
|
|
|
|
// cluster before promoting them to voters.
|
|
|
|
DisableUpgradeMigration bool
|
|
|
|
|
2017-07-17 19:05:24 -07:00
|
|
|
// (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when
|
|
|
|
// performing upgrade migrations. If left blank, the Consul version will be used.
|
|
|
|
UpgradeVersionTag string
|
|
|
|
|
2017-03-01 14:04:40 -08:00
|
|
|
// RaftIndex stores the create/modify indexes of this configuration.
|
2017-02-24 13:08:49 -08:00
|
|
|
RaftIndex
|
2017-02-23 20:32:13 -08:00
|
|
|
}
|
|
|
|
|
2016-08-30 11:30:56 -07:00
|
|
|
// RaftServer has information about a server in the Raft configuration.
|
|
|
|
type RaftServer struct {
|
|
|
|
// ID is the unique ID for the server. These are currently the same
|
|
|
|
// as the address, but they will be changed to a real GUID in a future
|
|
|
|
// release of Consul.
|
|
|
|
ID raft.ServerID
|
|
|
|
|
|
|
|
// Node is the node name of the server, as known by Consul, or this
|
|
|
|
// will be set to "(unknown)" otherwise.
|
|
|
|
Node string
|
|
|
|
|
|
|
|
// Address is the IP:port of the server, used for Raft communications.
|
|
|
|
Address raft.ServerAddress
|
|
|
|
|
|
|
|
// Leader is true if this server is the current cluster leader.
|
|
|
|
Leader bool
|
|
|
|
|
|
|
|
// Voter is true if this server has a vote in the cluster. This might
|
|
|
|
// be false if the server is staging and still coming online, or if
|
|
|
|
// it's a non-voting server, which will be added in a future release of
|
|
|
|
// Consul.
|
|
|
|
Voter bool
|
|
|
|
}
|
|
|
|
|
2016-08-29 19:09:57 -07:00
|
|
|
// RaftConfigrationResponse is returned when querying for the current Raft
|
2016-08-30 11:30:56 -07:00
|
|
|
// configuration.
|
2016-08-29 19:09:57 -07:00
|
|
|
type RaftConfigurationResponse struct {
|
2016-08-30 11:30:56 -07:00
|
|
|
// Servers has the list of servers in the Raft configuration.
|
|
|
|
Servers []*RaftServer
|
|
|
|
|
|
|
|
// Index has the Raft index of this configuration.
|
|
|
|
Index uint64
|
2016-08-29 19:09:57 -07:00
|
|
|
}
|
|
|
|
|
2017-03-29 18:09:41 -07:00
|
|
|
// RaftRemovePeerRequest is used by the Operator endpoint to apply a Raft
|
2016-08-29 19:09:57 -07:00
|
|
|
// operation on a specific Raft peer by address in the form of "IP:port".
|
2017-03-29 18:09:41 -07:00
|
|
|
type RaftRemovePeerRequest struct {
|
2016-08-29 19:09:57 -07:00
|
|
|
// Datacenter is the target this request is intended for.
|
|
|
|
Datacenter string
|
|
|
|
|
|
|
|
// Address is the peer to remove, in the form "IP:port".
|
|
|
|
Address raft.ServerAddress
|
|
|
|
|
2017-03-29 18:09:41 -07:00
|
|
|
// ID is the peer ID to remove.
|
|
|
|
ID raft.ServerID
|
|
|
|
|
2016-08-29 19:09:57 -07:00
|
|
|
// WriteRequest holds the ACL token to go along with this request.
|
|
|
|
WriteRequest
|
|
|
|
}
|
|
|
|
|
|
|
|
// RequestDatacenter returns the datacenter for a given request.
|
2017-03-29 18:09:41 -07:00
|
|
|
func (op *RaftRemovePeerRequest) RequestDatacenter() string {
|
2016-08-29 19:09:57 -07:00
|
|
|
return op.Datacenter
|
|
|
|
}
|
2017-02-23 20:32:13 -08:00
|
|
|
|
|
|
|
// AutopilotSetConfigRequest is used by the Operator endpoint to update the
|
|
|
|
// current Autopilot configuration of the cluster.
|
|
|
|
type AutopilotSetConfigRequest struct {
|
|
|
|
// Datacenter is the target this request is intended for.
|
|
|
|
Datacenter string
|
|
|
|
|
|
|
|
// Config is the new Autopilot configuration to use.
|
|
|
|
Config AutopilotConfig
|
|
|
|
|
2017-02-24 13:08:49 -08:00
|
|
|
// CAS controls whether to use check-and-set semantics for this request.
|
|
|
|
CAS bool
|
|
|
|
|
2017-02-23 20:32:13 -08:00
|
|
|
// WriteRequest holds the ACL token to go along with this request.
|
|
|
|
WriteRequest
|
|
|
|
}
|
|
|
|
|
|
|
|
// RequestDatacenter returns the datacenter for a given request.
|
|
|
|
func (op *AutopilotSetConfigRequest) RequestDatacenter() string {
|
|
|
|
return op.Datacenter
|
|
|
|
}
|
2017-03-01 14:04:40 -08:00
|
|
|
|
|
|
|
// ServerHealth is the health (from the leader's point of view) of a server.
|
|
|
|
type ServerHealth struct {
|
|
|
|
// ID is the raft ID of the server.
|
|
|
|
ID string
|
|
|
|
|
|
|
|
// Name is the node name of the server.
|
|
|
|
Name string
|
|
|
|
|
2017-03-15 18:27:17 -07:00
|
|
|
// Address is the address of the server.
|
|
|
|
Address string
|
|
|
|
|
2017-03-01 14:04:40 -08:00
|
|
|
// The status of the SerfHealth check for the server.
|
2017-03-09 16:43:07 -08:00
|
|
|
SerfStatus serf.MemberStatus
|
2017-03-01 14:04:40 -08:00
|
|
|
|
2017-03-21 16:36:44 -07:00
|
|
|
// Version is the Consul version of the server.
|
|
|
|
Version string
|
|
|
|
|
|
|
|
// Leader is whether this server is currently the leader.
|
|
|
|
Leader bool
|
|
|
|
|
2017-03-01 14:04:40 -08:00
|
|
|
// LastContact is the time since this node's last contact with the leader.
|
2017-03-09 16:43:07 -08:00
|
|
|
LastContact time.Duration
|
2017-03-01 14:04:40 -08:00
|
|
|
|
|
|
|
// LastTerm is the highest leader term this server has a record of in its Raft log.
|
|
|
|
LastTerm uint64
|
|
|
|
|
|
|
|
// LastIndex is the last log index this server has a record of in its Raft log.
|
|
|
|
LastIndex uint64
|
|
|
|
|
|
|
|
// Healthy is whether or not the server is healthy according to the current
|
|
|
|
// Autopilot config.
|
|
|
|
Healthy bool
|
|
|
|
|
2017-03-15 16:09:55 -07:00
|
|
|
// Voter is whether this is a voting server.
|
|
|
|
Voter bool
|
|
|
|
|
2017-03-09 16:43:07 -08:00
|
|
|
// StableSince is the last time this server's Healthy value changed.
|
2017-03-01 14:04:40 -08:00
|
|
|
StableSince time.Time
|
|
|
|
}
|
|
|
|
|
2017-03-09 16:43:07 -08:00
|
|
|
// IsHealthy determines whether this ServerHealth is considered healthy
|
|
|
|
// based on the given Autopilot config
|
2017-03-19 20:48:42 -07:00
|
|
|
func (h *ServerHealth) IsHealthy(lastTerm uint64, leaderLastIndex uint64, autopilotConf *AutopilotConfig) bool {
|
2017-03-09 16:43:07 -08:00
|
|
|
if h.SerfStatus != serf.StatusAlive {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if h.LastContact > autopilotConf.LastContactThreshold || h.LastContact < 0 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if h.LastTerm != lastTerm {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2017-03-19 20:48:42 -07:00
|
|
|
if leaderLastIndex > autopilotConf.MaxTrailingLogs && h.LastIndex < leaderLastIndex-autopilotConf.MaxTrailingLogs {
|
2017-03-09 16:43:07 -08:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsStable returns true if the ServerHealth is in a stable, passing state
|
|
|
|
// according to the given AutopilotConfig
|
|
|
|
func (h *ServerHealth) IsStable(now time.Time, conf *AutopilotConfig) bool {
|
|
|
|
if h == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if !h.Healthy {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if now.Sub(h.StableSince) < conf.ServerStabilizationTime {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2017-03-07 13:58:06 -08:00
|
|
|
// ServerStats holds miscellaneous Raft metrics for a server
|
|
|
|
type ServerStats struct {
|
|
|
|
// LastContact is the time since this node's last contact with the leader.
|
|
|
|
LastContact string
|
|
|
|
|
|
|
|
// LastTerm is the highest leader term this server has a record of in its Raft log.
|
|
|
|
LastTerm uint64
|
|
|
|
|
|
|
|
// LastIndex is the last log index this server has a record of in its Raft log.
|
|
|
|
LastIndex uint64
|
|
|
|
}
|
|
|
|
|
2017-03-01 14:04:40 -08:00
|
|
|
// OperatorHealthReply is a representation of the overall health of the cluster
|
|
|
|
type OperatorHealthReply struct {
|
|
|
|
// Healthy is true if all the servers in the cluster are healthy.
|
|
|
|
Healthy bool
|
|
|
|
|
|
|
|
// FailureTolerance is the number of healthy servers that could be lost without
|
|
|
|
// an outage occurring.
|
|
|
|
FailureTolerance int
|
|
|
|
|
|
|
|
// Servers holds the health of each server.
|
|
|
|
Servers []ServerHealth
|
|
|
|
}
|