package autopilot import ( "context" "fmt" "sort" "time" "github.com/hashicorp/raft" ) // aliveServers will filter the input map of servers and output one with all of the // servers in a Left state removed. func aliveServers(servers map[raft.ServerID]*Server) map[raft.ServerID]*Server { serverMap := make(map[raft.ServerID]*Server) for _, server := range servers { if server.NodeStatus == NodeLeft { continue } serverMap[server.ID] = server } return serverMap } // nextStateInputs is the collection of values that can influence // creation of the next State. type nextStateInputs struct { Now time.Time FirstStateTime time.Time Config *Config RaftConfig *raft.Configuration KnownServers map[raft.ServerID]*Server LatestIndex uint64 LastTerm uint64 FetchedStats map[raft.ServerID]*ServerStats LeaderID raft.ServerID } // gatherNextStateInputs gathers all the information that would be used to // create the new updated state from. // // - Time Providers current time. // - Autopilot Config (needed to determine if the stats should indicate unhealthiness) // - Current state // - Raft Configuration // - Known Servers // - Latest raft index (gathered right before the remote server stats so that they should // be from about the same point in time) // - Stats for all non-left servers func (a *Autopilot) gatherNextStateInputs(ctx context.Context) (*nextStateInputs, error) { // there are a lot of inputs to computing the next state so they get put into a // struct so that we don't have to return 8 values. now := a.time.Now() // We need to pull the previous states knowledge of the first time a state was generated. // This is really only important for when autopilot is first started. We will use the // first state's time when determining if a server is stable. Under normal circumstances // we need to just check that the current time - the servers StableSince time is greater // than the configured stabilization time. However while autopilot has been running for // less time than the stabilization time we need to consider all servers as stable // to prevent unnecessary leader elections. Therefore its important to track the first // time a state was generated so we know if we have a state old enough where there is // any chance of seeing servers as stable based off that configured threshold. var firstStateTime time.Time a.stateLock.Lock() if a.state != nil { firstStateTime = a.state.firstStateTime } a.stateLock.Unlock() // firstStateTime will be the zero value if we are in the process of generating // the first state. In that case we set it to the now time. if firstStateTime.IsZero() { firstStateTime = now } inputs := &nextStateInputs{ Now: now, FirstStateTime: firstStateTime, } // grab the latest autopilot configuration config := a.delegate.AutopilotConfig() if config == nil { return nil, fmt.Errorf("delegate did not return an Autopilot configuration") } inputs.Config = config // retrieve the raft configuration raftConfig, err := a.getRaftConfiguration() if err != nil { return nil, fmt.Errorf("failed to get the Raft configuration: %w", err) } inputs.RaftConfig = raftConfig // get the known servers which may include left/failed ones inputs.KnownServers = a.delegate.KnownServers() // Try to retrieve leader id from the delegate. for id, srv := range inputs.KnownServers { if srv.IsLeader { inputs.LeaderID = id break } } // Delegate setting the leader information is optional. If leader detection is // not successful, fallback on raft config to do the same. if inputs.LeaderID == "" { leader := a.raft.Leader() for _, s := range inputs.RaftConfig.Servers { if s.Address == leader { inputs.LeaderID = s.ID break } } if inputs.LeaderID == "" { return nil, fmt.Errorf("cannot detect the current leader server id from its address: %s", leader) } } // get the latest Raft index - this should be kept close to the call to // fetch the statistics so that the index values are as close in time as // possible to make the best decision regarding an individual servers // healthiness. inputs.LatestIndex = a.raft.LastIndex() term, err := a.lastTerm() if err != nil { return nil, fmt.Errorf("failed to determine the last Raft term: %w", err) } inputs.LastTerm = term // getting the raft configuration could block for a while so now is a good // time to check for context cancellation if ctx.Err() != nil { return nil, ctx.Err() } // in most cases getting the known servers should be quick but as we cannot // account for every potential delegate and prevent them from making // blocking network requests we should probably check the context again. if ctx.Err() != nil { return nil, ctx.Err() } // we only allow the fetch to take place for up to half the health interval // the next health interval will attempt to fetch the stats again but if // we do not see responses within this time then we can assume they are // unhealthy d := inputs.Now.Add(a.updateInterval / 2) fetchCtx, cancel := context.WithDeadline(ctx, d) defer cancel() inputs.FetchedStats = a.delegate.FetchServerStats(fetchCtx, aliveServers(inputs.KnownServers)) // it might be nil but we propagate the ctx.Err just in case our context was // cancelled since the last time we checked. return inputs, ctx.Err() } // nextState will gather many inputs about the current state of servers from the // delegate, raft and time provider among other sources and then compute the // next Autopilot state. func (a *Autopilot) nextState(ctx context.Context) (*State, error) { inputs, err := a.gatherNextStateInputs(ctx) if err != nil { return nil, err } state := a.nextStateWithInputs(inputs) if state.Leader == "" { return nil, fmt.Errorf("Unabled to detect the leader server") } return state, nil } // nextStateWithInputs computes the next state given pre-gathered inputs func (a *Autopilot) nextStateWithInputs(inputs *nextStateInputs) *State { nextServers := a.nextServers(inputs) // we record the firstStateTime so that we can ignore the server stabilization // time up until the time we generated the first state becomes far enough // in the past. Until that point in time all servers are considered stable. newState := &State{ firstStateTime: inputs.FirstStateTime, Healthy: true, Servers: nextServers, } voterCount := 0 healthyVoters := 0 // This loop will // 1. Determine the ID of the leader server and set it in the state // 2. Count the number of voters in the cluster // 3. Count the number of healthy voters in the cluster // 4. Detect unhealthy servers and mark the overall health as false for id, srv := range nextServers { if !srv.Health.Healthy { // any unhealthiness results in overall unhealthiness newState.Healthy = false } switch srv.State { case RaftLeader: newState.Leader = id fallthrough case RaftVoter: newState.Voters = append(newState.Voters, id) voterCount++ if srv.Health.Healthy { healthyVoters++ } } } // If we have extra healthy voters, update FailureTolerance from its // zero value in the struct. requiredQuorum := requiredQuorum(voterCount) if healthyVoters > requiredQuorum { newState.FailureTolerance = healthyVoters - requiredQuorum } // update any promoter specific overall state if newExt := a.promoter.GetStateExt(inputs.Config, newState); newExt != nil { newState.Ext = newExt } // update the node types - these are really informational for users to // know how autopilot and the associate promoter algorithms have classed // each server as some promotion algorithms may want to keep certain // servers as non-voters for reasons. The node type then can be used // to indicate why that might be happening. for id, typ := range a.promoter.GetNodeTypes(inputs.Config, newState) { if srv, ok := newState.Servers[id]; ok { srv.Server.NodeType = typ } } // Sort the voters list to keep the output stable. This is done near the end // as SortServers may use other parts of the state that were created in // this method and populated in the newState. Requiring output stability // helps make tests easier to manage and means that if you happen to be dumping // the state periodically you shouldn't see things change unless there // are real changes to server health or overall configuration. SortServers(newState.Voters, newState) return newState } // nextServers will build out the servers map for the next state to be created // from the given inputs. This will take into account all the various sources // of partial state (current state, raft config, application known servers etc.) // and combine them into the final server map. func (a *Autopilot) nextServers(inputs *nextStateInputs) map[raft.ServerID]*ServerState { newServers := make(map[raft.ServerID]*ServerState) for _, srv := range inputs.RaftConfig.Servers { state := a.buildServerState(inputs, srv) // update any promoter specific information. This isn't done within // buildServerState to keep that function "pure" and not require // mocking for tests if newExt := a.promoter.GetServerExt(inputs.Config, &state); newExt != nil { state.Server.Ext = newExt } newServers[srv.ID] = &state } return newServers } // buildServerState takes all the nextStateInputs and builds out a ServerState // for the given Raft server. This will take into account the raft configuration // existing state, application known servers and recently fetched stats. func (a *Autopilot) buildServerState(inputs *nextStateInputs, srv raft.Server) ServerState { // Note that the ordering of operations in this method are very important. // We are building up the ServerState from the least important sources // and overriding them with more up to date values. // build the basic state from the Raft server state := ServerState{ Server: Server{ ID: srv.ID, Address: srv.Address, }, } switch srv.Suffrage { case raft.Voter: state.State = RaftVoter case raft.Nonvoter: state.State = RaftNonVoter case raft.Staging: state.State = RaftStaging default: // should be impossible unless the constants in Raft were updated // to have a new state. // TODO (mkeeler) maybe a panic would be better here. The downside is // that it would be hard to catch that in tests when updating the Raft // version. state.State = RaftNone } // overwrite the raft state to mark the leader as such instead of just // a regular voter if srv.ID == inputs.LeaderID { state.State = RaftLeader } var previousHealthy *bool a.stateLock.RLock() // copy some state from an existing server into the new state - most of this // should be overridden soon but at this point we are just building the base. if existing, found := a.state.Servers[srv.ID]; found { state.Stats = existing.Stats state.Health = existing.Health previousHealthy = &state.Health.Healthy // it is is important to note that the map values we retrieved this from are // stored by value. Therefore we are modifying a copy of what is in the existing // state and not the actual state itself. We want to ensure that the Address // is what Raft will know about. state.Server = existing.Server state.Server.Address = srv.Address } a.stateLock.RUnlock() // pull in the latest information from the applications knowledge of the // server. Mainly we want the NodeStatus & Meta if known, found := inputs.KnownServers[srv.ID]; found { // it is important to note that we are modifying a copy of a Server as the // map we retrieved this from has a non-pointer type value. We definitely // do not want to modify the current known servers but we do want to ensure // that we do not overwrite the Address state.Server = *known state.Server.Address = srv.Address } else { // TODO (mkeeler) do we need a None state. In the previous autopilot code // we would have set this to serf.StatusNone state.Server.NodeStatus = NodeLeft } // override the Stats if any where in the fetched results if stats, found := inputs.FetchedStats[srv.ID]; found { state.Stats = *stats } // now populate the healthy field given the stats state.Health.Healthy = state.isHealthy(inputs.LastTerm, inputs.LatestIndex, inputs.Config) // overwrite the StableSince field if this is a new server or when // the health status changes. No need for an else as we previously set // it when we overwrote the whole Health structure when finding a // server in the existing state if previousHealthy == nil || *previousHealthy != state.Health.Healthy { state.Health.StableSince = inputs.Now } return state } // updateState will compute the nextState, set it on the Autopilot instance and // then notify the delegate of the update. func (a *Autopilot) updateState(ctx context.Context) { newState, err := a.nextState(ctx) if err != nil { a.logger.Error("Error when computing next state", "error", err) return } a.stateLock.Lock() defer a.stateLock.Unlock() a.state = newState a.delegate.NotifyState(newState) } // SortServers will take a list of raft ServerIDs and sort it using // information from the State. See the ServerLessThan function for // details about how two servers get compared. func SortServers(ids []raft.ServerID, s *State) { sort.Slice(ids, func(i, j int) bool { return ServerLessThan(ids[i], ids[j], s) }) } // ServerLessThan will lookup both servers in the given State and return // true if the first id corresponds to a server that is logically less than // lower than, better than etc. the second server. The following criteria // are considered in order of most important to least important // // 1. A Leader server is always less than all others // 2. A voter is less than non voters // 3. Healthy servers are less than unhealthy servers // 4. Servers that have been stable longer are consider less than. func ServerLessThan(id1 raft.ServerID, id2 raft.ServerID, s *State) bool { srvI := s.Servers[id1] srvJ := s.Servers[id2] // the leader always comes first if srvI.State == RaftLeader { return true } else if srvJ.State == RaftLeader { return false } // voters come before non-voters & staging if srvI.State == RaftVoter && srvJ.State != RaftVoter { return true } else if srvI.State != RaftVoter && srvJ.State == RaftVoter { return false } // at this point we know that the raft state of both nodes is roughly // equivalent so we want to now sort based on health if srvI.Health.Healthy == srvJ.Health.Healthy { if srvI.Health.StableSince.Before(srvJ.Health.StableSince) { return srvI.Health.Healthy } else if srvJ.Health.StableSince.Before(srvI.Health.StableSince) { return !srvI.Health.Healthy } // with all else equal sort by the IDs return id1 < id2 } // one of the two isn't healthy. We consider the healthy one as less than // the other. So we return true if server I is healthy and false if it isn't // as we know that server J is healthy and thus should come before server I. return srvI.Health.Healthy }