consul/vendor/github.com/hashicorp/raft/inflight.go

214 lines
4.8 KiB
Go

package raft
import (
"container/list"
"sync"
)
// QuorumPolicy allows individual logFutures to have different
// commitment rules while still using the inflight mechanism.
type quorumPolicy interface {
// Checks if a commit from a given peer is enough to
// satisfy the commitment rules
Commit() bool
// Checks if a commit is committed
IsCommitted() bool
}
// MajorityQuorum is used by Apply transactions and requires
// a simple majority of nodes.
type majorityQuorum struct {
count int
votesNeeded int
}
func newMajorityQuorum(clusterSize int) *majorityQuorum {
votesNeeded := (clusterSize / 2) + 1
return &majorityQuorum{count: 0, votesNeeded: votesNeeded}
}
func (m *majorityQuorum) Commit() bool {
m.count++
return m.count >= m.votesNeeded
}
func (m *majorityQuorum) IsCommitted() bool {
return m.count >= m.votesNeeded
}
// Inflight is used to track operations that are still in-flight.
type inflight struct {
sync.Mutex
committed *list.List
commitCh chan struct{}
minCommit uint64
maxCommit uint64
operations map[uint64]*logFuture
stopCh chan struct{}
}
// NewInflight returns an inflight struct that notifies
// the provided channel when logs are finished committing.
func newInflight(commitCh chan struct{}) *inflight {
return &inflight{
committed: list.New(),
commitCh: commitCh,
minCommit: 0,
maxCommit: 0,
operations: make(map[uint64]*logFuture),
stopCh: make(chan struct{}),
}
}
// Start is used to mark a logFuture as being inflight. It
// also commits the entry, as it is assumed the leader is
// starting.
func (i *inflight) Start(l *logFuture) {
i.Lock()
defer i.Unlock()
i.start(l)
}
// StartAll is used to mark a list of logFuture's as being
// inflight. It also commits each entry as the leader is
// assumed to be starting.
func (i *inflight) StartAll(logs []*logFuture) {
i.Lock()
defer i.Unlock()
for _, l := range logs {
i.start(l)
}
}
// start is used to mark a single entry as inflight,
// must be invoked with the lock held.
func (i *inflight) start(l *logFuture) {
idx := l.log.Index
i.operations[idx] = l
if idx > i.maxCommit {
i.maxCommit = idx
}
if i.minCommit == 0 {
i.minCommit = idx
}
i.commit(idx)
}
// Cancel is used to cancel all in-flight operations.
// This is done when the leader steps down, and all futures
// are sent the given error.
func (i *inflight) Cancel(err error) {
// Close the channel first to unblock any pending commits
close(i.stopCh)
// Lock after close to avoid deadlock
i.Lock()
defer i.Unlock()
// Respond to all inflight operations
for _, op := range i.operations {
op.respond(err)
}
// Clear all the committed but not processed
for e := i.committed.Front(); e != nil; e = e.Next() {
e.Value.(*logFuture).respond(err)
}
// Clear the map
i.operations = make(map[uint64]*logFuture)
// Clear the list of committed
i.committed = list.New()
// Close the commmitCh
close(i.commitCh)
// Reset indexes
i.minCommit = 0
i.maxCommit = 0
}
// Committed returns all the committed operations in order.
func (i *inflight) Committed() (l *list.List) {
i.Lock()
l, i.committed = i.committed, list.New()
i.Unlock()
return l
}
// Commit is used by leader replication routines to indicate that
// a follower was finished committing a log to disk.
func (i *inflight) Commit(index uint64) {
i.Lock()
defer i.Unlock()
i.commit(index)
}
// CommitRange is used to commit a range of indexes inclusively.
// It is optimized to avoid commits for indexes that are not tracked.
func (i *inflight) CommitRange(minIndex, maxIndex uint64) {
i.Lock()
defer i.Unlock()
// Update the minimum index
minIndex = max(i.minCommit, minIndex)
// Commit each index
for idx := minIndex; idx <= maxIndex; idx++ {
i.commit(idx)
}
}
// commit is used to commit a single index. Must be called with the lock held.
func (i *inflight) commit(index uint64) {
op, ok := i.operations[index]
if !ok {
// Ignore if not in the map, as it may be committed already
return
}
// Check if we've satisfied the commit
if !op.policy.Commit() {
return
}
// Cannot commit if this is not the minimum inflight. This can happen
// if the quorum size changes, meaning a previous commit requires a larger
// quorum that this commit. We MUST block until the previous log is committed,
// otherwise logs will be applied out of order.
if index != i.minCommit {
return
}
NOTIFY:
// Add the operation to the committed list
i.committed.PushBack(op)
// Stop tracking since it is committed
delete(i.operations, index)
// Update the indexes
if index == i.maxCommit {
i.minCommit = 0
i.maxCommit = 0
} else {
i.minCommit++
}
// Check if the next in-flight operation is ready
if i.minCommit != 0 {
op = i.operations[i.minCommit]
if op.policy.IsCommitted() {
index = i.minCommit
goto NOTIFY
}
}
// Async notify of ready operations
asyncNotifyCh(i.commitCh)
}