2014-01-15 15:14:50 -10:00
|
|
|
package agent
|
|
|
|
|
|
|
|
import (
|
2015-01-27 18:11:57 +09:00
|
|
|
"fmt"
|
2014-01-21 11:52:25 -08:00
|
|
|
"log"
|
2016-02-07 13:12:42 -08:00
|
|
|
"reflect"
|
2014-12-01 11:43:01 -08:00
|
|
|
"strings"
|
2014-01-15 15:14:50 -10:00
|
|
|
"sync"
|
2014-02-07 11:58:24 -08:00
|
|
|
"sync/atomic"
|
2014-01-15 15:14:50 -10:00
|
|
|
"time"
|
2014-12-01 11:43:01 -08:00
|
|
|
|
|
|
|
"github.com/hashicorp/consul/consul"
|
|
|
|
"github.com/hashicorp/consul/consul/structs"
|
2016-01-29 11:42:34 -08:00
|
|
|
"github.com/hashicorp/consul/lib"
|
2016-06-06 13:19:31 -07:00
|
|
|
"github.com/hashicorp/consul/types"
|
2014-01-15 15:14:50 -10:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2014-04-23 12:21:34 -07:00
|
|
|
syncStaggerIntv = 3 * time.Second
|
|
|
|
syncRetryIntv = 15 * time.Second
|
2014-12-01 11:43:01 -08:00
|
|
|
|
|
|
|
// permissionDenied is returned when an ACL based rejection happens
|
|
|
|
permissionDenied = "Permission denied"
|
2014-01-15 15:14:50 -10:00
|
|
|
)
|
|
|
|
|
|
|
|
// syncStatus is used to represent the difference between
|
|
|
|
// the local and remote state, and if action needs to be taken
|
|
|
|
type syncStatus struct {
|
2014-06-10 10:42:55 -07:00
|
|
|
remoteDelete bool // Should this be deleted from the server
|
|
|
|
inSync bool // Is this in sync with the server
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
|
|
|
|
// localState is used to represent the node's services,
|
|
|
|
// and checks. We used it to perform anti-entropy with the
|
|
|
|
// catalog representation
|
|
|
|
type localState struct {
|
2014-02-07 11:58:24 -08:00
|
|
|
// paused is used to check if we are paused. Must be the first
|
|
|
|
// element due to a go bug.
|
|
|
|
paused int32
|
|
|
|
|
2015-04-27 22:01:01 -07:00
|
|
|
sync.RWMutex
|
2014-01-21 11:52:25 -08:00
|
|
|
logger *log.Logger
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
// Config is the agent config
|
|
|
|
config *Config
|
|
|
|
|
|
|
|
// iface is the consul interface to use for keeping in sync
|
|
|
|
iface consul.Interface
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2016-02-07 13:12:42 -08:00
|
|
|
// nodeInfoInSync tracks whether the server has our correct top-level
|
|
|
|
// node information in sync (currently only used for tagged addresses)
|
|
|
|
nodeInfoInSync bool
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
// Services tracks the local services
|
|
|
|
services map[string]*structs.NodeService
|
|
|
|
serviceStatus map[string]syncStatus
|
2015-04-27 18:26:23 -07:00
|
|
|
serviceTokens map[string]string
|
2014-01-15 15:14:50 -10:00
|
|
|
|
|
|
|
// Checks tracks the local checks
|
2016-08-16 00:05:55 -07:00
|
|
|
checks map[types.CheckID]*structs.HealthCheck
|
|
|
|
checkStatus map[types.CheckID]syncStatus
|
|
|
|
checkTokens map[types.CheckID]string
|
|
|
|
checkCriticalTime map[types.CheckID]time.Time
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2014-12-04 23:25:06 +00:00
|
|
|
// Used to track checks that are being deferred
|
2016-06-06 13:19:31 -07:00
|
|
|
deferCheck map[types.CheckID]*time.Timer
|
2014-06-10 10:42:55 -07:00
|
|
|
|
2014-02-07 12:03:31 -08:00
|
|
|
// consulCh is used to inform of a change to the known
|
|
|
|
// consul nodes. This may be used to retry a sync run
|
|
|
|
consulCh chan struct{}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
// triggerCh is used to inform of a change to local state
|
|
|
|
// that requires anti-entropy with the server
|
|
|
|
triggerCh chan struct{}
|
|
|
|
}
|
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
// Init is used to initialize the local state
|
2014-02-07 12:11:34 -08:00
|
|
|
func (l *localState) Init(config *Config, logger *log.Logger) {
|
2014-01-21 11:52:25 -08:00
|
|
|
l.config = config
|
|
|
|
l.logger = logger
|
|
|
|
l.services = make(map[string]*structs.NodeService)
|
|
|
|
l.serviceStatus = make(map[string]syncStatus)
|
2015-04-27 18:26:23 -07:00
|
|
|
l.serviceTokens = make(map[string]string)
|
2016-06-06 13:19:31 -07:00
|
|
|
l.checks = make(map[types.CheckID]*structs.HealthCheck)
|
|
|
|
l.checkStatus = make(map[types.CheckID]syncStatus)
|
|
|
|
l.checkTokens = make(map[types.CheckID]string)
|
2016-08-16 00:05:55 -07:00
|
|
|
l.checkCriticalTime = make(map[types.CheckID]time.Time)
|
2016-06-06 13:19:31 -07:00
|
|
|
l.deferCheck = make(map[types.CheckID]*time.Timer)
|
2014-02-07 12:03:31 -08:00
|
|
|
l.consulCh = make(chan struct{}, 1)
|
2014-01-21 11:52:25 -08:00
|
|
|
l.triggerCh = make(chan struct{}, 1)
|
|
|
|
}
|
|
|
|
|
2014-02-07 12:11:34 -08:00
|
|
|
// SetIface is used to set the Consul interface. Must be set prior to
|
|
|
|
// starting anti-entropy
|
|
|
|
func (l *localState) SetIface(iface consul.Interface) {
|
|
|
|
l.iface = iface
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
// changeMade is used to trigger an anti-entropy run
|
|
|
|
func (l *localState) changeMade() {
|
|
|
|
select {
|
|
|
|
case l.triggerCh <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-07 12:03:31 -08:00
|
|
|
// ConsulServerUp is used to inform that a new consul server is now
|
|
|
|
// up. This can be used to speed up the sync process if we are blocking
|
|
|
|
// waiting to discover a consul server
|
|
|
|
func (l *localState) ConsulServerUp() {
|
|
|
|
select {
|
|
|
|
case l.consulCh <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-04 23:25:06 +00:00
|
|
|
// Pause is used to pause state synchronization, this can be
|
2014-02-07 11:58:24 -08:00
|
|
|
// used to make batch changes
|
|
|
|
func (l *localState) Pause() {
|
2015-09-11 18:28:06 +02:00
|
|
|
atomic.AddInt32(&l.paused, 1)
|
2014-02-07 11:58:24 -08:00
|
|
|
}
|
|
|
|
|
2014-12-04 23:25:06 +00:00
|
|
|
// Resume is used to resume state synchronization
|
2014-02-07 12:19:56 -08:00
|
|
|
func (l *localState) Resume() {
|
2015-09-17 11:32:08 +02:00
|
|
|
paused := atomic.AddInt32(&l.paused, -1)
|
|
|
|
if paused < 0 {
|
|
|
|
panic("unbalanced localState.Resume() detected")
|
|
|
|
}
|
2014-02-07 11:58:24 -08:00
|
|
|
l.changeMade()
|
|
|
|
}
|
|
|
|
|
|
|
|
// isPaused is used to check if we are paused
|
|
|
|
func (l *localState) isPaused() bool {
|
2015-09-11 18:28:06 +02:00
|
|
|
return atomic.LoadInt32(&l.paused) > 0
|
2014-02-07 11:58:24 -08:00
|
|
|
}
|
|
|
|
|
2015-04-27 22:01:01 -07:00
|
|
|
// ServiceToken returns the configured ACL token for the given
|
|
|
|
// service ID. If none is present, the agent's token is returned.
|
|
|
|
func (l *localState) ServiceToken(id string) string {
|
2015-04-28 11:53:53 -07:00
|
|
|
l.RLock()
|
|
|
|
defer l.RUnlock()
|
|
|
|
return l.serviceToken(id)
|
|
|
|
}
|
|
|
|
|
|
|
|
// serviceToken returns an ACL token associated with a service.
|
|
|
|
func (l *localState) serviceToken(id string) string {
|
2015-04-27 22:01:01 -07:00
|
|
|
token := l.serviceTokens[id]
|
|
|
|
if token == "" {
|
|
|
|
token = l.config.ACLToken
|
|
|
|
}
|
|
|
|
return token
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
// AddService is used to add a service entry to the local state.
|
|
|
|
// This entry is persistent and the agent will make a best effort to
|
|
|
|
// ensure it is registered
|
2015-05-04 17:36:17 -07:00
|
|
|
func (l *localState) AddService(service *structs.NodeService, token string) {
|
2014-01-20 14:22:59 -10:00
|
|
|
// Assign the ID if none given
|
|
|
|
if service.ID == "" && service.Service != "" {
|
|
|
|
service.ID = service.Service
|
|
|
|
}
|
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
l.Lock()
|
|
|
|
defer l.Unlock()
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
l.services[service.ID] = service
|
|
|
|
l.serviceStatus[service.ID] = syncStatus{}
|
2015-05-04 17:36:17 -07:00
|
|
|
l.serviceTokens[service.ID] = token
|
2014-01-21 11:52:25 -08:00
|
|
|
l.changeMade()
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
|
|
|
|
// RemoveService is used to remove a service entry from the local state.
|
|
|
|
// The agent will make a best effort to ensure it is deregistered
|
2014-01-21 11:52:25 -08:00
|
|
|
func (l *localState) RemoveService(serviceID string) {
|
|
|
|
l.Lock()
|
|
|
|
defer l.Unlock()
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
delete(l.services, serviceID)
|
2015-05-04 17:36:17 -07:00
|
|
|
delete(l.serviceTokens, serviceID)
|
2014-01-21 11:52:25 -08:00
|
|
|
l.serviceStatus[serviceID] = syncStatus{remoteDelete: true}
|
|
|
|
l.changeMade()
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
|
2014-01-20 15:00:52 -10:00
|
|
|
// Services returns the locally registered services that the
|
|
|
|
// agent is aware of and are being kept in sync with the server
|
2014-01-21 11:52:25 -08:00
|
|
|
func (l *localState) Services() map[string]*structs.NodeService {
|
2014-01-20 15:00:52 -10:00
|
|
|
services := make(map[string]*structs.NodeService)
|
2015-04-27 22:01:01 -07:00
|
|
|
l.RLock()
|
|
|
|
defer l.RUnlock()
|
2014-01-20 15:00:52 -10:00
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
for name, serv := range l.services {
|
2014-01-20 15:00:52 -10:00
|
|
|
services[name] = serv
|
|
|
|
}
|
|
|
|
return services
|
|
|
|
}
|
|
|
|
|
2016-06-07 15:24:51 -05:00
|
|
|
// CheckToken is used to return the configured health check token for a
|
|
|
|
// Check, or if none is configured, the default agent ACL token.
|
|
|
|
func (l *localState) CheckToken(checkID types.CheckID) string {
|
2015-04-28 11:53:53 -07:00
|
|
|
l.RLock()
|
|
|
|
defer l.RUnlock()
|
2016-06-07 15:24:51 -05:00
|
|
|
return l.checkToken(checkID)
|
2015-04-28 11:53:53 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// checkToken returns an ACL token associated with a check.
|
2016-06-07 15:24:51 -05:00
|
|
|
func (l *localState) checkToken(checkID types.CheckID) string {
|
|
|
|
token := l.checkTokens[checkID]
|
2015-04-27 22:01:01 -07:00
|
|
|
if token == "" {
|
|
|
|
token = l.config.ACLToken
|
|
|
|
}
|
|
|
|
return token
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
// AddCheck is used to add a health check to the local state.
|
|
|
|
// This entry is persistent and the agent will make a best effort to
|
|
|
|
// ensure it is registered
|
2015-05-04 17:36:17 -07:00
|
|
|
func (l *localState) AddCheck(check *structs.HealthCheck, token string) {
|
2014-01-20 15:06:44 -10:00
|
|
|
// Set the node name
|
2014-01-21 11:52:25 -08:00
|
|
|
check.Node = l.config.NodeName
|
2014-01-20 15:06:44 -10:00
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
l.Lock()
|
|
|
|
defer l.Unlock()
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
l.checks[check.CheckID] = check
|
|
|
|
l.checkStatus[check.CheckID] = syncStatus{}
|
2015-05-04 17:36:17 -07:00
|
|
|
l.checkTokens[check.CheckID] = token
|
2016-08-16 00:05:55 -07:00
|
|
|
delete(l.checkCriticalTime, check.CheckID)
|
2014-01-21 11:52:25 -08:00
|
|
|
l.changeMade()
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
|
|
|
|
// RemoveCheck is used to remove a health check from the local state.
|
|
|
|
// The agent will make a best effort to ensure it is deregistered
|
2016-06-06 13:19:31 -07:00
|
|
|
func (l *localState) RemoveCheck(checkID types.CheckID) {
|
2014-01-21 11:52:25 -08:00
|
|
|
l.Lock()
|
|
|
|
defer l.Unlock()
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
delete(l.checks, checkID)
|
2015-05-04 17:36:17 -07:00
|
|
|
delete(l.checkTokens, checkID)
|
2016-08-16 00:05:55 -07:00
|
|
|
delete(l.checkCriticalTime, checkID)
|
2014-01-21 11:52:25 -08:00
|
|
|
l.checkStatus[checkID] = syncStatus{remoteDelete: true}
|
|
|
|
l.changeMade()
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
|
|
|
|
// UpdateCheck is used to update the status of a check
|
2016-06-06 13:19:31 -07:00
|
|
|
func (l *localState) UpdateCheck(checkID types.CheckID, status, output string) {
|
2014-01-21 11:52:25 -08:00
|
|
|
l.Lock()
|
|
|
|
defer l.Unlock()
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
check, ok := l.checks[checkID]
|
2014-01-15 15:14:50 -10:00
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2016-08-16 00:05:55 -07:00
|
|
|
// Update the critical time tracking (this doesn't cause a server updates
|
|
|
|
// so we can always keep this up to date).
|
|
|
|
if status == structs.HealthCritical {
|
|
|
|
_, wasCritical := l.checkCriticalTime[checkID]
|
|
|
|
if !wasCritical {
|
|
|
|
l.checkCriticalTime[checkID] = time.Now()
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
delete(l.checkCriticalTime, checkID)
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
// Do nothing if update is idempotent
|
2014-04-21 16:20:22 -07:00
|
|
|
if check.Status == status && check.Output == output {
|
2014-01-15 15:14:50 -10:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2014-06-09 12:46:29 -07:00
|
|
|
// Defer a sync if the output has changed. This is an optimization around
|
|
|
|
// frequent updates of output. Instead, we update the output internally,
|
|
|
|
// and periodically do a write-back to the servers. If there is a status
|
|
|
|
// change we do the write immediately.
|
|
|
|
if l.config.CheckUpdateInterval > 0 && check.Status == status {
|
|
|
|
check.Output = output
|
2014-06-10 10:42:55 -07:00
|
|
|
if _, ok := l.deferCheck[checkID]; !ok {
|
2016-01-29 11:42:34 -08:00
|
|
|
intv := time.Duration(uint64(l.config.CheckUpdateInterval)/2) + lib.RandomStagger(l.config.CheckUpdateInterval)
|
2015-04-23 15:37:20 -05:00
|
|
|
deferSync := time.AfterFunc(intv, func() {
|
2014-06-09 12:46:29 -07:00
|
|
|
l.Lock()
|
2014-06-10 10:42:55 -07:00
|
|
|
if _, ok := l.checkStatus[checkID]; ok {
|
2014-06-09 16:00:25 -07:00
|
|
|
l.checkStatus[checkID] = syncStatus{inSync: false}
|
|
|
|
l.changeMade()
|
|
|
|
}
|
2014-06-10 10:42:55 -07:00
|
|
|
delete(l.deferCheck, checkID)
|
2014-06-09 12:46:29 -07:00
|
|
|
l.Unlock()
|
|
|
|
})
|
2014-06-10 10:42:55 -07:00
|
|
|
l.deferCheck[checkID] = deferSync
|
2014-06-09 12:46:29 -07:00
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
// Update status and mark out of sync
|
|
|
|
check.Status = status
|
2014-04-21 16:20:22 -07:00
|
|
|
check.Output = output
|
2014-01-21 11:52:25 -08:00
|
|
|
l.checkStatus[checkID] = syncStatus{inSync: false}
|
|
|
|
l.changeMade()
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
|
2014-01-20 15:00:52 -10:00
|
|
|
// Checks returns the locally registered checks that the
|
|
|
|
// agent is aware of and are being kept in sync with the server
|
2016-06-06 13:19:31 -07:00
|
|
|
func (l *localState) Checks() map[types.CheckID]*structs.HealthCheck {
|
|
|
|
checks := make(map[types.CheckID]*structs.HealthCheck)
|
2015-04-27 22:01:01 -07:00
|
|
|
l.RLock()
|
|
|
|
defer l.RUnlock()
|
2014-01-20 15:00:52 -10:00
|
|
|
|
2016-06-06 01:53:30 -07:00
|
|
|
for checkID, check := range l.checks {
|
|
|
|
checks[checkID] = check
|
2014-01-20 15:00:52 -10:00
|
|
|
}
|
|
|
|
return checks
|
|
|
|
}
|
|
|
|
|
2016-08-16 00:05:55 -07:00
|
|
|
// CriticalCheck is used to return the duration a check has been critical along
|
|
|
|
// with its associated health check.
|
|
|
|
type CriticalCheck struct {
|
|
|
|
CriticalFor time.Duration
|
|
|
|
Check *structs.HealthCheck
|
|
|
|
}
|
|
|
|
|
|
|
|
// CriticalChecks returns locally registered health checks that the agent is
|
|
|
|
// aware of and are being kept in sync with the server, and that are in a
|
|
|
|
// critical state. This also returns information about how long each check has
|
|
|
|
// been critical.
|
|
|
|
func (l *localState) CriticalChecks() map[types.CheckID]CriticalCheck {
|
|
|
|
checks := make(map[types.CheckID]CriticalCheck)
|
|
|
|
|
|
|
|
l.RLock()
|
|
|
|
defer l.RUnlock()
|
|
|
|
|
|
|
|
now := time.Now()
|
|
|
|
for checkID, criticalTime := range l.checkCriticalTime {
|
|
|
|
checks[checkID] = CriticalCheck{
|
|
|
|
CriticalFor: now.Sub(criticalTime),
|
|
|
|
Check: l.checks[checkID],
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return checks
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
// antiEntropy is a long running method used to perform anti-entropy
|
|
|
|
// between local and remote state.
|
2014-01-21 11:52:25 -08:00
|
|
|
func (l *localState) antiEntropy(shutdownCh chan struct{}) {
|
2014-01-15 15:14:50 -10:00
|
|
|
SYNC:
|
|
|
|
// Sync our state with the servers
|
2014-01-21 11:52:25 -08:00
|
|
|
for {
|
2014-04-14 12:47:58 -07:00
|
|
|
err := l.setSyncState()
|
|
|
|
if err == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
l.logger.Printf("[ERR] agent: failed to sync remote state: %v", err)
|
|
|
|
select {
|
|
|
|
case <-l.consulCh:
|
2014-04-23 12:21:34 -07:00
|
|
|
// Stagger the retry on leader election, avoid a thundering heard
|
|
|
|
select {
|
2016-01-29 11:42:34 -08:00
|
|
|
case <-time.After(lib.RandomStagger(aeScale(syncStaggerIntv, len(l.iface.LANMembers())))):
|
2014-04-23 12:21:34 -07:00
|
|
|
case <-shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
2016-01-29 11:42:34 -08:00
|
|
|
case <-time.After(syncRetryIntv + lib.RandomStagger(aeScale(syncRetryIntv, len(l.iface.LANMembers())))):
|
2014-04-14 12:47:58 -07:00
|
|
|
case <-shutdownCh:
|
|
|
|
return
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Force-trigger AE to pickup any changes
|
2014-01-21 11:52:25 -08:00
|
|
|
l.changeMade()
|
2014-01-15 15:14:50 -10:00
|
|
|
|
|
|
|
// Schedule the next full sync, with a random stagger
|
2014-01-21 11:52:25 -08:00
|
|
|
aeIntv := aeScale(l.config.AEInterval, len(l.iface.LANMembers()))
|
2016-01-29 11:42:34 -08:00
|
|
|
aeIntv = aeIntv + lib.RandomStagger(aeIntv)
|
2014-01-15 15:14:50 -10:00
|
|
|
aeTimer := time.After(aeIntv)
|
|
|
|
|
|
|
|
// Wait for sync events
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-aeTimer:
|
|
|
|
goto SYNC
|
2014-01-21 11:52:25 -08:00
|
|
|
case <-l.triggerCh:
|
2014-02-07 11:58:24 -08:00
|
|
|
// Skip the sync if we are paused
|
|
|
|
if l.isPaused() {
|
|
|
|
continue
|
|
|
|
}
|
2014-01-21 11:52:25 -08:00
|
|
|
if err := l.syncChanges(); err != nil {
|
|
|
|
l.logger.Printf("[ERR] agent: failed to sync changes: %v", err)
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
2014-01-21 11:52:25 -08:00
|
|
|
case <-shutdownCh:
|
2014-01-15 15:14:50 -10:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// setSyncState does a read of the server state, and updates
|
|
|
|
// the local syncStatus as appropriate
|
2014-01-21 11:52:25 -08:00
|
|
|
func (l *localState) setSyncState() error {
|
2014-01-15 15:14:50 -10:00
|
|
|
req := structs.NodeSpecificRequest{
|
2014-12-01 11:43:01 -08:00
|
|
|
Datacenter: l.config.Datacenter,
|
|
|
|
Node: l.config.NodeName,
|
|
|
|
QueryOptions: structs.QueryOptions{Token: l.config.ACLToken},
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
2014-02-05 14:36:13 -08:00
|
|
|
var out1 structs.IndexedNodeServices
|
|
|
|
var out2 structs.IndexedHealthChecks
|
|
|
|
if e := l.iface.RPC("Catalog.NodeServices", &req, &out1); e != nil {
|
2014-01-15 15:14:50 -10:00
|
|
|
return e
|
|
|
|
}
|
2014-02-05 14:36:13 -08:00
|
|
|
if err := l.iface.RPC("Health.NodeChecks", &req, &out2); err != nil {
|
2014-01-15 15:14:50 -10:00
|
|
|
return err
|
|
|
|
}
|
2014-02-05 14:36:13 -08:00
|
|
|
checks := out2.HealthChecks
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2014-01-21 11:52:25 -08:00
|
|
|
l.Lock()
|
|
|
|
defer l.Unlock()
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2016-02-07 13:12:42 -08:00
|
|
|
// Check the node info (currently limited to tagged addresses since
|
|
|
|
// everything else is managed by the Serf layer)
|
2016-02-07 15:07:23 -08:00
|
|
|
if out1.NodeServices == nil || out1.NodeServices.Node == nil ||
|
|
|
|
!reflect.DeepEqual(out1.NodeServices.Node.TaggedAddresses, l.config.TaggedAddresses) {
|
2016-02-07 13:12:42 -08:00
|
|
|
l.nodeInfoInSync = false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check all our services
|
2015-04-10 11:04:15 -07:00
|
|
|
services := make(map[string]*structs.NodeService)
|
|
|
|
if out1.NodeServices != nil {
|
|
|
|
services = out1.NodeServices.Services
|
|
|
|
}
|
|
|
|
|
2015-04-08 12:20:34 -07:00
|
|
|
for id, _ := range l.services {
|
|
|
|
// If the local service doesn't exist remotely, then sync it
|
2015-04-10 11:04:15 -07:00
|
|
|
if _, ok := services[id]; !ok {
|
2015-04-08 12:20:34 -07:00
|
|
|
l.serviceStatus[id] = syncStatus{inSync: false}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-10 11:04:15 -07:00
|
|
|
for id, service := range services {
|
|
|
|
// If we don't have the service locally, deregister it
|
|
|
|
existing, ok := l.services[id]
|
|
|
|
if !ok {
|
|
|
|
l.serviceStatus[id] = syncStatus{remoteDelete: true}
|
|
|
|
continue
|
2014-03-05 15:03:23 -08:00
|
|
|
}
|
2015-04-10 11:04:15 -07:00
|
|
|
|
2016-04-11 14:53:18 -07:00
|
|
|
// If our definition is different, we need to update it. Make a
|
|
|
|
// copy so that we don't retain a pointer to any actual state
|
|
|
|
// store info for in-memory RPCs.
|
2015-09-11 08:35:29 -07:00
|
|
|
if existing.EnableTagOverride {
|
2016-04-11 14:53:18 -07:00
|
|
|
existing.Tags = make([]string, len(service.Tags))
|
|
|
|
copy(existing.Tags, service.Tags)
|
2015-08-18 14:03:48 -07:00
|
|
|
}
|
2015-10-28 14:32:00 -07:00
|
|
|
equal := existing.IsSame(service)
|
2015-04-10 11:04:15 -07:00
|
|
|
l.serviceStatus[id] = syncStatus{inSync: equal}
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
|
2015-10-12 20:30:11 -07:00
|
|
|
// Index the remote health checks to improve efficiency
|
2016-06-06 13:19:31 -07:00
|
|
|
checkIndex := make(map[types.CheckID]*structs.HealthCheck, len(checks))
|
2015-10-12 20:30:11 -07:00
|
|
|
for _, check := range checks {
|
|
|
|
checkIndex[check.CheckID] = check
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sync any check which doesn't exist on the remote side
|
2015-04-08 12:20:34 -07:00
|
|
|
for id, _ := range l.checks {
|
2015-10-12 20:30:11 -07:00
|
|
|
if _, ok := checkIndex[id]; !ok {
|
2015-04-08 12:20:34 -07:00
|
|
|
l.checkStatus[id] = syncStatus{inSync: false}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
for _, check := range checks {
|
|
|
|
// If we don't have the check locally, deregister it
|
|
|
|
id := check.CheckID
|
2014-01-21 11:52:25 -08:00
|
|
|
existing, ok := l.checks[id]
|
2014-01-15 15:14:50 -10:00
|
|
|
if !ok {
|
2014-01-15 17:28:23 -10:00
|
|
|
// The Serf check is created automatically, and does not
|
|
|
|
// need to be registered
|
|
|
|
if id == consul.SerfCheckID {
|
|
|
|
continue
|
|
|
|
}
|
2014-01-21 11:52:25 -08:00
|
|
|
l.checkStatus[id] = syncStatus{remoteDelete: true}
|
2014-01-15 15:14:50 -10:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// If our definition is different, we need to update it
|
2014-06-09 16:00:25 -07:00
|
|
|
var equal bool
|
|
|
|
if l.config.CheckUpdateInterval == 0 {
|
2015-10-28 14:32:00 -07:00
|
|
|
equal = existing.IsSame(check)
|
2014-06-09 16:00:25 -07:00
|
|
|
} else {
|
2016-04-10 21:20:39 -07:00
|
|
|
// Copy the existing check before potentially modifying
|
|
|
|
// it before the compare operation.
|
2016-04-11 00:05:39 -07:00
|
|
|
eCopy := existing.Clone()
|
2016-04-10 21:20:39 -07:00
|
|
|
|
|
|
|
// Copy the server's check before modifying, otherwise
|
2016-04-11 08:58:17 -07:00
|
|
|
// in-memory RPCs will have side effects.
|
2016-04-11 00:05:39 -07:00
|
|
|
cCopy := check.Clone()
|
2016-04-10 21:20:39 -07:00
|
|
|
|
|
|
|
// If there's a defer timer active then we've got a
|
|
|
|
// potentially spammy check so we don't sync the output
|
|
|
|
// during this sweep since the timer will mark the check
|
|
|
|
// out of sync for us. Otherwise, it is safe to sync the
|
|
|
|
// output now. This is especially important for checks
|
|
|
|
// that don't change state after they are created, in
|
|
|
|
// which case we'd never see their output synced back ever.
|
|
|
|
if _, ok := l.deferCheck[id]; ok {
|
|
|
|
eCopy.Output = ""
|
|
|
|
cCopy.Output = ""
|
|
|
|
}
|
|
|
|
equal = eCopy.IsSame(cCopy)
|
2014-06-09 16:00:25 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Update the status
|
2014-01-21 11:52:25 -08:00
|
|
|
l.checkStatus[id] = syncStatus{inSync: equal}
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// syncChanges is used to scan the status our local services and checks
|
|
|
|
// and update any that are out of sync with the server
|
2014-01-21 11:52:25 -08:00
|
|
|
func (l *localState) syncChanges() error {
|
|
|
|
l.Lock()
|
|
|
|
defer l.Unlock()
|
2014-01-15 15:14:50 -10:00
|
|
|
|
2016-02-07 13:12:42 -08:00
|
|
|
// We will do node-level info syncing at the end, since it will get
|
|
|
|
// updated by a service or check sync anyway, given how the register
|
|
|
|
// API works.
|
|
|
|
|
2015-01-14 11:48:36 -08:00
|
|
|
// Sync the services
|
|
|
|
for id, status := range l.serviceStatus {
|
2014-01-15 15:14:50 -10:00
|
|
|
if status.remoteDelete {
|
2015-01-14 11:48:36 -08:00
|
|
|
if err := l.deleteService(id); err != nil {
|
2014-01-15 15:14:50 -10:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else if !status.inSync {
|
2015-01-14 11:48:36 -08:00
|
|
|
if err := l.syncService(id); err != nil {
|
|
|
|
return err
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
2014-04-23 12:21:47 -07:00
|
|
|
} else {
|
2015-01-14 11:48:36 -08:00
|
|
|
l.logger.Printf("[DEBUG] agent: Service '%s' in sync", id)
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-14 11:48:36 -08:00
|
|
|
// Sync the checks
|
|
|
|
for id, status := range l.checkStatus {
|
2014-01-15 15:14:50 -10:00
|
|
|
if status.remoteDelete {
|
2015-01-14 11:48:36 -08:00
|
|
|
if err := l.deleteCheck(id); err != nil {
|
2014-01-15 15:14:50 -10:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else if !status.inSync {
|
2015-01-14 11:48:36 -08:00
|
|
|
// Cancel a deferred sync
|
|
|
|
if timer := l.deferCheck[id]; timer != nil {
|
|
|
|
timer.Stop()
|
|
|
|
delete(l.deferCheck, id)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := l.syncCheck(id); err != nil {
|
2014-01-15 15:14:50 -10:00
|
|
|
return err
|
|
|
|
}
|
2014-04-23 12:21:47 -07:00
|
|
|
} else {
|
2015-01-14 11:48:36 -08:00
|
|
|
l.logger.Printf("[DEBUG] agent: Check '%s' in sync", id)
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
}
|
2016-02-07 13:12:42 -08:00
|
|
|
|
|
|
|
// Now sync the node level info if we need to, and didn't do any of
|
|
|
|
// the other sync operations.
|
|
|
|
if !l.nodeInfoInSync {
|
|
|
|
if err := l.syncNodeInfo(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-04-10 21:20:39 -07:00
|
|
|
} else {
|
|
|
|
l.logger.Printf("[DEBUG] agent: Node info in sync")
|
2016-02-07 13:12:42 -08:00
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// deleteService is used to delete a service from the server
|
2014-01-21 11:52:25 -08:00
|
|
|
func (l *localState) deleteService(id string) error {
|
2015-01-27 18:11:57 +09:00
|
|
|
if id == "" {
|
|
|
|
return fmt.Errorf("ServiceID missing")
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
req := structs.DeregisterRequest{
|
2014-12-01 11:43:01 -08:00
|
|
|
Datacenter: l.config.Datacenter,
|
|
|
|
Node: l.config.NodeName,
|
|
|
|
ServiceID: id,
|
2015-04-28 11:53:53 -07:00
|
|
|
WriteRequest: structs.WriteRequest{Token: l.serviceToken(id)},
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
var out struct{}
|
2014-01-21 11:52:25 -08:00
|
|
|
err := l.iface.RPC("Catalog.Deregister", &req, &out)
|
2014-01-15 15:14:50 -10:00
|
|
|
if err == nil {
|
2014-01-21 11:52:25 -08:00
|
|
|
delete(l.serviceStatus, id)
|
|
|
|
l.logger.Printf("[INFO] agent: Deregistered service '%s'", id)
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2016-08-16 00:05:55 -07:00
|
|
|
// deleteCheck is used to delete a check from the server
|
2016-06-06 13:19:31 -07:00
|
|
|
func (l *localState) deleteCheck(id types.CheckID) error {
|
2015-01-27 18:11:57 +09:00
|
|
|
if id == "" {
|
|
|
|
return fmt.Errorf("CheckID missing")
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
req := structs.DeregisterRequest{
|
2014-12-01 11:43:01 -08:00
|
|
|
Datacenter: l.config.Datacenter,
|
|
|
|
Node: l.config.NodeName,
|
|
|
|
CheckID: id,
|
2015-04-28 11:53:53 -07:00
|
|
|
WriteRequest: structs.WriteRequest{Token: l.checkToken(id)},
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
var out struct{}
|
2014-01-21 11:52:25 -08:00
|
|
|
err := l.iface.RPC("Catalog.Deregister", &req, &out)
|
2014-01-15 15:14:50 -10:00
|
|
|
if err == nil {
|
2014-01-21 11:52:25 -08:00
|
|
|
delete(l.checkStatus, id)
|
|
|
|
l.logger.Printf("[INFO] agent: Deregistered check '%s'", id)
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// syncService is used to sync a service to the server
|
2014-01-21 11:52:25 -08:00
|
|
|
func (l *localState) syncService(id string) error {
|
2014-01-15 15:14:50 -10:00
|
|
|
req := structs.RegisterRequest{
|
2016-02-07 10:37:34 -08:00
|
|
|
Datacenter: l.config.Datacenter,
|
|
|
|
Node: l.config.NodeName,
|
|
|
|
Address: l.config.AdvertiseAddr,
|
|
|
|
TaggedAddresses: l.config.TaggedAddresses,
|
|
|
|
Service: l.services[id],
|
|
|
|
WriteRequest: structs.WriteRequest{Token: l.serviceToken(id)},
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
2015-01-14 11:48:36 -08:00
|
|
|
|
2015-01-14 23:09:42 -08:00
|
|
|
// If the service has associated checks that are out of sync,
|
|
|
|
// piggyback them on the service sync so they are part of the
|
|
|
|
// same transaction and are registered atomically.
|
2015-01-14 11:48:36 -08:00
|
|
|
var checks structs.HealthChecks
|
|
|
|
for _, check := range l.checks {
|
|
|
|
if check.ServiceID == id {
|
|
|
|
if stat, ok := l.checkStatus[check.CheckID]; !ok || !stat.inSync {
|
|
|
|
checks = append(checks, check)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-14 23:09:42 -08:00
|
|
|
// Backwards-compatibility for Consul < 0.5
|
2015-01-14 11:48:36 -08:00
|
|
|
if len(checks) == 1 {
|
|
|
|
req.Check = checks[0]
|
|
|
|
} else {
|
|
|
|
req.Checks = checks
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:14:50 -10:00
|
|
|
var out struct{}
|
2014-01-21 11:52:25 -08:00
|
|
|
err := l.iface.RPC("Catalog.Register", &req, &out)
|
2014-01-15 15:14:50 -10:00
|
|
|
if err == nil {
|
2014-01-21 11:52:25 -08:00
|
|
|
l.serviceStatus[id] = syncStatus{inSync: true}
|
2016-02-07 13:12:42 -08:00
|
|
|
// Given how the register API works, this info is also updated
|
|
|
|
// every time we sync a service.
|
|
|
|
l.nodeInfoInSync = true
|
2014-01-21 11:52:25 -08:00
|
|
|
l.logger.Printf("[INFO] agent: Synced service '%s'", id)
|
2015-01-14 11:48:36 -08:00
|
|
|
for _, check := range checks {
|
|
|
|
l.checkStatus[check.CheckID] = syncStatus{inSync: true}
|
|
|
|
}
|
2014-12-01 11:43:01 -08:00
|
|
|
} else if strings.Contains(err.Error(), permissionDenied) {
|
|
|
|
l.serviceStatus[id] = syncStatus{inSync: true}
|
|
|
|
l.logger.Printf("[WARN] agent: Service '%s' registration blocked by ACLs", id)
|
2015-01-14 11:48:36 -08:00
|
|
|
for _, check := range checks {
|
|
|
|
l.checkStatus[check.CheckID] = syncStatus{inSync: true}
|
|
|
|
}
|
2014-12-01 11:43:01 -08:00
|
|
|
return nil
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-12-08 17:45:01 +08:00
|
|
|
// syncCheck is used to sync a check to the server
|
2016-06-06 13:19:31 -07:00
|
|
|
func (l *localState) syncCheck(id types.CheckID) error {
|
2015-01-14 11:48:36 -08:00
|
|
|
// Pull in the associated service if any
|
|
|
|
check := l.checks[id]
|
|
|
|
var service *structs.NodeService
|
|
|
|
if check.ServiceID != "" {
|
|
|
|
if serv, ok := l.services[check.ServiceID]; ok {
|
|
|
|
service = serv
|
2015-01-13 23:23:52 -08:00
|
|
|
}
|
|
|
|
}
|
2015-04-27 18:26:23 -07:00
|
|
|
|
2015-01-14 11:48:36 -08:00
|
|
|
req := structs.RegisterRequest{
|
2016-02-07 10:37:34 -08:00
|
|
|
Datacenter: l.config.Datacenter,
|
|
|
|
Node: l.config.NodeName,
|
|
|
|
Address: l.config.AdvertiseAddr,
|
|
|
|
TaggedAddresses: l.config.TaggedAddresses,
|
|
|
|
Service: service,
|
|
|
|
Check: l.checks[id],
|
|
|
|
WriteRequest: structs.WriteRequest{Token: l.checkToken(id)},
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
2015-01-14 11:48:36 -08:00
|
|
|
var out struct{}
|
|
|
|
err := l.iface.RPC("Catalog.Register", &req, &out)
|
|
|
|
if err == nil {
|
|
|
|
l.checkStatus[id] = syncStatus{inSync: true}
|
2016-02-07 13:12:42 -08:00
|
|
|
// Given how the register API works, this info is also updated
|
|
|
|
// every time we sync a service.
|
|
|
|
l.nodeInfoInSync = true
|
2015-01-14 11:48:36 -08:00
|
|
|
l.logger.Printf("[INFO] agent: Synced check '%s'", id)
|
|
|
|
} else if strings.Contains(err.Error(), permissionDenied) {
|
|
|
|
l.checkStatus[id] = syncStatus{inSync: true}
|
|
|
|
l.logger.Printf("[WARN] agent: Check '%s' registration blocked by ACLs", id)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return err
|
2014-01-15 15:14:50 -10:00
|
|
|
}
|
2016-02-07 13:12:42 -08:00
|
|
|
|
|
|
|
func (l *localState) syncNodeInfo() error {
|
|
|
|
req := structs.RegisterRequest{
|
|
|
|
Datacenter: l.config.Datacenter,
|
|
|
|
Node: l.config.NodeName,
|
|
|
|
Address: l.config.AdvertiseAddr,
|
|
|
|
TaggedAddresses: l.config.TaggedAddresses,
|
|
|
|
WriteRequest: structs.WriteRequest{Token: l.config.ACLToken},
|
|
|
|
}
|
|
|
|
var out struct{}
|
|
|
|
err := l.iface.RPC("Catalog.Register", &req, &out)
|
|
|
|
if err == nil {
|
|
|
|
l.nodeInfoInSync = true
|
|
|
|
l.logger.Printf("[INFO] agent: Synced node info")
|
|
|
|
} else if strings.Contains(err.Error(), permissionDenied) {
|
|
|
|
l.nodeInfoInSync = true
|
|
|
|
l.logger.Printf("[WARN] agent: Node info update blocked by ACLs")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|