2019-04-18 04:35:19 +00:00
|
|
|
package agent
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/hashicorp/consul/agent/cache"
|
|
|
|
cachetype "github.com/hashicorp/consul/agent/cache-types"
|
|
|
|
"github.com/hashicorp/consul/agent/structs"
|
2019-05-01 23:39:31 +00:00
|
|
|
"github.com/imdario/mergo"
|
|
|
|
"github.com/mitchellh/copystructure"
|
2019-04-18 04:35:19 +00:00
|
|
|
"golang.org/x/net/context"
|
|
|
|
)
|
|
|
|
|
2019-04-23 10:31:24 +00:00
|
|
|
// The ServiceManager is a layer for service registration in between the agent
|
|
|
|
// and the local state. Any services must be registered with the ServiceManager,
|
|
|
|
// which then maintains a long-running watch of any globally-set service or proxy
|
|
|
|
// configuration that applies to the service in order to register the final, merged
|
|
|
|
// service configuration locally in the agent state.
|
2019-04-18 04:35:19 +00:00
|
|
|
type ServiceManager struct {
|
|
|
|
services map[string]*serviceConfigWatch
|
|
|
|
agent *Agent
|
|
|
|
|
2019-04-24 13:11:08 +00:00
|
|
|
lock sync.Mutex
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewServiceManager(agent *Agent) *ServiceManager {
|
|
|
|
return &ServiceManager{
|
|
|
|
services: make(map[string]*serviceConfigWatch),
|
|
|
|
agent: agent,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-23 10:31:24 +00:00
|
|
|
// AddService starts a new serviceConfigWatch if the service has not been registered, and
|
|
|
|
// updates the existing registration if it has. For a new service, a call will also be made
|
|
|
|
// to fetch the merged global defaults that apply to the service in order to compose the
|
|
|
|
// initial registration.
|
2019-04-23 06:39:02 +00:00
|
|
|
func (s *ServiceManager) AddService(service *structs.NodeService, chkTypes []*structs.CheckType, persist bool, token string, source configSource) error {
|
2019-05-01 23:39:31 +00:00
|
|
|
// For now only sidecar proxies have anything that can be configured
|
|
|
|
// centrally. So bypass the whole manager for regular services.
|
2019-08-09 20:07:01 +00:00
|
|
|
if !service.IsSidecarProxy() && !service.IsMeshGateway() {
|
2019-09-02 15:38:29 +00:00
|
|
|
return s.agent.addServiceInternal(service, chkTypes, persist, token, false, source)
|
2019-05-01 23:39:31 +00:00
|
|
|
}
|
|
|
|
|
2019-04-24 13:46:30 +00:00
|
|
|
s.lock.Lock()
|
|
|
|
defer s.lock.Unlock()
|
|
|
|
|
2019-04-18 04:35:19 +00:00
|
|
|
reg := serviceRegistration{
|
|
|
|
service: service,
|
|
|
|
chkTypes: chkTypes,
|
|
|
|
persist: persist,
|
|
|
|
token: token,
|
|
|
|
source: source,
|
|
|
|
}
|
|
|
|
|
|
|
|
// If a service watch already exists, update the registration. Otherwise,
|
|
|
|
// start a new config watcher.
|
|
|
|
watch, ok := s.services[service.ID]
|
|
|
|
if ok {
|
2019-04-23 06:39:02 +00:00
|
|
|
if err := watch.updateRegistration(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-04-23 10:31:24 +00:00
|
|
|
s.agent.logger.Printf("[DEBUG] agent.manager: updated local registration for service %q", service.ID)
|
2019-04-18 04:35:19 +00:00
|
|
|
} else {
|
2019-04-23 06:39:02 +00:00
|
|
|
// This is a new entry, so get the existing global config and do the initial
|
|
|
|
// registration with the merged config.
|
2019-04-18 04:35:19 +00:00
|
|
|
watch := &serviceConfigWatch{
|
2019-04-24 13:11:08 +00:00
|
|
|
registration: ®,
|
|
|
|
readyCh: make(chan error),
|
|
|
|
updateCh: make(chan cache.UpdateEvent, 1),
|
|
|
|
agent: s.agent,
|
2019-04-23 06:39:02 +00:00
|
|
|
}
|
|
|
|
|
2019-04-24 13:11:08 +00:00
|
|
|
// Start the config watch, which starts a blocking query for the resolved service config
|
|
|
|
// in the background.
|
|
|
|
if err := watch.Start(); err != nil {
|
2019-04-23 06:39:02 +00:00
|
|
|
return err
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
|
|
|
|
2019-04-24 13:11:08 +00:00
|
|
|
// Call ReadyWait to block until the cache has returned the initial config and the service
|
|
|
|
// has been registered.
|
|
|
|
if err := watch.ReadyWait(); err != nil {
|
|
|
|
watch.Stop()
|
2019-04-23 06:39:02 +00:00
|
|
|
return err
|
|
|
|
}
|
2019-04-24 13:11:08 +00:00
|
|
|
|
|
|
|
s.services[service.ID] = watch
|
|
|
|
|
|
|
|
s.agent.logger.Printf("[DEBUG] agent.manager: added local registration for service %q", service.ID)
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
2019-04-23 06:39:02 +00:00
|
|
|
|
|
|
|
return nil
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *ServiceManager) RemoveService(serviceID string) {
|
2019-04-24 13:11:08 +00:00
|
|
|
s.lock.Lock()
|
|
|
|
defer s.lock.Unlock()
|
2019-04-18 04:35:19 +00:00
|
|
|
|
|
|
|
serviceWatch, ok := s.services[serviceID]
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
serviceWatch.Stop()
|
|
|
|
delete(s.services, serviceID)
|
|
|
|
}
|
|
|
|
|
2019-04-23 10:31:24 +00:00
|
|
|
// serviceRegistration represents a locally registered service.
|
2019-04-18 04:35:19 +00:00
|
|
|
type serviceRegistration struct {
|
|
|
|
service *structs.NodeService
|
|
|
|
chkTypes []*structs.CheckType
|
|
|
|
persist bool
|
|
|
|
token string
|
|
|
|
source configSource
|
|
|
|
}
|
|
|
|
|
2019-04-23 10:31:24 +00:00
|
|
|
// serviceConfigWatch is a long running helper for composing the end config
|
|
|
|
// for a given service from both the local registration and the global
|
|
|
|
// service/proxy defaults.
|
2019-04-18 04:35:19 +00:00
|
|
|
type serviceConfigWatch struct {
|
|
|
|
registration *serviceRegistration
|
2019-05-01 23:39:31 +00:00
|
|
|
defaults *structs.ServiceConfigResponse
|
2019-04-18 04:35:19 +00:00
|
|
|
|
|
|
|
agent *Agent
|
|
|
|
|
2019-04-24 13:11:08 +00:00
|
|
|
// readyCh is used for ReadyWait in order to block until the first update
|
2019-04-24 13:46:30 +00:00
|
|
|
// for the resolved service config is received from the cache.
|
2019-04-24 13:11:08 +00:00
|
|
|
readyCh chan error
|
|
|
|
|
2019-05-01 23:39:31 +00:00
|
|
|
// ctx and cancelFunc store the overall context that lives as long as the
|
|
|
|
// Watch instance is needed, possibly spanning multiple cache.Notify
|
|
|
|
// lifetimes.
|
2019-04-18 04:35:19 +00:00
|
|
|
ctx context.Context
|
|
|
|
cancelFunc func()
|
|
|
|
|
2019-05-01 23:39:31 +00:00
|
|
|
// cacheKey stores the key of the current request, when registration changes
|
|
|
|
// we check to see if a new cache watch is needed.
|
|
|
|
cacheKey string
|
|
|
|
|
|
|
|
// updateCh receives changes from cache watchers or registration changes.
|
|
|
|
updateCh chan cache.UpdateEvent
|
|
|
|
|
|
|
|
// notifyCancel, if non-nil it the cancel func that will stop the currently
|
|
|
|
// active Notify loop. It does not cancel ctx and is used when we need to
|
|
|
|
// switch to a new Notify call because cache key changed.
|
|
|
|
notifyCancel func()
|
|
|
|
|
2019-04-24 13:11:08 +00:00
|
|
|
lock sync.Mutex
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
|
|
|
|
2019-04-24 13:11:08 +00:00
|
|
|
// Start starts the config watch and a goroutine to handle updates over
|
|
|
|
// the updateCh. This is not safe to call more than once.
|
2019-04-18 04:35:19 +00:00
|
|
|
func (s *serviceConfigWatch) Start() error {
|
|
|
|
s.ctx, s.cancelFunc = context.WithCancel(context.Background())
|
2019-05-01 23:39:31 +00:00
|
|
|
if err := s.ensureConfigWatch(); err != nil {
|
2019-04-18 04:35:19 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
go s.runWatch()
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-04-23 10:31:24 +00:00
|
|
|
func (s *serviceConfigWatch) Stop() {
|
|
|
|
s.cancelFunc()
|
|
|
|
}
|
|
|
|
|
2019-04-24 13:11:08 +00:00
|
|
|
// ReadyWait blocks until the readyCh is closed, which means the initial
|
|
|
|
// registration of the service has been completed. If there was an error
|
|
|
|
// with the initial registration, it will be returned.
|
|
|
|
func (s *serviceConfigWatch) ReadyWait() error {
|
|
|
|
err := <-s.readyCh
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2019-04-23 10:31:24 +00:00
|
|
|
// runWatch handles any update events from the cache.Notify until the
|
|
|
|
// config watch is shut down.
|
2019-04-18 04:35:19 +00:00
|
|
|
func (s *serviceConfigWatch) runWatch() {
|
2019-04-24 13:46:30 +00:00
|
|
|
firstRun := true
|
2019-04-18 04:35:19 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-s.ctx.Done():
|
|
|
|
return
|
|
|
|
case event := <-s.updateCh:
|
2019-04-24 13:46:30 +00:00
|
|
|
if err := s.handleUpdate(event, false, firstRun); err != nil {
|
2019-04-23 10:31:24 +00:00
|
|
|
s.agent.logger.Printf("[ERR] agent.manager: error handling service update: %v", err)
|
2019-04-23 06:39:02 +00:00
|
|
|
}
|
2019-04-24 13:46:30 +00:00
|
|
|
firstRun = false
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-23 10:31:24 +00:00
|
|
|
// handleUpdate receives an update event about either the service registration or the
|
|
|
|
// global config defaults, updates the local state and re-registers the service with
|
|
|
|
// the newly merged config. This function takes the serviceConfigWatch lock to ensure
|
|
|
|
// only one update can be happening at a time.
|
2019-04-24 13:46:30 +00:00
|
|
|
func (s *serviceConfigWatch) handleUpdate(event cache.UpdateEvent, locked, firstRun bool) error {
|
2019-04-23 10:31:24 +00:00
|
|
|
// Take the agent state lock if needed. This is done before the local config watch
|
|
|
|
// lock in order to prevent a race between this config watch and others - the config
|
2019-04-24 13:46:30 +00:00
|
|
|
// watch lock is the inner lock and the agent stateLock is the outer lock. If this is the
|
|
|
|
// first run we also don't need to take the stateLock, as this is being waited on
|
|
|
|
// synchronously by a caller that already holds it.
|
|
|
|
if !locked && !firstRun {
|
2019-04-23 10:31:24 +00:00
|
|
|
s.agent.stateLock.Lock()
|
|
|
|
defer s.agent.stateLock.Unlock()
|
|
|
|
}
|
2019-04-24 13:11:08 +00:00
|
|
|
s.lock.Lock()
|
|
|
|
defer s.lock.Unlock()
|
2019-04-23 06:39:02 +00:00
|
|
|
|
2019-04-24 13:11:08 +00:00
|
|
|
// If we got an error, log a warning if this is the first update; otherwise return the error.
|
|
|
|
// We want the initial update to cause a service registration no matter what.
|
2019-04-23 06:39:02 +00:00
|
|
|
if event.Err != nil {
|
2019-04-24 13:46:30 +00:00
|
|
|
if firstRun {
|
2019-04-24 13:11:08 +00:00
|
|
|
s.agent.logger.Printf("[WARN] could not retrieve initial service_defaults config for service %q: %v",
|
|
|
|
s.registration.service.ID, event.Err)
|
|
|
|
} else {
|
|
|
|
return fmt.Errorf("error watching service config: %v", event.Err)
|
|
|
|
}
|
|
|
|
} else {
|
2019-05-01 23:39:31 +00:00
|
|
|
switch res := event.Result.(type) {
|
2019-04-24 13:11:08 +00:00
|
|
|
case *serviceRegistration:
|
2019-05-01 23:39:31 +00:00
|
|
|
s.registration = res
|
|
|
|
// We may need to restart watch if upstreams changed
|
|
|
|
if err := s.ensureConfigWatch(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-04-24 13:11:08 +00:00
|
|
|
case *structs.ServiceConfigResponse:
|
2019-05-01 23:39:31 +00:00
|
|
|
// Sanity check this even came from the currently active watch to ignore
|
|
|
|
// rare races when switching cache keys
|
|
|
|
if event.CorrelationID != s.cacheKey {
|
|
|
|
// It's a no-op. The new watcher will deliver (or may have already
|
|
|
|
// delivered) the correct config so just ignore this old message.
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
s.defaults = res
|
2019-04-24 13:11:08 +00:00
|
|
|
default:
|
|
|
|
return fmt.Errorf("unknown update event type: %T", event)
|
|
|
|
}
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
|
|
|
|
2019-05-01 23:39:31 +00:00
|
|
|
// Merge the local registration with the central defaults and update this service
|
|
|
|
// in the local state.
|
|
|
|
service, err := s.mergeServiceConfig()
|
2019-04-18 04:35:19 +00:00
|
|
|
if err != nil {
|
2019-05-01 23:39:31 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := s.updateAgentRegistration(service); err != nil {
|
2019-04-24 13:11:08 +00:00
|
|
|
// If this is the initial registration, return the error through the readyCh
|
|
|
|
// so it can be passed back to the original caller.
|
2019-04-24 13:46:30 +00:00
|
|
|
if firstRun {
|
2019-04-24 13:11:08 +00:00
|
|
|
s.readyCh <- err
|
|
|
|
}
|
2019-04-23 06:39:02 +00:00
|
|
|
return fmt.Errorf("error updating service registration: %v", err)
|
|
|
|
}
|
|
|
|
|
2019-04-24 13:11:08 +00:00
|
|
|
// If this is the first registration, set the ready status by closing the channel.
|
2019-04-24 13:46:30 +00:00
|
|
|
if firstRun {
|
2019-04-24 13:11:08 +00:00
|
|
|
close(s.readyCh)
|
|
|
|
}
|
|
|
|
|
2019-04-23 06:39:02 +00:00
|
|
|
return nil
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
|
|
|
|
2019-05-01 23:39:31 +00:00
|
|
|
// updateAgentRegistration updates the service (and its sidecar, if applicable) in the
|
|
|
|
// local state.
|
|
|
|
func (s *serviceConfigWatch) updateAgentRegistration(ns *structs.NodeService) error {
|
2019-09-02 15:38:29 +00:00
|
|
|
return s.agent.addServiceInternal(ns, s.registration.chkTypes, s.registration.persist, s.registration.token, false, s.registration.source)
|
2019-05-01 23:39:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// ensureConfigWatch starts a cache.Notify goroutine to run a continuous
|
|
|
|
// blocking query on the resolved service config for this service. If the
|
|
|
|
// registration has changed in a way that requires a new blocking query, it will
|
|
|
|
// cancel any current watch and start a new one. It is a no-op if there is an
|
|
|
|
// existing watch that is sufficient for the current registration. It is not
|
|
|
|
// thread-safe and must only be called from the Start method (which is only safe
|
|
|
|
// to call once as documented) or from inside the run loop.
|
|
|
|
func (s *serviceConfigWatch) ensureConfigWatch() error {
|
|
|
|
ns := s.registration.service
|
|
|
|
name := ns.Service
|
|
|
|
var upstreams []string
|
|
|
|
|
|
|
|
// Note that only sidecar proxies should even make it here for now although
|
|
|
|
// later that will change to add the condition.
|
|
|
|
if ns.IsSidecarProxy() {
|
|
|
|
// This is a sidecar proxy, ignore the proxy service's config since we are
|
|
|
|
// managed by the target service config.
|
|
|
|
name = ns.Proxy.DestinationServiceName
|
|
|
|
|
|
|
|
// Also if we have any upstreams defined, add them to the request so we can
|
|
|
|
// learn about their configs.
|
|
|
|
for _, us := range ns.Proxy.Upstreams {
|
|
|
|
if us.DestinationType == "" || us.DestinationType == structs.UpstreamDestTypeService {
|
|
|
|
upstreams = append(upstreams, us.DestinationName)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-04-18 04:35:19 +00:00
|
|
|
|
|
|
|
req := &structs.ServiceConfigRequest{
|
2019-04-23 06:39:02 +00:00
|
|
|
Name: name,
|
|
|
|
Datacenter: s.agent.config.Datacenter,
|
|
|
|
QueryOptions: structs.QueryOptions{Token: s.agent.config.ACLAgentToken},
|
2019-05-01 23:39:31 +00:00
|
|
|
Upstreams: upstreams,
|
2019-04-23 06:39:02 +00:00
|
|
|
}
|
|
|
|
if s.registration.token != "" {
|
|
|
|
req.QueryOptions.Token = s.registration.token
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
2019-05-01 23:39:31 +00:00
|
|
|
|
|
|
|
// See if this request is different from the current one
|
|
|
|
cacheKey := req.CacheInfo().Key
|
|
|
|
if cacheKey == s.cacheKey {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is an existing notify running, stop it first. This may leave a
|
|
|
|
// blocking query running in the background but the Notify loop will swallow
|
|
|
|
// the response and exit when it next unblocks so we can consider it stopped.
|
|
|
|
if s.notifyCancel != nil {
|
|
|
|
s.notifyCancel()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make a new context just for this Notify call
|
|
|
|
ctx, cancel := context.WithCancel(s.ctx)
|
|
|
|
s.notifyCancel = cancel
|
|
|
|
s.cacheKey = cacheKey
|
|
|
|
// We use the cache key as the correlationID here. Notify in general will not
|
|
|
|
// respond on the updateCh after the context is cancelled however there could
|
|
|
|
// possible be a race where it has only just got an update and checked the
|
|
|
|
// context before we cancel and so might still deliver the old event. Using
|
|
|
|
// the cacheKey allows us to ignore updates from the old cache watch and makes
|
|
|
|
// even this rare edge case safe.
|
|
|
|
err := s.agent.cache.Notify(ctx, cachetype.ResolvedServiceConfigName, req,
|
|
|
|
s.cacheKey, s.updateCh)
|
2019-04-18 04:35:19 +00:00
|
|
|
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2019-04-23 10:31:24 +00:00
|
|
|
// updateRegistration does a synchronous update of the local service registration and
|
2019-04-24 13:11:08 +00:00
|
|
|
// returns the result. The agent stateLock should be held when calling this function.
|
2019-04-23 06:39:02 +00:00
|
|
|
func (s *serviceConfigWatch) updateRegistration(registration *serviceRegistration) error {
|
|
|
|
return s.handleUpdate(cache.UpdateEvent{
|
2019-04-18 04:35:19 +00:00
|
|
|
Result: registration,
|
2019-04-24 13:46:30 +00:00
|
|
|
}, true, false)
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|
|
|
|
|
2019-04-23 10:31:24 +00:00
|
|
|
// mergeServiceConfig returns the final effective config for the watched service,
|
|
|
|
// including the latest known global defaults from the servers.
|
2019-05-01 23:39:31 +00:00
|
|
|
func (s *serviceConfigWatch) mergeServiceConfig() (*structs.NodeService, error) {
|
2019-06-18 00:52:01 +00:00
|
|
|
if s.defaults == nil || (!s.registration.service.IsSidecarProxy() && !s.registration.service.IsMeshGateway()) {
|
2019-05-01 23:39:31 +00:00
|
|
|
return s.registration.service, nil
|
2019-04-23 06:39:02 +00:00
|
|
|
}
|
|
|
|
|
2019-05-01 23:39:31 +00:00
|
|
|
// We don't want to change s.registration in place since it is our source of
|
|
|
|
// truth about what was actually registered before defaults applied. So copy
|
|
|
|
// it first.
|
|
|
|
nsRaw, err := copystructure.Copy(s.registration.service)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2019-04-23 06:39:02 +00:00
|
|
|
|
2019-05-01 23:39:31 +00:00
|
|
|
// Merge proxy defaults
|
|
|
|
ns := nsRaw.(*structs.NodeService)
|
|
|
|
|
|
|
|
if err := mergo.Merge(&ns.Proxy.Config, s.defaults.ProxyConfig); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2019-06-18 00:52:01 +00:00
|
|
|
|
|
|
|
if ns.Proxy.MeshGateway.Mode == structs.MeshGatewayModeDefault {
|
|
|
|
ns.Proxy.MeshGateway.Mode = s.defaults.MeshGateway.Mode
|
|
|
|
}
|
|
|
|
|
2019-05-01 23:39:31 +00:00
|
|
|
// Merge upstream defaults if there were any returned
|
|
|
|
for i := range ns.Proxy.Upstreams {
|
|
|
|
// Get a pointer not a value copy of the upstream struct
|
|
|
|
us := &ns.Proxy.Upstreams[i]
|
|
|
|
if us.DestinationType != "" && us.DestinationType != structs.UpstreamDestTypeService {
|
|
|
|
continue
|
|
|
|
}
|
2019-06-18 00:52:01 +00:00
|
|
|
|
|
|
|
// default the upstreams gateway mode if it didn't specify one
|
|
|
|
if us.MeshGateway.Mode == structs.MeshGatewayModeDefault {
|
|
|
|
us.MeshGateway.Mode = ns.Proxy.MeshGateway.Mode
|
|
|
|
}
|
|
|
|
|
2019-05-01 23:39:31 +00:00
|
|
|
usCfg, ok := s.defaults.UpstreamConfigs[us.DestinationName]
|
|
|
|
if !ok {
|
|
|
|
// No config defaults to merge
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err := mergo.Merge(&us.Config, usCfg); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ns, err
|
2019-04-18 04:35:19 +00:00
|
|
|
}
|