diff --git a/agent/acl.go b/agent/acl.go
index 2aee3faadc..4245c53a05 100644
--- a/agent/acl.go
+++ b/agent/acl.go
@@ -259,7 +259,7 @@ func (a *Agent) vetServiceRegister(token string, service *structs.NodeService) e
 	}
 
 	// Vet any service that might be getting overwritten.
-	services := a.state.Services()
+	services := a.State.Services()
 	if existing, ok := services[service.ID]; ok {
 		if !rule.ServiceWrite(existing.Service, nil) {
 			return acl.ErrPermissionDenied
@@ -282,7 +282,7 @@ func (a *Agent) vetServiceUpdate(token string, serviceID string) error {
 	}
 
 	// Vet any changes based on the existing services's info.
-	services := a.state.Services()
+	services := a.State.Services()
 	if existing, ok := services[serviceID]; ok {
 		if !rule.ServiceWrite(existing.Service, nil) {
 			return acl.ErrPermissionDenied
@@ -318,7 +318,7 @@ func (a *Agent) vetCheckRegister(token string, check *structs.HealthCheck) error
 	}
 
 	// Vet any check that might be getting overwritten.
-	checks := a.state.Checks()
+	checks := a.State.Checks()
 	if existing, ok := checks[check.CheckID]; ok {
 		if len(existing.ServiceName) > 0 {
 			if !rule.ServiceWrite(existing.ServiceName, nil) {
@@ -346,7 +346,7 @@ func (a *Agent) vetCheckUpdate(token string, checkID types.CheckID) error {
 	}
 
 	// Vet any changes based on the existing check's info.
-	checks := a.state.Checks()
+	checks := a.State.Checks()
 	if existing, ok := checks[checkID]; ok {
 		if len(existing.ServiceName) > 0 {
 			if !rule.ServiceWrite(existing.ServiceName, nil) {
diff --git a/agent/acl_test.go b/agent/acl_test.go
index 42ddbcaa25..8b16269981 100644
--- a/agent/acl_test.go
+++ b/agent/acl_test.go
@@ -564,7 +564,7 @@ func TestACL_vetServiceRegister(t *testing.T) {
 
 	// Try to register over a service without write privs to the existing
 	// service.
-	a.state.AddService(&structs.NodeService{
+	a.State.AddService(&structs.NodeService{
 		ID:      "my-service",
 		Service: "other",
 	}, "")
@@ -596,7 +596,7 @@ func TestACL_vetServiceUpdate(t *testing.T) {
 	}
 
 	// Update with write privs.
-	a.state.AddService(&structs.NodeService{
+	a.State.AddService(&structs.NodeService{
 		ID:      "my-service",
 		Service: "service",
 	}, "")
@@ -662,11 +662,11 @@ func TestACL_vetCheckRegister(t *testing.T) {
 
 	// Try to register over a service check without write privs to the
 	// existing service.
-	a.state.AddService(&structs.NodeService{
+	a.State.AddService(&structs.NodeService{
 		ID:      "my-service",
 		Service: "service",
 	}, "")
-	a.state.AddCheck(&structs.HealthCheck{
+	a.State.AddCheck(&structs.HealthCheck{
 		CheckID:     types.CheckID("my-check"),
 		ServiceID:   "my-service",
 		ServiceName: "other",
@@ -681,7 +681,7 @@ func TestACL_vetCheckRegister(t *testing.T) {
 	}
 
 	// Try to register over a node check without write privs to the node.
-	a.state.AddCheck(&structs.HealthCheck{
+	a.State.AddCheck(&structs.HealthCheck{
 		CheckID: types.CheckID("my-node-check"),
 	}, "")
 	err = a.vetCheckRegister("service-rw", &structs.HealthCheck{
@@ -713,11 +713,11 @@ func TestACL_vetCheckUpdate(t *testing.T) {
 	}
 
 	// Update service check with write privs.
-	a.state.AddService(&structs.NodeService{
+	a.State.AddService(&structs.NodeService{
 		ID:      "my-service",
 		Service: "service",
 	}, "")
-	a.state.AddCheck(&structs.HealthCheck{
+	a.State.AddCheck(&structs.HealthCheck{
 		CheckID:     types.CheckID("my-service-check"),
 		ServiceID:   "my-service",
 		ServiceName: "service",
@@ -734,7 +734,7 @@ func TestACL_vetCheckUpdate(t *testing.T) {
 	}
 
 	// Update node check with write privs.
-	a.state.AddCheck(&structs.HealthCheck{
+	a.State.AddCheck(&structs.HealthCheck{
 		CheckID: types.CheckID("my-node-check"),
 	}, "")
 	err = a.vetCheckUpdate("node-rw", "my-node-check")
diff --git a/agent/ae/ae.go b/agent/ae/ae.go
new file mode 100644
index 0000000000..d177ee7763
--- /dev/null
+++ b/agent/ae/ae.go
@@ -0,0 +1,321 @@
+// Package ae provides tools to synchronize state between local and remote consul servers.
+package ae
+
+import (
+	"fmt"
+	"log"
+	"math"
+	"sync"
+	"time"
+
+	"github.com/hashicorp/consul/lib"
+)
+
+// scaleThreshold is the number of nodes after which regular sync runs are
+// spread out farther apart. The value should be a power of 2 since the
+// scale function uses log2.
+//
+// When set to 128 nodes the delay between regular runs is doubled when the
+// cluster is larger than 128 nodes. It doubles again when it passes 256
+// nodes, and again at 512 nodes and so forth. At 8192 nodes, the delay
+// factor is 8.
+//
+// If you update this, you may need to adjust the tuning of
+// CoordinateUpdatePeriod and CoordinateUpdateMaxBatchSize.
+const scaleThreshold = 128
+
+// scaleFactor returns a factor by which the next sync run should be delayed to
+// avoid saturation of the cluster. The larger the cluster grows the farther
+// the sync runs should be spread apart.
+//
+// The current implementation uses a log2 scale which doubles the delay between
+// runs every time the cluster doubles in size.
+func scaleFactor(nodes int) int {
+	if nodes <= scaleThreshold {
+		return 1.0
+	}
+	return int(math.Ceil(math.Log2(float64(nodes))-math.Log2(float64(scaleThreshold))) + 1.0)
+}
+
+type SyncState interface {
+	SyncChanges() error
+	SyncFull() error
+}
+
+// StateSyncer manages background synchronization of the given state.
+//
+// The state is synchronized on a regular basis or on demand when either
+// the state has changed or a new Consul server has joined the cluster.
+//
+// The regular state sychronization provides a self-healing mechanism
+// for the cluster which is also called anti-entropy.
+type StateSyncer struct {
+	// State contains the data that needs to be synchronized.
+	State SyncState
+
+	// Interval is the time between two full sync runs.
+	Interval time.Duration
+
+	// ShutdownCh is closed when the application is shutting down.
+	ShutdownCh chan struct{}
+
+	// Logger is the logger.
+	Logger *log.Logger
+
+	// ClusterSize returns the number of members in the cluster to
+	// allow staggering the sync runs based on cluster size.
+	// This needs to be set before Run() is called.
+	ClusterSize func() int
+
+	// SyncFull allows triggering an immediate but staggered full sync
+	// in a non-blocking way.
+	SyncFull *Trigger
+
+	// SyncChanges allows triggering an immediate partial sync
+	// in a non-blocking way.
+	SyncChanges *Trigger
+
+	// paused stores whether sync runs are temporarily disabled.
+	pauseLock sync.Mutex
+	paused    int
+
+	// serverUpInterval is the max time after which a full sync is
+	// performed when a server has been added to the cluster.
+	serverUpInterval time.Duration
+
+	// retryFailInterval is the time after which a failed full sync is retried.
+	retryFailInterval time.Duration
+
+	// stagger randomly picks a duration between 0s and the given duration.
+	stagger func(time.Duration) time.Duration
+
+	// retrySyncFullEvent generates an event based on multiple conditions
+	// when the state machine is trying to retry a full state sync.
+	retrySyncFullEvent func() event
+
+	// syncChangesEvent generates an event based on multiple conditions
+	// when the state machine is performing partial state syncs.
+	syncChangesEvent func() event
+}
+
+const (
+	// serverUpIntv is the max time to wait before a sync is triggered
+	// when a consul server has been added to the cluster.
+	serverUpIntv = 3 * time.Second
+
+	// retryFailIntv is the min time to wait before a failed sync is retried.
+	retryFailIntv = 15 * time.Second
+)
+
+func NewStateSyncer(state SyncState, intv time.Duration, shutdownCh chan struct{}, logger *log.Logger) *StateSyncer {
+	s := &StateSyncer{
+		State:             state,
+		Interval:          intv,
+		ShutdownCh:        shutdownCh,
+		Logger:            logger,
+		SyncFull:          NewTrigger(),
+		SyncChanges:       NewTrigger(),
+		serverUpInterval:  serverUpIntv,
+		retryFailInterval: retryFailIntv,
+	}
+
+	// retain these methods as member variables so that
+	// we can mock them for testing.
+	s.retrySyncFullEvent = s.retrySyncFullEventFn
+	s.syncChangesEvent = s.syncChangesEventFn
+	s.stagger = s.staggerFn
+
+	return s
+}
+
+// fsmState defines states for the state machine.
+type fsmState string
+
+const (
+	doneState          fsmState = "done"
+	fullSyncState      fsmState = "fullSync"
+	partialSyncState   fsmState = "partialSync"
+	retryFullSyncState fsmState = "retryFullSync"
+)
+
+// Run is the long running method to perform state synchronization
+// between local and remote servers.
+func (s *StateSyncer) Run() {
+	if s.ClusterSize == nil {
+		panic("ClusterSize not set")
+	}
+	s.runFSM(fullSyncState, s.nextFSMState)
+}
+
+// runFSM runs the state machine.
+func (s *StateSyncer) runFSM(fs fsmState, next func(fsmState) fsmState) {
+	for {
+		if fs = next(fs); fs == doneState {
+			return
+		}
+	}
+}
+
+// nextFSMState determines the next state based on the current state.
+func (s *StateSyncer) nextFSMState(fs fsmState) fsmState {
+	switch fs {
+	case fullSyncState:
+		if s.Paused() {
+			return retryFullSyncState
+		}
+
+		err := s.State.SyncFull()
+		if err != nil {
+			s.Logger.Printf("[ERR] agent: failed to sync remote state: %v", err)
+			return retryFullSyncState
+		}
+
+		return partialSyncState
+
+	case retryFullSyncState:
+		e := s.retrySyncFullEvent()
+		switch e {
+		case syncFullNotifEvent, syncFullTimerEvent:
+			return fullSyncState
+		case shutdownEvent:
+			return doneState
+		default:
+			panic(fmt.Sprintf("invalid event: %s", e))
+		}
+
+	case partialSyncState:
+		e := s.syncChangesEvent()
+		switch e {
+		case syncFullNotifEvent, syncFullTimerEvent:
+			return fullSyncState
+
+		case syncChangesNotifEvent:
+			if s.Paused() {
+				return partialSyncState
+			}
+
+			err := s.State.SyncChanges()
+			if err != nil {
+				s.Logger.Printf("[ERR] agent: failed to sync changes: %v", err)
+			}
+			return partialSyncState
+
+		case shutdownEvent:
+			return doneState
+
+		default:
+			panic(fmt.Sprintf("invalid event: %s", e))
+		}
+
+	default:
+		panic(fmt.Sprintf("invalid state: %s", fs))
+	}
+}
+
+// event defines a timing or notification event from multiple timers and
+// channels.
+type event string
+
+const (
+	shutdownEvent         event = "shutdown"
+	syncFullNotifEvent    event = "syncFullNotif"
+	syncFullTimerEvent    event = "syncFullTimer"
+	syncChangesNotifEvent event = "syncChangesNotif"
+)
+
+// retrySyncFullEventFn waits for an event which triggers a retry
+// of a full sync or a termination signal. This function should not be
+// called directly but through s.retryFullSyncState to allow mocking for
+// testing.
+func (s *StateSyncer) retrySyncFullEventFn() event {
+	select {
+	// trigger a full sync immediately.
+	// this is usually called when a consul server was added to the cluster.
+	// stagger the delay to avoid a thundering herd.
+	case <-s.SyncFull.Notif():
+		select {
+		case <-time.After(s.stagger(s.serverUpInterval)):
+			return syncFullNotifEvent
+		case <-s.ShutdownCh:
+			return shutdownEvent
+		}
+
+	// retry full sync after some time
+	// todo(fs): why don't we use s.Interval here?
+	case <-time.After(s.retryFailInterval + s.stagger(s.retryFailInterval)):
+		return syncFullTimerEvent
+
+	case <-s.ShutdownCh:
+		return shutdownEvent
+	}
+}
+
+// syncChangesEventFn waits for a event which either triggers a full
+// or a partial sync or a termination signal. This function should not
+// be called directly but through s.syncChangesEvent to allow mocking
+// for testing.
+func (s *StateSyncer) syncChangesEventFn() event {
+	select {
+	// trigger a full sync immediately
+	// this is usually called when a consul server was added to the cluster.
+	// stagger the delay to avoid a thundering herd.
+	case <-s.SyncFull.Notif():
+		select {
+		case <-time.After(s.stagger(s.serverUpInterval)):
+			return syncFullNotifEvent
+		case <-s.ShutdownCh:
+			return shutdownEvent
+		}
+
+	// time for a full sync again
+	case <-time.After(s.Interval + s.stagger(s.Interval)):
+		return syncFullTimerEvent
+
+	// do partial syncs on demand
+	case <-s.SyncChanges.Notif():
+		return syncChangesNotifEvent
+
+	case <-s.ShutdownCh:
+		return shutdownEvent
+	}
+}
+
+// stubbed out for testing
+var libRandomStagger = lib.RandomStagger
+
+// staggerFn returns a random duration which depends on the cluster size
+// and a random factor which should provide some timely distribution of
+// cluster wide events. This function should not be called directly
+// but through s.stagger to allow mocking for testing.
+func (s *StateSyncer) staggerFn(d time.Duration) time.Duration {
+	f := scaleFactor(s.ClusterSize())
+	return libRandomStagger(time.Duration(f) * d)
+}
+
+// Pause temporarily disables sync runs.
+func (s *StateSyncer) Pause() {
+	s.pauseLock.Lock()
+	s.paused++
+	s.pauseLock.Unlock()
+}
+
+// Paused returns whether sync runs are temporarily disabled.
+func (s *StateSyncer) Paused() bool {
+	s.pauseLock.Lock()
+	defer s.pauseLock.Unlock()
+	return s.paused != 0
+}
+
+// Resume re-enables sync runs.
+func (s *StateSyncer) Resume() {
+	s.pauseLock.Lock()
+	s.paused--
+	if s.paused < 0 {
+		panic("unbalanced pause/resume")
+	}
+	trigger := s.paused == 0
+	s.pauseLock.Unlock()
+	if trigger {
+		s.SyncChanges.Trigger()
+	}
+}
diff --git a/agent/ae/ae_test.go b/agent/ae/ae_test.go
new file mode 100644
index 0000000000..a80532d80e
--- /dev/null
+++ b/agent/ae/ae_test.go
@@ -0,0 +1,397 @@
+package ae
+
+import (
+	"errors"
+	"fmt"
+	"log"
+	"os"
+	"reflect"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/hashicorp/consul/lib"
+)
+
+func TestAE_scaleFactor(t *testing.T) {
+	t.Parallel()
+	tests := []struct {
+		nodes int
+		scale int
+	}{
+		{100, 1},
+		{200, 2},
+		{1000, 4},
+		{10000, 8},
+	}
+	for _, tt := range tests {
+		t.Run(fmt.Sprintf("%d nodes", tt.nodes), func(t *testing.T) {
+			if got, want := scaleFactor(tt.nodes), tt.scale; got != want {
+				t.Fatalf("got scale factor %d want %d", got, want)
+			}
+		})
+	}
+}
+
+func TestAE_Pause_nestedPauseResume(t *testing.T) {
+	t.Parallel()
+	l := NewStateSyncer(nil, 0, nil, nil)
+	if l.Paused() != false {
+		t.Fatal("syncer should be unPaused after init")
+	}
+	l.Pause()
+	if l.Paused() != true {
+		t.Fatal("syncer should be Paused after first call to Pause()")
+	}
+	l.Pause()
+	if l.Paused() != true {
+		t.Fatal("syncer should STILL be Paused after second call to Pause()")
+	}
+	l.Resume()
+	if l.Paused() != true {
+		t.Fatal("syncer should STILL be Paused after FIRST call to Resume()")
+	}
+	l.Resume()
+	if l.Paused() != false {
+		t.Fatal("syncer should NOT be Paused after SECOND call to Resume()")
+	}
+
+	defer func() {
+		err := recover()
+		if err == nil {
+			t.Fatal("unbalanced Resume() should panic")
+		}
+	}()
+	l.Resume()
+}
+
+func TestAE_Pause_ResumeTriggersSyncChanges(t *testing.T) {
+	l := NewStateSyncer(nil, 0, nil, nil)
+	l.Pause()
+	l.Resume()
+	select {
+	case <-l.SyncChanges.Notif():
+		// expected
+	case <-l.SyncFull.Notif():
+		t.Fatal("resume triggered SyncFull instead of SyncChanges")
+	default:
+		t.Fatal("resume did not trigger SyncFull")
+	}
+}
+
+func TestAE_staggerDependsOnClusterSize(t *testing.T) {
+	libRandomStagger = func(d time.Duration) time.Duration { return d }
+	defer func() { libRandomStagger = lib.RandomStagger }()
+
+	l := testSyncer()
+	if got, want := l.staggerFn(10*time.Millisecond), 10*time.Millisecond; got != want {
+		t.Fatalf("got %v want %v", got, want)
+	}
+	l.ClusterSize = func() int { return 256 }
+	if got, want := l.staggerFn(10*time.Millisecond), 20*time.Millisecond; got != want {
+		t.Fatalf("got %v want %v", got, want)
+	}
+}
+
+func TestAE_Run_SyncFullBeforeChanges(t *testing.T) {
+	shutdownCh := make(chan struct{})
+	state := &mock{
+		syncChanges: func() error {
+			close(shutdownCh)
+			return nil
+		},
+	}
+
+	// indicate that we have partial changes before starting Run
+	l := testSyncer()
+	l.State = state
+	l.ShutdownCh = shutdownCh
+	l.SyncChanges.Trigger()
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		l.Run()
+	}()
+	wg.Wait()
+
+	if got, want := state.seq, []string{"full", "changes"}; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got call sequence %v want %v", got, want)
+	}
+}
+
+func TestAE_Run_Quit(t *testing.T) {
+	t.Run("Run panics without ClusterSize", func(t *testing.T) {
+		defer func() {
+			err := recover()
+			if err == nil {
+				t.Fatal("Run should panic")
+			}
+		}()
+		l := testSyncer()
+		l.ClusterSize = nil
+		l.Run()
+	})
+	t.Run("runFSM quits", func(t *testing.T) {
+		// start timer which explodes if runFSM does not quit
+		tm := time.AfterFunc(time.Second, func() { panic("timeout") })
+
+		l := testSyncer()
+		l.runFSM(fullSyncState, func(fsmState) fsmState { return doneState })
+		// should just quit
+		tm.Stop()
+	})
+}
+
+func TestAE_FSM(t *testing.T) {
+	t.Run("fullSyncState", func(t *testing.T) {
+		t.Run("Paused -> retryFullSyncState", func(t *testing.T) {
+			l := testSyncer()
+			l.Pause()
+			fs := l.nextFSMState(fullSyncState)
+			if got, want := fs, retryFullSyncState; got != want {
+				t.Fatalf("got state %v want %v", got, want)
+			}
+		})
+		t.Run("SyncFull() error -> retryFullSyncState", func(t *testing.T) {
+			l := testSyncer()
+			l.State = &mock{syncFull: func() error { return errors.New("boom") }}
+			fs := l.nextFSMState(fullSyncState)
+			if got, want := fs, retryFullSyncState; got != want {
+				t.Fatalf("got state %v want %v", got, want)
+			}
+		})
+		t.Run("SyncFull() OK -> partialSyncState", func(t *testing.T) {
+			l := testSyncer()
+			l.State = &mock{}
+			fs := l.nextFSMState(fullSyncState)
+			if got, want := fs, partialSyncState; got != want {
+				t.Fatalf("got state %v want %v", got, want)
+			}
+		})
+	})
+
+	t.Run("retryFullSyncState", func(t *testing.T) {
+		// helper for testing state transitions from retrySyncFullState
+		test := func(ev event, to fsmState) {
+			l := testSyncer()
+			l.retrySyncFullEvent = func() event { return ev }
+			fs := l.nextFSMState(retryFullSyncState)
+			if got, want := fs, to; got != want {
+				t.Fatalf("got state %v want %v", got, want)
+			}
+		}
+		t.Run("shutdownEvent -> doneState", func(t *testing.T) {
+			test(shutdownEvent, doneState)
+		})
+		t.Run("syncFullNotifEvent -> fullSyncState", func(t *testing.T) {
+			test(syncFullNotifEvent, fullSyncState)
+		})
+		t.Run("syncFullTimerEvent -> fullSyncState", func(t *testing.T) {
+			test(syncFullTimerEvent, fullSyncState)
+		})
+		t.Run("invalid event -> panic ", func(t *testing.T) {
+			defer func() {
+				err := recover()
+				if err == nil {
+					t.Fatal("invalid event should panic")
+				}
+			}()
+			test(event("invalid"), fsmState(""))
+		})
+	})
+
+	t.Run("partialSyncState", func(t *testing.T) {
+		// helper for testing state transitions from partialSyncState
+		test := func(ev event, to fsmState) {
+			l := testSyncer()
+			l.syncChangesEvent = func() event { return ev }
+			fs := l.nextFSMState(partialSyncState)
+			if got, want := fs, to; got != want {
+				t.Fatalf("got state %v want %v", got, want)
+			}
+		}
+		t.Run("shutdownEvent -> doneState", func(t *testing.T) {
+			test(shutdownEvent, doneState)
+		})
+		t.Run("syncFullNotifEvent -> fullSyncState", func(t *testing.T) {
+			test(syncFullNotifEvent, fullSyncState)
+		})
+		t.Run("syncFullTimerEvent -> fullSyncState", func(t *testing.T) {
+			test(syncFullTimerEvent, fullSyncState)
+		})
+		t.Run("syncChangesEvent+Paused -> partialSyncState", func(t *testing.T) {
+			l := testSyncer()
+			l.Pause()
+			l.syncChangesEvent = func() event { return syncChangesNotifEvent }
+			fs := l.nextFSMState(partialSyncState)
+			if got, want := fs, partialSyncState; got != want {
+				t.Fatalf("got state %v want %v", got, want)
+			}
+		})
+		t.Run("syncChangesEvent+SyncChanges() error -> partialSyncState", func(t *testing.T) {
+			l := testSyncer()
+			l.State = &mock{syncChanges: func() error { return errors.New("boom") }}
+			l.syncChangesEvent = func() event { return syncChangesNotifEvent }
+			fs := l.nextFSMState(partialSyncState)
+			if got, want := fs, partialSyncState; got != want {
+				t.Fatalf("got state %v want %v", got, want)
+			}
+		})
+		t.Run("syncChangesEvent+SyncChanges() OK -> partialSyncState", func(t *testing.T) {
+			l := testSyncer()
+			l.State = &mock{}
+			l.syncChangesEvent = func() event { return syncChangesNotifEvent }
+			fs := l.nextFSMState(partialSyncState)
+			if got, want := fs, partialSyncState; got != want {
+				t.Fatalf("got state %v want %v", got, want)
+			}
+		})
+		t.Run("invalid event -> panic ", func(t *testing.T) {
+			defer func() {
+				err := recover()
+				if err == nil {
+					t.Fatal("invalid event should panic")
+				}
+			}()
+			test(event("invalid"), fsmState(""))
+		})
+	})
+	t.Run("invalid state -> panic ", func(t *testing.T) {
+		defer func() {
+			err := recover()
+			if err == nil {
+				t.Fatal("invalid state should panic")
+			}
+		}()
+		l := testSyncer()
+		l.nextFSMState(fsmState("invalid"))
+	})
+}
+
+func TestAE_RetrySyncFullEvent(t *testing.T) {
+	t.Run("trigger shutdownEvent", func(t *testing.T) {
+		l := testSyncer()
+		l.ShutdownCh = make(chan struct{})
+		evch := make(chan event)
+		go func() { evch <- l.retrySyncFullEvent() }()
+		close(l.ShutdownCh)
+		if got, want := <-evch, shutdownEvent; got != want {
+			t.Fatalf("got event %q want %q", got, want)
+		}
+	})
+	t.Run("trigger shutdownEvent during FullNotif", func(t *testing.T) {
+		l := testSyncer()
+		l.ShutdownCh = make(chan struct{})
+		evch := make(chan event)
+		go func() { evch <- l.retrySyncFullEvent() }()
+		l.SyncFull.Trigger()
+		time.Sleep(100 * time.Millisecond)
+		close(l.ShutdownCh)
+		if got, want := <-evch, shutdownEvent; got != want {
+			t.Fatalf("got event %q want %q", got, want)
+		}
+	})
+	t.Run("trigger syncFullNotifEvent", func(t *testing.T) {
+		l := testSyncer()
+		l.serverUpInterval = 10 * time.Millisecond
+		evch := make(chan event)
+		go func() { evch <- l.retrySyncFullEvent() }()
+		l.SyncFull.Trigger()
+		if got, want := <-evch, syncFullNotifEvent; got != want {
+			t.Fatalf("got event %q want %q", got, want)
+		}
+	})
+	t.Run("trigger syncFullTimerEvent", func(t *testing.T) {
+		l := testSyncer()
+		l.retryFailInterval = 10 * time.Millisecond
+		evch := make(chan event)
+		go func() { evch <- l.retrySyncFullEvent() }()
+		if got, want := <-evch, syncFullTimerEvent; got != want {
+			t.Fatalf("got event %q want %q", got, want)
+		}
+	})
+}
+
+func TestAE_SyncChangesEvent(t *testing.T) {
+	t.Run("trigger shutdownEvent", func(t *testing.T) {
+		l := testSyncer()
+		l.ShutdownCh = make(chan struct{})
+		evch := make(chan event)
+		go func() { evch <- l.syncChangesEvent() }()
+		close(l.ShutdownCh)
+		if got, want := <-evch, shutdownEvent; got != want {
+			t.Fatalf("got event %q want %q", got, want)
+		}
+	})
+	t.Run("trigger shutdownEvent during FullNotif", func(t *testing.T) {
+		l := testSyncer()
+		l.ShutdownCh = make(chan struct{})
+		evch := make(chan event)
+		go func() { evch <- l.syncChangesEvent() }()
+		l.SyncFull.Trigger()
+		time.Sleep(100 * time.Millisecond)
+		close(l.ShutdownCh)
+		if got, want := <-evch, shutdownEvent; got != want {
+			t.Fatalf("got event %q want %q", got, want)
+		}
+	})
+	t.Run("trigger syncFullNotifEvent", func(t *testing.T) {
+		l := testSyncer()
+		l.serverUpInterval = 10 * time.Millisecond
+		evch := make(chan event)
+		go func() { evch <- l.syncChangesEvent() }()
+		l.SyncFull.Trigger()
+		if got, want := <-evch, syncFullNotifEvent; got != want {
+			t.Fatalf("got event %q want %q", got, want)
+		}
+	})
+	t.Run("trigger syncFullTimerEvent", func(t *testing.T) {
+		l := testSyncer()
+		l.Interval = 10 * time.Millisecond
+		evch := make(chan event)
+		go func() { evch <- l.syncChangesEvent() }()
+		if got, want := <-evch, syncFullTimerEvent; got != want {
+			t.Fatalf("got event %q want %q", got, want)
+		}
+	})
+	t.Run("trigger syncChangesNotifEvent", func(t *testing.T) {
+		l := testSyncer()
+		evch := make(chan event)
+		go func() { evch <- l.syncChangesEvent() }()
+		l.SyncChanges.Trigger()
+		if got, want := <-evch, syncChangesNotifEvent; got != want {
+			t.Fatalf("got event %q want %q", got, want)
+		}
+	})
+}
+
+type mock struct {
+	seq                   []string
+	syncFull, syncChanges func() error
+}
+
+func (m *mock) SyncFull() error {
+	m.seq = append(m.seq, "full")
+	if m.syncFull != nil {
+		return m.syncFull()
+	}
+	return nil
+}
+
+func (m *mock) SyncChanges() error {
+	m.seq = append(m.seq, "changes")
+	if m.syncChanges != nil {
+		return m.syncChanges()
+	}
+	return nil
+}
+
+func testSyncer() *StateSyncer {
+	logger := log.New(os.Stderr, "", 0)
+	l := NewStateSyncer(nil, time.Second, nil, logger)
+	l.stagger = func(d time.Duration) time.Duration { return d }
+	l.ClusterSize = func() int { return 1 }
+	return l
+}
diff --git a/agent/ae/trigger.go b/agent/ae/trigger.go
new file mode 100644
index 0000000000..1aa5f45862
--- /dev/null
+++ b/agent/ae/trigger.go
@@ -0,0 +1,23 @@
+package ae
+
+// Trigger implements a non-blocking event notifier. Events can be
+// triggered without blocking and notifications happen only when the
+// previous event was consumed.
+type Trigger struct {
+	ch chan struct{}
+}
+
+func NewTrigger() *Trigger {
+	return &Trigger{make(chan struct{}, 1)}
+}
+
+func (t Trigger) Trigger() {
+	select {
+	case t.ch <- struct{}{}:
+	default:
+	}
+}
+
+func (t Trigger) Notif() <-chan struct{} {
+	return t.ch
+}
diff --git a/agent/agent.go b/agent/agent.go
index c2abbb76dc..f40a027448 100644
--- a/agent/agent.go
+++ b/agent/agent.go
@@ -20,8 +20,10 @@ import (
 
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/consul/acl"
+	"github.com/hashicorp/consul/agent/ae"
 	"github.com/hashicorp/consul/agent/config"
 	"github.com/hashicorp/consul/agent/consul"
+	"github.com/hashicorp/consul/agent/local"
 	"github.com/hashicorp/consul/agent/structs"
 	"github.com/hashicorp/consul/agent/systemd"
 	"github.com/hashicorp/consul/agent/token"
@@ -107,7 +109,11 @@ type Agent struct {
 
 	// state stores a local representation of the node,
 	// services and checks. Used for anti-entropy.
-	state *localState
+	State *local.State
+
+	// sync manages the synchronization of the local
+	// and the remote state.
+	sync *ae.StateSyncer
 
 	// checkReapAfter maps the check ID to a timeout after which we should
 	// reap its associated service
@@ -224,6 +230,22 @@ func New(c *config.RuntimeConfig) (*Agent, error) {
 	return a, nil
 }
 
+func LocalConfig(cfg *config.RuntimeConfig) local.Config {
+	lc := local.Config{
+		AdvertiseAddr:       cfg.AdvertiseAddrLAN.String(),
+		CheckUpdateInterval: cfg.CheckUpdateInterval,
+		Datacenter:          cfg.Datacenter,
+		DiscardCheckOutput:  cfg.DiscardCheckOutput,
+		NodeID:              cfg.NodeID,
+		NodeName:            cfg.NodeName,
+		TaggedAddresses:     map[string]string{},
+	}
+	for k, v := range cfg.TaggedAddresses {
+		lc.TaggedAddresses[k] = v
+	}
+	return lc
+}
+
 func (a *Agent) Start() error {
 	c := a.config
 
@@ -242,7 +264,11 @@ func (a *Agent) Start() error {
 	}
 
 	// create the local state
-	a.state = NewLocalState(c, a.logger, a.tokens)
+	a.State = local.NewState(LocalConfig(c), a.logger, a.tokens)
+
+	// create the state synchronization manager which performs
+	// regular and on-demand state synchronizations (anti-entropy).
+	a.sync = ae.NewStateSyncer(a.State, c.AEInterval, a.shutdownCh, a.logger)
 
 	// create the config for the rpc server/client
 	consulCfg, err := a.consulConfig()
@@ -250,8 +276,10 @@ func (a *Agent) Start() error {
 		return err
 	}
 
-	// link consul client/server with the state
-	consulCfg.ServerUp = a.state.ConsulServerUp
+	// ServerUp is used to inform that a new consul server is now
+	// up. This can be used to speed up the sync process if we are blocking
+	// waiting to discover a consul server
+	consulCfg.ServerUp = a.sync.SyncFull.Trigger
 
 	// Setup either the client or the server.
 	if c.ServerMode {
@@ -259,19 +287,25 @@ func (a *Agent) Start() error {
 		if err != nil {
 			return fmt.Errorf("Failed to start Consul server: %v", err)
 		}
-
 		a.delegate = server
-		a.state.delegate = server
 	} else {
 		client, err := consul.NewClientLogger(consulCfg, a.logger)
 		if err != nil {
 			return fmt.Errorf("Failed to start Consul client: %v", err)
 		}
-
 		a.delegate = client
-		a.state.delegate = client
 	}
 
+	// the staggering of the state syncing depends on the cluster size.
+	a.sync.ClusterSize = func() int { return len(a.delegate.LANMembers()) }
+
+	// link the state with the consul server/client and the state syncer
+	// via callbacks. After several attempts this was easier than using
+	// channels since the event notification needs to be non-blocking
+	// and that should be hidden in the state syncer implementation.
+	a.State.Delegate = a.delegate
+	a.State.TriggerSyncChanges = a.sync.SyncChanges.Trigger
+
 	// Load checks/services/metadata.
 	if err := a.loadServices(c); err != nil {
 		return err
@@ -1264,18 +1298,18 @@ func (a *Agent) WANMembers() []serf.Member {
 // StartSync is called once Services and Checks are registered.
 // This is called to prevent a race between clients and the anti-entropy routines
 func (a *Agent) StartSync() {
-	// Start the anti entropy routine
-	go a.state.antiEntropy(a.shutdownCh)
+	go a.sync.Run()
+	a.logger.Printf("[INFO] agent: started state syncer")
 }
 
 // PauseSync is used to pause anti-entropy while bulk changes are make
 func (a *Agent) PauseSync() {
-	a.state.Pause()
+	a.sync.Pause()
 }
 
 // ResumeSync is used to unpause anti-entropy after bulk changes are make
 func (a *Agent) ResumeSync() {
-	a.state.Resume()
+	a.sync.Resume()
 }
 
 // GetLANCoordinate returns the coordinates of this node in the local pools
@@ -1339,29 +1373,31 @@ OUTER:
 
 // reapServicesInternal does a single pass, looking for services to reap.
 func (a *Agent) reapServicesInternal() {
-	reaped := make(map[string]struct{})
-	for checkID, check := range a.state.CriticalChecks() {
+	reaped := make(map[string]bool)
+	for checkID, cs := range a.State.CriticalCheckStates() {
+		serviceID := cs.Check.ServiceID
+
 		// There's nothing to do if there's no service.
-		if check.Check.ServiceID == "" {
+		if serviceID == "" {
 			continue
 		}
 
 		// There might be multiple checks for one service, so
 		// we don't need to reap multiple times.
-		serviceID := check.Check.ServiceID
-		if _, ok := reaped[serviceID]; ok {
+		if reaped[serviceID] {
 			continue
 		}
 
 		// See if there's a timeout.
+		// todo(fs): this looks fishy... why is there anoter data structure in the agent with its own lock?
 		a.checkLock.Lock()
-		timeout, ok := a.checkReapAfter[checkID]
+		timeout := a.checkReapAfter[checkID]
 		a.checkLock.Unlock()
 
 		// Reap, if necessary. We keep track of which service
 		// this is so that we won't try to remove it again.
-		if ok && check.CriticalFor > timeout {
-			reaped[serviceID] = struct{}{}
+		if timeout > 0 && cs.CriticalFor() > timeout {
+			reaped[serviceID] = true
 			a.RemoveService(serviceID, true)
 			a.logger.Printf("[INFO] agent: Check %q for service %q has been critical for too long; deregistered service",
 				checkID, serviceID)
@@ -1396,7 +1432,7 @@ func (a *Agent) persistService(service *structs.NodeService) error {
 	svcPath := filepath.Join(a.config.DataDir, servicesDir, stringHash(service.ID))
 
 	wrapped := persistedService{
-		Token:   a.state.ServiceToken(service.ID),
+		Token:   a.State.ServiceToken(service.ID),
 		Service: service,
 	}
 	encoded, err := json.Marshal(wrapped)
@@ -1424,7 +1460,7 @@ func (a *Agent) persistCheck(check *structs.HealthCheck, chkType *structs.CheckT
 	wrapped := persistedCheck{
 		Check:   check,
 		ChkType: chkType,
-		Token:   a.state.CheckToken(check.CheckID),
+		Token:   a.State.CheckToken(check.CheckID),
 	}
 
 	encoded, err := json.Marshal(wrapped)
@@ -1523,7 +1559,7 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che
 	defer a.restoreCheckState(snap)
 
 	// Add the service
-	a.state.AddService(service, token)
+	a.State.AddService(service, token)
 
 	// Persist the service to a file
 	if persist && !a.config.DevMode {
@@ -1573,7 +1609,7 @@ func (a *Agent) RemoveService(serviceID string, persist bool) error {
 	}
 
 	// Remove service immediately
-	if err := a.state.RemoveService(serviceID); err != nil {
+	if err := a.State.RemoveService(serviceID); err != nil {
 		a.logger.Printf("[WARN] agent: Failed to deregister service %q: %s", serviceID, err)
 		return nil
 	}
@@ -1586,8 +1622,8 @@ func (a *Agent) RemoveService(serviceID string, persist bool) error {
 	}
 
 	// Deregister any associated health checks
-	for checkID, health := range a.state.Checks() {
-		if health.ServiceID != serviceID {
+	for checkID, check := range a.State.Checks() {
+		if check.ServiceID != serviceID {
 			continue
 		}
 		if err := a.RemoveCheck(checkID, persist); err != nil {
@@ -1619,11 +1655,11 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
 	}
 
 	if check.ServiceID != "" {
-		svc, ok := a.state.Services()[check.ServiceID]
-		if !ok {
+		s := a.State.Service(check.ServiceID)
+		if s == nil {
 			return fmt.Errorf("ServiceID %q does not exist", check.ServiceID)
 		}
-		check.ServiceName = svc.Service
+		check.ServiceName = s.Service
 	}
 
 	a.checkLock.Lock()
@@ -1640,7 +1676,7 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
 			}
 
 			ttl := &CheckTTL{
-				Notify:  a.state,
+				Notify:  a.State,
 				CheckID: check.CheckID,
 				TTL:     chkType.TTL,
 				Logger:  a.logger,
@@ -1667,7 +1703,7 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
 			}
 
 			http := &CheckHTTP{
-				Notify:        a.state,
+				Notify:        a.State,
 				CheckID:       check.CheckID,
 				HTTP:          chkType.HTTP,
 				Header:        chkType.Header,
@@ -1692,7 +1728,7 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
 			}
 
 			tcp := &CheckTCP{
-				Notify:   a.state,
+				Notify:   a.State,
 				CheckID:  check.CheckID,
 				TCP:      chkType.TCP,
 				Interval: chkType.Interval,
@@ -1729,7 +1765,7 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
 			}
 
 			dockerCheck := &CheckDocker{
-				Notify:            a.state,
+				Notify:            a.State,
 				CheckID:           check.CheckID,
 				DockerContainerID: chkType.DockerContainerID,
 				Shell:             chkType.Shell,
@@ -1759,7 +1795,7 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
 			}
 
 			monitor := &CheckMonitor{
-				Notify:     a.state,
+				Notify:     a.State,
 				CheckID:    check.CheckID,
 				Script:     chkType.Script,
 				ScriptArgs: chkType.ScriptArgs,
@@ -1788,7 +1824,7 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
 	}
 
 	// Add to the local state for anti-entropy
-	err := a.state.AddCheck(check, token)
+	err := a.State.AddCheck(check, token)
 	if err != nil {
 		a.cancelCheckMonitors(check.CheckID)
 		return err
@@ -1811,7 +1847,7 @@ func (a *Agent) RemoveCheck(checkID types.CheckID, persist bool) error {
 	}
 
 	// Add to the local state for anti-entropy
-	a.state.RemoveCheck(checkID)
+	a.State.RemoveCheck(checkID)
 
 	a.checkLock.Lock()
 	defer a.checkLock.Unlock()
@@ -1971,15 +2007,13 @@ func (a *Agent) GossipEncrypted() bool {
 
 // Stats is used to get various debugging state from the sub-systems
 func (a *Agent) Stats() map[string]map[string]string {
-	toString := func(v uint64) string {
-		return strconv.FormatUint(v, 10)
-	}
 	stats := a.delegate.Stats()
 	stats["agent"] = map[string]string{
-		"check_monitors": toString(uint64(len(a.checkMonitors))),
-		"check_ttls":     toString(uint64(len(a.checkTTLs))),
-		"checks":         toString(uint64(len(a.state.checks))),
-		"services":       toString(uint64(len(a.state.services))),
+		"check_monitors": strconv.Itoa(len(a.checkMonitors)),
+		"check_ttls":     strconv.Itoa(len(a.checkTTLs)),
+	}
+	for k, v := range a.State.Stats() {
+		stats["agent"][k] = v
 	}
 
 	revision := a.config.Revision
@@ -2102,7 +2136,7 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig) error {
 		}
 		serviceID := p.Service.ID
 
-		if _, ok := a.state.services[serviceID]; ok {
+		if a.State.Service(serviceID) != nil {
 			// Purge previously persisted service. This allows config to be
 			// preferred over services persisted from the API.
 			a.logger.Printf("[DEBUG] agent: service %q exists, not restoring from %q",
@@ -2122,15 +2156,13 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig) error {
 	return nil
 }
 
-// unloadServices will deregister all services other than the 'consul' service
-// known to the local agent.
+// unloadServices will deregister all services.
 func (a *Agent) unloadServices() error {
-	for _, service := range a.state.Services() {
-		if err := a.RemoveService(service.ID, false); err != nil {
-			return fmt.Errorf("Failed deregistering service '%s': %v", service.ID, err)
+	for id := range a.State.Services() {
+		if err := a.RemoveService(id, false); err != nil {
+			return fmt.Errorf("Failed deregistering service '%s': %v", id, err)
 		}
 	}
-
 	return nil
 }
 
@@ -2182,7 +2214,7 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig) error {
 		}
 		checkID := p.Check.CheckID
 
-		if _, ok := a.state.checks[checkID]; ok {
+		if a.State.Check(checkID) != nil {
 			// Purge previously persisted check. This allows config to be
 			// preferred over persisted checks from the API.
 			a.logger.Printf("[DEBUG] agent: check %q exists, not restoring from %q",
@@ -2213,12 +2245,11 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig) error {
 
 // unloadChecks will deregister all checks known to the local agent.
 func (a *Agent) unloadChecks() error {
-	for _, check := range a.state.Checks() {
-		if err := a.RemoveCheck(check.CheckID, false); err != nil {
-			return fmt.Errorf("Failed deregistering check '%s': %s", check.CheckID, err)
+	for id := range a.State.Checks() {
+		if err := a.RemoveCheck(id, false); err != nil {
+			return fmt.Errorf("Failed deregistering check '%s': %s", id, err)
 		}
 	}
-
 	return nil
 }
 
@@ -2226,7 +2257,7 @@ func (a *Agent) unloadChecks() error {
 // checks. This is done before we reload our checks, so that we can properly
 // restore into the same state.
 func (a *Agent) snapshotCheckState() map[types.CheckID]*structs.HealthCheck {
-	return a.state.Checks()
+	return a.State.Checks()
 }
 
 // restoreCheckState is used to reset the health state based on a snapshot.
@@ -2234,33 +2265,24 @@ func (a *Agent) snapshotCheckState() map[types.CheckID]*structs.HealthCheck {
 // in health state and potential session invalidations.
 func (a *Agent) restoreCheckState(snap map[types.CheckID]*structs.HealthCheck) {
 	for id, check := range snap {
-		a.state.UpdateCheck(id, check.Status, check.Output)
+		a.State.UpdateCheck(id, check.Status, check.Output)
 	}
 }
 
 // loadMetadata loads node metadata fields from the agent config and
 // updates them on the local agent.
 func (a *Agent) loadMetadata(conf *config.RuntimeConfig) error {
-	a.state.Lock()
-	defer a.state.Unlock()
-
-	for key, value := range conf.NodeMeta {
-		a.state.metadata[key] = value
+	meta := map[string]string{}
+	for k, v := range conf.NodeMeta {
+		meta[k] = v
 	}
-
-	a.state.metadata[structs.MetaSegmentKey] = conf.SegmentName
-
-	a.state.changeMade()
-
-	return nil
+	meta[structs.MetaSegmentKey] = conf.SegmentName
+	return a.State.LoadMetadata(meta)
 }
 
 // unloadMetadata resets the local metadata state
 func (a *Agent) unloadMetadata() {
-	a.state.Lock()
-	defer a.state.Unlock()
-
-	a.state.metadata = make(map[string]string)
+	a.State.UnloadMetadata()
 }
 
 // serviceMaintCheckID returns the ID of a given service's maintenance check
@@ -2271,14 +2293,14 @@ func serviceMaintCheckID(serviceID string) types.CheckID {
 // EnableServiceMaintenance will register a false health check against the given
 // service ID with critical status. This will exclude the service from queries.
 func (a *Agent) EnableServiceMaintenance(serviceID, reason, token string) error {
-	service, ok := a.state.Services()[serviceID]
+	service, ok := a.State.Services()[serviceID]
 	if !ok {
 		return fmt.Errorf("No service registered with ID %q", serviceID)
 	}
 
 	// Check if maintenance mode is not already enabled
 	checkID := serviceMaintCheckID(serviceID)
-	if _, ok := a.state.Checks()[checkID]; ok {
+	if _, ok := a.State.Checks()[checkID]; ok {
 		return nil
 	}
 
@@ -2306,13 +2328,13 @@ func (a *Agent) EnableServiceMaintenance(serviceID, reason, token string) error
 // DisableServiceMaintenance will deregister the fake maintenance mode check
 // if the service has been marked as in maintenance.
 func (a *Agent) DisableServiceMaintenance(serviceID string) error {
-	if _, ok := a.state.Services()[serviceID]; !ok {
+	if _, ok := a.State.Services()[serviceID]; !ok {
 		return fmt.Errorf("No service registered with ID %q", serviceID)
 	}
 
 	// Check if maintenance mode is enabled
 	checkID := serviceMaintCheckID(serviceID)
-	if _, ok := a.state.Checks()[checkID]; !ok {
+	if _, ok := a.State.Checks()[checkID]; !ok {
 		return nil
 	}
 
@@ -2326,7 +2348,7 @@ func (a *Agent) DisableServiceMaintenance(serviceID string) error {
 // EnableNodeMaintenance places a node into maintenance mode.
 func (a *Agent) EnableNodeMaintenance(reason, token string) {
 	// Ensure node maintenance is not already enabled
-	if _, ok := a.state.Checks()[structs.NodeMaint]; ok {
+	if _, ok := a.State.Checks()[structs.NodeMaint]; ok {
 		return
 	}
 
@@ -2349,7 +2371,7 @@ func (a *Agent) EnableNodeMaintenance(reason, token string) {
 
 // DisableNodeMaintenance removes a node from maintenance mode
 func (a *Agent) DisableNodeMaintenance() {
-	if _, ok := a.state.Checks()[structs.NodeMaint]; !ok {
+	if _, ok := a.State.Checks()[structs.NodeMaint]; !ok {
 		return
 	}
 	a.RemoveCheck(structs.NodeMaint, true)
@@ -2393,7 +2415,7 @@ func (a *Agent) ReloadConfig(newCfg *config.RuntimeConfig) error {
 	// Update filtered metrics
 	metrics.UpdateFilter(newCfg.TelemetryAllowedPrefixes, newCfg.TelemetryBlockedPrefixes)
 
-	a.state.SetDiscardCheckOutput(newCfg.DiscardCheckOutput)
+	a.State.SetDiscardCheckOutput(newCfg.DiscardCheckOutput)
 
 	return nil
 }
diff --git a/agent/agent_endpoint.go b/agent/agent_endpoint.go
index 271f53da62..6f105b8b0e 100644
--- a/agent/agent_endpoint.go
+++ b/agent/agent_endpoint.go
@@ -72,7 +72,7 @@ func (s *HTTPServer) AgentSelf(resp http.ResponseWriter, req *http.Request) (int
 		Coord:       cs[s.agent.config.SegmentName],
 		Member:      s.agent.LocalMember(),
 		Stats:       s.agent.Stats(),
-		Meta:        s.agent.state.Metadata(),
+		Meta:        s.agent.State.Metadata(),
 	}, nil
 }
 
@@ -137,7 +137,7 @@ func (s *HTTPServer) AgentServices(resp http.ResponseWriter, req *http.Request)
 	var token string
 	s.parseToken(req, &token)
 
-	services := s.agent.state.Services()
+	services := s.agent.State.Services()
 	if err := s.agent.filterServices(token, &services); err != nil {
 		return nil, err
 	}
@@ -161,7 +161,7 @@ func (s *HTTPServer) AgentChecks(resp http.ResponseWriter, req *http.Request) (i
 	var token string
 	s.parseToken(req, &token)
 
-	checks := s.agent.state.Checks()
+	checks := s.agent.State.Checks()
 	if err := s.agent.filterChecks(token, &checks); err != nil {
 		return nil, err
 	}
@@ -304,7 +304,7 @@ func (s *HTTPServer) AgentForceLeave(resp http.ResponseWriter, req *http.Request
 // services and checks to the server. If the operation fails, we only
 // only warn because the write did succeed and anti-entropy will sync later.
 func (s *HTTPServer) syncChanges() {
-	if err := s.agent.state.syncChanges(); err != nil {
+	if err := s.agent.State.SyncChanges(); err != nil {
 		s.agent.logger.Printf("[ERR] agent: failed to sync changes: %v", err)
 	}
 }
diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go
index 7068eab2df..55fd2536ce 100644
--- a/agent/agent_endpoint_test.go
+++ b/agent/agent_endpoint_test.go
@@ -51,7 +51,7 @@ func TestAgent_Services(t *testing.T) {
 		Tags:    []string{"master"},
 		Port:    5000,
 	}
-	a.state.AddService(srv1, "")
+	a.State.AddService(srv1, "")
 
 	req, _ := http.NewRequest("GET", "/v1/agent/services", nil)
 	obj, err := a.srv.AgentServices(nil, req)
@@ -78,7 +78,7 @@ func TestAgent_Services_ACLFilter(t *testing.T) {
 		Tags:    []string{"master"},
 		Port:    5000,
 	}
-	a.state.AddService(srv1, "")
+	a.State.AddService(srv1, "")
 
 	t.Run("no token", func(t *testing.T) {
 		req, _ := http.NewRequest("GET", "/v1/agent/services", nil)
@@ -116,7 +116,7 @@ func TestAgent_Checks(t *testing.T) {
 		Name:    "mysql",
 		Status:  api.HealthPassing,
 	}
-	a.state.AddCheck(chk1, "")
+	a.State.AddCheck(chk1, "")
 
 	req, _ := http.NewRequest("GET", "/v1/agent/checks", nil)
 	obj, err := a.srv.AgentChecks(nil, req)
@@ -143,7 +143,7 @@ func TestAgent_Checks_ACLFilter(t *testing.T) {
 		Name:    "mysql",
 		Status:  api.HealthPassing,
 	}
-	a.state.AddCheck(chk1, "")
+	a.State.AddCheck(chk1, "")
 
 	t.Run("no token", func(t *testing.T) {
 		req, _ := http.NewRequest("GET", "/v1/agent/checks", nil)
@@ -283,8 +283,8 @@ func TestAgent_Reload(t *testing.T) {
 	`)
 	defer a.Shutdown()
 
-	if _, ok := a.state.services["redis"]; !ok {
-		t.Fatalf("missing redis service")
+	if a.State.Service("redis") == nil {
+		t.Fatal("missing redis service")
 	}
 
 	cfg2 := TestConfig(config.Source{
@@ -307,8 +307,8 @@ func TestAgent_Reload(t *testing.T) {
 	if err := a.ReloadConfig(cfg2); err != nil {
 		t.Fatalf("got error %v want nil", err)
 	}
-	if _, ok := a.state.services["redis-reloaded"]; !ok {
-		t.Fatalf("missing redis-reloaded service")
+	if a.State.Service("redis-reloaded") == nil {
+		t.Fatal("missing redis-reloaded service")
 	}
 
 	for _, wp := range a.watchPlans {
@@ -682,7 +682,7 @@ func TestAgent_RegisterCheck(t *testing.T) {
 
 	// Ensure we have a check mapping
 	checkID := types.CheckID("test")
-	if _, ok := a.state.Checks()[checkID]; !ok {
+	if _, ok := a.State.Checks()[checkID]; !ok {
 		t.Fatalf("missing test check")
 	}
 
@@ -691,12 +691,12 @@ func TestAgent_RegisterCheck(t *testing.T) {
 	}
 
 	// Ensure the token was configured
-	if token := a.state.CheckToken(checkID); token == "" {
+	if token := a.State.CheckToken(checkID); token == "" {
 		t.Fatalf("missing token")
 	}
 
 	// By default, checks start in critical state.
-	state := a.state.Checks()[checkID]
+	state := a.State.Checks()[checkID]
 	if state.Status != api.HealthCritical {
 		t.Fatalf("bad: %v", state)
 	}
@@ -817,7 +817,7 @@ func TestAgent_RegisterCheck_Passing(t *testing.T) {
 
 	// Ensure we have a check mapping
 	checkID := types.CheckID("test")
-	if _, ok := a.state.Checks()[checkID]; !ok {
+	if _, ok := a.State.Checks()[checkID]; !ok {
 		t.Fatalf("missing test check")
 	}
 
@@ -825,7 +825,7 @@ func TestAgent_RegisterCheck_Passing(t *testing.T) {
 		t.Fatalf("missing test check ttl")
 	}
 
-	state := a.state.Checks()[checkID]
+	state := a.State.Checks()[checkID]
 	if state.Status != api.HealthPassing {
 		t.Fatalf("bad: %v", state)
 	}
@@ -896,7 +896,7 @@ func TestAgent_DeregisterCheck(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	if _, ok := a.state.Checks()["test"]; ok {
+	if _, ok := a.State.Checks()["test"]; ok {
 		t.Fatalf("have test check")
 	}
 }
@@ -947,7 +947,7 @@ func TestAgent_PassCheck(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	state := a.state.Checks()["test"]
+	state := a.State.Checks()["test"]
 	if state.Status != api.HealthPassing {
 		t.Fatalf("bad: %v", state)
 	}
@@ -1000,7 +1000,7 @@ func TestAgent_WarnCheck(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	state := a.state.Checks()["test"]
+	state := a.State.Checks()["test"]
 	if state.Status != api.HealthWarning {
 		t.Fatalf("bad: %v", state)
 	}
@@ -1053,7 +1053,7 @@ func TestAgent_FailCheck(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	state := a.state.Checks()["test"]
+	state := a.State.Checks()["test"]
 	if state.Status != api.HealthCritical {
 		t.Fatalf("bad: %v", state)
 	}
@@ -1117,7 +1117,7 @@ func TestAgent_UpdateCheck(t *testing.T) {
 				t.Fatalf("expected 200, got %d", resp.Code)
 			}
 
-			state := a.state.Checks()["test"]
+			state := a.State.Checks()["test"]
 			if state.Status != c.Status || state.Output != c.Output {
 				t.Fatalf("bad: %v", state)
 			}
@@ -1145,7 +1145,7 @@ func TestAgent_UpdateCheck(t *testing.T) {
 		// Since we append some notes about truncating, we just do a
 		// rough check that the output buffer was cut down so this test
 		// isn't super brittle.
-		state := a.state.Checks()["test"]
+		state := a.State.Checks()["test"]
 		if state.Status != api.HealthPassing || len(state.Output) > 2*CheckBufSize {
 			t.Fatalf("bad: %v", state)
 		}
@@ -1228,12 +1228,12 @@ func TestAgent_RegisterService(t *testing.T) {
 	}
 
 	// Ensure the servie
-	if _, ok := a.state.Services()["test"]; !ok {
+	if _, ok := a.State.Services()["test"]; !ok {
 		t.Fatalf("missing test service")
 	}
 
 	// Ensure we have a check mapping
-	checks := a.state.Checks()
+	checks := a.State.Checks()
 	if len(checks) != 3 {
 		t.Fatalf("bad: %v", checks)
 	}
@@ -1243,7 +1243,7 @@ func TestAgent_RegisterService(t *testing.T) {
 	}
 
 	// Ensure the token was configured
-	if token := a.state.ServiceToken("test"); token == "" {
+	if token := a.State.ServiceToken("test"); token == "" {
 		t.Fatalf("missing token")
 	}
 }
@@ -1271,7 +1271,7 @@ func TestAgent_RegisterService_TranslateKeys(t *testing.T) {
 		EnableTagOverride: true,
 	}
 
-	if got, want := a.state.Services()["test"], svc; !verify.Values(t, "", got, want) {
+	if got, want := a.State.Service("test"), svc; !verify.Values(t, "", got, want) {
 		t.Fail()
 	}
 }
@@ -1364,11 +1364,11 @@ func TestAgent_DeregisterService(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	if _, ok := a.state.Services()["test"]; ok {
+	if _, ok := a.State.Services()["test"]; ok {
 		t.Fatalf("have test service")
 	}
 
-	if _, ok := a.state.Checks()["test"]; ok {
+	if _, ok := a.State.Checks()["test"]; ok {
 		t.Fatalf("have test check")
 	}
 }
@@ -1466,13 +1466,13 @@ func TestAgent_ServiceMaintenance_Enable(t *testing.T) {
 
 	// Ensure the maintenance check was registered
 	checkID := serviceMaintCheckID("test")
-	check, ok := a.state.Checks()[checkID]
+	check, ok := a.State.Checks()[checkID]
 	if !ok {
 		t.Fatalf("should have registered maintenance check")
 	}
 
 	// Ensure the token was added
-	if token := a.state.CheckToken(checkID); token != "mytoken" {
+	if token := a.State.CheckToken(checkID); token != "mytoken" {
 		t.Fatalf("expected 'mytoken', got '%s'", token)
 	}
 
@@ -1513,7 +1513,7 @@ func TestAgent_ServiceMaintenance_Disable(t *testing.T) {
 
 	// Ensure the maintenance check was removed
 	checkID := serviceMaintCheckID("test")
-	if _, ok := a.state.Checks()[checkID]; ok {
+	if _, ok := a.State.Checks()[checkID]; ok {
 		t.Fatalf("should have removed maintenance check")
 	}
 }
@@ -1579,13 +1579,13 @@ func TestAgent_NodeMaintenance_Enable(t *testing.T) {
 	}
 
 	// Ensure the maintenance check was registered
-	check, ok := a.state.Checks()[structs.NodeMaint]
+	check, ok := a.State.Checks()[structs.NodeMaint]
 	if !ok {
 		t.Fatalf("should have registered maintenance check")
 	}
 
 	// Check that the token was used
-	if token := a.state.CheckToken(structs.NodeMaint); token != "mytoken" {
+	if token := a.State.CheckToken(structs.NodeMaint); token != "mytoken" {
 		t.Fatalf("expected 'mytoken', got '%s'", token)
 	}
 
@@ -1614,7 +1614,7 @@ func TestAgent_NodeMaintenance_Disable(t *testing.T) {
 	}
 
 	// Ensure the maintenance check was removed
-	if _, ok := a.state.Checks()[structs.NodeMaint]; ok {
+	if _, ok := a.State.Checks()[structs.NodeMaint]; ok {
 		t.Fatalf("should have removed maintenance check")
 	}
 }
@@ -1670,7 +1670,7 @@ func TestAgent_RegisterCheck_Service(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	result := a.state.Checks()
+	result := a.State.Checks()
 	if _, ok := result["service:memcache"]; !ok {
 		t.Fatalf("missing memcached check")
 	}
diff --git a/agent/agent_test.go b/agent/agent_test.go
index 19acf63d6d..a883caf703 100644
--- a/agent/agent_test.go
+++ b/agent/agent_test.go
@@ -363,14 +363,14 @@ func TestAgent_AddService(t *testing.T) {
 					t.Fatalf("err: %v", err)
 				}
 
-				got, want := a.state.Services()[tt.srv.ID], tt.srv
+				got, want := a.State.Services()[tt.srv.ID], tt.srv
 				verify.Values(t, "", got, want)
 			})
 
 			// check the health checks
 			for k, v := range tt.healthChks {
 				t.Run(k, func(t *testing.T) {
-					got, want := a.state.Checks()[types.CheckID(k)], v
+					got, want := a.State.Checks()[types.CheckID(k)], v
 					verify.Values(t, k, got, want)
 				})
 			}
@@ -437,10 +437,10 @@ func TestAgent_RemoveService(t *testing.T) {
 		if err := a.RemoveService("memcache", false); err != nil {
 			t.Fatalf("err: %s", err)
 		}
-		if _, ok := a.state.Checks()["service:memcache"]; ok {
+		if _, ok := a.State.Checks()["service:memcache"]; ok {
 			t.Fatalf("have memcache check")
 		}
-		if _, ok := a.state.Checks()["check2"]; ok {
+		if _, ok := a.State.Checks()["check2"]; ok {
 			t.Fatalf("have check2 check")
 		}
 	}
@@ -466,15 +466,15 @@ func TestAgent_RemoveService(t *testing.T) {
 		}
 
 		// Ensure we have a state mapping
-		if _, ok := a.state.Services()["redis"]; ok {
+		if _, ok := a.State.Services()["redis"]; ok {
 			t.Fatalf("have redis service")
 		}
 
 		// Ensure checks were removed
-		if _, ok := a.state.Checks()["service:redis:1"]; ok {
+		if _, ok := a.State.Checks()["service:redis:1"]; ok {
 			t.Fatalf("check redis:1 should be removed")
 		}
-		if _, ok := a.state.Checks()["service:redis:2"]; ok {
+		if _, ok := a.State.Checks()["service:redis:2"]; ok {
 			t.Fatalf("check redis:2 should be removed")
 		}
 
@@ -507,7 +507,7 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) {
 	}
 
 	// verify chk1 exists
-	if a.state.Checks()["chk1"] == nil {
+	if a.State.Checks()["chk1"] == nil {
 		t.Fatal("Could not find health check chk1")
 	}
 
@@ -517,10 +517,10 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) {
 	}
 
 	// check that both checks are there
-	if got, want := a.state.Checks()["chk1"], hchk1; !verify.Values(t, "", got, want) {
+	if got, want := a.State.Checks()["chk1"], hchk1; !verify.Values(t, "", got, want) {
 		t.FailNow()
 	}
-	if got, want := a.state.Checks()["chk2"], hchk2; !verify.Values(t, "", got, want) {
+	if got, want := a.State.Checks()["chk2"], hchk2; !verify.Values(t, "", got, want) {
 		t.FailNow()
 	}
 
@@ -530,10 +530,10 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) {
 	}
 
 	// Check that both checks are gone
-	if a.state.Checks()["chk1"] != nil {
+	if a.State.Checks()["chk1"] != nil {
 		t.Fatal("Found health check chk1 want nil")
 	}
-	if a.state.Checks()["chk2"] != nil {
+	if a.State.Checks()["chk2"] != nil {
 		t.Fatal("Found health check chk2 want nil")
 	}
 }
@@ -561,7 +561,7 @@ func TestAgent_AddCheck(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	sChk, ok := a.state.Checks()["mem"]
+	sChk, ok := a.State.Checks()["mem"]
 	if !ok {
 		t.Fatalf("missing mem check")
 	}
@@ -600,7 +600,7 @@ func TestAgent_AddCheck_StartPassing(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	sChk, ok := a.state.Checks()["mem"]
+	sChk, ok := a.State.Checks()["mem"]
 	if !ok {
 		t.Fatalf("missing mem check")
 	}
@@ -639,7 +639,7 @@ func TestAgent_AddCheck_MinInterval(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	if _, ok := a.state.Checks()["mem"]; !ok {
+	if _, ok := a.State.Checks()["mem"]; !ok {
 		t.Fatalf("missing mem check")
 	}
 
@@ -704,7 +704,7 @@ func TestAgent_AddCheck_RestoreState(t *testing.T) {
 	}
 
 	// Ensure the check status was restored during registration
-	checks := a.state.Checks()
+	checks := a.State.Checks()
 	check, ok := checks["baz"]
 	if !ok {
 		t.Fatalf("missing check")
@@ -739,7 +739,7 @@ func TestAgent_AddCheck_ExecDisable(t *testing.T) {
 	}
 
 	// Ensure we don't have a check mapping
-	if memChk := a.state.Checks()["mem"]; memChk != nil {
+	if memChk := a.State.Checks()["mem"]; memChk != nil {
 		t.Fatalf("should be missing mem check")
 	}
 }
@@ -782,7 +782,7 @@ func TestAgent_RemoveCheck(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping
-	if _, ok := a.state.Checks()["mem"]; ok {
+	if _, ok := a.State.Checks()["mem"]; ok {
 		t.Fatalf("have mem check")
 	}
 
@@ -817,7 +817,7 @@ func TestAgent_updateTTLCheck(t *testing.T) {
 	}
 
 	// Ensure we have a check mapping.
-	status := a.state.Checks()["mem"]
+	status := a.State.Checks()["mem"]
 	if status.Status != api.HealthPassing {
 		t.Fatalf("bad: %v", status)
 	}
@@ -904,15 +904,15 @@ func TestAgent_PersistService(t *testing.T) {
 	a2.Start()
 	defer a2.Shutdown()
 
-	restored, ok := a2.state.services[svc.ID]
-	if !ok {
-		t.Fatalf("bad: %#v", a2.state.services)
+	restored := a2.State.ServiceState(svc.ID)
+	if restored == nil {
+		t.Fatalf("service %q missing", svc.ID)
 	}
-	if a2.state.serviceTokens[svc.ID] != "mytoken" {
-		t.Fatalf("bad: %#v", a2.state.services[svc.ID])
+	if got, want := restored.Token, "mytoken"; got != want {
+		t.Fatalf("got token %q want %q", got, want)
 	}
-	if restored.Port != 8001 {
-		t.Fatalf("bad: %#v", restored)
+	if got, want := restored.Service.Port, 8001; got != want {
+		t.Fatalf("got port %d want %d", got, want)
 	}
 }
 
@@ -951,7 +951,7 @@ func TestAgent_persistedService_compat(t *testing.T) {
 	}
 
 	// Ensure the service was restored
-	services := a.state.Services()
+	services := a.State.Services()
 	result, ok := services["redis"]
 	if !ok {
 		t.Fatalf("missing service")
@@ -1043,8 +1043,8 @@ func TestAgent_PurgeServiceOnDuplicate(t *testing.T) {
 	if _, err := os.Stat(file); err == nil {
 		t.Fatalf("should have removed persisted service")
 	}
-	result, ok := a2.state.services["redis"]
-	if !ok {
+	result := a2.State.Service("redis")
+	if result == nil {
 		t.Fatalf("missing service registration")
 	}
 	if !reflect.DeepEqual(result.Tags, []string{"bar"}) || result.Port != 9000 {
@@ -1137,9 +1137,9 @@ func TestAgent_PersistCheck(t *testing.T) {
 	a2.Start()
 	defer a2.Shutdown()
 
-	result, ok := a2.state.checks[check.CheckID]
-	if !ok {
-		t.Fatalf("bad: %#v", a2.state.checks)
+	result := a2.State.Check(check.CheckID)
+	if result == nil {
+		t.Fatalf("bad: %#v", a2.State.Checks())
 	}
 	if result.Status != api.HealthCritical {
 		t.Fatalf("bad: %#v", result)
@@ -1152,8 +1152,8 @@ func TestAgent_PersistCheck(t *testing.T) {
 	if _, ok := a2.checkMonitors[check.CheckID]; !ok {
 		t.Fatalf("bad: %#v", a2.checkMonitors)
 	}
-	if a2.state.checkTokens[check.CheckID] != "mytoken" {
-		t.Fatalf("bad: %s", a2.state.checkTokens[check.CheckID])
+	if a2.State.CheckState(check.CheckID).Token != "mytoken" {
+		t.Fatalf("bad: %s", a2.State.CheckState(check.CheckID).Token)
 	}
 }
 
@@ -1241,8 +1241,8 @@ func TestAgent_PurgeCheckOnDuplicate(t *testing.T) {
 	if _, err := os.Stat(file); err == nil {
 		t.Fatalf("should have removed persisted check")
 	}
-	result, ok := a2.state.checks["mem"]
-	if !ok {
+	result := a2.State.Check("mem")
+	if result == nil {
 		t.Fatalf("missing check registration")
 	}
 	expected := &structs.HealthCheck{
@@ -1269,11 +1269,11 @@ func TestAgent_loadChecks_token(t *testing.T) {
 	`)
 	defer a.Shutdown()
 
-	checks := a.state.Checks()
+	checks := a.State.Checks()
 	if _, ok := checks["rabbitmq"]; !ok {
 		t.Fatalf("missing check")
 	}
-	if token := a.state.CheckToken("rabbitmq"); token != "abc123" {
+	if token := a.State.CheckToken("rabbitmq"); token != "abc123" {
 		t.Fatalf("bad: %s", token)
 	}
 }
@@ -1307,7 +1307,7 @@ func TestAgent_unloadChecks(t *testing.T) {
 		t.Fatalf("err: %s", err)
 	}
 	found := false
-	for check := range a.state.Checks() {
+	for check := range a.State.Checks() {
 		if check == check1.CheckID {
 			found = true
 			break
@@ -1323,7 +1323,7 @@ func TestAgent_unloadChecks(t *testing.T) {
 	}
 
 	// Make sure it was unloaded
-	for check := range a.state.Checks() {
+	for check := range a.State.Checks() {
 		if check == check1.CheckID {
 			t.Fatalf("should have unloaded checks")
 		}
@@ -1342,11 +1342,11 @@ func TestAgent_loadServices_token(t *testing.T) {
 	`)
 	defer a.Shutdown()
 
-	services := a.state.Services()
+	services := a.State.Services()
 	if _, ok := services["rabbitmq"]; !ok {
 		t.Fatalf("missing service")
 	}
-	if token := a.state.ServiceToken("rabbitmq"); token != "abc123" {
+	if token := a.State.ServiceToken("rabbitmq"); token != "abc123" {
 		t.Fatalf("bad: %s", token)
 	}
 }
@@ -1368,7 +1368,7 @@ func TestAgent_unloadServices(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 	found := false
-	for id := range a.state.Services() {
+	for id := range a.State.Services() {
 		if id == svc.ID {
 			found = true
 			break
@@ -1382,7 +1382,7 @@ func TestAgent_unloadServices(t *testing.T) {
 	if err := a.unloadServices(); err != nil {
 		t.Fatalf("err: %s", err)
 	}
-	if len(a.state.Services()) != 0 {
+	if len(a.State.Services()) != 0 {
 		t.Fatalf("should have unloaded services")
 	}
 }
@@ -1411,13 +1411,13 @@ func TestAgent_Service_MaintenanceMode(t *testing.T) {
 
 	// Make sure the critical health check was added
 	checkID := serviceMaintCheckID("redis")
-	check, ok := a.state.Checks()[checkID]
+	check, ok := a.State.Checks()[checkID]
 	if !ok {
 		t.Fatalf("should have registered critical maintenance check")
 	}
 
 	// Check that the token was used to register the check
-	if token := a.state.CheckToken(checkID); token != "mytoken" {
+	if token := a.State.CheckToken(checkID); token != "mytoken" {
 		t.Fatalf("expected 'mytoken', got: '%s'", token)
 	}
 
@@ -1432,7 +1432,7 @@ func TestAgent_Service_MaintenanceMode(t *testing.T) {
 	}
 
 	// Ensure the check was deregistered
-	if _, ok := a.state.Checks()[checkID]; ok {
+	if _, ok := a.State.Checks()[checkID]; ok {
 		t.Fatalf("should have deregistered maintenance check")
 	}
 
@@ -1442,7 +1442,7 @@ func TestAgent_Service_MaintenanceMode(t *testing.T) {
 	}
 
 	// Ensure the check was registered with the default notes
-	check, ok = a.state.Checks()[checkID]
+	check, ok = a.State.Checks()[checkID]
 	if !ok {
 		t.Fatalf("should have registered critical check")
 	}
@@ -1479,19 +1479,19 @@ func TestAgent_Service_Reap(t *testing.T) {
 	}
 
 	// Make sure it's there and there's no critical check yet.
-	if _, ok := a.state.Services()["redis"]; !ok {
+	if _, ok := a.State.Services()["redis"]; !ok {
 		t.Fatalf("should have redis service")
 	}
-	if checks := a.state.CriticalChecks(); len(checks) > 0 {
+	if checks := a.State.CriticalCheckStates(); len(checks) > 0 {
 		t.Fatalf("should not have critical checks")
 	}
 
 	// Wait for the check TTL to fail but before the check is reaped.
 	time.Sleep(100 * time.Millisecond)
-	if _, ok := a.state.Services()["redis"]; !ok {
+	if _, ok := a.State.Services()["redis"]; !ok {
 		t.Fatalf("should have redis service")
 	}
-	if checks := a.state.CriticalChecks(); len(checks) != 1 {
+	if checks := a.State.CriticalCheckStates(); len(checks) != 1 {
 		t.Fatalf("should have a critical check")
 	}
 
@@ -1499,28 +1499,28 @@ func TestAgent_Service_Reap(t *testing.T) {
 	if err := a.updateTTLCheck("service:redis", api.HealthPassing, "foo"); err != nil {
 		t.Fatalf("err: %v", err)
 	}
-	if _, ok := a.state.Services()["redis"]; !ok {
+	if _, ok := a.State.Services()["redis"]; !ok {
 		t.Fatalf("should have redis service")
 	}
-	if checks := a.state.CriticalChecks(); len(checks) > 0 {
+	if checks := a.State.CriticalCheckStates(); len(checks) > 0 {
 		t.Fatalf("should not have critical checks")
 	}
 
 	// Wait for the check TTL to fail again.
 	time.Sleep(100 * time.Millisecond)
-	if _, ok := a.state.Services()["redis"]; !ok {
+	if _, ok := a.State.Services()["redis"]; !ok {
 		t.Fatalf("should have redis service")
 	}
-	if checks := a.state.CriticalChecks(); len(checks) != 1 {
+	if checks := a.State.CriticalCheckStates(); len(checks) != 1 {
 		t.Fatalf("should have a critical check")
 	}
 
 	// Wait for the reap.
 	time.Sleep(400 * time.Millisecond)
-	if _, ok := a.state.Services()["redis"]; ok {
+	if _, ok := a.State.Services()["redis"]; ok {
 		t.Fatalf("redis service should have been reaped")
 	}
-	if checks := a.state.CriticalChecks(); len(checks) > 0 {
+	if checks := a.State.CriticalCheckStates(); len(checks) > 0 {
 		t.Fatalf("should not have critical checks")
 	}
 }
@@ -1552,28 +1552,28 @@ func TestAgent_Service_NoReap(t *testing.T) {
 	}
 
 	// Make sure it's there and there's no critical check yet.
-	if _, ok := a.state.Services()["redis"]; !ok {
+	if _, ok := a.State.Services()["redis"]; !ok {
 		t.Fatalf("should have redis service")
 	}
-	if checks := a.state.CriticalChecks(); len(checks) > 0 {
+	if checks := a.State.CriticalCheckStates(); len(checks) > 0 {
 		t.Fatalf("should not have critical checks")
 	}
 
 	// Wait for the check TTL to fail.
 	time.Sleep(200 * time.Millisecond)
-	if _, ok := a.state.Services()["redis"]; !ok {
+	if _, ok := a.State.Services()["redis"]; !ok {
 		t.Fatalf("should have redis service")
 	}
-	if checks := a.state.CriticalChecks(); len(checks) != 1 {
+	if checks := a.State.CriticalCheckStates(); len(checks) != 1 {
 		t.Fatalf("should have a critical check")
 	}
 
 	// Wait a while and make sure it doesn't reap.
 	time.Sleep(200 * time.Millisecond)
-	if _, ok := a.state.Services()["redis"]; !ok {
+	if _, ok := a.State.Services()["redis"]; !ok {
 		t.Fatalf("should have redis service")
 	}
-	if checks := a.state.CriticalChecks(); len(checks) != 1 {
+	if checks := a.State.CriticalCheckStates(); len(checks) != 1 {
 		t.Fatalf("should have a critical check")
 	}
 }
@@ -1612,7 +1612,7 @@ func TestAgent_addCheck_restoresSnapshot(t *testing.T) {
 	if err := a.AddService(svc, chkTypes, false, ""); err != nil {
 		t.Fatalf("err: %s", err)
 	}
-	check, ok := a.state.Checks()["service:redis"]
+	check, ok := a.State.Checks()["service:redis"]
 	if !ok {
 		t.Fatalf("missing check")
 	}
@@ -1630,13 +1630,13 @@ func TestAgent_NodeMaintenanceMode(t *testing.T) {
 	a.EnableNodeMaintenance("broken", "mytoken")
 
 	// Make sure the critical health check was added
-	check, ok := a.state.Checks()[structs.NodeMaint]
+	check, ok := a.State.Checks()[structs.NodeMaint]
 	if !ok {
 		t.Fatalf("should have registered critical node check")
 	}
 
 	// Check that the token was used to register the check
-	if token := a.state.CheckToken(structs.NodeMaint); token != "mytoken" {
+	if token := a.State.CheckToken(structs.NodeMaint); token != "mytoken" {
 		t.Fatalf("expected 'mytoken', got: '%s'", token)
 	}
 
@@ -1649,7 +1649,7 @@ func TestAgent_NodeMaintenanceMode(t *testing.T) {
 	a.DisableNodeMaintenance()
 
 	// Ensure the check was deregistered
-	if _, ok := a.state.Checks()[structs.NodeMaint]; ok {
+	if _, ok := a.State.Checks()[structs.NodeMaint]; ok {
 		t.Fatalf("should have deregistered critical node check")
 	}
 
@@ -1657,7 +1657,7 @@ func TestAgent_NodeMaintenanceMode(t *testing.T) {
 	a.EnableNodeMaintenance("", "")
 
 	// Make sure the check was registered with the default note
-	check, ok = a.state.Checks()[structs.NodeMaint]
+	check, ok = a.State.Checks()[structs.NodeMaint]
 	if !ok {
 		t.Fatalf("should have registered critical node check")
 	}
@@ -1712,7 +1712,7 @@ func TestAgent_checkStateSnapshot(t *testing.T) {
 	a.restoreCheckState(snap)
 
 	// Search for the check
-	out, ok := a.state.Checks()[check1.CheckID]
+	out, ok := a.State.Checks()[check1.CheckID]
 	if !ok {
 		t.Fatalf("check should have been registered")
 	}
diff --git a/agent/catalog_endpoint_test.go b/agent/catalog_endpoint_test.go
index 048800d44f..8459291173 100644
--- a/agent/catalog_endpoint_test.go
+++ b/agent/catalog_endpoint_test.go
@@ -12,45 +12,6 @@ import (
 	"github.com/hashicorp/serf/coordinate"
 )
 
-func TestCatalogRegister(t *testing.T) {
-	t.Parallel()
-	a := NewTestAgent(t.Name(), "")
-	defer a.Shutdown()
-
-	// Register node
-	args := &structs.RegisterRequest{
-		Node:    "foo",
-		Address: "127.0.0.1",
-	}
-	req, _ := http.NewRequest("PUT", "/v1/catalog/register", jsonReader(args))
-	obj, err := a.srv.CatalogRegister(nil, req)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-
-	res := obj.(bool)
-	if res != true {
-		t.Fatalf("bad: %v", res)
-	}
-
-	// data race
-	func() {
-		a.state.Lock()
-		defer a.state.Unlock()
-
-		// Service should be in sync
-		if err := a.state.syncService("foo"); err != nil {
-			t.Fatalf("err: %s", err)
-		}
-		if _, ok := a.state.serviceStatus["foo"]; !ok {
-			t.Fatalf("bad: %#v", a.state.serviceStatus)
-		}
-		if !a.state.serviceStatus["foo"].inSync {
-			t.Fatalf("should be in sync")
-		}
-	}()
-}
-
 func TestCatalogRegister_Service_InvalidAddress(t *testing.T) {
 	t.Parallel()
 	a := NewTestAgent(t.Name(), "")
diff --git a/agent/local.go b/agent/local.go
deleted file mode 100644
index 2a29a9c6c0..0000000000
--- a/agent/local.go
+++ /dev/null
@@ -1,822 +0,0 @@
-package agent
-
-import (
-	"fmt"
-	"log"
-	"reflect"
-	"strings"
-	"sync"
-	"sync/atomic"
-	"time"
-
-	"github.com/hashicorp/consul/acl"
-	"github.com/hashicorp/consul/agent/config"
-	"github.com/hashicorp/consul/agent/structs"
-	"github.com/hashicorp/consul/agent/token"
-	"github.com/hashicorp/consul/api"
-	"github.com/hashicorp/consul/lib"
-	"github.com/hashicorp/consul/types"
-)
-
-const (
-	syncStaggerIntv = 3 * time.Second
-	syncRetryIntv   = 15 * time.Second
-)
-
-// syncStatus is used to represent the difference between
-// the local and remote state, and if action needs to be taken
-type syncStatus struct {
-	inSync bool // Is this in sync with the server
-}
-
-// localStateConfig is the configuration for the localState. It is
-// populated during NewLocalAgent from the agent configuration to avoid
-// race conditions with the agent configuration.
-type localStateConfig struct {
-	AEInterval          time.Duration
-	AdvertiseAddr       string
-	CheckUpdateInterval time.Duration
-	Datacenter          string
-	NodeID              types.NodeID
-	NodeName            string
-	TaggedAddresses     map[string]string
-	Tokens              *token.Store
-}
-
-// localState is used to represent the node's services,
-// and checks. We used it to perform anti-entropy with the
-// catalog representation
-type localState struct {
-	// paused is used to check if we are paused. Must be the first
-	// element due to a go bug.
-	paused int32
-
-	sync.RWMutex
-	logger *log.Logger
-
-	// Config is the agent config
-	config localStateConfig
-
-	// delegate is the consul interface to use for keeping in sync
-	delegate delegate
-
-	// nodeInfoInSync tracks whether the server has our correct top-level
-	// node information in sync
-	nodeInfoInSync bool
-
-	// Services tracks the local services
-	services      map[string]*structs.NodeService
-	serviceStatus map[string]syncStatus
-	serviceTokens map[string]string
-
-	// Checks tracks the local checks
-	checks            map[types.CheckID]*structs.HealthCheck
-	checkStatus       map[types.CheckID]syncStatus
-	checkTokens       map[types.CheckID]string
-	checkCriticalTime map[types.CheckID]time.Time
-
-	// Used to track checks that are being deferred
-	deferCheck map[types.CheckID]*time.Timer
-
-	// metadata tracks the local metadata fields
-	metadata map[string]string
-
-	// consulCh is used to inform of a change to the known
-	// consul nodes. This may be used to retry a sync run
-	consulCh chan struct{}
-
-	// triggerCh is used to inform of a change to local state
-	// that requires anti-entropy with the server
-	triggerCh chan struct{}
-
-	// discardCheckOutput stores whether the output of health checks
-	// is stored in the raft log.
-	discardCheckOutput atomic.Value // bool
-}
-
-// NewLocalState creates a  is used to initialize the local state
-func NewLocalState(c *config.RuntimeConfig, lg *log.Logger, tokens *token.Store) *localState {
-	lc := localStateConfig{
-		AEInterval:          c.AEInterval,
-		AdvertiseAddr:       c.AdvertiseAddrLAN.String(),
-		CheckUpdateInterval: c.CheckUpdateInterval,
-		Datacenter:          c.Datacenter,
-		NodeID:              c.NodeID,
-		NodeName:            c.NodeName,
-		TaggedAddresses:     map[string]string{},
-		Tokens:              tokens,
-	}
-	for k, v := range c.TaggedAddresses {
-		lc.TaggedAddresses[k] = v
-	}
-
-	l := &localState{
-		config:            lc,
-		logger:            lg,
-		services:          make(map[string]*structs.NodeService),
-		serviceStatus:     make(map[string]syncStatus),
-		serviceTokens:     make(map[string]string),
-		checks:            make(map[types.CheckID]*structs.HealthCheck),
-		checkStatus:       make(map[types.CheckID]syncStatus),
-		checkTokens:       make(map[types.CheckID]string),
-		checkCriticalTime: make(map[types.CheckID]time.Time),
-		deferCheck:        make(map[types.CheckID]*time.Timer),
-		metadata:          make(map[string]string),
-		consulCh:          make(chan struct{}, 1),
-		triggerCh:         make(chan struct{}, 1),
-	}
-	l.discardCheckOutput.Store(c.DiscardCheckOutput)
-	return l
-}
-
-// changeMade is used to trigger an anti-entropy run
-func (l *localState) changeMade() {
-	select {
-	case l.triggerCh <- struct{}{}:
-	default:
-	}
-}
-
-// ConsulServerUp is used to inform that a new consul server is now
-// up. This can be used to speed up the sync process if we are blocking
-// waiting to discover a consul server
-func (l *localState) ConsulServerUp() {
-	select {
-	case l.consulCh <- struct{}{}:
-	default:
-	}
-}
-
-// Pause is used to pause state synchronization, this can be
-// used to make batch changes
-func (l *localState) Pause() {
-	atomic.AddInt32(&l.paused, 1)
-}
-
-// Resume is used to resume state synchronization
-func (l *localState) Resume() {
-	paused := atomic.AddInt32(&l.paused, -1)
-	if paused < 0 {
-		panic("unbalanced localState.Resume() detected")
-	}
-	l.changeMade()
-}
-
-// isPaused is used to check if we are paused
-func (l *localState) isPaused() bool {
-	return atomic.LoadInt32(&l.paused) > 0
-}
-
-func (l *localState) SetDiscardCheckOutput(b bool) {
-	l.discardCheckOutput.Store(b)
-}
-
-// ServiceToken returns the configured ACL token for the given
-// service ID. If none is present, the agent's token is returned.
-func (l *localState) ServiceToken(id string) string {
-	l.RLock()
-	defer l.RUnlock()
-	return l.serviceToken(id)
-}
-
-// serviceToken returns an ACL token associated with a service.
-func (l *localState) serviceToken(id string) string {
-	token := l.serviceTokens[id]
-	if token == "" {
-		token = l.config.Tokens.UserToken()
-	}
-	return token
-}
-
-// AddService is used to add a service entry to the local state.
-// This entry is persistent and the agent will make a best effort to
-// ensure it is registered
-func (l *localState) AddService(service *structs.NodeService, token string) {
-	// Assign the ID if none given
-	if service.ID == "" && service.Service != "" {
-		service.ID = service.Service
-	}
-
-	l.Lock()
-	defer l.Unlock()
-
-	l.services[service.ID] = service
-	l.serviceStatus[service.ID] = syncStatus{}
-	l.serviceTokens[service.ID] = token
-	l.changeMade()
-}
-
-// RemoveService is used to remove a service entry from the local state.
-// The agent will make a best effort to ensure it is deregistered
-func (l *localState) RemoveService(serviceID string) error {
-	l.Lock()
-	defer l.Unlock()
-
-	if _, ok := l.services[serviceID]; ok {
-		delete(l.services, serviceID)
-		// Leave the service token around, if any, until we successfully
-		// delete the service.
-		l.serviceStatus[serviceID] = syncStatus{inSync: false}
-		l.changeMade()
-	} else {
-		return fmt.Errorf("Service does not exist")
-	}
-
-	return nil
-}
-
-// Services returns the locally registered services that the
-// agent is aware of and are being kept in sync with the server
-func (l *localState) Services() map[string]*structs.NodeService {
-	services := make(map[string]*structs.NodeService)
-	l.RLock()
-	defer l.RUnlock()
-
-	for name, serv := range l.services {
-		services[name] = serv
-	}
-	return services
-}
-
-// CheckToken is used to return the configured health check token for a
-// Check, or if none is configured, the default agent ACL token.
-func (l *localState) CheckToken(checkID types.CheckID) string {
-	l.RLock()
-	defer l.RUnlock()
-	return l.checkToken(checkID)
-}
-
-// checkToken returns an ACL token associated with a check.
-func (l *localState) checkToken(checkID types.CheckID) string {
-	token := l.checkTokens[checkID]
-	if token == "" {
-		token = l.config.Tokens.UserToken()
-	}
-	return token
-}
-
-// AddCheck is used to add a health check to the local state.
-// This entry is persistent and the agent will make a best effort to
-// ensure it is registered
-func (l *localState) AddCheck(check *structs.HealthCheck, token string) error {
-	l.Lock()
-	defer l.Unlock()
-
-	// Set the node name
-	check.Node = l.config.NodeName
-
-	if l.discardCheckOutput.Load().(bool) {
-		check.Output = ""
-	}
-
-	// if there is a serviceID associated with the check, make sure it exists before adding it
-	// NOTE - This logic may be moved to be handled within the Agent's Addcheck method after a refactor
-	if check.ServiceID != "" && l.services[check.ServiceID] == nil {
-		return fmt.Errorf("ServiceID %q does not exist", check.ServiceID)
-	}
-
-	l.checks[check.CheckID] = check
-	l.checkStatus[check.CheckID] = syncStatus{}
-	l.checkTokens[check.CheckID] = token
-	delete(l.checkCriticalTime, check.CheckID)
-	l.changeMade()
-	return nil
-}
-
-// RemoveCheck is used to remove a health check from the local state.
-// The agent will make a best effort to ensure it is deregistered
-func (l *localState) RemoveCheck(checkID types.CheckID) {
-	l.Lock()
-	defer l.Unlock()
-
-	delete(l.checks, checkID)
-	// Leave the check token around, if any, until we successfully delete
-	// the check.
-	delete(l.checkCriticalTime, checkID)
-	l.checkStatus[checkID] = syncStatus{inSync: false}
-	l.changeMade()
-}
-
-// UpdateCheck is used to update the status of a check
-func (l *localState) UpdateCheck(checkID types.CheckID, status, output string) {
-	l.Lock()
-	defer l.Unlock()
-
-	check, ok := l.checks[checkID]
-	if !ok {
-		return
-	}
-
-	if l.discardCheckOutput.Load().(bool) {
-		output = ""
-	}
-
-	// Update the critical time tracking (this doesn't cause a server updates
-	// so we can always keep this up to date).
-	if status == api.HealthCritical {
-		_, wasCritical := l.checkCriticalTime[checkID]
-		if !wasCritical {
-			l.checkCriticalTime[checkID] = time.Now()
-		}
-	} else {
-		delete(l.checkCriticalTime, checkID)
-	}
-
-	// Do nothing if update is idempotent
-	if check.Status == status && check.Output == output {
-		return
-	}
-
-	// Defer a sync if the output has changed. This is an optimization around
-	// frequent updates of output. Instead, we update the output internally,
-	// and periodically do a write-back to the servers. If there is a status
-	// change we do the write immediately.
-	if l.config.CheckUpdateInterval > 0 && check.Status == status {
-		check.Output = output
-		if _, ok := l.deferCheck[checkID]; !ok {
-			intv := time.Duration(uint64(l.config.CheckUpdateInterval)/2) + lib.RandomStagger(l.config.CheckUpdateInterval)
-			deferSync := time.AfterFunc(intv, func() {
-				l.Lock()
-				if _, ok := l.checkStatus[checkID]; ok {
-					l.checkStatus[checkID] = syncStatus{inSync: false}
-					l.changeMade()
-				}
-				delete(l.deferCheck, checkID)
-				l.Unlock()
-			})
-			l.deferCheck[checkID] = deferSync
-		}
-		return
-	}
-
-	// Update status and mark out of sync
-	check.Status = status
-	check.Output = output
-	l.checkStatus[checkID] = syncStatus{inSync: false}
-	l.changeMade()
-}
-
-// Checks returns the locally registered checks that the
-// agent is aware of and are being kept in sync with the server
-func (l *localState) Checks() map[types.CheckID]*structs.HealthCheck {
-	l.RLock()
-	defer l.RUnlock()
-
-	checks := make(map[types.CheckID]*structs.HealthCheck)
-	for id, c := range l.checks {
-		c2 := new(structs.HealthCheck)
-		*c2 = *c
-		checks[id] = c2
-	}
-	return checks
-}
-
-// CriticalCheck is used to return the duration a check has been critical along
-// with its associated health check.
-type CriticalCheck struct {
-	CriticalFor time.Duration
-	Check       *structs.HealthCheck
-}
-
-// CriticalChecks returns locally registered health checks that the agent is
-// aware of and are being kept in sync with the server, and that are in a
-// critical state. This also returns information about how long each check has
-// been critical.
-func (l *localState) CriticalChecks() map[types.CheckID]CriticalCheck {
-	checks := make(map[types.CheckID]CriticalCheck)
-
-	l.RLock()
-	defer l.RUnlock()
-
-	now := time.Now()
-	for checkID, criticalTime := range l.checkCriticalTime {
-		checks[checkID] = CriticalCheck{
-			CriticalFor: now.Sub(criticalTime),
-			Check:       l.checks[checkID],
-		}
-	}
-
-	return checks
-}
-
-// Metadata returns the local node metadata fields that the
-// agent is aware of and are being kept in sync with the server
-func (l *localState) Metadata() map[string]string {
-	metadata := make(map[string]string)
-	l.RLock()
-	defer l.RUnlock()
-
-	for key, value := range l.metadata {
-		metadata[key] = value
-	}
-	return metadata
-}
-
-// antiEntropy is a long running method used to perform anti-entropy
-// between local and remote state.
-func (l *localState) antiEntropy(shutdownCh chan struct{}) {
-SYNC:
-	// Sync our state with the servers
-	for {
-		err := l.setSyncState()
-		if err == nil {
-			break
-		}
-		l.logger.Printf("[ERR] agent: failed to sync remote state: %v", err)
-		select {
-		case <-l.consulCh:
-			// Stagger the retry on leader election, avoid a thundering heard
-			select {
-			case <-time.After(lib.RandomStagger(aeScale(syncStaggerIntv, len(l.delegate.LANMembers())))):
-			case <-shutdownCh:
-				return
-			}
-		case <-time.After(syncRetryIntv + lib.RandomStagger(aeScale(syncRetryIntv, len(l.delegate.LANMembers())))):
-		case <-shutdownCh:
-			return
-		}
-	}
-
-	// Force-trigger AE to pickup any changes
-	l.changeMade()
-
-	// Schedule the next full sync, with a random stagger
-	aeIntv := aeScale(l.config.AEInterval, len(l.delegate.LANMembers()))
-	aeIntv = aeIntv + lib.RandomStagger(aeIntv)
-	aeTimer := time.After(aeIntv)
-
-	// Wait for sync events
-	for {
-		select {
-		case <-aeTimer:
-			goto SYNC
-		case <-l.triggerCh:
-			// Skip the sync if we are paused
-			if l.isPaused() {
-				continue
-			}
-			if err := l.syncChanges(); err != nil {
-				l.logger.Printf("[ERR] agent: failed to sync changes: %v", err)
-			}
-		case <-shutdownCh:
-			return
-		}
-	}
-}
-
-// setSyncState does a read of the server state, and updates
-// the local syncStatus as appropriate
-func (l *localState) setSyncState() error {
-	req := structs.NodeSpecificRequest{
-		Datacenter:   l.config.Datacenter,
-		Node:         l.config.NodeName,
-		QueryOptions: structs.QueryOptions{Token: l.config.Tokens.AgentToken()},
-	}
-	var out1 structs.IndexedNodeServices
-	var out2 structs.IndexedHealthChecks
-	if e := l.delegate.RPC("Catalog.NodeServices", &req, &out1); e != nil {
-		return e
-	}
-	if err := l.delegate.RPC("Health.NodeChecks", &req, &out2); err != nil {
-		return err
-	}
-	checks := out2.HealthChecks
-
-	l.Lock()
-	defer l.Unlock()
-
-	// Check the node info
-	if out1.NodeServices == nil || out1.NodeServices.Node == nil ||
-		out1.NodeServices.Node.ID != l.config.NodeID ||
-		!reflect.DeepEqual(out1.NodeServices.Node.TaggedAddresses, l.config.TaggedAddresses) ||
-		!reflect.DeepEqual(out1.NodeServices.Node.Meta, l.metadata) {
-		l.nodeInfoInSync = false
-	}
-
-	// Check all our services
-	services := make(map[string]*structs.NodeService)
-	if out1.NodeServices != nil {
-		services = out1.NodeServices.Services
-	}
-
-	for id := range l.services {
-		// If the local service doesn't exist remotely, then sync it
-		if _, ok := services[id]; !ok {
-			l.serviceStatus[id] = syncStatus{inSync: false}
-		}
-	}
-
-	for id, service := range services {
-		// If we don't have the service locally, deregister it
-		existing, ok := l.services[id]
-		if !ok {
-			// The consul service is created automatically, and does
-			// not need to be deregistered.
-			if id == structs.ConsulServiceID {
-				continue
-			}
-			l.serviceStatus[id] = syncStatus{inSync: false}
-			continue
-		}
-
-		// If our definition is different, we need to update it. Make a
-		// copy so that we don't retain a pointer to any actual state
-		// store info for in-memory RPCs.
-		if existing.EnableTagOverride {
-			existing.Tags = make([]string, len(service.Tags))
-			copy(existing.Tags, service.Tags)
-		}
-		equal := existing.IsSame(service)
-		l.serviceStatus[id] = syncStatus{inSync: equal}
-	}
-
-	// Index the remote health checks to improve efficiency
-	checkIndex := make(map[types.CheckID]*structs.HealthCheck, len(checks))
-	for _, check := range checks {
-		checkIndex[check.CheckID] = check
-	}
-
-	// Sync any check which doesn't exist on the remote side
-	for id := range l.checks {
-		if _, ok := checkIndex[id]; !ok {
-			l.checkStatus[id] = syncStatus{inSync: false}
-		}
-	}
-
-	for _, check := range checks {
-		// If we don't have the check locally, deregister it
-		id := check.CheckID
-		existing, ok := l.checks[id]
-		if !ok {
-			// The Serf check is created automatically, and does not
-			// need to be deregistered.
-			if id == structs.SerfCheckID {
-				continue
-			}
-			l.checkStatus[id] = syncStatus{inSync: false}
-			continue
-		}
-
-		// If our definition is different, we need to update it
-		var equal bool
-		if l.config.CheckUpdateInterval == 0 {
-			equal = existing.IsSame(check)
-		} else {
-			// Copy the existing check before potentially modifying
-			// it before the compare operation.
-			eCopy := existing.Clone()
-
-			// Copy the server's check before modifying, otherwise
-			// in-memory RPCs will have side effects.
-			cCopy := check.Clone()
-
-			// If there's a defer timer active then we've got a
-			// potentially spammy check so we don't sync the output
-			// during this sweep since the timer will mark the check
-			// out of sync for us. Otherwise, it is safe to sync the
-			// output now. This is especially important for checks
-			// that don't change state after they are created, in
-			// which case we'd never see their output synced back ever.
-			if _, ok := l.deferCheck[id]; ok {
-				eCopy.Output = ""
-				cCopy.Output = ""
-			}
-			equal = eCopy.IsSame(cCopy)
-		}
-
-		// Update the status
-		l.checkStatus[id] = syncStatus{inSync: equal}
-	}
-	return nil
-}
-
-// syncChanges is used to scan the status our local services and checks
-// and update any that are out of sync with the server
-func (l *localState) syncChanges() error {
-	l.Lock()
-	defer l.Unlock()
-
-	// We will do node-level info syncing at the end, since it will get
-	// updated by a service or check sync anyway, given how the register
-	// API works.
-
-	// Sync the services
-	for id, status := range l.serviceStatus {
-		if _, ok := l.services[id]; !ok {
-			if err := l.deleteService(id); err != nil {
-				return err
-			}
-		} else if !status.inSync {
-			if err := l.syncService(id); err != nil {
-				return err
-			}
-		} else {
-			l.logger.Printf("[DEBUG] agent: Service '%s' in sync", id)
-		}
-	}
-
-	// Sync the checks
-	for id, status := range l.checkStatus {
-		if _, ok := l.checks[id]; !ok {
-			if err := l.deleteCheck(id); err != nil {
-				return err
-			}
-		} else if !status.inSync {
-			// Cancel a deferred sync
-			if timer := l.deferCheck[id]; timer != nil {
-				timer.Stop()
-				delete(l.deferCheck, id)
-			}
-
-			if err := l.syncCheck(id); err != nil {
-				return err
-			}
-		} else {
-			l.logger.Printf("[DEBUG] agent: Check '%s' in sync", id)
-		}
-	}
-
-	// Now sync the node level info if we need to, and didn't do any of
-	// the other sync operations.
-	if !l.nodeInfoInSync {
-		if err := l.syncNodeInfo(); err != nil {
-			return err
-		}
-	} else {
-		l.logger.Printf("[DEBUG] agent: Node info in sync")
-	}
-
-	return nil
-}
-
-// deleteService is used to delete a service from the server
-func (l *localState) deleteService(id string) error {
-	if id == "" {
-		return fmt.Errorf("ServiceID missing")
-	}
-
-	req := structs.DeregisterRequest{
-		Datacenter:   l.config.Datacenter,
-		Node:         l.config.NodeName,
-		ServiceID:    id,
-		WriteRequest: structs.WriteRequest{Token: l.serviceToken(id)},
-	}
-	var out struct{}
-	err := l.delegate.RPC("Catalog.Deregister", &req, &out)
-	if err == nil || strings.Contains(err.Error(), "Unknown service") {
-		delete(l.serviceStatus, id)
-		delete(l.serviceTokens, id)
-		l.logger.Printf("[INFO] agent: Deregistered service '%s'", id)
-		return nil
-	} else if acl.IsErrPermissionDenied(err) {
-		l.serviceStatus[id] = syncStatus{inSync: true}
-		l.logger.Printf("[WARN] agent: Service '%s' deregistration blocked by ACLs", id)
-		return nil
-	}
-	return err
-}
-
-// deleteCheck is used to delete a check from the server
-func (l *localState) deleteCheck(id types.CheckID) error {
-	if id == "" {
-		return fmt.Errorf("CheckID missing")
-	}
-
-	req := structs.DeregisterRequest{
-		Datacenter:   l.config.Datacenter,
-		Node:         l.config.NodeName,
-		CheckID:      id,
-		WriteRequest: structs.WriteRequest{Token: l.checkToken(id)},
-	}
-	var out struct{}
-	err := l.delegate.RPC("Catalog.Deregister", &req, &out)
-	if err == nil || strings.Contains(err.Error(), "Unknown check") {
-		delete(l.checkStatus, id)
-		delete(l.checkTokens, id)
-		l.logger.Printf("[INFO] agent: Deregistered check '%s'", id)
-		return nil
-	} else if acl.IsErrPermissionDenied(err) {
-		l.checkStatus[id] = syncStatus{inSync: true}
-		l.logger.Printf("[WARN] agent: Check '%s' deregistration blocked by ACLs", id)
-		return nil
-	}
-	return err
-}
-
-// syncService is used to sync a service to the server
-func (l *localState) syncService(id string) error {
-	req := structs.RegisterRequest{
-		Datacenter:      l.config.Datacenter,
-		ID:              l.config.NodeID,
-		Node:            l.config.NodeName,
-		Address:         l.config.AdvertiseAddr,
-		TaggedAddresses: l.config.TaggedAddresses,
-		NodeMeta:        l.metadata,
-		Service:         l.services[id],
-		WriteRequest:    structs.WriteRequest{Token: l.serviceToken(id)},
-	}
-
-	// If the service has associated checks that are out of sync,
-	// piggyback them on the service sync so they are part of the
-	// same transaction and are registered atomically. We only let
-	// checks ride on service registrations with the same token,
-	// otherwise we need to register them separately so they don't
-	// pick up privileges from the service token.
-	var checks structs.HealthChecks
-	for _, check := range l.checks {
-		if check.ServiceID == id && (l.serviceToken(id) == l.checkToken(check.CheckID)) {
-			if stat, ok := l.checkStatus[check.CheckID]; !ok || !stat.inSync {
-				checks = append(checks, check)
-			}
-		}
-	}
-
-	// Backwards-compatibility for Consul < 0.5
-	if len(checks) == 1 {
-		req.Check = checks[0]
-	} else {
-		req.Checks = checks
-	}
-
-	var out struct{}
-	err := l.delegate.RPC("Catalog.Register", &req, &out)
-	if err == nil {
-		l.serviceStatus[id] = syncStatus{inSync: true}
-		// Given how the register API works, this info is also updated
-		// every time we sync a service.
-		l.nodeInfoInSync = true
-		l.logger.Printf("[INFO] agent: Synced service '%s'", id)
-		for _, check := range checks {
-			l.checkStatus[check.CheckID] = syncStatus{inSync: true}
-		}
-	} else if acl.IsErrPermissionDenied(err) {
-		l.serviceStatus[id] = syncStatus{inSync: true}
-		l.logger.Printf("[WARN] agent: Service '%s' registration blocked by ACLs", id)
-		for _, check := range checks {
-			l.checkStatus[check.CheckID] = syncStatus{inSync: true}
-		}
-		return nil
-	}
-	return err
-}
-
-// syncCheck is used to sync a check to the server
-func (l *localState) syncCheck(id types.CheckID) error {
-	// Pull in the associated service if any
-	check := l.checks[id]
-	var service *structs.NodeService
-	if check.ServiceID != "" {
-		if serv, ok := l.services[check.ServiceID]; ok {
-			service = serv
-		}
-	}
-
-	req := structs.RegisterRequest{
-		Datacenter:      l.config.Datacenter,
-		ID:              l.config.NodeID,
-		Node:            l.config.NodeName,
-		Address:         l.config.AdvertiseAddr,
-		TaggedAddresses: l.config.TaggedAddresses,
-		NodeMeta:        l.metadata,
-		Service:         service,
-		Check:           l.checks[id],
-		WriteRequest:    structs.WriteRequest{Token: l.checkToken(id)},
-	}
-	var out struct{}
-	err := l.delegate.RPC("Catalog.Register", &req, &out)
-	if err == nil {
-		l.checkStatus[id] = syncStatus{inSync: true}
-		// Given how the register API works, this info is also updated
-		// every time we sync a check.
-		l.nodeInfoInSync = true
-		l.logger.Printf("[INFO] agent: Synced check '%s'", id)
-	} else if acl.IsErrPermissionDenied(err) {
-		l.checkStatus[id] = syncStatus{inSync: true}
-		l.logger.Printf("[WARN] agent: Check '%s' registration blocked by ACLs", id)
-		return nil
-	}
-	return err
-}
-
-func (l *localState) syncNodeInfo() error {
-	req := structs.RegisterRequest{
-		Datacenter:      l.config.Datacenter,
-		ID:              l.config.NodeID,
-		Node:            l.config.NodeName,
-		Address:         l.config.AdvertiseAddr,
-		TaggedAddresses: l.config.TaggedAddresses,
-		NodeMeta:        l.metadata,
-		WriteRequest:    structs.WriteRequest{Token: l.config.Tokens.AgentToken()},
-	}
-	var out struct{}
-	err := l.delegate.RPC("Catalog.Register", &req, &out)
-	if err == nil {
-		l.nodeInfoInSync = true
-		l.logger.Printf("[INFO] agent: Synced node info")
-	} else if acl.IsErrPermissionDenied(err) {
-		l.nodeInfoInSync = true
-		l.logger.Printf("[WARN] agent: Node info update blocked by ACLs")
-		return nil
-	}
-	return err
-}
diff --git a/agent/local/state.go b/agent/local/state.go
new file mode 100644
index 0000000000..b0adcd53d7
--- /dev/null
+++ b/agent/local/state.go
@@ -0,0 +1,1034 @@
+package local
+
+import (
+	"fmt"
+	"log"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/hashicorp/consul/acl"
+	"github.com/hashicorp/consul/agent/structs"
+	"github.com/hashicorp/consul/agent/token"
+	"github.com/hashicorp/consul/api"
+	"github.com/hashicorp/consul/lib"
+	"github.com/hashicorp/consul/types"
+)
+
+// Config is the configuration for the State.
+type Config struct {
+	AdvertiseAddr       string
+	CheckUpdateInterval time.Duration
+	Datacenter          string
+	DiscardCheckOutput  bool
+	NodeID              types.NodeID
+	NodeName            string
+	TaggedAddresses     map[string]string
+}
+
+// ServiceState describes the state of a service record.
+type ServiceState struct {
+	// Service is the local copy of the service record.
+	Service *structs.NodeService
+
+	// Token is the ACL to update or delete the service record on the
+	// server.
+	Token string
+
+	// InSync contains whether the local state of the service record
+	// is in sync with the remote state on the server.
+	InSync bool
+
+	// Deleted is true when the service record has been marked as deleted
+	// but has not been removed on the server yet.
+	Deleted bool
+}
+
+// Clone returns a shallow copy of the object. The service record still
+// points to the original service record and must not be modified.
+func (s *ServiceState) Clone() *ServiceState {
+	s2 := new(ServiceState)
+	*s2 = *s
+	return s2
+}
+
+// CheckState describes the state of a health check record.
+type CheckState struct {
+	// Check is the local copy of the health check record.
+	Check *structs.HealthCheck
+
+	// Token is the ACL record to update or delete the health check
+	// record on the server.
+	Token string
+
+	// CriticalTime is the last time the health check status went
+	// from non-critical to critical. When the health check is not
+	// in critical state the value is the zero value.
+	CriticalTime time.Time
+
+	// DeferCheck is used to delay the sync of a health check when
+	// only the output has changed. This rate limits changes which
+	// do not affect the state of the node and/or service.
+	DeferCheck *time.Timer
+
+	// InSync contains whether the local state of the health check
+	// record is in sync with the remote state on the server.
+	InSync bool
+
+	// Deleted is true when the health check record has been marked as
+	// deleted but has not been removed on the server yet.
+	Deleted bool
+}
+
+// Clone returns a shallow copy of the object. The check record and the
+// defer timer still point to the original values and must not be
+// modified.
+func (c *CheckState) Clone() *CheckState {
+	c2 := new(CheckState)
+	*c2 = *c
+	return c2
+}
+
+// Critical returns true when the health check is in critical state.
+func (c *CheckState) Critical() bool {
+	return !c.CriticalTime.IsZero()
+}
+
+// CriticalFor returns the amount of time the service has been in critical
+// state. Its value is undefined when the service is not in critical state.
+func (c *CheckState) CriticalFor() time.Duration {
+	return time.Since(c.CriticalTime)
+}
+
+type rpc interface {
+	RPC(method string, args interface{}, reply interface{}) error
+}
+
+// State is used to represent the node's services,
+// and checks. We use it to perform anti-entropy with the
+// catalog representation
+type State struct {
+	sync.RWMutex
+
+	// Delegate the RPC interface to the consul server or agent.
+	//
+	// It is set after both the state and the consul server/agent have
+	// been created.
+	Delegate rpc
+
+	// TriggerSyncChanges is used to notify the state syncer that a
+	// partial sync should be performed.
+	//
+	// It is set after both the state and the state syncer have been
+	// created.
+	TriggerSyncChanges func()
+
+	logger *log.Logger
+
+	// Config is the agent config
+	config Config
+
+	// nodeInfoInSync tracks whether the server has our correct top-level
+	// node information in sync
+	nodeInfoInSync bool
+
+	// Services tracks the local services
+	services map[string]*ServiceState
+
+	// Checks tracks the local checks
+	checks map[types.CheckID]*CheckState
+
+	// metadata tracks the node metadata fields
+	metadata map[string]string
+
+	// discardCheckOutput stores whether the output of health checks
+	// is stored in the raft log.
+	discardCheckOutput atomic.Value // bool
+
+	// tokens contains the ACL tokens
+	tokens *token.Store
+}
+
+// NewLocalState creates a new local state for the agent.
+func NewState(c Config, lg *log.Logger, tokens *token.Store) *State {
+	l := &State{
+		config:   c,
+		logger:   lg,
+		services: make(map[string]*ServiceState),
+		checks:   make(map[types.CheckID]*CheckState),
+		metadata: make(map[string]string),
+		tokens:   tokens,
+	}
+	l.SetDiscardCheckOutput(c.DiscardCheckOutput)
+	return l
+}
+
+// SetDiscardCheckOutput configures whether the check output
+// is discarded. This can be changed at runtime.
+func (l *State) SetDiscardCheckOutput(b bool) {
+	l.discardCheckOutput.Store(b)
+}
+
+// ServiceToken returns the configured ACL token for the given
+// service ID. If none is present, the agent's token is returned.
+func (l *State) ServiceToken(id string) string {
+	l.RLock()
+	defer l.RUnlock()
+	return l.serviceToken(id)
+}
+
+// serviceToken returns an ACL token associated with a service.
+// This method is not synchronized and the lock must already be held.
+func (l *State) serviceToken(id string) string {
+	var token string
+	if s := l.services[id]; s != nil {
+		token = s.Token
+	}
+	if token == "" {
+		token = l.tokens.UserToken()
+	}
+	return token
+}
+
+// AddService is used to add a service entry to the local state.
+// This entry is persistent and the agent will make a best effort to
+// ensure it is registered
+func (l *State) AddService(service *structs.NodeService, token string) error {
+	if service == nil {
+		return fmt.Errorf("no service")
+	}
+
+	// use the service name as id if the id was omitted
+	if service.ID == "" {
+		service.ID = service.Service
+	}
+
+	l.SetServiceState(&ServiceState{
+		Service: service,
+		Token:   token,
+	})
+	return nil
+}
+
+// RemoveService is used to remove a service entry from the local state.
+// The agent will make a best effort to ensure it is deregistered.
+func (l *State) RemoveService(id string) error {
+	l.Lock()
+	defer l.Unlock()
+
+	s := l.services[id]
+	if s == nil || s.Deleted {
+		return fmt.Errorf("Service %q does not exist", id)
+	}
+
+	// To remove the service on the server we need the token.
+	// Therefore, we mark the service as deleted and keep the
+	// entry around until it is actually removed.
+	s.InSync = false
+	s.Deleted = true
+	l.TriggerSyncChanges()
+
+	return nil
+}
+
+// Service returns the locally registered service that the
+// agent is aware of and are being kept in sync with the server
+func (l *State) Service(id string) *structs.NodeService {
+	l.RLock()
+	defer l.RUnlock()
+
+	s := l.services[id]
+	if s == nil || s.Deleted {
+		return nil
+	}
+	return s.Service
+}
+
+// Services returns the locally registered services that the
+// agent is aware of and are being kept in sync with the server
+func (l *State) Services() map[string]*structs.NodeService {
+	l.RLock()
+	defer l.RUnlock()
+
+	m := make(map[string]*structs.NodeService)
+	for id, s := range l.services {
+		if s.Deleted {
+			continue
+		}
+		m[id] = s.Service
+	}
+	return m
+}
+
+// ServiceState returns a shallow copy of the current service state
+// record. The service record still points to the original service
+// record and must not be modified.
+func (l *State) ServiceState(id string) *ServiceState {
+	l.RLock()
+	defer l.RUnlock()
+
+	s := l.services[id]
+	if s == nil || s.Deleted {
+		return nil
+	}
+	return s.Clone()
+}
+
+// SetServiceState is used to overwrite a raw service state with the given
+// state. This method is safe to be called concurrently but should only be used
+// during testing. You should most likely call AddService instead.
+func (l *State) SetServiceState(s *ServiceState) {
+	l.Lock()
+	defer l.Unlock()
+
+	l.services[s.Service.ID] = s
+	l.TriggerSyncChanges()
+}
+
+// ServiceStates returns a shallow copy of all service state records.
+// The service record still points to the original service record and
+// must not be modified.
+func (l *State) ServiceStates() map[string]*ServiceState {
+	l.RLock()
+	defer l.RUnlock()
+
+	m := make(map[string]*ServiceState)
+	for id, s := range l.services {
+		if s.Deleted {
+			continue
+		}
+		m[id] = s.Clone()
+	}
+	return m
+}
+
+// CheckToken is used to return the configured health check token for a
+// Check, or if none is configured, the default agent ACL token.
+func (l *State) CheckToken(checkID types.CheckID) string {
+	l.RLock()
+	defer l.RUnlock()
+	return l.checkToken(checkID)
+}
+
+// checkToken returns an ACL token associated with a check.
+// This method is not synchronized and the lock must already be held.
+func (l *State) checkToken(id types.CheckID) string {
+	var token string
+	c := l.checks[id]
+	if c != nil {
+		token = c.Token
+	}
+	if token == "" {
+		token = l.tokens.UserToken()
+	}
+	return token
+}
+
+// AddCheck is used to add a health check to the local state.
+// This entry is persistent and the agent will make a best effort to
+// ensure it is registered
+func (l *State) AddCheck(check *structs.HealthCheck, token string) error {
+	if check == nil {
+		return fmt.Errorf("no check")
+	}
+
+	// clone the check since we will be modifying it.
+	check = check.Clone()
+
+	if l.discardCheckOutput.Load().(bool) {
+		check.Output = ""
+	}
+
+	// if there is a serviceID associated with the check, make sure it exists before adding it
+	// NOTE - This logic may be moved to be handled within the Agent's Addcheck method after a refactor
+	if check.ServiceID != "" && l.Service(check.ServiceID) == nil {
+		return fmt.Errorf("Check %q refers to non-existent service %q", check.CheckID, check.ServiceID)
+	}
+
+	// hard-set the node name
+	check.Node = l.config.NodeName
+
+	l.SetCheckState(&CheckState{
+		Check: check,
+		Token: token,
+	})
+	return nil
+}
+
+// RemoveCheck is used to remove a health check from the local state.
+// The agent will make a best effort to ensure it is deregistered
+// todo(fs): RemoveService returns an error for a non-existant service. RemoveCheck should as well.
+// todo(fs): Check code that calls this to handle the error.
+func (l *State) RemoveCheck(id types.CheckID) error {
+	l.Lock()
+	defer l.Unlock()
+
+	c := l.checks[id]
+	if c == nil || c.Deleted {
+		return fmt.Errorf("Check %q does not exist", id)
+	}
+
+	// To remove the check on the server we need the token.
+	// Therefore, we mark the service as deleted and keep the
+	// entry around until it is actually removed.
+	c.InSync = false
+	c.Deleted = true
+	l.TriggerSyncChanges()
+
+	return nil
+}
+
+// UpdateCheck is used to update the status of a check
+func (l *State) UpdateCheck(id types.CheckID, status, output string) {
+	l.Lock()
+	defer l.Unlock()
+
+	c := l.checks[id]
+	if c == nil || c.Deleted {
+		return
+	}
+
+	if l.discardCheckOutput.Load().(bool) {
+		output = ""
+	}
+
+	// Update the critical time tracking (this doesn't cause a server updates
+	// so we can always keep this up to date).
+	if status == api.HealthCritical {
+		if !c.Critical() {
+			c.CriticalTime = time.Now()
+		}
+	} else {
+		c.CriticalTime = time.Time{}
+	}
+
+	// Do nothing if update is idempotent
+	if c.Check.Status == status && c.Check.Output == output {
+		return
+	}
+
+	// Defer a sync if the output has changed. This is an optimization around
+	// frequent updates of output. Instead, we update the output internally,
+	// and periodically do a write-back to the servers. If there is a status
+	// change we do the write immediately.
+	if l.config.CheckUpdateInterval > 0 && c.Check.Status == status {
+		c.Check.Output = output
+		if c.DeferCheck == nil {
+			d := l.config.CheckUpdateInterval
+			intv := time.Duration(uint64(d)/2) + lib.RandomStagger(d)
+			c.DeferCheck = time.AfterFunc(intv, func() {
+				l.Lock()
+				defer l.Unlock()
+
+				c := l.checks[id]
+				if c == nil {
+					return
+				}
+				c.DeferCheck = nil
+				if c.Deleted {
+					return
+				}
+				c.InSync = false
+				l.TriggerSyncChanges()
+			})
+		}
+		return
+	}
+
+	// Update status and mark out of sync
+	c.Check.Status = status
+	c.Check.Output = output
+	c.InSync = false
+	l.TriggerSyncChanges()
+}
+
+// Check returns the locally registered check that the
+// agent is aware of and are being kept in sync with the server
+func (l *State) Check(id types.CheckID) *structs.HealthCheck {
+	l.RLock()
+	defer l.RUnlock()
+
+	c := l.checks[id]
+	if c == nil || c.Deleted {
+		return nil
+	}
+	return c.Check
+}
+
+// Checks returns the locally registered checks that the
+// agent is aware of and are being kept in sync with the server
+func (l *State) Checks() map[types.CheckID]*structs.HealthCheck {
+	m := make(map[types.CheckID]*structs.HealthCheck)
+	for id, c := range l.CheckStates() {
+		m[id] = c.Check
+	}
+	return m
+}
+
+// CheckState returns a shallow copy of the current health check state
+// record. The health check record and the deferred check still point to
+// the original values and must not be modified.
+func (l *State) CheckState(id types.CheckID) *CheckState {
+	l.RLock()
+	defer l.RUnlock()
+
+	c := l.checks[id]
+	if c == nil || c.Deleted {
+		return nil
+	}
+	return c.Clone()
+}
+
+// SetCheckState is used to overwrite a raw check state with the given
+// state. This method is safe to be called concurrently but should only be used
+// during testing. You should most likely call AddCheck instead.
+func (l *State) SetCheckState(c *CheckState) {
+	l.Lock()
+	defer l.Unlock()
+
+	l.checks[c.Check.CheckID] = c
+	l.TriggerSyncChanges()
+}
+
+// CheckStates returns a shallow copy of all health check state records.
+// The health check records and the deferred checks still point to
+// the original values and must not be modified.
+func (l *State) CheckStates() map[types.CheckID]*CheckState {
+	l.RLock()
+	defer l.RUnlock()
+
+	m := make(map[types.CheckID]*CheckState)
+	for id, c := range l.checks {
+		if c.Deleted {
+			continue
+		}
+		m[id] = c.Clone()
+	}
+	return m
+}
+
+// CriticalCheckStates returns the locally registered checks that the
+// agent is aware of and are being kept in sync with the server.
+// The map contains a shallow copy of the current check states but
+// references to the actual check definition which must not be
+// modified.
+func (l *State) CriticalCheckStates() map[types.CheckID]*CheckState {
+	l.RLock()
+	defer l.RUnlock()
+
+	m := make(map[types.CheckID]*CheckState)
+	for id, c := range l.checks {
+		if c.Deleted || !c.Critical() {
+			continue
+		}
+		m[id] = c.Clone()
+	}
+	return m
+}
+
+// Metadata returns the local node metadata fields that the
+// agent is aware of and are being kept in sync with the server
+func (l *State) Metadata() map[string]string {
+	l.RLock()
+	defer l.RUnlock()
+
+	m := make(map[string]string)
+	for k, v := range l.metadata {
+		m[k] = v
+	}
+	return m
+}
+
+// LoadMetadata loads node metadata fields from the agent config and
+// updates them on the local agent.
+func (l *State) LoadMetadata(data map[string]string) error {
+	l.Lock()
+	defer l.Unlock()
+
+	for k, v := range data {
+		l.metadata[k] = v
+	}
+	l.TriggerSyncChanges()
+	return nil
+}
+
+// UnloadMetadata resets the local metadata state
+func (l *State) UnloadMetadata() {
+	l.Lock()
+	defer l.Unlock()
+	l.metadata = make(map[string]string)
+}
+
+// Stats is used to get various debugging state from the sub-systems
+func (l *State) Stats() map[string]string {
+	l.RLock()
+	defer l.RUnlock()
+
+	services := 0
+	for _, s := range l.services {
+		if s.Deleted {
+			continue
+		}
+		services++
+	}
+
+	checks := 0
+	for _, c := range l.checks {
+		if c.Deleted {
+			continue
+		}
+		checks++
+	}
+
+	return map[string]string{
+		"services": strconv.Itoa(services),
+		"checks":   strconv.Itoa(checks),
+	}
+}
+
+// updateSyncState does a read of the server state, and updates
+// the local sync status as appropriate
+func (l *State) updateSyncState() error {
+	// Get all checks and services from the master
+	req := structs.NodeSpecificRequest{
+		Datacenter:   l.config.Datacenter,
+		Node:         l.config.NodeName,
+		QueryOptions: structs.QueryOptions{Token: l.tokens.AgentToken()},
+	}
+
+	var out1 structs.IndexedNodeServices
+	if err := l.Delegate.RPC("Catalog.NodeServices", &req, &out1); err != nil {
+		return err
+	}
+
+	var out2 structs.IndexedHealthChecks
+	if err := l.Delegate.RPC("Health.NodeChecks", &req, &out2); err != nil {
+		return err
+	}
+
+	// Create useful data structures for traversal
+	remoteServices := make(map[string]*structs.NodeService)
+	if out1.NodeServices != nil {
+		remoteServices = out1.NodeServices.Services
+	}
+
+	remoteChecks := make(map[types.CheckID]*structs.HealthCheck, len(out2.HealthChecks))
+	for _, rc := range out2.HealthChecks {
+		remoteChecks[rc.CheckID] = rc
+	}
+
+	// Traverse all checks, services and the node info to determine
+	// which entries need to be updated on or removed from the server
+
+	l.Lock()
+	defer l.Unlock()
+
+	// Check if node info needs syncing
+	if out1.NodeServices == nil || out1.NodeServices.Node == nil ||
+		out1.NodeServices.Node.ID != l.config.NodeID ||
+		!reflect.DeepEqual(out1.NodeServices.Node.TaggedAddresses, l.config.TaggedAddresses) ||
+		!reflect.DeepEqual(out1.NodeServices.Node.Meta, l.metadata) {
+		l.nodeInfoInSync = false
+	}
+
+	// Check which services need syncing
+
+	// Look for local services that do not exist remotely and mark them for
+	// syncing so that they will be pushed to the server later
+	for id, s := range l.services {
+		if remoteServices[id] == nil {
+			s.InSync = false
+		}
+	}
+
+	// Traverse the list of services from the server.
+	// Remote services which do not exist locally have been deregistered.
+	// Otherwise, check whether the two definitions are still in sync.
+	for id, rs := range remoteServices {
+		ls := l.services[id]
+		if ls == nil {
+			// The consul service is managed automatically and does
+			// not need to be deregistered
+			if id == structs.ConsulServiceID {
+				continue
+			}
+
+			// Mark a remote service that does not exist locally as deleted so
+			// that it will be removed on the server later.
+			l.services[id] = &ServiceState{Deleted: true}
+			continue
+		}
+
+		// If the service is already scheduled for removal skip it
+		if ls.Deleted {
+			continue
+		}
+
+		// If our definition is different, we need to update it. Make a
+		// copy so that we don't retain a pointer to any actual state
+		// store info for in-memory RPCs.
+		if ls.Service.EnableTagOverride {
+			ls.Service.Tags = make([]string, len(rs.Tags))
+			copy(ls.Service.Tags, rs.Tags)
+		}
+		ls.InSync = ls.Service.IsSame(rs)
+	}
+
+	// Check which checks need syncing
+
+	// Look for local checks that do not exist remotely and mark them for
+	// syncing so that they will be pushed to the server later
+	for id, c := range l.checks {
+		if remoteChecks[id] == nil {
+			c.InSync = false
+		}
+	}
+
+	// Traverse the list of checks from the server.
+	// Remote checks which do not exist locally have been deregistered.
+	// Otherwise, check whether the two definitions are still in sync.
+	for id, rc := range remoteChecks {
+		lc := l.checks[id]
+
+		if lc == nil {
+			// The Serf check is created automatically and does not
+			// need to be deregistered.
+			if id == structs.SerfCheckID {
+				l.logger.Printf("[DEBUG] Skipping remote check %q since it is managed automatically", id)
+				continue
+			}
+
+			// Mark a remote check that does not exist locally as deleted so
+			// that it will be removed on the server later.
+			l.checks[id] = &CheckState{Deleted: true}
+			continue
+		}
+
+		// If the check is already scheduled for removal skip it.
+		if lc.Deleted {
+			continue
+		}
+
+		// If our definition is different, we need to update it
+		if l.config.CheckUpdateInterval == 0 {
+			lc.InSync = lc.Check.IsSame(rc)
+			continue
+		}
+
+		// Copy the existing check before potentially modifying
+		// it before the compare operation.
+		lcCopy := lc.Check.Clone()
+
+		// Copy the server's check before modifying, otherwise
+		// in-memory RPCs will have side effects.
+		rcCopy := rc.Clone()
+
+		// If there's a defer timer active then we've got a
+		// potentially spammy check so we don't sync the output
+		// during this sweep since the timer will mark the check
+		// out of sync for us. Otherwise, it is safe to sync the
+		// output now. This is especially important for checks
+		// that don't change state after they are created, in
+		// which case we'd never see their output synced back ever.
+		if lc.DeferCheck != nil {
+			lcCopy.Output = ""
+			rcCopy.Output = ""
+		}
+		lc.InSync = lcCopy.IsSame(rcCopy)
+	}
+	return nil
+}
+
+// SyncFull determines the delta between the local and remote state
+// and synchronizes the changes.
+func (l *State) SyncFull() error {
+	// note that we do not acquire the lock here since the methods
+	// we are calling will do that themself.
+	//
+	// Also note that we don't hold the lock for the entire operation
+	// but release it between the two calls. This is not an issue since
+	// the algorithm is best-effort to achieve eventual consistency.
+	// SyncChanges will sync whatever updateSyncState() has determined
+	// needs updating.
+
+	if err := l.updateSyncState(); err != nil {
+		return err
+	}
+	return l.SyncChanges()
+}
+
+// SyncChanges pushes checks, services and node info data which has been
+// marked out of sync or deleted to the server.
+func (l *State) SyncChanges() error {
+	l.Lock()
+	defer l.Unlock()
+
+	// We will do node-level info syncing at the end, since it will get
+	// updated by a service or check sync anyway, given how the register
+	// API works.
+
+	// Sync the services
+	// (logging happens in the helper methods)
+	for id, s := range l.services {
+		var err error
+		switch {
+		case s.Deleted:
+			err = l.deleteService(id)
+		case !s.InSync:
+			err = l.syncService(id)
+		default:
+			l.logger.Printf("[DEBUG] agent: Service %q in sync", id)
+		}
+		if err != nil {
+			return err
+		}
+	}
+
+	// Sync the checks
+	// (logging happens in the helper methods)
+	for id, c := range l.checks {
+		var err error
+		switch {
+		case c.Deleted:
+			err = l.deleteCheck(id)
+		case !c.InSync:
+			if c.DeferCheck != nil {
+				c.DeferCheck.Stop()
+				c.DeferCheck = nil
+			}
+			err = l.syncCheck(id)
+		default:
+			l.logger.Printf("[DEBUG] agent: Check %q in sync", id)
+		}
+		if err != nil {
+			return err
+		}
+	}
+
+	// Now sync the node level info if we need to, and didn't do any of
+	// the other sync operations.
+	if l.nodeInfoInSync {
+		l.logger.Printf("[DEBUG] agent: Node info in sync")
+		return nil
+	}
+	return l.syncNodeInfo()
+}
+
+// deleteService is used to delete a service from the server
+func (l *State) deleteService(id string) error {
+	if id == "" {
+		return fmt.Errorf("ServiceID missing")
+	}
+
+	req := structs.DeregisterRequest{
+		Datacenter:   l.config.Datacenter,
+		Node:         l.config.NodeName,
+		ServiceID:    id,
+		WriteRequest: structs.WriteRequest{Token: l.serviceToken(id)},
+	}
+	var out struct{}
+	err := l.Delegate.RPC("Catalog.Deregister", &req, &out)
+	switch {
+	case err == nil || strings.Contains(err.Error(), "Unknown service"):
+		delete(l.services, id)
+		l.logger.Printf("[INFO] agent: Deregistered service %q", id)
+		return nil
+
+	case acl.IsErrPermissionDenied(err):
+		// todo(fs): mark the service to be in sync to prevent excessive retrying before next full sync
+		// todo(fs): some backoff strategy might be a better solution
+		l.services[id].InSync = true
+		l.logger.Printf("[WARN] agent: Service %q deregistration blocked by ACLs", id)
+		return nil
+
+	default:
+		l.logger.Printf("[WARN] agent: Deregistering service %q failed. %s", id, err)
+		return err
+	}
+}
+
+// deleteCheck is used to delete a check from the server
+func (l *State) deleteCheck(id types.CheckID) error {
+	if id == "" {
+		return fmt.Errorf("CheckID missing")
+	}
+
+	req := structs.DeregisterRequest{
+		Datacenter:   l.config.Datacenter,
+		Node:         l.config.NodeName,
+		CheckID:      id,
+		WriteRequest: structs.WriteRequest{Token: l.checkToken(id)},
+	}
+	var out struct{}
+	err := l.Delegate.RPC("Catalog.Deregister", &req, &out)
+	switch {
+	case err == nil || strings.Contains(err.Error(), "Unknown check"):
+		c := l.checks[id]
+		if c != nil && c.DeferCheck != nil {
+			c.DeferCheck.Stop()
+		}
+		delete(l.checks, id)
+		l.logger.Printf("[INFO] agent: Deregistered check %q", id)
+		return nil
+
+	case acl.IsErrPermissionDenied(err):
+		// todo(fs): mark the check to be in sync to prevent excessive retrying before next full sync
+		// todo(fs): some backoff strategy might be a better solution
+		l.checks[id].InSync = true
+		l.logger.Printf("[WARN] agent: Check %q deregistration blocked by ACLs", id)
+		return nil
+
+	default:
+		l.logger.Printf("[WARN] agent: Deregistering check %q failed. %s", id, err)
+		return err
+	}
+}
+
+// syncService is used to sync a service to the server
+func (l *State) syncService(id string) error {
+	// If the service has associated checks that are out of sync,
+	// piggyback them on the service sync so they are part of the
+	// same transaction and are registered atomically. We only let
+	// checks ride on service registrations with the same token,
+	// otherwise we need to register them separately so they don't
+	// pick up privileges from the service token.
+	var checks structs.HealthChecks
+	for checkID, c := range l.checks {
+		if c.Deleted || c.InSync {
+			continue
+		}
+		if c.Check.ServiceID != id {
+			continue
+		}
+		if l.serviceToken(id) != l.checkToken(checkID) {
+			continue
+		}
+		checks = append(checks, c.Check)
+	}
+
+	req := structs.RegisterRequest{
+		Datacenter:      l.config.Datacenter,
+		ID:              l.config.NodeID,
+		Node:            l.config.NodeName,
+		Address:         l.config.AdvertiseAddr,
+		TaggedAddresses: l.config.TaggedAddresses,
+		NodeMeta:        l.metadata,
+		Service:         l.services[id].Service,
+		WriteRequest:    structs.WriteRequest{Token: l.serviceToken(id)},
+	}
+
+	// Backwards-compatibility for Consul < 0.5
+	if len(checks) == 1 {
+		req.Check = checks[0]
+	} else {
+		req.Checks = checks
+	}
+
+	var out struct{}
+	err := l.Delegate.RPC("Catalog.Register", &req, &out)
+	switch {
+	case err == nil:
+		l.services[id].InSync = true
+		// Given how the register API works, this info is also updated
+		// every time we sync a service.
+		l.nodeInfoInSync = true
+		for _, check := range checks {
+			l.checks[check.CheckID].InSync = true
+		}
+		l.logger.Printf("[INFO] agent: Synced service %q", id)
+		return nil
+
+	case acl.IsErrPermissionDenied(err):
+		// todo(fs): mark the service and the checks to be in sync to prevent excessive retrying before next full sync
+		// todo(fs): some backoff strategy might be a better solution
+		l.services[id].InSync = true
+		for _, check := range checks {
+			l.checks[check.CheckID].InSync = true
+		}
+		l.logger.Printf("[WARN] agent: Service %q registration blocked by ACLs", id)
+		return nil
+
+	default:
+		l.logger.Printf("[WARN] agent: Syncing service %q failed. %s", id, err)
+		return err
+	}
+}
+
+// syncCheck is used to sync a check to the server
+func (l *State) syncCheck(id types.CheckID) error {
+	c := l.checks[id]
+
+	req := structs.RegisterRequest{
+		Datacenter:      l.config.Datacenter,
+		ID:              l.config.NodeID,
+		Node:            l.config.NodeName,
+		Address:         l.config.AdvertiseAddr,
+		TaggedAddresses: l.config.TaggedAddresses,
+		NodeMeta:        l.metadata,
+		Check:           c.Check,
+		WriteRequest:    structs.WriteRequest{Token: l.checkToken(id)},
+	}
+
+	// Pull in the associated service if any
+	s := l.services[c.Check.ServiceID]
+	if s != nil && !s.Deleted {
+		req.Service = s.Service
+	}
+
+	var out struct{}
+	err := l.Delegate.RPC("Catalog.Register", &req, &out)
+	switch {
+	case err == nil:
+		l.checks[id].InSync = true
+		// Given how the register API works, this info is also updated
+		// every time we sync a check.
+		l.nodeInfoInSync = true
+		l.logger.Printf("[INFO] agent: Synced check %q", id)
+		return nil
+
+	case acl.IsErrPermissionDenied(err):
+		// todo(fs): mark the check to be in sync to prevent excessive retrying before next full sync
+		// todo(fs): some backoff strategy might be a better solution
+		l.checks[id].InSync = true
+		l.logger.Printf("[WARN] agent: Check %q registration blocked by ACLs", id)
+		return nil
+
+	default:
+		l.logger.Printf("[WARN] agent: Syncing check %q failed. %s", id, err)
+		return err
+	}
+}
+
+func (l *State) syncNodeInfo() error {
+	req := structs.RegisterRequest{
+		Datacenter:      l.config.Datacenter,
+		ID:              l.config.NodeID,
+		Node:            l.config.NodeName,
+		Address:         l.config.AdvertiseAddr,
+		TaggedAddresses: l.config.TaggedAddresses,
+		NodeMeta:        l.metadata,
+		WriteRequest:    structs.WriteRequest{Token: l.tokens.AgentToken()},
+	}
+	var out struct{}
+	err := l.Delegate.RPC("Catalog.Register", &req, &out)
+	switch {
+	case err == nil:
+		l.nodeInfoInSync = true
+		l.logger.Printf("[INFO] agent: Synced node info")
+		return nil
+
+	case acl.IsErrPermissionDenied(err):
+		// todo(fs): mark the node info to be in sync to prevent excessive retrying before next full sync
+		// todo(fs): some backoff strategy might be a better solution
+		l.nodeInfoInSync = true
+		l.logger.Printf("[WARN] agent: Node info update blocked by ACLs")
+		return nil
+
+	default:
+		l.logger.Printf("[WARN] agent: Syncing node info failed. %s", err)
+		return err
+	}
+}
diff --git a/agent/local_test.go b/agent/local/state_test.go
similarity index 57%
rename from agent/local_test.go
rename to agent/local/state_test.go
index 2d0d2fb022..166b8155e9 100644
--- a/agent/local_test.go
+++ b/agent/local/state_test.go
@@ -1,11 +1,15 @@
-package agent
+package local_test
 
 import (
+	"errors"
+	"fmt"
 	"reflect"
 	"testing"
 	"time"
 
+	"github.com/hashicorp/consul/agent"
 	"github.com/hashicorp/consul/agent/config"
+	"github.com/hashicorp/consul/agent/local"
 	"github.com/hashicorp/consul/agent/structs"
 	"github.com/hashicorp/consul/agent/token"
 	"github.com/hashicorp/consul/api"
@@ -16,7 +20,7 @@ import (
 
 func TestAgentAntiEntropy_Services(t *testing.T) {
 	t.Parallel()
-	a := &TestAgent{Name: t.Name(), NoInitialSync: true}
+	a := &agent.TestAgent{Name: t.Name()}
 	a.Start()
 	defer a.Shutdown()
 
@@ -35,7 +39,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
 		Tags:    []string{"master"},
 		Port:    5000,
 	}
-	a.state.AddService(srv1, "")
+	a.State.AddService(srv1, "")
 	args.Service = srv1
 	if err := a.RPC("Catalog.Register", args, &out); err != nil {
 		t.Fatalf("err: %v", err)
@@ -48,7 +52,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
 		Tags:    []string{},
 		Port:    8000,
 	}
-	a.state.AddService(srv2, "")
+	a.State.AddService(srv2, "")
 
 	srv2_mod := new(structs.NodeService)
 	*srv2_mod = *srv2
@@ -65,7 +69,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
 		Tags:    []string{},
 		Port:    80,
 	}
-	a.state.AddService(srv3, "")
+	a.State.AddService(srv3, "")
 
 	// Exists remote (delete)
 	srv4 := &structs.NodeService{
@@ -87,7 +91,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
 		Address: "127.0.0.10",
 		Port:    8000,
 	}
-	a.state.AddService(srv5, "")
+	a.State.AddService(srv5, "")
 
 	srv5_mod := new(structs.NodeService)
 	*srv5_mod = *srv5
@@ -104,15 +108,14 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
 		Tags:    []string{},
 		Port:    11211,
 	}
-	a.state.AddService(srv6, "")
+	a.State.SetServiceState(&local.ServiceState{
+		Service: srv6,
+		InSync:  true,
+	})
 
-	// todo(fs): data race
-	a.state.Lock()
-	a.state.serviceStatus["cache"] = syncStatus{inSync: true}
-	a.state.Unlock()
-
-	// Trigger anti-entropy run and wait
-	a.StartSync()
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	var services structs.IndexedNodeServices
 	req := structs.NodeSpecificRequest{
@@ -120,139 +123,110 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
 		Node:       a.Config.NodeName,
 	}
 
-	retry.Run(t, func(r *retry.R) {
-		if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
-			r.Fatalf("err: %v", err)
-		}
+	if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
-		// Make sure we sent along our node info when we synced.
-		id := services.NodeServices.Node.ID
-		addrs := services.NodeServices.Node.TaggedAddresses
-		meta := services.NodeServices.Node.Meta
-		delete(meta, structs.MetaSegmentKey) // Added later, not in config.
-		verify.Values(r, "node id", id, a.config.NodeID)
-		verify.Values(r, "tagged addrs", addrs, a.config.TaggedAddresses)
-		verify.Values(r, "node meta", meta, a.config.NodeMeta)
+	// Make sure we sent along our node info when we synced.
+	id := services.NodeServices.Node.ID
+	addrs := services.NodeServices.Node.TaggedAddresses
+	meta := services.NodeServices.Node.Meta
+	delete(meta, structs.MetaSegmentKey) // Added later, not in config.
+	verify.Values(t, "node id", id, a.Config.NodeID)
+	verify.Values(t, "tagged addrs", addrs, a.Config.TaggedAddresses)
+	verify.Values(t, "node meta", meta, a.Config.NodeMeta)
 
-		// We should have 6 services (consul included)
-		if len(services.NodeServices.Services) != 6 {
-			r.Fatalf("bad: %v", services.NodeServices.Services)
-		}
+	// We should have 6 services (consul included)
+	if len(services.NodeServices.Services) != 6 {
+		t.Fatalf("bad: %v", services.NodeServices.Services)
+	}
 
-		// All the services should match
-		for id, serv := range services.NodeServices.Services {
-			serv.CreateIndex, serv.ModifyIndex = 0, 0
-			switch id {
-			case "mysql":
-				if !reflect.DeepEqual(serv, srv1) {
-					r.Fatalf("bad: %v %v", serv, srv1)
-				}
-			case "redis":
-				if !reflect.DeepEqual(serv, srv2) {
-					r.Fatalf("bad: %#v %#v", serv, srv2)
-				}
-			case "web":
-				if !reflect.DeepEqual(serv, srv3) {
-					r.Fatalf("bad: %v %v", serv, srv3)
-				}
-			case "api":
-				if !reflect.DeepEqual(serv, srv5) {
-					r.Fatalf("bad: %v %v", serv, srv5)
-				}
-			case "cache":
-				if !reflect.DeepEqual(serv, srv6) {
-					r.Fatalf("bad: %v %v", serv, srv6)
-				}
-			case structs.ConsulServiceID:
-				// ignore
-			default:
-				r.Fatalf("unexpected service: %v", id)
+	// All the services should match
+	for id, serv := range services.NodeServices.Services {
+		serv.CreateIndex, serv.ModifyIndex = 0, 0
+		switch id {
+		case "mysql":
+			if !reflect.DeepEqual(serv, srv1) {
+				t.Fatalf("bad: %v %v", serv, srv1)
 			}
-		}
-
-		// todo(fs): data race
-		a.state.RLock()
-		defer a.state.RUnlock()
-
-		// Check the local state
-		if len(a.state.services) != 5 {
-			r.Fatalf("bad: %v", a.state.services)
-		}
-		if len(a.state.serviceStatus) != 5 {
-			r.Fatalf("bad: %v", a.state.serviceStatus)
-		}
-		for name, status := range a.state.serviceStatus {
-			if !status.inSync {
-				r.Fatalf("should be in sync: %v %v", name, status)
+		case "redis":
+			if !reflect.DeepEqual(serv, srv2) {
+				t.Fatalf("bad: %#v %#v", serv, srv2)
 			}
+		case "web":
+			if !reflect.DeepEqual(serv, srv3) {
+				t.Fatalf("bad: %v %v", serv, srv3)
+			}
+		case "api":
+			if !reflect.DeepEqual(serv, srv5) {
+				t.Fatalf("bad: %v %v", serv, srv5)
+			}
+		case "cache":
+			if !reflect.DeepEqual(serv, srv6) {
+				t.Fatalf("bad: %v %v", serv, srv6)
+			}
+		case structs.ConsulServiceID:
+			// ignore
+		default:
+			t.Fatalf("unexpected service: %v", id)
 		}
-	})
+	}
+
+	if err := servicesInSync(a.State, 5); err != nil {
+		t.Fatal(err)
+	}
 
 	// Remove one of the services
-	a.state.RemoveService("api")
+	a.State.RemoveService("api")
 
-	// Trigger anti-entropy run and wait
-	a.StartSync()
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
-	retry.Run(t, func(r *retry.R) {
-		if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
-			r.Fatalf("err: %v", err)
-		}
+	if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
-		// We should have 5 services (consul included)
-		if len(services.NodeServices.Services) != 5 {
-			r.Fatalf("bad: %v", services.NodeServices.Services)
-		}
+	// We should have 5 services (consul included)
+	if len(services.NodeServices.Services) != 5 {
+		t.Fatalf("bad: %v", services.NodeServices.Services)
+	}
 
-		// All the services should match
-		for id, serv := range services.NodeServices.Services {
-			serv.CreateIndex, serv.ModifyIndex = 0, 0
-			switch id {
-			case "mysql":
-				if !reflect.DeepEqual(serv, srv1) {
-					r.Fatalf("bad: %v %v", serv, srv1)
-				}
-			case "redis":
-				if !reflect.DeepEqual(serv, srv2) {
-					r.Fatalf("bad: %#v %#v", serv, srv2)
-				}
-			case "web":
-				if !reflect.DeepEqual(serv, srv3) {
-					r.Fatalf("bad: %v %v", serv, srv3)
-				}
-			case "cache":
-				if !reflect.DeepEqual(serv, srv6) {
-					r.Fatalf("bad: %v %v", serv, srv6)
-				}
-			case structs.ConsulServiceID:
-				// ignore
-			default:
-				r.Fatalf("unexpected service: %v", id)
+	// All the services should match
+	for id, serv := range services.NodeServices.Services {
+		serv.CreateIndex, serv.ModifyIndex = 0, 0
+		switch id {
+		case "mysql":
+			if !reflect.DeepEqual(serv, srv1) {
+				t.Fatalf("bad: %v %v", serv, srv1)
 			}
-		}
-
-		// todo(fs): data race
-		a.state.RLock()
-		defer a.state.RUnlock()
-
-		// Check the local state
-		if len(a.state.services) != 4 {
-			r.Fatalf("bad: %v", a.state.services)
-		}
-		if len(a.state.serviceStatus) != 4 {
-			r.Fatalf("bad: %v", a.state.serviceStatus)
-		}
-		for name, status := range a.state.serviceStatus {
-			if !status.inSync {
-				r.Fatalf("should be in sync: %v %v", name, status)
+		case "redis":
+			if !reflect.DeepEqual(serv, srv2) {
+				t.Fatalf("bad: %#v %#v", serv, srv2)
 			}
+		case "web":
+			if !reflect.DeepEqual(serv, srv3) {
+				t.Fatalf("bad: %v %v", serv, srv3)
+			}
+		case "cache":
+			if !reflect.DeepEqual(serv, srv6) {
+				t.Fatalf("bad: %v %v", serv, srv6)
+			}
+		case structs.ConsulServiceID:
+			// ignore
+		default:
+			t.Fatalf("unexpected service: %v", id)
 		}
-	})
+	}
+
+	if err := servicesInSync(a.State, 4); err != nil {
+		t.Fatal(err)
+	}
 }
 
 func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) {
 	t.Parallel()
-	a := &TestAgent{Name: t.Name(), NoInitialSync: true}
+	a := &agent.TestAgent{Name: t.Name()}
 	a.Start()
 	defer a.Shutdown()
 
@@ -263,7 +237,7 @@ func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) {
 	}
 	var out struct{}
 
-	// EnableTagOverride = true
+	// register a local service with tag override enabled
 	srv1 := &structs.NodeService{
 		ID:                "svc_id1",
 		Service:           "svc1",
@@ -271,17 +245,9 @@ func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) {
 		Port:              6100,
 		EnableTagOverride: true,
 	}
-	a.state.AddService(srv1, "")
-	srv1_mod := new(structs.NodeService)
-	*srv1_mod = *srv1
-	srv1_mod.Port = 7100
-	srv1_mod.Tags = []string{"tag1_mod"}
-	args.Service = srv1_mod
-	if err := a.RPC("Catalog.Register", args, &out); err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	a.State.AddService(srv1, "")
 
-	// EnableTagOverride = false
+	// register a local service with tag override disabled
 	srv2 := &structs.NodeService{
 		ID:                "svc_id2",
 		Service:           "svc2",
@@ -289,18 +255,42 @@ func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) {
 		Port:              6200,
 		EnableTagOverride: false,
 	}
-	a.state.AddService(srv2, "")
-	srv2_mod := new(structs.NodeService)
-	*srv2_mod = *srv2
-	srv2_mod.Port = 7200
-	srv2_mod.Tags = []string{"tag2_mod"}
-	args.Service = srv2_mod
+	a.State.AddService(srv2, "")
+
+	// make sure they are both in the catalog
+	if err := a.State.SyncChanges(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// update the services in the catalog and change the tags and port.
+	// Only tag changes should be propagated for services where tag
+	// override is enabled.
+	args.Service = &structs.NodeService{
+		ID:                srv1.ID,
+		Service:           srv1.Service,
+		Tags:              []string{"tag1_mod"},
+		Port:              7100,
+		EnableTagOverride: true,
+	}
 	if err := a.RPC("Catalog.Register", args, &out); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	// Trigger anti-entropy run and wait
-	a.StartSync()
+	args.Service = &structs.NodeService{
+		ID:                srv2.ID,
+		Service:           srv2.Service,
+		Tags:              []string{"tag2_mod"},
+		Port:              7200,
+		EnableTagOverride: false,
+	}
+	if err := a.RPC("Catalog.Register", args, &out); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// sync catalog and local state
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	req := structs.NodeSpecificRequest{
 		Datacenter: "dc1",
@@ -308,55 +298,47 @@ func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) {
 	}
 	var services structs.IndexedNodeServices
 
-	retry.Run(t, func(r *retry.R) {
-		//	runtime.Gosched()
-		if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
-			r.Fatalf("err: %v", err)
-		}
+	if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
-		a.state.RLock()
-		defer a.state.RUnlock()
-
-		// All the services should match
-		for id, serv := range services.NodeServices.Services {
-			serv.CreateIndex, serv.ModifyIndex = 0, 0
-			switch id {
-			case "svc_id1":
-				if serv.ID != "svc_id1" ||
-					serv.Service != "svc1" ||
-					serv.Port != 6100 ||
-					!reflect.DeepEqual(serv.Tags, []string{"tag1_mod"}) {
-					r.Fatalf("bad: %v %v", serv, srv1)
-				}
-			case "svc_id2":
-				if serv.ID != "svc_id2" ||
-					serv.Service != "svc2" ||
-					serv.Port != 6200 ||
-					!reflect.DeepEqual(serv.Tags, []string{"tag2"}) {
-					r.Fatalf("bad: %v %v", serv, srv2)
-				}
-			case structs.ConsulServiceID:
-				// ignore
-			default:
-				r.Fatalf("unexpected service: %v", id)
+	// All the services should match
+	for id, serv := range services.NodeServices.Services {
+		serv.CreateIndex, serv.ModifyIndex = 0, 0
+		switch id {
+		case "svc_id1":
+			// tags should be modified but not the port
+			got := serv
+			want := &structs.NodeService{
+				ID:                "svc_id1",
+				Service:           "svc1",
+				Tags:              []string{"tag1_mod"},
+				Port:              6100,
+				EnableTagOverride: true,
 			}
-		}
-
-		// todo(fs): data race
-		a.state.RLock()
-		defer a.state.RUnlock()
-
-		for name, status := range a.state.serviceStatus {
-			if !status.inSync {
-				r.Fatalf("should be in sync: %v %v", name, status)
+			if !verify.Values(t, "", got, want) {
+				t.FailNow()
 			}
+		case "svc_id2":
+			got, want := serv, srv2
+			if !verify.Values(t, "", got, want) {
+				t.FailNow()
+			}
+		case structs.ConsulServiceID:
+			// ignore
+		default:
+			t.Fatalf("unexpected service: %v", id)
 		}
-	})
+	}
+
+	if err := servicesInSync(a.State, 2); err != nil {
+		t.Fatal(err)
+	}
 }
 
 func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
 	t.Parallel()
-	a := NewTestAgent(t.Name(), "")
+	a := agent.NewTestAgent(t.Name(), "")
 	defer a.Shutdown()
 
 	{
@@ -367,7 +349,7 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
 			Tags:    []string{"master"},
 			Port:    5000,
 		}
-		a.state.AddService(srv, "")
+		a.State.AddService(srv, "")
 
 		chk := &structs.HealthCheck{
 			Node:      a.Config.NodeName,
@@ -376,18 +358,11 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
 			ServiceID: "mysql",
 			Status:    api.HealthPassing,
 		}
-		a.state.AddCheck(chk, "")
+		a.State.AddCheck(chk, "")
 
-		// todo(fs): data race
-		func() {
-			a.state.RLock()
-			defer a.state.RUnlock()
-
-			// Sync the service once
-			if err := a.state.syncService("mysql"); err != nil {
-				t.Fatalf("err: %s", err)
-			}
-		}()
+		if err := a.State.SyncFull(); err != nil {
+			t.Fatal("sync failed: ", err)
+		}
 
 		// We should have 2 services (consul included)
 		svcReq := structs.NodeSpecificRequest{
@@ -424,7 +399,7 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
 			Tags:    []string{"master"},
 			Port:    5000,
 		}
-		a.state.AddService(srv, "")
+		a.State.AddService(srv, "")
 
 		chk1 := &structs.HealthCheck{
 			Node:      a.Config.NodeName,
@@ -433,7 +408,7 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
 			ServiceID: "redis",
 			Status:    api.HealthPassing,
 		}
-		a.state.AddCheck(chk1, "")
+		a.State.AddCheck(chk1, "")
 
 		chk2 := &structs.HealthCheck{
 			Node:      a.Config.NodeName,
@@ -442,18 +417,11 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
 			ServiceID: "redis",
 			Status:    api.HealthPassing,
 		}
-		a.state.AddCheck(chk2, "")
+		a.State.AddCheck(chk2, "")
 
-		// todo(fs): data race
-		func() {
-			a.state.RLock()
-			defer a.state.RUnlock()
-
-			// Sync the service once
-			if err := a.state.syncService("redis"); err != nil {
-				t.Fatalf("err: %s", err)
-			}
-		}()
+		if err := a.State.SyncFull(); err != nil {
+			t.Fatal("sync failed: ", err)
+		}
 
 		// We should have 3 services (consul included)
 		svcReq := structs.NodeSpecificRequest{
@@ -499,12 +467,11 @@ var testRegisterRules = `
 
 func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
 	t.Parallel()
-	a := &TestAgent{Name: t.Name(), HCL: `
+	a := &agent.TestAgent{Name: t.Name(), HCL: `
 		acl_datacenter = "dc1"
 		acl_master_token = "root"
 		acl_default_policy = "deny"
-		acl_enforce_version_8 = true
-	`, NoInitialSync: true}
+		acl_enforce_version_8 = true`}
 	a.Start()
 	defer a.Shutdown()
 
@@ -533,7 +500,7 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
 		Tags:    []string{"master"},
 		Port:    5000,
 	}
-	a.state.AddService(srv1, token)
+	a.State.AddService(srv1, token)
 
 	// Create service (allowed)
 	srv2 := &structs.NodeService{
@@ -542,11 +509,11 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
 		Tags:    []string{"foo"},
 		Port:    5001,
 	}
-	a.state.AddService(srv2, token)
+	a.State.AddService(srv2, token)
 
-	// Trigger anti-entropy run and wait
-	a.StartSync()
-	time.Sleep(200 * time.Millisecond)
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	// Verify that we are in sync
 	{
@@ -584,30 +551,16 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
 			}
 		}
 
-		// todo(fs): data race
-		func() {
-			a.state.RLock()
-			defer a.state.RUnlock()
-
-			// Check the local state
-			if len(a.state.services) != 2 {
-				t.Fatalf("bad: %v", a.state.services)
-			}
-			if len(a.state.serviceStatus) != 2 {
-				t.Fatalf("bad: %v", a.state.serviceStatus)
-			}
-			for name, status := range a.state.serviceStatus {
-				if !status.inSync {
-					t.Fatalf("should be in sync: %v %v", name, status)
-				}
-			}
-		}()
+		if err := servicesInSync(a.State, 2); err != nil {
+			t.Fatal(err)
+		}
 	}
 
 	// Now remove the service and re-sync
-	a.state.RemoveService("api")
-	a.StartSync()
-	time.Sleep(200 * time.Millisecond)
+	a.State.RemoveService("api")
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	// Verify that we are in sync
 	{
@@ -643,35 +596,20 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
 			}
 		}
 
-		// todo(fs): data race
-		func() {
-			a.state.RLock()
-			defer a.state.RUnlock()
-
-			// Check the local state
-			if len(a.state.services) != 1 {
-				t.Fatalf("bad: %v", a.state.services)
-			}
-			if len(a.state.serviceStatus) != 1 {
-				t.Fatalf("bad: %v", a.state.serviceStatus)
-			}
-			for name, status := range a.state.serviceStatus {
-				if !status.inSync {
-					t.Fatalf("should be in sync: %v %v", name, status)
-				}
-			}
-		}()
+		if err := servicesInSync(a.State, 1); err != nil {
+			t.Fatal(err)
+		}
 	}
 
 	// Make sure the token got cleaned up.
-	if token := a.state.ServiceToken("api"); token != "" {
+	if token := a.State.ServiceToken("api"); token != "" {
 		t.Fatalf("bad: %s", token)
 	}
 }
 
 func TestAgentAntiEntropy_Checks(t *testing.T) {
 	t.Parallel()
-	a := &TestAgent{Name: t.Name(), NoInitialSync: true}
+	a := &agent.TestAgent{Name: t.Name()}
 	a.Start()
 	defer a.Shutdown()
 
@@ -690,7 +628,7 @@ func TestAgentAntiEntropy_Checks(t *testing.T) {
 		Name:    "mysql",
 		Status:  api.HealthPassing,
 	}
-	a.state.AddCheck(chk1, "")
+	a.State.AddCheck(chk1, "")
 	args.Check = chk1
 	if err := a.RPC("Catalog.Register", args, &out); err != nil {
 		t.Fatalf("err: %v", err)
@@ -703,7 +641,7 @@ func TestAgentAntiEntropy_Checks(t *testing.T) {
 		Name:    "redis",
 		Status:  api.HealthPassing,
 	}
-	a.state.AddCheck(chk2, "")
+	a.State.AddCheck(chk2, "")
 
 	chk2_mod := new(structs.HealthCheck)
 	*chk2_mod = *chk2
@@ -720,7 +658,7 @@ func TestAgentAntiEntropy_Checks(t *testing.T) {
 		Name:    "web",
 		Status:  api.HealthPassing,
 	}
-	a.state.AddCheck(chk3, "")
+	a.State.AddCheck(chk3, "")
 
 	// Exists remote (delete)
 	chk4 := &structs.HealthCheck{
@@ -741,15 +679,14 @@ func TestAgentAntiEntropy_Checks(t *testing.T) {
 		Name:    "cache",
 		Status:  api.HealthPassing,
 	}
-	a.state.AddCheck(chk5, "")
+	a.State.SetCheckState(&local.CheckState{
+		Check:  chk5,
+		InSync: true,
+	})
 
-	// todo(fs): data race
-	a.state.Lock()
-	a.state.checkStatus["cache"] = syncStatus{inSync: true}
-	a.state.Unlock()
-
-	// Trigger anti-entropy run and wait
-	a.StartSync()
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	req := structs.NodeSpecificRequest{
 		Datacenter: "dc1",
@@ -758,62 +695,45 @@ func TestAgentAntiEntropy_Checks(t *testing.T) {
 	var checks structs.IndexedHealthChecks
 
 	// Verify that we are in sync
-	retry.Run(t, func(r *retry.R) {
-		if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
-			r.Fatalf("err: %v", err)
-		}
+	if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
-		// We should have 5 checks (serf included)
-		if len(checks.HealthChecks) != 5 {
-			r.Fatalf("bad: %v", checks)
-		}
+	// We should have 5 checks (serf included)
+	if len(checks.HealthChecks) != 5 {
+		t.Fatalf("bad: %v", checks)
+	}
 
-		// All the checks should match
-		for _, chk := range checks.HealthChecks {
-			chk.CreateIndex, chk.ModifyIndex = 0, 0
-			switch chk.CheckID {
-			case "mysql":
-				if !reflect.DeepEqual(chk, chk1) {
-					r.Fatalf("bad: %v %v", chk, chk1)
-				}
-			case "redis":
-				if !reflect.DeepEqual(chk, chk2) {
-					r.Fatalf("bad: %v %v", chk, chk2)
-				}
-			case "web":
-				if !reflect.DeepEqual(chk, chk3) {
-					r.Fatalf("bad: %v %v", chk, chk3)
-				}
-			case "cache":
-				if !reflect.DeepEqual(chk, chk5) {
-					r.Fatalf("bad: %v %v", chk, chk5)
-				}
-			case "serfHealth":
-				// ignore
-			default:
-				r.Fatalf("unexpected check: %v", chk)
+	// All the checks should match
+	for _, chk := range checks.HealthChecks {
+		chk.CreateIndex, chk.ModifyIndex = 0, 0
+		switch chk.CheckID {
+		case "mysql":
+			if !reflect.DeepEqual(chk, chk1) {
+				t.Fatalf("bad: %v %v", chk, chk1)
 			}
-		}
-	})
-
-	// todo(fs): data race
-	func() {
-		a.state.RLock()
-		defer a.state.RUnlock()
-
-		// Check the local state
-		if len(a.state.checks) != 4 {
-			t.Fatalf("bad: %v", a.state.checks)
-		}
-		if len(a.state.checkStatus) != 4 {
-			t.Fatalf("bad: %v", a.state.checkStatus)
-		}
-		for name, status := range a.state.checkStatus {
-			if !status.inSync {
-				t.Fatalf("should be in sync: %v %v", name, status)
+		case "redis":
+			if !reflect.DeepEqual(chk, chk2) {
+				t.Fatalf("bad: %v %v", chk, chk2)
 			}
+		case "web":
+			if !reflect.DeepEqual(chk, chk3) {
+				t.Fatalf("bad: %v %v", chk, chk3)
+			}
+		case "cache":
+			if !reflect.DeepEqual(chk, chk5) {
+				t.Fatalf("bad: %v %v", chk, chk5)
+			}
+		case "serfHealth":
+			// ignore
+		default:
+			t.Fatalf("unexpected check: %v", chk)
 		}
-	}()
+	}
+
+	if err := checksInSync(a.State, 4); err != nil {
+		t.Fatal(err)
+	}
 
 	// Make sure we sent along our node info addresses when we synced.
 	{
@@ -830,80 +750,63 @@ func TestAgentAntiEntropy_Checks(t *testing.T) {
 		addrs := services.NodeServices.Node.TaggedAddresses
 		meta := services.NodeServices.Node.Meta
 		delete(meta, structs.MetaSegmentKey) // Added later, not in config.
-		verify.Values(t, "node id", id, a.config.NodeID)
-		verify.Values(t, "tagged addrs", addrs, a.config.TaggedAddresses)
-		verify.Values(t, "node meta", meta, a.config.NodeMeta)
+		verify.Values(t, "node id", id, a.Config.NodeID)
+		verify.Values(t, "tagged addrs", addrs, a.Config.TaggedAddresses)
+		verify.Values(t, "node meta", meta, a.Config.NodeMeta)
 	}
 
 	// Remove one of the checks
-	a.state.RemoveCheck("redis")
+	a.State.RemoveCheck("redis")
 
-	// Trigger anti-entropy run and wait
-	a.StartSync()
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	// Verify that we are in sync
-	retry.Run(t, func(r *retry.R) {
-		if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
-			r.Fatalf("err: %v", err)
-		}
+	if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
-		// We should have 5 checks (serf included)
-		if len(checks.HealthChecks) != 4 {
-			r.Fatalf("bad: %v", checks)
-		}
+	// We should have 5 checks (serf included)
+	if len(checks.HealthChecks) != 4 {
+		t.Fatalf("bad: %v", checks)
+	}
 
-		// All the checks should match
-		for _, chk := range checks.HealthChecks {
-			chk.CreateIndex, chk.ModifyIndex = 0, 0
-			switch chk.CheckID {
-			case "mysql":
-				if !reflect.DeepEqual(chk, chk1) {
-					r.Fatalf("bad: %v %v", chk, chk1)
-				}
-			case "web":
-				if !reflect.DeepEqual(chk, chk3) {
-					r.Fatalf("bad: %v %v", chk, chk3)
-				}
-			case "cache":
-				if !reflect.DeepEqual(chk, chk5) {
-					r.Fatalf("bad: %v %v", chk, chk5)
-				}
-			case "serfHealth":
-				// ignore
-			default:
-				r.Fatalf("unexpected check: %v", chk)
+	// All the checks should match
+	for _, chk := range checks.HealthChecks {
+		chk.CreateIndex, chk.ModifyIndex = 0, 0
+		switch chk.CheckID {
+		case "mysql":
+			if !reflect.DeepEqual(chk, chk1) {
+				t.Fatalf("bad: %v %v", chk, chk1)
 			}
-		}
-	})
-
-	// todo(fs): data race
-	func() {
-		a.state.RLock()
-		defer a.state.RUnlock()
-
-		// Check the local state
-		if len(a.state.checks) != 3 {
-			t.Fatalf("bad: %v", a.state.checks)
-		}
-		if len(a.state.checkStatus) != 3 {
-			t.Fatalf("bad: %v", a.state.checkStatus)
-		}
-		for name, status := range a.state.checkStatus {
-			if !status.inSync {
-				t.Fatalf("should be in sync: %v %v", name, status)
+		case "web":
+			if !reflect.DeepEqual(chk, chk3) {
+				t.Fatalf("bad: %v %v", chk, chk3)
 			}
+		case "cache":
+			if !reflect.DeepEqual(chk, chk5) {
+				t.Fatalf("bad: %v %v", chk, chk5)
+			}
+		case "serfHealth":
+			// ignore
+		default:
+			t.Fatalf("unexpected check: %v", chk)
 		}
-	}()
+	}
+
+	if err := checksInSync(a.State, 3); err != nil {
+		t.Fatal(err)
+	}
 }
 
 func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
 	t.Parallel()
-	a := &TestAgent{Name: t.Name(), HCL: `
+	a := &agent.TestAgent{Name: t.Name(), HCL: `
 		acl_datacenter = "dc1"
 		acl_master_token = "root"
 		acl_default_policy = "deny"
-		acl_enforce_version_8 = true
-	`, NoInitialSync: true}
+		acl_enforce_version_8 = true`}
 	a.Start()
 	defer a.Shutdown()
 
@@ -932,18 +835,18 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
 		Tags:    []string{"master"},
 		Port:    5000,
 	}
-	a.state.AddService(srv1, "root")
+	a.State.AddService(srv1, "root")
 	srv2 := &structs.NodeService{
 		ID:      "api",
 		Service: "api",
 		Tags:    []string{"foo"},
 		Port:    5001,
 	}
-	a.state.AddService(srv2, "root")
+	a.State.AddService(srv2, "root")
 
-	// Trigger anti-entropy run and wait
-	a.StartSync()
-	time.Sleep(200 * time.Millisecond)
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	// Verify that we are in sync
 	{
@@ -983,24 +886,9 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
 			}
 		}
 
-		// todo(fs): data race
-		func() {
-			a.state.RLock()
-			defer a.state.RUnlock()
-
-			// Check the local state
-			if len(a.state.services) != 2 {
-				t.Fatalf("bad: %v", a.state.services)
-			}
-			if len(a.state.serviceStatus) != 2 {
-				t.Fatalf("bad: %v", a.state.serviceStatus)
-			}
-			for name, status := range a.state.serviceStatus {
-				if !status.inSync {
-					t.Fatalf("should be in sync: %v %v", name, status)
-				}
-			}
-		}()
+		if err := servicesInSync(a.State, 2); err != nil {
+			t.Fatal(err)
+		}
 	}
 
 	// This check won't be allowed.
@@ -1013,7 +901,7 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
 		Name:        "mysql",
 		Status:      api.HealthPassing,
 	}
-	a.state.AddCheck(chk1, token)
+	a.State.AddCheck(chk1, token)
 
 	// This one will be allowed.
 	chk2 := &structs.HealthCheck{
@@ -1025,14 +913,59 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
 		Name:        "api",
 		Status:      api.HealthPassing,
 	}
-	a.state.AddCheck(chk2, token)
+	a.State.AddCheck(chk2, token)
 
-	// Trigger anti-entropy run and wait.
-	a.StartSync()
-	time.Sleep(200 * time.Millisecond)
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	// Verify that we are in sync
-	retry.Run(t, func(r *retry.R) {
+	req := structs.NodeSpecificRequest{
+		Datacenter: "dc1",
+		Node:       a.Config.NodeName,
+		QueryOptions: structs.QueryOptions{
+			Token: "root",
+		},
+	}
+	var checks structs.IndexedHealthChecks
+	if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// We should have 2 checks (serf included)
+	if len(checks.HealthChecks) != 2 {
+		t.Fatalf("bad: %v", checks)
+	}
+
+	// All the checks should match
+	for _, chk := range checks.HealthChecks {
+		chk.CreateIndex, chk.ModifyIndex = 0, 0
+		switch chk.CheckID {
+		case "mysql-check":
+			t.Fatalf("should not be permitted")
+		case "api-check":
+			if !reflect.DeepEqual(chk, chk2) {
+				t.Fatalf("bad: %v %v", chk, chk2)
+			}
+		case "serfHealth":
+			// ignore
+		default:
+			t.Fatalf("unexpected check: %v", chk)
+		}
+	}
+
+	if err := checksInSync(a.State, 2); err != nil {
+		t.Fatal(err)
+	}
+
+	// Now delete the check and wait for sync.
+	a.State.RemoveCheck("api-check")
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Verify that we are in sync
+	{
 		req := structs.NodeSpecificRequest{
 			Datacenter: "dc1",
 			Node:       a.Config.NodeName,
@@ -1042,12 +975,12 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
 		}
 		var checks structs.IndexedHealthChecks
 		if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
-			r.Fatalf("err: %v", err)
+			t.Fatalf("err: %v", err)
 		}
 
-		// We should have 2 checks (serf included)
-		if len(checks.HealthChecks) != 2 {
-			r.Fatalf("bad: %v", checks)
+		// We should have 1 check (just serf)
+		if len(checks.HealthChecks) != 1 {
+			t.Fatalf("bad: %v", checks)
 		}
 
 		// All the checks should match
@@ -1057,112 +990,39 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
 			case "mysql-check":
 				t.Fatalf("should not be permitted")
 			case "api-check":
-				if !reflect.DeepEqual(chk, chk2) {
-					r.Fatalf("bad: %v %v", chk, chk2)
-				}
+				t.Fatalf("should be deleted")
 			case "serfHealth":
 				// ignore
 			default:
-				r.Fatalf("unexpected check: %v", chk)
+				t.Fatalf("unexpected check: %v", chk)
 			}
 		}
-	})
+	}
 
-	// todo(fs): data race
-	func() {
-		a.state.RLock()
-		defer a.state.RUnlock()
-
-		// Check the local state.
-		if len(a.state.checks) != 2 {
-			t.Fatalf("bad: %v", a.state.checks)
-		}
-		if len(a.state.checkStatus) != 2 {
-			t.Fatalf("bad: %v", a.state.checkStatus)
-		}
-		for name, status := range a.state.checkStatus {
-			if !status.inSync {
-				t.Fatalf("should be in sync: %v %v", name, status)
-			}
-		}
-	}()
-
-	// Now delete the check and wait for sync.
-	a.state.RemoveCheck("api-check")
-	a.StartSync()
-	time.Sleep(200 * time.Millisecond)
-	// Verify that we are in sync
-	retry.Run(t, func(r *retry.R) {
-		req := structs.NodeSpecificRequest{
-			Datacenter: "dc1",
-			Node:       a.Config.NodeName,
-			QueryOptions: structs.QueryOptions{
-				Token: "root",
-			},
-		}
-		var checks structs.IndexedHealthChecks
-		if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
-			r.Fatalf("err: %v", err)
-		}
-
-		// We should have 1 check (just serf)
-		if len(checks.HealthChecks) != 1 {
-			r.Fatalf("bad: %v", checks)
-		}
-
-		// All the checks should match
-		for _, chk := range checks.HealthChecks {
-			chk.CreateIndex, chk.ModifyIndex = 0, 0
-			switch chk.CheckID {
-			case "mysql-check":
-				r.Fatalf("should not be permitted")
-			case "api-check":
-				r.Fatalf("should be deleted")
-			case "serfHealth":
-				// ignore
-			default:
-				r.Fatalf("unexpected check: %v", chk)
-			}
-		}
-	})
-
-	// todo(fs): data race
-	func() {
-		a.state.RLock()
-		defer a.state.RUnlock()
-
-		// Check the local state.
-		if len(a.state.checks) != 1 {
-			t.Fatalf("bad: %v", a.state.checks)
-		}
-		if len(a.state.checkStatus) != 1 {
-			t.Fatalf("bad: %v", a.state.checkStatus)
-		}
-		for name, status := range a.state.checkStatus {
-			if !status.inSync {
-				t.Fatalf("should be in sync: %v %v", name, status)
-			}
-		}
-	}()
+	if err := checksInSync(a.State, 1); err != nil {
+		t.Fatal(err)
+	}
 
 	// Make sure the token got cleaned up.
-	if token := a.state.CheckToken("api-check"); token != "" {
+	if token := a.State.CheckToken("api-check"); token != "" {
 		t.Fatalf("bad: %s", token)
 	}
 }
 
 func TestAgent_UpdateCheck_DiscardOutput(t *testing.T) {
 	t.Parallel()
-	a := NewTestAgent(t.Name(), `
+	a := agent.NewTestAgent(t.Name(), `
 		discard_check_output = true
 		check_update_interval = "0s" # set to "0s" since otherwise output checks are deferred
 	`)
 	defer a.Shutdown()
 
 	inSync := func(id string) bool {
-		a.state.Lock()
-		defer a.state.Unlock()
-		return a.state.checkStatus[types.CheckID(id)].inSync
+		s := a.State.CheckState(types.CheckID(id))
+		if s == nil {
+			return false
+		}
+		return s.InSync
 	}
 
 	// register a check
@@ -1173,29 +1033,27 @@ func TestAgent_UpdateCheck_DiscardOutput(t *testing.T) {
 		Status:  api.HealthPassing,
 		Output:  "first output",
 	}
-	if err := a.state.AddCheck(check, ""); err != nil {
+	if err := a.State.AddCheck(check, ""); err != nil {
 		t.Fatalf("bad: %s", err)
 	}
-
-	// wait until the check is in sync
-	retry.Run(t, func(r *retry.R) {
-		if inSync("web") {
-			return
-		}
-		r.FailNow()
-	})
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("bad: %s", err)
+	}
+	if !inSync("web") {
+		t.Fatal("check should be in sync")
+	}
 
 	// update the check with the same status but different output
 	// and the check should still be in sync.
-	a.state.UpdateCheck(check.CheckID, api.HealthPassing, "second output")
+	a.State.UpdateCheck(check.CheckID, api.HealthPassing, "second output")
 	if !inSync("web") {
 		t.Fatal("check should be in sync")
 	}
 
 	// disable discarding of check output and update the check again with different
 	// output. Then the check should be out of sync.
-	a.state.SetDiscardCheckOutput(false)
-	a.state.UpdateCheck(check.CheckID, api.HealthPassing, "third output")
+	a.State.SetDiscardCheckOutput(false)
+	a.State.UpdateCheck(check.CheckID, api.HealthPassing, "third output")
 	if inSync("web") {
 		t.Fatal("check should be out of sync")
 	}
@@ -1203,9 +1061,9 @@ func TestAgent_UpdateCheck_DiscardOutput(t *testing.T) {
 
 func TestAgentAntiEntropy_Check_DeferSync(t *testing.T) {
 	t.Parallel()
-	a := &TestAgent{Name: t.Name(), HCL: `
+	a := &agent.TestAgent{Name: t.Name(), HCL: `
 		check_update_interval = "500ms"
-	`, NoInitialSync: true}
+	`}
 	a.Start()
 	defer a.Shutdown()
 
@@ -1217,10 +1075,11 @@ func TestAgentAntiEntropy_Check_DeferSync(t *testing.T) {
 		Status:  api.HealthPassing,
 		Output:  "",
 	}
-	a.state.AddCheck(check, "")
+	a.State.AddCheck(check, "")
 
-	// Trigger anti-entropy run and wait
-	a.StartSync()
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	// Verify that we are in sync
 	req := structs.NodeSpecificRequest{
@@ -1238,7 +1097,7 @@ func TestAgentAntiEntropy_Check_DeferSync(t *testing.T) {
 	})
 
 	// Update the check output! Should be deferred
-	a.state.UpdateCheck("web", api.HealthPassing, "output")
+	a.State.UpdateCheck("web", api.HealthPassing, "output")
 
 	// Should not update for 500 milliseconds
 	time.Sleep(250 * time.Millisecond)
@@ -1301,9 +1160,9 @@ func TestAgentAntiEntropy_Check_DeferSync(t *testing.T) {
 		}
 	}
 
-	// Trigger anti-entropy run and wait.
-	a.StartSync()
-	time.Sleep(200 * time.Millisecond)
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	// Verify that the output was synced back to the agent's value.
 	if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
@@ -1337,11 +1196,11 @@ func TestAgentAntiEntropy_Check_DeferSync(t *testing.T) {
 	}
 
 	// Now make an update that should be deferred.
-	a.state.UpdateCheck("web", api.HealthPassing, "deferred")
+	a.State.UpdateCheck("web", api.HealthPassing, "deferred")
 
-	// Trigger anti-entropy run and wait.
-	a.StartSync()
-	time.Sleep(200 * time.Millisecond)
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	// Verify that the output is still out of sync since there's a deferred
 	// update pending.
@@ -1381,12 +1240,11 @@ func TestAgentAntiEntropy_NodeInfo(t *testing.T) {
 	nodeMeta := map[string]string{
 		"somekey": "somevalue",
 	}
-	a := &TestAgent{Name: t.Name(), HCL: `
+	a := &agent.TestAgent{Name: t.Name(), HCL: `
 		node_id = "40e4a748-2192-161a-0510-9bf59fe950b5"
 		node_meta {
 			somekey = "somevalue"
-		}
-	`, NoInitialSync: true}
+		}`}
 	a.Start()
 	defer a.Shutdown()
 
@@ -1401,92 +1259,66 @@ func TestAgentAntiEntropy_NodeInfo(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	// Trigger anti-entropy run and wait
-	a.StartSync()
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
 	req := structs.NodeSpecificRequest{
 		Datacenter: "dc1",
 		Node:       a.Config.NodeName,
 	}
 	var services structs.IndexedNodeServices
-	// Wait for the sync
-	retry.Run(t, func(r *retry.R) {
-		if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
-			r.Fatalf("err: %v", err)
-		}
+	if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
-		// Make sure we synced our node info - this should have ridden on the
-		// "consul" service sync
-		id := services.NodeServices.Node.ID
-		addrs := services.NodeServices.Node.TaggedAddresses
-		meta := services.NodeServices.Node.Meta
-		delete(meta, structs.MetaSegmentKey) // Added later, not in config.
-		if id != nodeID ||
-			!reflect.DeepEqual(addrs, a.config.TaggedAddresses) ||
-			!reflect.DeepEqual(meta, nodeMeta) {
-			r.Fatalf("bad: %v", services.NodeServices.Node)
-		}
-	})
+	id := services.NodeServices.Node.ID
+	addrs := services.NodeServices.Node.TaggedAddresses
+	meta := services.NodeServices.Node.Meta
+	delete(meta, structs.MetaSegmentKey) // Added later, not in config.
+	if id != a.Config.NodeID ||
+		!reflect.DeepEqual(addrs, a.Config.TaggedAddresses) ||
+		!reflect.DeepEqual(meta, a.Config.NodeMeta) {
+		t.Fatalf("bad: %v", services.NodeServices.Node)
+	}
 
 	// Blow away the catalog version of the node info
 	if err := a.RPC("Catalog.Register", args, &out); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	// Trigger anti-entropy run and wait
-	a.StartSync()
-	// Wait for the sync - this should have been a sync of just the node info
-	retry.Run(t, func(r *retry.R) {
-		if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
-			r.Fatalf("err: %v", err)
-		}
+	if err := a.State.SyncFull(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
 
+	// Wait for the sync - this should have been a sync of just the node info
+	if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	{
 		id := services.NodeServices.Node.ID
 		addrs := services.NodeServices.Node.TaggedAddresses
 		meta := services.NodeServices.Node.Meta
 		delete(meta, structs.MetaSegmentKey) // Added later, not in config.
 		if id != nodeID ||
-			!reflect.DeepEqual(addrs, a.config.TaggedAddresses) ||
+			!reflect.DeepEqual(addrs, a.Config.TaggedAddresses) ||
 			!reflect.DeepEqual(meta, nodeMeta) {
-			r.Fatalf("bad: %v", services.NodeServices.Node)
+			t.Fatalf("bad: %v", services.NodeServices.Node)
 		}
-	})
-}
-
-func TestAgentAntiEntropy_deleteService_fails(t *testing.T) {
-	t.Parallel()
-	l := new(localState)
-
-	// todo(fs): data race
-	l.Lock()
-	defer l.Unlock()
-	if err := l.deleteService(""); err == nil {
-		t.Fatalf("should have failed")
 	}
 }
 
-func TestAgentAntiEntropy_deleteCheck_fails(t *testing.T) {
-	t.Parallel()
-	l := new(localState)
-
-	// todo(fs): data race
-	l.Lock()
-	defer l.Unlock()
-	if err := l.deleteCheck(""); err == nil {
-		t.Fatalf("should have errored")
-	}
-}
-
-func TestAgent_serviceTokens(t *testing.T) {
+func TestAgent_ServiceTokens(t *testing.T) {
 	t.Parallel()
 
 	tokens := new(token.Store)
 	tokens.UpdateUserToken("default")
-	l := NewLocalState(config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`), nil, tokens)
+	cfg := config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`)
+	l := local.NewState(agent.LocalConfig(cfg), nil, tokens)
+	l.TriggerSyncChanges = func() {}
 
-	l.AddService(&structs.NodeService{
-		ID: "redis",
-	}, "")
+	l.AddService(&structs.NodeService{ID: "redis"}, "")
 
 	// Returns default when no token is set
 	if token := l.ServiceToken("redis"); token != "default" {
@@ -1494,7 +1326,7 @@ func TestAgent_serviceTokens(t *testing.T) {
 	}
 
 	// Returns configured token
-	l.serviceTokens["redis"] = "abc123"
+	l.AddService(&structs.NodeService{ID: "redis"}, "abc123")
 	if token := l.ServiceToken("redis"); token != "abc123" {
 		t.Fatalf("bad: %s", token)
 	}
@@ -1506,20 +1338,23 @@ func TestAgent_serviceTokens(t *testing.T) {
 	}
 }
 
-func TestAgent_checkTokens(t *testing.T) {
+func TestAgent_CheckTokens(t *testing.T) {
 	t.Parallel()
 
 	tokens := new(token.Store)
 	tokens.UpdateUserToken("default")
-	l := NewLocalState(config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`), nil, tokens)
+	cfg := config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`)
+	l := local.NewState(agent.LocalConfig(cfg), nil, tokens)
+	l.TriggerSyncChanges = func() {}
 
 	// Returns default when no token is set
+	l.AddCheck(&structs.HealthCheck{CheckID: types.CheckID("mem")}, "")
 	if token := l.CheckToken("mem"); token != "default" {
 		t.Fatalf("bad: %s", token)
 	}
 
 	// Returns configured token
-	l.checkTokens["mem"] = "abc123"
+	l.AddCheck(&structs.HealthCheck{CheckID: types.CheckID("mem")}, "abc123")
 	if token := l.CheckToken("mem"); token != "abc123" {
 		t.Fatalf("bad: %s", token)
 	}
@@ -1531,9 +1366,11 @@ func TestAgent_checkTokens(t *testing.T) {
 	}
 }
 
-func TestAgent_checkCriticalTime(t *testing.T) {
+func TestAgent_CheckCriticalTime(t *testing.T) {
 	t.Parallel()
-	l := NewLocalState(config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`), nil, new(token.Store))
+	cfg := config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`)
+	l := local.NewState(agent.LocalConfig(cfg), nil, new(token.Store))
+	l.TriggerSyncChanges = func() {}
 
 	svc := &structs.NodeService{ID: "redis", Service: "redis", Port: 8000}
 	l.AddService(svc, "")
@@ -1548,54 +1385,56 @@ func TestAgent_checkCriticalTime(t *testing.T) {
 		Status:    api.HealthPassing,
 	}
 	l.AddCheck(chk, "")
-	if checks := l.CriticalChecks(); len(checks) > 0 {
+	if checks := l.CriticalCheckStates(); len(checks) > 0 {
 		t.Fatalf("should not have any critical checks")
 	}
 
 	// Set it to warning and make sure that doesn't show up as critical.
 	l.UpdateCheck(checkID, api.HealthWarning, "")
-	if checks := l.CriticalChecks(); len(checks) > 0 {
+	if checks := l.CriticalCheckStates(); len(checks) > 0 {
 		t.Fatalf("should not have any critical checks")
 	}
 
 	// Fail the check and make sure the time looks reasonable.
 	l.UpdateCheck(checkID, api.HealthCritical, "")
-	if crit, ok := l.CriticalChecks()[checkID]; !ok {
+	if c, ok := l.CriticalCheckStates()[checkID]; !ok {
 		t.Fatalf("should have a critical check")
-	} else if crit.CriticalFor > time.Millisecond {
-		t.Fatalf("bad: %#v", crit)
+	} else if c.CriticalFor() > time.Millisecond {
+		t.Fatalf("bad: %#v", c)
 	}
 
 	// Wait a while, then fail it again and make sure the time keeps track
 	// of the initial failure, and doesn't reset here.
 	time.Sleep(50 * time.Millisecond)
 	l.UpdateCheck(chk.CheckID, api.HealthCritical, "")
-	if crit, ok := l.CriticalChecks()[checkID]; !ok {
+	if c, ok := l.CriticalCheckStates()[checkID]; !ok {
 		t.Fatalf("should have a critical check")
-	} else if crit.CriticalFor < 25*time.Millisecond ||
-		crit.CriticalFor > 75*time.Millisecond {
-		t.Fatalf("bad: %#v", crit)
+	} else if c.CriticalFor() < 25*time.Millisecond ||
+		c.CriticalFor() > 75*time.Millisecond {
+		t.Fatalf("bad: %#v", c)
 	}
 
 	// Set it passing again.
 	l.UpdateCheck(checkID, api.HealthPassing, "")
-	if checks := l.CriticalChecks(); len(checks) > 0 {
+	if checks := l.CriticalCheckStates(); len(checks) > 0 {
 		t.Fatalf("should not have any critical checks")
 	}
 
 	// Fail the check and make sure the time looks like it started again
 	// from the latest failure, not the original one.
 	l.UpdateCheck(checkID, api.HealthCritical, "")
-	if crit, ok := l.CriticalChecks()[checkID]; !ok {
+	if c, ok := l.CriticalCheckStates()[checkID]; !ok {
 		t.Fatalf("should have a critical check")
-	} else if crit.CriticalFor > time.Millisecond {
-		t.Fatalf("bad: %#v", crit)
+	} else if c.CriticalFor() > time.Millisecond {
+		t.Fatalf("bad: %#v", c)
 	}
 }
 
 func TestAgent_AddCheckFailure(t *testing.T) {
 	t.Parallel()
-	l := NewLocalState(config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`), nil, new(token.Store))
+	cfg := config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`)
+	l := local.NewState(agent.LocalConfig(cfg), nil, new(token.Store))
+	l.TriggerSyncChanges = func() {}
 
 	// Add a check for a service that does not exist and verify that it fails
 	checkID := types.CheckID("redis:1")
@@ -1606,48 +1445,15 @@ func TestAgent_AddCheckFailure(t *testing.T) {
 		ServiceID: "redis",
 		Status:    api.HealthPassing,
 	}
-	expectedErr := "ServiceID \"redis\" does not exist"
-	if err := l.AddCheck(chk, ""); err == nil || expectedErr != err.Error() {
-		t.Fatalf("Expected error when adding a check for a non-existent service but got %v", err)
+	wantErr := errors.New(`Check "redis:1" refers to non-existent service "redis"`)
+	if got, want := l.AddCheck(chk, ""), wantErr; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got error %q want %q", got, want)
 	}
-
-}
-
-func TestAgent_nestedPauseResume(t *testing.T) {
-	t.Parallel()
-	l := new(localState)
-	if l.isPaused() != false {
-		t.Fatal("localState should be unPaused after init")
-	}
-	l.Pause()
-	if l.isPaused() != true {
-		t.Fatal("localState should be Paused after first call to Pause()")
-	}
-	l.Pause()
-	if l.isPaused() != true {
-		t.Fatal("localState should STILL be Paused after second call to Pause()")
-	}
-	l.Resume()
-	if l.isPaused() != true {
-		t.Fatal("localState should STILL be Paused after FIRST call to Resume()")
-	}
-	l.Resume()
-	if l.isPaused() != false {
-		t.Fatal("localState should NOT be Paused after SECOND call to Resume()")
-	}
-
-	defer func() {
-		err := recover()
-		if err == nil {
-			t.Fatal("unbalanced Resume() should cause a panic()")
-		}
-	}()
-	l.Resume()
 }
 
 func TestAgent_sendCoordinate(t *testing.T) {
 	t.Parallel()
-	a := NewTestAgent(t.Name(), `
+	a := agent.NewTestAgent(t.Name(), `
 		sync_coordinate_interval_min = "1ms"
 		sync_coordinate_rate_target = 10.0
 		consul = {
@@ -1660,8 +1466,10 @@ func TestAgent_sendCoordinate(t *testing.T) {
 	`)
 	defer a.Shutdown()
 
-	t.Logf("%d %d %s", a.consulConfig().CoordinateUpdateBatchSize, a.consulConfig().CoordinateUpdateMaxBatches,
-		a.consulConfig().CoordinateUpdatePeriod.String())
+	t.Logf("%d %d %s",
+		a.Config.ConsulCoordinateUpdateBatchSize,
+		a.Config.ConsulCoordinateUpdateMaxBatches,
+		a.Config.ConsulCoordinateUpdatePeriod.String())
 
 	// Make sure the coordinate is present.
 	req := structs.DCSpecificRequest{
@@ -1681,3 +1489,29 @@ func TestAgent_sendCoordinate(t *testing.T) {
 		}
 	})
 }
+
+func servicesInSync(state *local.State, wantServices int) error {
+	services := state.ServiceStates()
+	if got, want := len(services), wantServices; got != want {
+		return fmt.Errorf("got %d services want %d", got, want)
+	}
+	for id, s := range services {
+		if !s.InSync {
+			return fmt.Errorf("service %q should be in sync", id)
+		}
+	}
+	return nil
+}
+
+func checksInSync(state *local.State, wantChecks int) error {
+	checks := state.CheckStates()
+	if got, want := len(checks), wantChecks; got != want {
+		return fmt.Errorf("got %d checks want %d", got, want)
+	}
+	for id, c := range checks {
+		if !c.InSync {
+			return fmt.Errorf("check %q should be in sync", id)
+		}
+	}
+	return nil
+}
diff --git a/agent/testagent.go b/agent/testagent.go
index 9b99cd6471..1c4806d36c 100644
--- a/agent/testagent.go
+++ b/agent/testagent.go
@@ -65,10 +65,6 @@ type TestAgent struct {
 	// Key is the optional encryption key for the LAN and WAN keyring.
 	Key string
 
-	// NoInitialSync determines whether an anti-entropy run
-	// will be scheduled after the agent started.
-	NoInitialSync bool
-
 	// dns is a reference to the first started DNS endpoint.
 	// It is valid after Start().
 	dns *DNSServer
@@ -175,9 +171,9 @@ func (a *TestAgent) Start() *TestAgent {
 			}
 		}
 	}
-	if !a.NoInitialSync {
-		a.Agent.StartSync()
-	}
+
+	// Start the anti-entropy syncer
+	a.Agent.StartSync()
 
 	var out structs.IndexedNodes
 	retry.Run(&panicFailer{}, func(r *retry.R) {
@@ -200,7 +196,7 @@ func (a *TestAgent) Start() *TestAgent {
 				r.Fatal(a.Name, "No leader")
 			}
 			if out.Index == 0 {
-				r.Fatal(a.Name, "Consul index is 0")
+				r.Fatal(a.Name, ": Consul index is 0")
 			}
 		} else {
 			req, _ := http.NewRequest("GET", "/v1/agent/self", nil)
diff --git a/agent/user_event.go b/agent/user_event.go
index a83cb7fdce..24d4bb03d5 100644
--- a/agent/user_event.go
+++ b/agent/user_event.go
@@ -173,7 +173,7 @@ func (a *Agent) shouldProcessUserEvent(msg *UserEvent) bool {
 		}
 
 		// Scan for a match
-		services := a.state.Services()
+		services := a.State.Services()
 		found := false
 	OUTER:
 		for name, info := range services {
diff --git a/agent/user_event_test.go b/agent/user_event_test.go
index 0f1e59d00b..1f79145798 100644
--- a/agent/user_event_test.go
+++ b/agent/user_event_test.go
@@ -57,7 +57,7 @@ func TestShouldProcessUserEvent(t *testing.T) {
 		Tags:    []string{"test", "foo", "bar", "master"},
 		Port:    5000,
 	}
-	a.state.AddService(srv1, "")
+	a.State.AddService(srv1, "")
 
 	p := &UserEvent{}
 	if !a.shouldProcessUserEvent(p) {
@@ -157,7 +157,7 @@ func TestFireReceiveEvent(t *testing.T) {
 		Tags:    []string{"test", "foo", "bar", "master"},
 		Port:    5000,
 	}
-	a.state.AddService(srv1, "")
+	a.State.AddService(srv1, "")
 
 	p1 := &UserEvent{Name: "deploy", ServiceFilter: "web"}
 	err := a.UserEvent("dc1", "root", p1)
diff --git a/agent/util.go b/agent/util.go
index 3a13c2172a..ae7c3c154f 100644
--- a/agent/util.go
+++ b/agent/util.go
@@ -4,28 +4,16 @@ import (
 	"bytes"
 	"crypto/md5"
 	"fmt"
-	"math"
 	"os"
 	"os/exec"
 	"os/signal"
 	osuser "os/user"
 	"strconv"
-	"time"
 
 	"github.com/hashicorp/consul/types"
 	"github.com/hashicorp/go-msgpack/codec"
 )
 
-const (
-	// This scale factor means we will add a minute after we cross 128 nodes,
-	// another at 256, another at 512, etc. By 8192 nodes, we will scale up
-	// by a factor of 8.
-	//
-	// If you update this, you may need to adjust the tuning of
-	// CoordinateUpdatePeriod and CoordinateUpdateMaxBatchSize.
-	aeScaleThreshold = 128
-)
-
 // msgpackHandle is a shared handle for encoding/decoding of
 // messages
 var msgpackHandle = &codec.MsgpackHandle{
@@ -33,18 +21,6 @@ var msgpackHandle = &codec.MsgpackHandle{
 	WriteExt:    true,
 }
 
-// aeScale is used to scale the time interval at which anti-entropy updates take
-// place. It is used to prevent saturation as the cluster size grows.
-func aeScale(interval time.Duration, n int) time.Duration {
-	// Don't scale until we cross the threshold
-	if n <= aeScaleThreshold {
-		return interval
-	}
-
-	multiplier := math.Ceil(math.Log2(float64(n))-math.Log2(aeScaleThreshold)) + 1.0
-	return time.Duration(multiplier) * interval
-}
-
 // decodeMsgPack is used to decode a MsgPack encoded object
 func decodeMsgPack(buf []byte, out interface{}) error {
 	return codec.NewDecoder(bytes.NewReader(buf), msgpackHandle).Decode(out)
diff --git a/agent/util_test.go b/agent/util_test.go
index ea12c708db..b10f274c02 100644
--- a/agent/util_test.go
+++ b/agent/util_test.go
@@ -4,28 +4,10 @@ import (
 	"os"
 	"runtime"
 	"testing"
-	"time"
 
 	"github.com/hashicorp/consul/testutil"
 )
 
-func TestAEScale(t *testing.T) {
-	t.Parallel()
-	intv := time.Minute
-	if v := aeScale(intv, 100); v != intv {
-		t.Fatalf("Bad: %v", v)
-	}
-	if v := aeScale(intv, 200); v != 2*intv {
-		t.Fatalf("Bad: %v", v)
-	}
-	if v := aeScale(intv, 1000); v != 4*intv {
-		t.Fatalf("Bad: %v", v)
-	}
-	if v := aeScale(intv, 10000); v != 8*intv {
-		t.Fatalf("Bad: %v", v)
-	}
-}
-
 func TestStringHash(t *testing.T) {
 	t.Parallel()
 	in := "hello world"