diff --git a/.changelog/8747.txt b/.changelog/8747.txt new file mode 100644 index 0000000000..4ec3a40b4c --- /dev/null +++ b/.changelog/8747.txt @@ -0,0 +1,3 @@ +```release-note:bug +agent: when enable_central_service_config is enabled ensure agent reload doesn't revert check state to critical +``` diff --git a/agent/agent.go b/agent/agent.go index 1239c8d5f8..66827a8821 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1840,7 +1840,8 @@ func (a *Agent) AddServiceAndReplaceChecks(service *structs.NodeService, chkType token: token, replaceExistingChecks: true, source: source, - }, a.snapshotCheckState()) + snap: a.snapshotCheckState(), + }) } // AddService is used to add a service entry. @@ -1859,12 +1860,13 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che token: token, replaceExistingChecks: false, source: source, - }, a.snapshotCheckState()) + snap: a.snapshotCheckState(), + }) } // addServiceLocked adds a service entry to the service manager if enabled, or directly // to the local state if it is not. This function assumes the state lock is already held. -func (a *Agent) addServiceLocked(req *addServiceRequest, snap map[structs.CheckID]*structs.HealthCheck) error { +func (a *Agent) addServiceLocked(req *addServiceRequest) error { req.fixupForAddServiceLocked() req.service.EnterpriseMeta.Normalize() @@ -1882,7 +1884,7 @@ func (a *Agent) addServiceLocked(req *addServiceRequest, snap map[structs.CheckI req.persistDefaults = nil req.persistServiceConfig = false - return a.addServiceInternal(req, snap) + return a.addServiceInternal(req) } // addServiceRequest is the union of arguments for calling both @@ -1907,6 +1909,7 @@ type addServiceRequest struct { token string replaceExistingChecks bool source configSource + snap map[structs.CheckID]*structs.HealthCheck } func (r *addServiceRequest) fixupForAddServiceLocked() { @@ -1920,7 +1923,7 @@ func (r *addServiceRequest) fixupForAddServiceInternal() { } // addServiceInternal adds the given service and checks to the local state. -func (a *Agent) addServiceInternal(req *addServiceRequest, snap map[structs.CheckID]*structs.HealthCheck) error { +func (a *Agent) addServiceInternal(req *addServiceRequest) error { req.fixupForAddServiceInternal() var ( service = req.service @@ -1932,6 +1935,7 @@ func (a *Agent) addServiceInternal(req *addServiceRequest, snap map[structs.Chec token = req.token replaceExistingChecks = req.replaceExistingChecks source = req.source + snap = req.snap ) // Pause the service syncs during modification @@ -3066,7 +3070,8 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI token: service.Token, replaceExistingChecks: false, // do default behavior source: ConfigSourceLocal, - }, snap) + snap: snap, + }) if err != nil { return fmt.Errorf("Failed to register service %q: %v", service.Name, err) } @@ -3084,7 +3089,8 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI token: sidecarToken, replaceExistingChecks: false, // do default behavior source: ConfigSourceLocal, - }, snap) + snap: snap, + }) if err != nil { return fmt.Errorf("Failed to register sidecar for service %q: %v", service.Name, err) } @@ -3176,7 +3182,8 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI token: p.Token, replaceExistingChecks: false, // do default behavior source: source, - }, snap) + snap: snap, + }) if err != nil { return fmt.Errorf("failed adding service %q: %s", serviceID, err) } diff --git a/agent/agent_test.go b/agent/agent_test.go index e058e244bf..283e90c147 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -3397,14 +3397,24 @@ func TestAgent_ReloadConfigOutgoingRPCConfig(t *testing.T) { } func TestAgent_ReloadConfigAndKeepChecksStatus(t *testing.T) { - t.Parallel() + t.Run("normal", func(t *testing.T) { + t.Parallel() + testAgent_ReloadConfigAndKeepChecksStatus(t, "") + }) + t.Run("service manager", func(t *testing.T) { + t.Parallel() + testAgent_ReloadConfigAndKeepChecksStatus(t, "enable_central_service_config = true") + }) +} + +func testAgent_ReloadConfigAndKeepChecksStatus(t *testing.T, extraHCL string) { dataDir := testutil.TempDir(t, "agent") // we manage the data dir hcl := `data_dir = "` + dataDir + `" enable_local_script_checks=true services=[{ name="webserver1", check{id="check1", ttl="30s"} - }]` + }] ` + extraHCL a := NewTestAgent(t, hcl) defer a.Shutdown() @@ -3419,6 +3429,7 @@ func TestAgent_ReloadConfigAndKeepChecksStatus(t *testing.T) { c := TestConfig(testutil.Logger(t), config.FileSource{Name: t.Name(), Format: "hcl", Data: hcl}) require.NoError(t, a.reloadConfigInternal(c)) + // After reload, should be passing directly (no critical state) for id, check := range a.State.Checks(nil) { require.Equal(t, "passing", check.Status, "check %q is wrong", id) diff --git a/agent/service_manager.go b/agent/service_manager.go index a20a00d198..d6874adee4 100644 --- a/agent/service_manager.go +++ b/agent/service_manager.go @@ -87,7 +87,11 @@ func (s *ServiceManager) registerOnce(args *addServiceRequest) error { s.agent.stateLock.Lock() defer s.agent.stateLock.Unlock() - err := s.agent.addServiceInternal(args, s.agent.snapshotCheckState()) + if args.snap == nil { + args.snap = s.agent.snapshotCheckState() + } + + err := s.agent.addServiceInternal(args) if err != nil { return fmt.Errorf("error updating service registration: %v", err) } @@ -128,7 +132,7 @@ func (s *ServiceManager) AddService(req *addServiceRequest) error { req.persistService = nil req.persistDefaults = nil req.persistServiceConfig = false - return s.agent.addServiceInternal(req, s.agent.snapshotCheckState()) + return s.agent.addServiceInternal(req) } var ( @@ -273,7 +277,8 @@ func (w *serviceConfigWatch) RegisterAndStart( token: w.registration.token, replaceExistingChecks: w.registration.replaceExistingChecks, source: w.registration.source, - }, w.agent.snapshotCheckState()) + snap: w.agent.snapshotCheckState(), + }) if err != nil { return fmt.Errorf("error updating service registration: %v", err) }