diff --git a/.changelog/8747.txt b/.changelog/8747.txt new file mode 100644 index 0000000000..4ec3a40b4c --- /dev/null +++ b/.changelog/8747.txt @@ -0,0 +1,3 @@ +```release-note:bug +agent: when enable_central_service_config is enabled ensure agent reload doesn't revert check state to critical +``` diff --git a/agent/agent.go b/agent/agent.go index 99fd80b976..d85583f9e6 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1893,7 +1893,8 @@ func (a *Agent) AddServiceAndReplaceChecks(service *structs.NodeService, chkType token: token, replaceExistingChecks: true, source: source, - }, a.snapshotCheckState()) + snap: a.snapshotCheckState(), + }) } // AddService is used to add a service entry. @@ -1912,12 +1913,13 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che token: token, replaceExistingChecks: false, source: source, - }, a.snapshotCheckState()) + snap: a.snapshotCheckState(), + }) } // addServiceLocked adds a service entry to the service manager if enabled, or directly // to the local state if it is not. This function assumes the state lock is already held. -func (a *Agent) addServiceLocked(req *addServiceRequest, snap map[structs.CheckID]*structs.HealthCheck) error { +func (a *Agent) addServiceLocked(req *addServiceRequest) error { req.fixupForAddServiceLocked() req.service.EnterpriseMeta.Normalize() @@ -1935,7 +1937,7 @@ func (a *Agent) addServiceLocked(req *addServiceRequest, snap map[structs.CheckI req.persistDefaults = nil req.persistServiceConfig = false - return a.addServiceInternal(req, snap) + return a.addServiceInternal(req) } // addServiceRequest is the union of arguments for calling both @@ -1960,6 +1962,7 @@ type addServiceRequest struct { token string replaceExistingChecks bool source configSource + snap map[structs.CheckID]*structs.HealthCheck } func (r *addServiceRequest) fixupForAddServiceLocked() { @@ -1973,7 +1976,7 @@ func (r *addServiceRequest) fixupForAddServiceInternal() { } // addServiceInternal adds the given service and checks to the local state. -func (a *Agent) addServiceInternal(req *addServiceRequest, snap map[structs.CheckID]*structs.HealthCheck) error { +func (a *Agent) addServiceInternal(req *addServiceRequest) error { req.fixupForAddServiceInternal() var ( service = req.service @@ -1985,6 +1988,7 @@ func (a *Agent) addServiceInternal(req *addServiceRequest, snap map[structs.Chec token = req.token replaceExistingChecks = req.replaceExistingChecks source = req.source + snap = req.snap ) // Pause the service syncs during modification @@ -3119,7 +3123,8 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI token: service.Token, replaceExistingChecks: false, // do default behavior source: ConfigSourceLocal, - }, snap) + snap: snap, + }) if err != nil { return fmt.Errorf("Failed to register service %q: %v", service.Name, err) } @@ -3137,7 +3142,8 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI token: sidecarToken, replaceExistingChecks: false, // do default behavior source: ConfigSourceLocal, - }, snap) + snap: snap, + }) if err != nil { return fmt.Errorf("Failed to register sidecar for service %q: %v", service.Name, err) } @@ -3229,7 +3235,8 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI token: p.Token, replaceExistingChecks: false, // do default behavior source: source, - }, snap) + snap: snap, + }) if err != nil { return fmt.Errorf("failed adding service %q: %s", serviceID, err) } diff --git a/agent/agent_test.go b/agent/agent_test.go index d87f66187d..0851c0c9c7 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -3559,7 +3559,17 @@ func TestAgent_ReloadConfigOutgoingRPCConfig(t *testing.T) { } func TestAgent_ReloadConfigAndKeepChecksStatus(t *testing.T) { - t.Parallel() + t.Run("normal", func(t *testing.T) { + t.Parallel() + testAgent_ReloadConfigAndKeepChecksStatus(t, "") + }) + t.Run("service manager", func(t *testing.T) { + t.Parallel() + testAgent_ReloadConfigAndKeepChecksStatus(t, "enable_central_service_config = true") + }) +} + +func testAgent_ReloadConfigAndKeepChecksStatus(t *testing.T, extraHCL string) { dataDir := testutil.TempDir(t, "agent") // we manage the data dir defer os.RemoveAll(dataDir) hcl := `data_dir = "` + dataDir + `" @@ -3567,7 +3577,7 @@ func TestAgent_ReloadConfigAndKeepChecksStatus(t *testing.T) { services=[{ name="webserver1", check{id="check1", ttl="30s"} - }]` + }] ` + extraHCL a := NewTestAgent(t, hcl) defer a.Shutdown() @@ -3582,6 +3592,7 @@ func TestAgent_ReloadConfigAndKeepChecksStatus(t *testing.T) { c := TestConfig(testutil.Logger(t), config.FileSource{Name: t.Name(), Format: "hcl", Data: hcl}) require.NoError(t, a.reloadConfigInternal(c)) + // After reload, should be passing directly (no critical state) for id, check := range a.State.Checks(nil) { require.Equal(t, "passing", check.Status, "check %q is wrong", id) diff --git a/agent/service_manager.go b/agent/service_manager.go index 5a163f3596..a523116457 100644 --- a/agent/service_manager.go +++ b/agent/service_manager.go @@ -86,7 +86,11 @@ func (s *ServiceManager) registerOnce(args *addServiceRequest) error { s.agent.stateLock.Lock() defer s.agent.stateLock.Unlock() - err := s.agent.addServiceInternal(args, s.agent.snapshotCheckState()) + if args.snap == nil { + args.snap = s.agent.snapshotCheckState() + } + + err := s.agent.addServiceInternal(args) if err != nil { return fmt.Errorf("error updating service registration: %v", err) } @@ -127,7 +131,7 @@ func (s *ServiceManager) AddService(req *addServiceRequest) error { req.persistService = nil req.persistDefaults = nil req.persistServiceConfig = false - return s.agent.addServiceInternal(req, s.agent.snapshotCheckState()) + return s.agent.addServiceInternal(req) } var ( @@ -279,7 +283,8 @@ func (w *serviceConfigWatch) RegisterAndStart( token: w.registration.token, replaceExistingChecks: w.registration.replaceExistingChecks, source: w.registration.source, - }, w.agent.snapshotCheckState()) + snap: w.agent.snapshotCheckState(), + }) if err != nil { return fmt.Errorf("error updating service registration: %v", err) }