diff --git a/command/agent/agent.go b/command/agent/agent.go index 88e26488c5..9bcc8be6c6 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -1032,9 +1032,16 @@ func (a *Agent) loadChecks(conf *Config) error { // services into the active pool p.Check.Status = structs.HealthCritical + if err := a.AddCheck(p.Check, p.ChkType, false); err != nil { + // Purge the check if it is unable to be restored. + a.logger.Printf("[WARN] agent: Failed to restore check %q: %s", + p.Check.CheckID, err) + return a.purgeCheck(p.Check.CheckID) + } + a.logger.Printf("[DEBUG] agent: restored health check %q from %q", p.Check.CheckID, filePath) - return a.AddCheck(p.Check, p.ChkType, false) + return nil } }) diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index b718674811..33db4f6e2b 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -1065,3 +1065,39 @@ func TestAgent_checkStateSnapshot(t *testing.T) { t.Fatalf("should have restored check state") } } + +func TestAgent_loadChecks_checkFails(t *testing.T) { + config := nextConfig() + dir, agent := makeAgent(t, config) + defer os.RemoveAll(dir) + defer agent.Shutdown() + + // Persist a health check with an invalid service ID + check := &structs.HealthCheck{ + Node: config.NodeName, + CheckID: "service:redis", + Name: "redischeck", + Status: structs.HealthPassing, + ServiceID: "nope", + } + if err := agent.persistCheck(check, nil); err != nil { + t.Fatalf("err: %s", err) + } + + // Check to make sure the check was persisted + checkHash := stringHash(check.CheckID) + checkPath := filepath.Join(config.DataDir, checksDir, checkHash) + if _, err := os.Stat(checkPath); err != nil { + t.Fatalf("err: %s", err) + } + + // Try loading the checks from the persisted files + if err := agent.loadChecks(config); err != nil { + t.Fatalf("err: %s", err) + } + + // Ensure the erroneous check was purged + if _, err := os.Stat(checkPath); err == nil { + t.Fatalf("should have purged check") + } +}