From 4e325a6b8fc3d34050804f1957d2832324261024 Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Fri, 19 Jan 2018 14:07:36 -0800 Subject: [PATCH 1/2] Add graceful handling of malformed persisted service/check files. Previously a change was made to make the file writing atomic, but that wasn't enough to cover something like an OS crash so we needed something here to handle the situation more gracefully. Fixes #1221. --- agent/agent.go | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 933700f0da..50848f30f5 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -2150,12 +2150,23 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig) error { return fmt.Errorf("failed reading service file %q: %s", file, err) } + // If the file ended up empty as a result of something like an OS crash, remove + // it for convenience and log about it. + if len(buf) == 0 { + a.logger.Printf("[WARN] Removing leftover empty service file %q", file) + if err := os.Remove(file); err != nil { + a.logger.Printf("[WARN] Error removing leftover empty service file %q: %v", file, err) + } + continue + } + // Try decoding the service definition var p persistedService if err := json.Unmarshal(buf, &p); err != nil { // Backwards-compatibility for pre-0.5.1 persisted services if err := json.Unmarshal(buf, &p.Service); err != nil { - return fmt.Errorf("failed decoding service file %q: %s", file, err) + a.logger.Printf("[WARN] Failed decoding service file %q: %s", file, err) + continue } } serviceID := p.Service.ID @@ -2231,10 +2242,21 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig) error { return fmt.Errorf("failed reading check file %q: %s", file, err) } + // If the file ended up empty as a result of something like an OS crash, remove + // it for convenience and log about it. + if len(buf) == 0 { + a.logger.Printf("[WARN] Removing leftover empty check file %q", file) + if err := os.Remove(file); err != nil { + a.logger.Printf("[WARN] Error removing leftover empty check file %q: %v", file, err) + } + continue + } + // Decode the check var p persistedCheck if err := json.Unmarshal(buf, &p); err != nil { - return fmt.Errorf("Failed decoding check file %q: %s", file, err) + a.logger.Printf("[WARN] Failed decoding check file %q: %s", file, err) + continue } checkID := p.Check.CheckID From 68ae92cb8cc4e59aaf1bf79832bfc3c34366ce00 Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Fri, 19 Jan 2018 14:25:51 -0800 Subject: [PATCH 2/2] Don't remove the files, just log an error --- agent/agent.go | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 50848f30f5..b078ee8836 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -2150,22 +2150,12 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig) error { return fmt.Errorf("failed reading service file %q: %s", file, err) } - // If the file ended up empty as a result of something like an OS crash, remove - // it for convenience and log about it. - if len(buf) == 0 { - a.logger.Printf("[WARN] Removing leftover empty service file %q", file) - if err := os.Remove(file); err != nil { - a.logger.Printf("[WARN] Error removing leftover empty service file %q: %v", file, err) - } - continue - } - // Try decoding the service definition var p persistedService if err := json.Unmarshal(buf, &p); err != nil { // Backwards-compatibility for pre-0.5.1 persisted services if err := json.Unmarshal(buf, &p.Service); err != nil { - a.logger.Printf("[WARN] Failed decoding service file %q: %s", file, err) + a.logger.Printf("[ERR] Failed decoding service file %q: %s", file, err) continue } } @@ -2242,20 +2232,10 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig) error { return fmt.Errorf("failed reading check file %q: %s", file, err) } - // If the file ended up empty as a result of something like an OS crash, remove - // it for convenience and log about it. - if len(buf) == 0 { - a.logger.Printf("[WARN] Removing leftover empty check file %q", file) - if err := os.Remove(file); err != nil { - a.logger.Printf("[WARN] Error removing leftover empty check file %q: %v", file, err) - } - continue - } - // Decode the check var p persistedCheck if err := json.Unmarshal(buf, &p); err != nil { - a.logger.Printf("[WARN] Failed decoding check file %q: %s", file, err) + a.logger.Printf("[ERR] Failed decoding check file %q: %s", file, err) continue } checkID := p.Check.CheckID