From ffd284de368cae7a3de91ecf549868c3a235ae41 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 25 Apr 2018 17:39:32 -0700 Subject: [PATCH] agent/proxy: exponential backoff on restarts --- agent/proxy/daemon.go | 40 ++++++++++++++++++++++++++++++++++++++++ agent/proxy/manager.go | 1 - 2 files changed, 40 insertions(+), 1 deletion(-) delete mode 100644 agent/proxy/manager.go diff --git a/agent/proxy/daemon.go b/agent/proxy/daemon.go index 74fa62d440..3a8c1b11b2 100644 --- a/agent/proxy/daemon.go +++ b/agent/proxy/daemon.go @@ -6,6 +6,16 @@ import ( "os" "os/exec" "sync" + "time" +) + +// Constants related to restart timers with the daemon mode proxies. At some +// point we will probably want to expose these knobs to an end user, but +// reasonable defaults are chosen. +const ( + DaemonRestartHealthy = 10 * time.Second // time before considering healthy + DaemonRestartBackoffMin = 3 // 3 attempts before backing off + DaemonRestartMaxWait = 1 * time.Minute // maximum backoff wait time ) // Daemon is a long-running proxy process. It is expected to keep running @@ -68,8 +78,38 @@ func (p *Daemon) keepAlive(stopCh chan struct{}) { process := p.process p.lock.Unlock() + // attemptsDeadline is the time at which we consider the daemon to have + // been alive long enough that we can reset the attempt counter. + // + // attempts keeps track of the number of restart attempts we've had and + // is used to calculate the wait time using an exponential backoff. + var attemptsDeadline time.Time + var attempts uint + for { if process == nil { + // If we're passed the attempt deadline then reset the attempts + if !attemptsDeadline.IsZero() && time.Now().After(attemptsDeadline) { + attempts = 0 + } + attemptsDeadline = time.Now().Add(DaemonRestartHealthy) + attempts++ + + // Calculate the exponential backoff and wait if we have to + if attempts > DaemonRestartBackoffMin { + waitTime := (1 << (attempts - DaemonRestartBackoffMin)) * time.Second + if waitTime > DaemonRestartMaxWait { + waitTime = DaemonRestartMaxWait + } + + if waitTime > 0 { + p.Logger.Printf( + "[WARN] agent/proxy: waiting %s before restarting daemon", + waitTime) + time.Sleep(waitTime) + } + } + p.lock.Lock() // If we gracefully stopped (stopCh is closed) then don't restart. We diff --git a/agent/proxy/manager.go b/agent/proxy/manager.go deleted file mode 100644 index 943b369ffe..0000000000 --- a/agent/proxy/manager.go +++ /dev/null @@ -1 +0,0 @@ -package proxy