From 60dacecb39a44e5e9e914d1498843763a08e1980 Mon Sep 17 00:00:00 2001 From: Ryan Breen Date: Wed, 17 Dec 2014 21:44:12 -0500 Subject: [PATCH 1/4] Add a randomized start before running CheckMonitors. --- command/agent/check.go | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/command/agent/check.go b/command/agent/check.go index 180f6db13a..82b8c84f61 100644 --- a/command/agent/check.go +++ b/command/agent/check.go @@ -5,6 +5,7 @@ import ( "github.com/armon/circbuf" "github.com/hashicorp/consul/consul/structs" "log" + "math/rand" "os/exec" "sync" "syscall" @@ -91,9 +92,30 @@ func (c *CheckMonitor) Stop() { } } +// getInitialPauseTime returns the random duration we should wait before starting this CheckMonitor, +// preventing potentially large numbers of checks from firing concurrently by staggering their starts. +func (c *CheckMonitor) getInitialPauseTime() time.Duration { + var initialPauseTime time.Duration + intervalSeconds := int(c.Interval.Seconds()) + if intervalSeconds > 0 { + // If the check interval is greater than 500ms, as it will be in all real-world cases due to the + // application of MinInterval, start after some random number of seconds between 0 and c.Interval + initialPauseTime = time.Duration(rand.Intn(intervalSeconds)) * time.Second + } else { + // Test cases may use sub-second intervals. In this case, return 0 as the pause duration. + initialPauseTime = time.Duration(0) + } + return initialPauseTime +} + // run is invoked by a goroutine to run until Stop() is called func (c *CheckMonitor) run() { - next := time.After(0) + + // Get the randomized initial pause time + initialPauseTime := c.getInitialPauseTime() + + c.Logger.Printf("[DEBUG] agent: pausing %ds before first invocation of %s", initialPauseTime, c.Script) + next := time.After(initialPauseTime) for { select { case <-next: From 2e58f54a263a741dbdd6bd959e608f463186a82e Mon Sep 17 00:00:00 2001 From: Ryan Breen Date: Wed, 17 Dec 2014 22:24:41 -0500 Subject: [PATCH 2/4] Well, that sure is cleaner. --- command/agent/check.go | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/command/agent/check.go b/command/agent/check.go index 82b8c84f61..ad2c35dbd3 100644 --- a/command/agent/check.go +++ b/command/agent/check.go @@ -5,7 +5,6 @@ import ( "github.com/armon/circbuf" "github.com/hashicorp/consul/consul/structs" "log" - "math/rand" "os/exec" "sync" "syscall" @@ -92,29 +91,11 @@ func (c *CheckMonitor) Stop() { } } -// getInitialPauseTime returns the random duration we should wait before starting this CheckMonitor, -// preventing potentially large numbers of checks from firing concurrently by staggering their starts. -func (c *CheckMonitor) getInitialPauseTime() time.Duration { - var initialPauseTime time.Duration - intervalSeconds := int(c.Interval.Seconds()) - if intervalSeconds > 0 { - // If the check interval is greater than 500ms, as it will be in all real-world cases due to the - // application of MinInterval, start after some random number of seconds between 0 and c.Interval - initialPauseTime = time.Duration(rand.Intn(intervalSeconds)) * time.Second - } else { - // Test cases may use sub-second intervals. In this case, return 0 as the pause duration. - initialPauseTime = time.Duration(0) - } - return initialPauseTime -} - // run is invoked by a goroutine to run until Stop() is called func (c *CheckMonitor) run() { - // Get the randomized initial pause time - initialPauseTime := c.getInitialPauseTime() - - c.Logger.Printf("[DEBUG] agent: pausing %ds before first invocation of %s", initialPauseTime, c.Script) + initialPauseTime := randomStagger(c.Interval) + c.Logger.Printf("[DEBUG] agent: pausing %ds before first invocation of %s", int(initialPauseTime.Seconds()), c.Script) next := time.After(initialPauseTime) for { select { From d82ef7bb8c69acfbea054939dd52fde4b7b3388c Mon Sep 17 00:00:00 2001 From: Ryan Breen Date: Wed, 17 Dec 2014 22:39:11 -0500 Subject: [PATCH 3/4] Test that staggered checks run within the defined interval. --- command/agent/check_test.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/command/agent/check_test.go b/command/agent/check_test.go index 19e087c67e..62cbafb7c4 100644 --- a/command/agent/check_test.go +++ b/command/agent/check_test.go @@ -65,6 +65,34 @@ func TestCheckMonitor_BadCmd(t *testing.T) { expectStatus(t, "foobarbaz", structs.HealthCritical) } +func TestCheckMonitor_RandomStagger(t *testing.T) { + mock := &MockNotify{ + state: make(map[string]string), + updates: make(map[string]int), + output: make(map[string]string), + } + check := &CheckMonitor{ + Notify: mock, + CheckID: "foo", + Script: "exit 0", + Interval: 5 * time.Second, + Logger: log.New(os.Stderr, "", log.LstdFlags), + } + check.Start() + defer check.Stop() + + time.Sleep(6 * time.Second) + + // Should have at least 1 update + if mock.updates["foo"] < 1 { + t.Fatalf("should have 1 updates %v", mock.updates) + } + + if mock.state["foo"] != structs.HealthPassing { + t.Fatalf("should be %v %v", structs.HealthPassing, mock.state) + } +} + func TestCheckMonitor_LimitOutput(t *testing.T) { mock := &MockNotify{ state: make(map[string]string), From f2bd6414023f1e8c033b84b6545a898100a967ff Mon Sep 17 00:00:00 2001 From: Ryan Breen Date: Thu, 18 Dec 2014 09:00:51 -0500 Subject: [PATCH 4/4] Clean up log line and reduce test time. --- command/agent/check.go | 2 +- command/agent/check_test.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/command/agent/check.go b/command/agent/check.go index ad2c35dbd3..17e2fb5f0a 100644 --- a/command/agent/check.go +++ b/command/agent/check.go @@ -95,7 +95,7 @@ func (c *CheckMonitor) Stop() { func (c *CheckMonitor) run() { // Get the randomized initial pause time initialPauseTime := randomStagger(c.Interval) - c.Logger.Printf("[DEBUG] agent: pausing %ds before first invocation of %s", int(initialPauseTime.Seconds()), c.Script) + c.Logger.Printf("[DEBUG] agent: pausing %v before first invocation of %s", initialPauseTime, c.Script) next := time.After(initialPauseTime) for { select { diff --git a/command/agent/check_test.go b/command/agent/check_test.go index 62cbafb7c4..6a6f80afc3 100644 --- a/command/agent/check_test.go +++ b/command/agent/check_test.go @@ -75,17 +75,17 @@ func TestCheckMonitor_RandomStagger(t *testing.T) { Notify: mock, CheckID: "foo", Script: "exit 0", - Interval: 5 * time.Second, + Interval: 25 * time.Millisecond, Logger: log.New(os.Stderr, "", log.LstdFlags), } check.Start() defer check.Stop() - time.Sleep(6 * time.Second) + time.Sleep(50 * time.Millisecond) // Should have at least 1 update if mock.updates["foo"] < 1 { - t.Fatalf("should have 1 updates %v", mock.updates) + t.Fatalf("should have 1 or more updates %v", mock.updates) } if mock.state["foo"] != structs.HealthPassing {