go-watchdog/watchdog_test.go

275 lines
6.6 KiB
Go
Raw Normal View History

2020-12-02 00:03:20 +00:00
package watchdog
import (
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
2020-12-02 00:03:20 +00:00
"runtime"
"runtime/debug"
"strconv"
2020-12-02 00:03:20 +00:00
"testing"
"time"
"github.com/elastic/gosigar"
2020-12-02 00:03:20 +00:00
"github.com/raulk/clock"
"github.com/stretchr/testify/require"
)
const (
// EnvTestIsolated is a marker property for the runner to confirm that this
// test is running in isolation (i.e. a dedicated process).
EnvTestIsolated = "TEST_ISOLATED"
// EnvTestDockerMemLimit is the memory limit applied in a docker container.
EnvTestDockerMemLimit = "TEST_DOCKER_MEMLIMIT"
)
// DockerMemLimit is initialized in the init() function from the
// EnvTestDockerMemLimit env variable.
var DockerMemLimit int // bytes
func init() {
Logger = &stdlog{log: log.New(os.Stdout, "[watchdog test] ", log.LstdFlags|log.Lmsgprefix), debug: true}
if l := os.Getenv(EnvTestDockerMemLimit); l != "" {
l, err := strconv.Atoi(l)
if err != nil {
panic(err)
}
DockerMemLimit = l
}
2020-12-02 00:03:20 +00:00
}
func skipIfNotIsolated(t *testing.T) {
if os.Getenv(EnvTestIsolated) != "1" {
t.Skipf("skipping test in non-isolated mode")
}
}
var (
limit64MiB uint64 = 64 << 20 // 64MiB.
)
func TestControl_Isolated(t *testing.T) {
skipIfNotIsolated(t)
debug.SetGCPercent(100)
rounds := 100
if DockerMemLimit != 0 {
rounds /= int(float64(DockerMemLimit)*0.8) / 1024 / 1024
}
// retain 1MiB every iteration.
var retained [][]byte
for i := 0; i < rounds; i++ {
b := make([]byte, 1*1024*1024)
for i := range b {
b[i] = byte(i)
}
retained = append(retained, b)
}
2020-12-02 00:03:20 +00:00
for _, b := range retained {
for i := range b {
b[i] = byte(i)
}
}
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
require.NotZero(t, ms.NumGC) // GCs have taken place, but...
require.Zero(t, ms.NumForcedGC) // ... no forced GCs beyond our initial one.
2020-12-02 00:03:20 +00:00
}
func TestHeapDriven_Isolated(t *testing.T) {
skipIfNotIsolated(t)
// we can't mock ReadMemStats, because we're relying on the go runtime to
// enforce the GC run, and the go runtime won't use our mock. Therefore, we
// need to do the actual thing.
debug.SetGCPercent(100)
2020-12-02 00:03:20 +00:00
clk := clock.NewMock()
Clock = clk
observations := make([]*runtime.MemStats, 0, 100)
NotifyGC = func() {
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
observations = append(observations, &ms)
}
2020-12-02 00:03:20 +00:00
// limit is 64MiB.
err, stopFn := HeapDriven(limit64MiB, NewAdaptivePolicy(0.5))
2020-12-02 00:03:20 +00:00
require.NoError(t, err)
defer stopFn()
2020-12-02 00:03:20 +00:00
time.Sleep(500 * time.Millisecond) // give time for the watchdog to init.
2020-12-02 00:03:20 +00:00
// retain 1MiB every iteration, up to 100MiB (beyond heap limit!).
var retained [][]byte
for i := 0; i < 100; i++ {
retained = append(retained, make([]byte, 1*1024*1024))
}
2020-12-02 00:03:20 +00:00
for _, o := range observations {
fmt.Println("heap alloc:", o.HeapAlloc, "next gc:", o.NextGC, "gc count:", o.NumGC, "forced gc:", o.NumForcedGC)
}
2020-12-02 00:03:20 +00:00
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
require.GreaterOrEqual(t, ms.NumGC, uint32(5)) // over 5 GCs should've taken place.
}
2020-12-02 00:03:20 +00:00
func TestSystemDriven_Isolated(t *testing.T) {
skipIfNotIsolated(t)
debug.SetGCPercent(100)
2020-12-02 00:03:20 +00:00
clk := clock.NewMock()
Clock = clk
2020-12-02 00:03:20 +00:00
// mock the system reporting.
var actualUsed uint64
sysmemFn = func(g *gosigar.Mem) error {
g.ActualUsed = actualUsed
return nil
}
2020-12-02 00:03:20 +00:00
// limit is 64MiB.
err, stopFn := SystemDriven(limit64MiB, 5*time.Second, NewAdaptivePolicy(0.5))
require.NoError(t, err)
defer stopFn()
2020-12-02 00:03:20 +00:00
time.Sleep(200 * time.Millisecond) // give time for the watchdog to init.
2020-12-02 00:03:20 +00:00
notifyCh := make(chan struct{}, 1)
NotifyGC = func() {
notifyCh <- struct{}{}
}
// first tick; used = 0.
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
require.Len(t, notifyCh, 0) // no GC has taken place.
// second tick; used = just over 50%; will trigger GC.
actualUsed = (limit64MiB / 2) + 1
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
require.Len(t, notifyCh, 1)
<-notifyCh
2020-12-02 00:03:20 +00:00
// third tick; just below 75%; no GC.
actualUsed = uint64(float64(limit64MiB)*0.75) - 1
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
require.Len(t, notifyCh, 0)
2020-12-02 00:03:20 +00:00
// fourth tick; 75% exactly; will trigger GC.
actualUsed = uint64(float64(limit64MiB)*0.75) + 1
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
require.Len(t, notifyCh, 1)
<-notifyCh
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
require.GreaterOrEqual(t, ms.NumForcedGC, uint32(2))
2020-12-02 00:03:20 +00:00
}
// TestHeapdumpCapture tests that heap dumps are captured appropriately.
func TestHeapdumpCapture(t *testing.T) {
debug.SetGCPercent(100)
dir, err := ioutil.TempDir("", "")
require.NoError(t, err)
t.Cleanup(func() {
_ = os.RemoveAll(dir)
})
assertFileCount := func(expected int) {
glob, err := filepath.Glob(filepath.Join(dir, "*"))
require.NoError(t, err)
require.Len(t, glob, expected)
}
HeapdumpDir = dir
HeapdumpThreshold = 0.5
HeapdumpMaxCaptures = 5
// mock clock.
clk := clock.NewMock()
Clock = clk
// mock the system reporting.
var actualUsed uint64
sysmemFn = func(g *gosigar.Mem) error {
g.ActualUsed = actualUsed
return nil
}
// init a system driven watchdog.
err, stopFn := SystemDriven(limit64MiB, 5*time.Second, NewAdaptivePolicy(0.5))
require.NoError(t, err)
defer stopFn()
time.Sleep(200 * time.Millisecond) // give time for the watchdog to init.
// first tick; used = 0.
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
assertFileCount(0)
// second tick; used = just over 50%; will trigger a heapdump.
actualUsed = (limit64MiB / 2) + 1
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
assertFileCount(1)
// third tick; continues above 50%; same episode, no heapdump.
actualUsed = (limit64MiB / 2) + 10
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
assertFileCount(1)
// fourth tick; below 50%; this resets the episodic flag.
actualUsed = limit64MiB / 3
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
assertFileCount(1)
// fifth tick; above 50%; this triggers a new heapdump.
actualUsed = (limit64MiB / 2) + 1
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
assertFileCount(2)
for i := 0; i < 20; i++ {
// below 50%; this resets the episodic flag.
actualUsed = limit64MiB / 3
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
// above 50%; this triggers a new heapdump.
actualUsed = (limit64MiB / 2) + 1
clk.Add(5 * time.Second)
time.Sleep(200 * time.Millisecond)
}
assertFileCount(5) // we only generated 5 heap dumps even though we had more episodes.
// verify that heap dump file sizes aren't zero.
glob, err := filepath.Glob(filepath.Join(dir, "*"))
require.NoError(t, err)
for _, f := range glob {
fi, err := os.Stat(f)
require.NoError(t, err)
require.NotZero(t, fi.Size())
}
}