introduce cgroup-driven watchdog; refactor.
This commit introduces the cgroup-driven watchdog. It can be initialized by calling watchdog.CgroupDriven(). This watchdog infers the limit from the process' cgroup, which is either derived from /proc/self/cgroup, or from the root cgroup if the PID == 1 (running in a container). Tests have been added/refactored to accommodate running locally and in a Docker container. Certain test cases now must be isolated from one another, to prevent side-effects from dirty go runtimes. A Makefile has been introduced to run all tests.
This commit is contained in:
parent
903e001223
commit
8676adea5c
|
@ -8,6 +8,4 @@ jobs:
|
|||
working_directory: /go/src/github.com/{{ORG_NAME}}/{{REPO_NAME}}
|
||||
steps:
|
||||
- checkout
|
||||
|
||||
- run: go get -v -t -d ./...
|
||||
- run: go test -v ./...
|
||||
- run: make
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Makefile
|
|
@ -0,0 +1,22 @@
|
|||
## This Dockerfile compiles the watchdog with delve support. It enables the tests
|
||||
## to be debugged inside a container.
|
||||
##
|
||||
## Run with:
|
||||
## docker run --memory=64MiB --memory-swap=64MiB -p 2345:2345 <image> \
|
||||
## --listen=:2345 --headless=true --log=true \
|
||||
## --log-output=debugger,debuglineerr,gdbwire,lldbout,rpc \
|
||||
## --accept-multiclient --api-version=2 exec /root/watchdog.test
|
||||
##
|
||||
FROM golang:1.15.5
|
||||
WORKDIR /watchdog
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 go get -ldflags "-s -w -extldflags '-static'" github.com/go-delve/delve/cmd/dlv
|
||||
RUN CGO_ENABLED=0 go test -gcflags "all=-N -l" -c -o ./watchdog.test
|
||||
|
||||
FROM alpine:latest
|
||||
RUN apk --no-cache add ca-certificates
|
||||
WORKDIR /root/
|
||||
COPY --from=0 /go/bin/dlv /dlv
|
||||
COPY --from=0 /watchdog/watchdog.test .
|
||||
ENTRYPOINT [ "/dlv" ]
|
||||
EXPOSE 2345
|
|
@ -0,0 +1,10 @@
|
|||
FROM golang:1.15.5
|
||||
WORKDIR /watchdog
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 GOOS=linux go test -c -o watchdog.test
|
||||
|
||||
FROM alpine:latest
|
||||
RUN apk --no-cache add ca-certificates
|
||||
WORKDIR /root/
|
||||
COPY --from=0 /watchdog/watchdog.test .
|
||||
CMD ["/root/watchdog.test", "-test.v"]
|
|
@ -0,0 +1,30 @@
|
|||
SHELL = /bin/bash
|
||||
|
||||
.PHONY: test
|
||||
|
||||
# these tests run in isolation by calling go test -run=... or the equivalent.
|
||||
ISOLATED_TESTS = TestControl_Isolated \
|
||||
TestSystemDriven_Isolated \
|
||||
TestHeapDriven_Isolated \
|
||||
TestCgroupsDriven_Create_Isolated \
|
||||
TestCgroupsDriven_Docker_Isolated
|
||||
|
||||
test: test-binary test-docker
|
||||
|
||||
test-binary:
|
||||
go test -v ./... # run all the non-isolated tests.
|
||||
# foreach does not actually execute each iteration; it expands the text, and it's executed all at once
|
||||
# that's why we use && true, to shorcircuit if a test fails.
|
||||
$(foreach name,$(ISOLATED_TESTS),TEST_ISOLATED=1 go test -v -test.run=$(name) ./... && ) true
|
||||
|
||||
test-docker: docker
|
||||
docker run --memory=32MiB --memory-swap=32MiB -e TEST_DOCKER_MEMLIMIT=33554432 raulk/watchdog:latest
|
||||
$(foreach name,$(ISOLATED_TESTS),docker run \
|
||||
--memory=32MiB --memory-swap=32MiB \
|
||||
-e TEST_ISOLATED=1 \
|
||||
-e TEST_DOCKER_MEMLIMIT=33554432 \
|
||||
raulk/watchdog:latest /root/watchdog.test -test.v -test.run=$(name) ./... && ) true
|
||||
|
||||
docker:
|
||||
docker build -f ./Dockerfile.test -t raulk/watchdog:latest .
|
||||
|
62
README.md
62
README.md
|
@ -5,27 +5,59 @@
|
|||
[![godocs](https://img.shields.io/badge/godoc-reference-5272B4.svg?style=flat-square)](https://godoc.org/github.com/raulk/go-watchdog)
|
||||
[![build status](https://circleci.com/gh/raulk/go-watchdog.svg?style=svg)](https://circleci.com/gh/raulk/go-watchdog)
|
||||
|
||||
go-watchdog runs a singleton memory watchdog in the process, which watches
|
||||
memory utilization and forces Go GC in accordance with a user-defined policy.
|
||||
Package watchdog runs a singleton memory watchdog in the process, which
|
||||
watches memory utilization and forces Go GC in accordance with a
|
||||
user-defined policy.
|
||||
|
||||
There are two kinds of watchdog so far:
|
||||
There three kinds of watchdogs:
|
||||
|
||||
* **heap-driven:** applies a limit to the heap, and obtains current usage through
|
||||
`runtime.ReadMemStats()`.
|
||||
* **system-driven:** applies a limit to the total system memory used, and obtains
|
||||
current usage through [`elastic/go-sigar`](https://github.com/elastic/gosigar).
|
||||
1. heap-driven (`watchdog.HeapDriven()`): applies a heap limit, adjusting GOGC
|
||||
dynamically in accordance with the policy.
|
||||
2. system-driven (`watchdog.SystemDriven()`): applies a limit to the total
|
||||
system memory used, obtaining the current usage through elastic/go-sigar.
|
||||
3. cgroups-driven (`watchdog.CgroupDriven()`): discovers the memory limit from
|
||||
the cgroup of the process (derived from /proc/self/cgroup), or from the
|
||||
root cgroup path if the PID == 1 (which indicates that the process is
|
||||
running in a container). It uses the cgroup stats to obtain the
|
||||
current usage.
|
||||
|
||||
A third process-driven watchdog that uses cgroups is underway.
|
||||
The watchdog's behaviour is controlled by the policy, a pluggable function
|
||||
that determines when to trigger GC based on the current utilization. This
|
||||
library ships with two policies:
|
||||
|
||||
This library ships with two policies out of the box:
|
||||
1. watermarks policy (`watchdog.NewWatermarkPolicy()`): runs GC at configured
|
||||
watermarks of memory utilisation.
|
||||
2. adaptive policy (`watchdog.NewAdaptivePolicy()`): runs GC when the current
|
||||
usage surpasses a dynamically-set threshold.
|
||||
|
||||
* watermarks policy: runs GC at configured watermarks of system or heap memory
|
||||
utilisation.
|
||||
* adaptive policy: runs GC when the current usage surpasses a dynamically-set
|
||||
threshold.
|
||||
You can easily write a custom policy tailored to the allocation patterns of
|
||||
your program.
|
||||
|
||||
You can easily build a custom policy tailored to the allocation patterns of your
|
||||
program.
|
||||
## Recommended way to set up the watchdog
|
||||
|
||||
The recommended way to set up the watchdog is as follows, in descending order
|
||||
of precedence. This logic assumes that the library supports setting a heap
|
||||
limit through an environment variable (e.g. MYAPP_HEAP_MAX) or config key.
|
||||
|
||||
1. If heap limit is set and legal, initialize a heap-driven watchdog.
|
||||
2. Otherwise, try to use the cgroup-driven watchdog. If it succeeds, return.
|
||||
3. Otherwise, try to initialize a system-driven watchdog. If it succeeds, return.
|
||||
4. Watchdog initialization failed. Log a warning to inform the user that
|
||||
they're flying solo.
|
||||
|
||||
## Running the tests
|
||||
|
||||
Given the low-level nature of this component, some tests need to run in
|
||||
isolation, so that they don't carry over Go runtime metrics. For completeness,
|
||||
this module uses a Docker image for testing, so we can simulate cgroup memory
|
||||
limits.
|
||||
|
||||
The test execution and docker builds have been conveniently packaged in a
|
||||
Makefile. Run with:
|
||||
|
||||
```shell
|
||||
$ make
|
||||
```
|
||||
|
||||
## Why is this even needed?
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
// Package watchdog runs a singleton memory watchdog in the process, which
|
||||
// watches memory utilization and forces Go GC in accordance with a
|
||||
// user-defined policy.
|
||||
//
|
||||
// There three kinds of watchdogs:
|
||||
//
|
||||
// 1. heap-driven (watchdog.HeapDriven()): applies a heap limit, adjusting GOGC
|
||||
// dynamically in accordance with the policy.
|
||||
// 2. system-driven (watchdog.SystemDriven()): applies a limit to the total
|
||||
// system memory used, obtaining the current usage through elastic/go-sigar.
|
||||
// 3. cgroups-driven (watchdog.CgroupDriven()): discovers the memory limit from
|
||||
// the cgroup of the process (derived from /proc/self/cgroup), or from the
|
||||
// root cgroup path if the PID == 1 (which indicates that the process is
|
||||
// running in a container). It uses the cgroup stats to obtain the
|
||||
// current usage.
|
||||
//
|
||||
// The watchdog's behaviour is controlled by the policy, a pluggable function
|
||||
// that determines when to trigger GC based on the current utilization. This
|
||||
// library ships with two policies:
|
||||
//
|
||||
// 1. watermarks policy (watchdog.NewWatermarkPolicy()): runs GC at configured
|
||||
// watermarks of memory utilisation.
|
||||
// 2. adaptive policy (watchdog.NewAdaptivePolicy()): runs GC when the current
|
||||
// usage surpasses a dynamically-set threshold.
|
||||
//
|
||||
// You can easily write a custom policy tailored to the allocation patterns of
|
||||
// your program.
|
||||
//
|
||||
// Recommended way to set up the watchdog
|
||||
//
|
||||
// The recommended way to set up the watchdog is as follows, in descending order
|
||||
// of precedence. This logic assumes that the library supports setting a heap
|
||||
// limit through an environment variable (e.g. MYAPP_HEAP_MAX) or config key.
|
||||
//
|
||||
// 1. If heap limit is set and legal, initialize a heap-driven watchdog.
|
||||
// 2. Otherwise, try to use the cgroup-driven watchdog. If it succeeds, return.
|
||||
// 3. Otherwise, try to initialize a system-driven watchdog. If it succeeds, return.
|
||||
// 4. Watchdog initialization failed. Log a warning to inform the user that
|
||||
// they're flying solo.
|
||||
package watchdog
|
1
go.mod
1
go.mod
|
@ -6,6 +6,7 @@ require (
|
|||
github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327
|
||||
github.com/elastic/gosigar v0.12.0
|
||||
github.com/kr/pretty v0.1.0 // indirect
|
||||
github.com/opencontainers/runtime-spec v1.0.2
|
||||
github.com/raulk/clock v1.1.0
|
||||
github.com/stretchr/testify v1.4.0
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect
|
||||
|
|
26
sys_linux.go
26
sys_linux.go
|
@ -1,26 +0,0 @@
|
|||
package watchdog
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
)
|
||||
|
||||
func ProcessMemoryLimit() uint64 {
|
||||
var (
|
||||
pid = os.Getpid()
|
||||
memSubsystem = cgroups.SingleSubsystem(cgroups.V1, cgroups.Memory)
|
||||
)
|
||||
cgroup, err := cgroups.Load(memSubsystem, cgroups.PidPath(pid))
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
metrics, err := cgroup.Stat()
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
if metrics.Memory == nil {
|
||||
return 0
|
||||
}
|
||||
return metrics.Memory.HierarchicalMemoryLimit
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
// +build !linux
|
||||
|
||||
package watchdog
|
||||
|
||||
func ProcessMemoryLimit() uint64 {
|
||||
return 0
|
||||
}
|
126
watchdog.go
126
watchdog.go
|
@ -1,6 +1,7 @@
|
|||
package watchdog
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
|
@ -13,6 +14,10 @@ import (
|
|||
"github.com/raulk/clock"
|
||||
)
|
||||
|
||||
// ErrNotSupported is returned when the watchdog does not support the requested
|
||||
// run mode in the current OS/arch.
|
||||
var ErrNotSupported = errors.New("watchdog run mode not supported")
|
||||
|
||||
// PolicyTempDisabled is a marker value for policies to signal that the policy
|
||||
// is temporarily disabled. Use it when all hope is lost to turn around from
|
||||
// significant memory pressure (such as when above an "extreme" watermark).
|
||||
|
@ -28,9 +33,8 @@ var (
|
|||
// Clock can be used to inject a mock clock for testing.
|
||||
Clock = clock.New()
|
||||
|
||||
// NotifyFired, if non-nil, will be called when the policy has fired,
|
||||
// prior to calling GC, even if GC is disabled.
|
||||
NotifyFired func() = func() {}
|
||||
// NotifyGC, if non-nil, will be called when a GC has happened.
|
||||
NotifyGC func() = func() {}
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -104,6 +108,8 @@ const (
|
|||
// UtilizationSystem specifies that the policy compares against actual used
|
||||
// system memory.
|
||||
UtilizationSystem UtilizationType = iota
|
||||
// UtilizationProcess specifies that the watchdog is using process limits.
|
||||
UtilizationProcess
|
||||
// UtilizationHeap specifies that the policy compares against heap used.
|
||||
UtilizationHeap
|
||||
)
|
||||
|
@ -126,13 +132,6 @@ type Policy interface {
|
|||
//
|
||||
// A zero-valued limit will error.
|
||||
func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func()) {
|
||||
_watchdog.lk.Lock()
|
||||
defer _watchdog.lk.Unlock()
|
||||
|
||||
if _watchdog.state != stateUnstarted {
|
||||
return ErrAlreadyStarted, nil
|
||||
}
|
||||
|
||||
if limit == 0 {
|
||||
return fmt.Errorf("cannot use zero limit for heap-driven watchdog"), nil
|
||||
}
|
||||
|
@ -142,9 +141,9 @@ func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func())
|
|||
return fmt.Errorf("failed to construct policy with limit %d: %w", limit, err), nil
|
||||
}
|
||||
|
||||
_watchdog.state = stateRunning
|
||||
_watchdog.scope = UtilizationHeap
|
||||
_watchdog.closing = make(chan struct{})
|
||||
if err := start(UtilizationHeap); err != nil {
|
||||
return err, nil
|
||||
}
|
||||
|
||||
gcTriggered := make(chan struct{}, 16)
|
||||
setupGCSentinel(gcTriggered)
|
||||
|
@ -163,7 +162,7 @@ func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func())
|
|||
for {
|
||||
select {
|
||||
case <-gcTriggered:
|
||||
NotifyFired()
|
||||
NotifyGC()
|
||||
|
||||
case <-_watchdog.closing:
|
||||
return
|
||||
|
@ -218,18 +217,12 @@ func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func())
|
|||
// This threshold is calculated by querying the policy every time that GC runs,
|
||||
// either triggered by the runtime, or forced by us.
|
||||
func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor) (err error, stopFn func()) {
|
||||
_watchdog.lk.Lock()
|
||||
defer _watchdog.lk.Unlock()
|
||||
|
||||
if _watchdog.state != stateUnstarted {
|
||||
return ErrAlreadyStarted, nil
|
||||
}
|
||||
|
||||
if limit == 0 {
|
||||
limit, err = determineLimit(false)
|
||||
if err != nil {
|
||||
return err, nil
|
||||
var sysmem gosigar.Mem
|
||||
if err := sysmemFn(&sysmem); err != nil {
|
||||
return fmt.Errorf("failed to get system memory stats: %w", err), nil
|
||||
}
|
||||
limit = sysmem.Total
|
||||
}
|
||||
|
||||
policy, err := policyCtor(limit)
|
||||
|
@ -237,31 +230,47 @@ func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor)
|
|||
return fmt.Errorf("failed to construct policy with limit %d: %w", limit, err), nil
|
||||
}
|
||||
|
||||
_watchdog.state = stateRunning
|
||||
_watchdog.scope = UtilizationSystem
|
||||
_watchdog.closing = make(chan struct{})
|
||||
if err := start(UtilizationSystem); err != nil {
|
||||
return err, nil
|
||||
}
|
||||
|
||||
_watchdog.wg.Add(1)
|
||||
var sysmem gosigar.Mem
|
||||
go pollingWatchdog(policy, frequency, func() (uint64, error) {
|
||||
if err := sysmemFn(&sysmem); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return sysmem.ActualUsed, nil
|
||||
})
|
||||
|
||||
return nil, stop
|
||||
}
|
||||
|
||||
// pollingWatchdog starts a polling watchdog with the provided policy, using
|
||||
// the supplied polling frequency. On every tick, it calls usageFn and, if the
|
||||
// usage is greater or equal to the threshold at the time, it forces GC.
|
||||
// usageFn is guaranteed to be called serially, so no locking should be
|
||||
// necessary.
|
||||
func pollingWatchdog(policy Policy, frequency time.Duration, usageFn func() (uint64, error)) {
|
||||
defer _watchdog.wg.Done()
|
||||
|
||||
gcTriggered := make(chan struct{}, 16)
|
||||
setupGCSentinel(gcTriggered)
|
||||
|
||||
_watchdog.wg.Add(1)
|
||||
go func() {
|
||||
defer _watchdog.wg.Done()
|
||||
|
||||
var (
|
||||
memstats runtime.MemStats
|
||||
sysmem gosigar.Mem
|
||||
threshold uint64
|
||||
)
|
||||
|
||||
renewThreshold := func() {
|
||||
// get the current usage.
|
||||
if err := sysmemFn(&sysmem); err != nil {
|
||||
Logger.Warnf("failed to obtain system memory stats; err: %s", err)
|
||||
usage, err := usageFn()
|
||||
if err != nil {
|
||||
Logger.Warnf("failed to obtain memory utilization stats; err: %s", err)
|
||||
return
|
||||
}
|
||||
// calculate the threshold.
|
||||
threshold = policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
|
||||
threshold = policy.Evaluate(_watchdog.scope, usage)
|
||||
}
|
||||
|
||||
// initialize the threshold.
|
||||
|
@ -281,22 +290,22 @@ func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor)
|
|||
select {
|
||||
case <-timer.C:
|
||||
// get the current usage.
|
||||
if err := sysmemFn(&sysmem); err != nil {
|
||||
Logger.Warnf("failed to obtain system memory stats; err: %s", err)
|
||||
usage, err := usageFn()
|
||||
if err != nil {
|
||||
Logger.Warnf("failed to obtain memory utilizationstats; err: %s", err)
|
||||
continue
|
||||
}
|
||||
actual := sysmem.ActualUsed
|
||||
if actual < threshold {
|
||||
if usage < threshold {
|
||||
// nothing to do.
|
||||
continue
|
||||
}
|
||||
// trigger GC; this will emit a gcTriggered event which we'll
|
||||
// consume next to readjust the threshold.
|
||||
Logger.Warnf("system-driven watchdog triggering GC; %d/%d bytes (used/threshold)", actual, threshold)
|
||||
Logger.Warnf("system-driven watchdog triggering GC; %d/%d bytes (used/threshold)", usage, threshold)
|
||||
forceGC(&memstats)
|
||||
|
||||
case <-gcTriggered:
|
||||
NotifyFired()
|
||||
NotifyGC()
|
||||
|
||||
renewThreshold()
|
||||
|
||||
|
@ -307,27 +316,6 @@ func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor)
|
|||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return nil, stop
|
||||
}
|
||||
|
||||
func determineLimit(restrictByProcess bool) (uint64, error) {
|
||||
// TODO.
|
||||
// if restrictByProcess {
|
||||
// if pmem := ProcessMemoryLimit(); pmem > 0 {
|
||||
// Logger.Infof("watchdog using process limit: %d bytes", pmem)
|
||||
// return pmem, nil
|
||||
// }
|
||||
// Logger.Infof("watchdog was unable to determine process limit; falling back to total system memory")
|
||||
// }
|
||||
|
||||
// populate initial utilisation and system stats.
|
||||
var sysmem gosigar.Mem
|
||||
if err := sysmemFn(&sysmem); err != nil {
|
||||
return 0, fmt.Errorf("failed to get system memory stats: %w", err)
|
||||
}
|
||||
return sysmem.Total, nil
|
||||
}
|
||||
|
||||
// forceGC forces a manual GC.
|
||||
|
@ -379,6 +367,20 @@ func setupGCSentinel(gcTriggered chan struct{}) {
|
|||
runtime.SetFinalizer(&sentinel{}, finalizer) // start the flywheel.
|
||||
}
|
||||
|
||||
func start(scope UtilizationType) error {
|
||||
_watchdog.lk.Lock()
|
||||
defer _watchdog.lk.Unlock()
|
||||
|
||||
if _watchdog.state != stateUnstarted {
|
||||
return ErrAlreadyStarted
|
||||
}
|
||||
|
||||
_watchdog.state = stateRunning
|
||||
_watchdog.scope = scope
|
||||
_watchdog.closing = make(chan struct{})
|
||||
return nil
|
||||
}
|
||||
|
||||
func stop() {
|
||||
_watchdog.lk.Lock()
|
||||
defer _watchdog.lk.Unlock()
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
package watchdog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
)
|
||||
|
||||
var (
|
||||
pid = os.Getpid()
|
||||
memSubsystem = cgroups.SingleSubsystem(cgroups.V1, cgroups.Memory)
|
||||
)
|
||||
|
||||
// CgroupDriven initializes a cgroups-driven watchdog. It will try to discover
|
||||
// the memory limit from the cgroup of the process (derived from /proc/self/cgroup),
|
||||
// or from the root cgroup path if the PID == 1 (which indicates that the process
|
||||
// is running in a container).
|
||||
//
|
||||
// Memory usage is calculated by querying the cgroup stats.
|
||||
//
|
||||
// This function will return an error immediately if the OS does not support cgroups,
|
||||
// or if another error occurs during initialization. The caller can then safely fall
|
||||
// back to the system driven watchdog.
|
||||
func CgroupDriven(frequency time.Duration, policyCtor PolicyCtor) (err error, stopFn func()) {
|
||||
// use self path unless our PID is 1, in which case we're running inside
|
||||
// a container and our limits are in the root path.
|
||||
path := cgroups.NestedPath("")
|
||||
if pid := os.Getpid(); pid == 1 {
|
||||
path = cgroups.RootPath
|
||||
}
|
||||
|
||||
cgroup, err := cgroups.Load(memSubsystem, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load cgroup for process: %w", err), nil
|
||||
}
|
||||
|
||||
var limit uint64
|
||||
if stat, err := cgroup.Stat(); err != nil {
|
||||
return fmt.Errorf("failed to load memory cgroup stats: %w", err), nil
|
||||
} else if stat.Memory == nil || stat.Memory.Usage == nil {
|
||||
return fmt.Errorf("cgroup memory stats are nil; aborting"), nil
|
||||
} else {
|
||||
limit = stat.Memory.Usage.Limit
|
||||
}
|
||||
|
||||
if limit == 0 {
|
||||
return fmt.Errorf("cgroup limit is 0; refusing to start memory watchdog"), nil
|
||||
}
|
||||
|
||||
policy, err := policyCtor(limit)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to construct policy with limit %d: %w", limit, err), nil
|
||||
}
|
||||
|
||||
if err := start(UtilizationProcess); err != nil {
|
||||
return err, nil
|
||||
}
|
||||
|
||||
_watchdog.wg.Add(1)
|
||||
go pollingWatchdog(policy, frequency, func() (uint64, error) {
|
||||
stat, err := cgroup.Stat()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
} else if stat.Memory == nil || stat.Memory.Usage == nil {
|
||||
return 0, fmt.Errorf("cgroup memory stats are nil; aborting")
|
||||
}
|
||||
return stat.Memory.Usage.Usage, nil
|
||||
})
|
||||
|
||||
return nil, stop
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
package watchdog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/raulk/clock"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// retained will hoard unreclaimable byte buffers in the heap.
|
||||
var retained [][]byte
|
||||
|
||||
func TestCgroupsDriven_Create_Isolated(t *testing.T) {
|
||||
skipIfNotIsolated(t)
|
||||
|
||||
if os.Getpid() == 1 {
|
||||
// we are running in Docker and cannot create a cgroup.
|
||||
t.Skipf("cannot create a cgroup while running in non-privileged docker")
|
||||
}
|
||||
|
||||
// new cgroup limit.
|
||||
var limit = uint64(32 << 20) // 32MiB.
|
||||
createMemoryCgroup(t, limit)
|
||||
|
||||
testCgroupsWatchdog(t, limit)
|
||||
}
|
||||
|
||||
func TestCgroupsDriven_Docker_Isolated(t *testing.T) {
|
||||
skipIfNotIsolated(t)
|
||||
|
||||
testCgroupsWatchdog(t, uint64(DockerMemLimit))
|
||||
}
|
||||
|
||||
func testCgroupsWatchdog(t *testing.T, limit uint64) {
|
||||
t.Cleanup(func() {
|
||||
retained = nil
|
||||
})
|
||||
|
||||
runtime.GC() // first GC to clear any junk from other tests.
|
||||
debug.SetGCPercent(100000000) // disable GC.
|
||||
|
||||
clk := clock.NewMock()
|
||||
Clock = clk
|
||||
|
||||
notifyCh := make(chan struct{}, 1)
|
||||
NotifyGC = func() {
|
||||
notifyCh <- struct{}{}
|
||||
}
|
||||
|
||||
err, stopFn := CgroupDriven(5*time.Second, NewAdaptivePolicy(0.5))
|
||||
require.NoError(t, err)
|
||||
defer stopFn()
|
||||
|
||||
time.Sleep(200 * time.Millisecond) // give time for the watchdog to init.
|
||||
|
||||
maxSlabs := limit / (1 << 20) // number of 1MiB slabs to take up the entire limit.
|
||||
|
||||
// first tick; nothing should happen.
|
||||
clk.Add(5 * time.Second)
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
require.Len(t, notifyCh, 0) // no GC has taken place.
|
||||
|
||||
// allocate 50% of limit in heap (to be added to other mem usage).
|
||||
for i := 0; i < (int(maxSlabs))/2; i++ {
|
||||
retained = append(retained, func() []byte {
|
||||
b := make([]byte, 1*1024*1024)
|
||||
for i := range b {
|
||||
b[i] = 0xff
|
||||
}
|
||||
return b
|
||||
}())
|
||||
}
|
||||
|
||||
// second tick; used = just over 50%; will trigger GC.
|
||||
clk.Add(5 * time.Second)
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
require.NotNil(t, <-notifyCh)
|
||||
|
||||
var memstats runtime.MemStats
|
||||
runtime.ReadMemStats(&memstats)
|
||||
require.EqualValues(t, 2, memstats.NumForcedGC)
|
||||
}
|
||||
|
||||
// createMemoryCgroup creates a memory cgroup to restrict the memory available
|
||||
// to this test.
|
||||
func createMemoryCgroup(t *testing.T, limit uint64) {
|
||||
l := int64(limit)
|
||||
path := cgroups.NestedPath(fmt.Sprintf("/%d", time.Now().UnixNano()))
|
||||
cgroup, err := cgroups.New(cgroups.V1, path, &specs.LinuxResources{
|
||||
Memory: &specs.LinuxMemory{
|
||||
Limit: &l,
|
||||
Swap: &l,
|
||||
},
|
||||
})
|
||||
|
||||
require.NoError(t, err, "failed to create a cgroup")
|
||||
t.Cleanup(func() {
|
||||
root, err := cgroups.Load(cgroups.V1, cgroups.RootPath)
|
||||
if err != nil {
|
||||
t.Logf("failed to resolve root cgroup: %s", err)
|
||||
return
|
||||
}
|
||||
if err = root.Add(cgroups.Process{Pid: pid}); err != nil {
|
||||
t.Logf("failed to move process to root cgroup: %s", err)
|
||||
return
|
||||
}
|
||||
if err = cgroup.Delete(); err != nil {
|
||||
t.Logf("failed to clean up temp cgroup: %s", err)
|
||||
}
|
||||
})
|
||||
|
||||
log.Printf("cgroup created")
|
||||
|
||||
// add process to cgroup.
|
||||
err = cgroup.Add(cgroups.Process{Pid: pid})
|
||||
require.NoError(t, err)
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
// +build !linux
|
||||
|
||||
package watchdog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CgroupDriven is only available in Linux. This method will error.
|
||||
func CgroupDriven(frequency time.Duration, policyCtor PolicyCtor) (err error, stopFn func()) {
|
||||
return fmt.Errorf("cgroups-driven watchdog: %w", ErrNotSupported), nil
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
// +build !linux
|
||||
|
||||
package watchdog
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestCgroupsDriven_Create_Isolated(t *testing.T) {
|
||||
// this test only runs on linux.
|
||||
t.Skip("test only valid on linux")
|
||||
}
|
||||
|
||||
func TestCgroupsDriven_Docker_Isolated(t *testing.T) {
|
||||
// this test only runs on linux.
|
||||
t.Skip("test only valid on linux")
|
||||
}
|
|
@ -6,6 +6,7 @@ import (
|
|||
"os"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -14,29 +15,54 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// These integration tests are a hugely non-deterministic, but necessary to get
|
||||
// good coverage and confidence. The Go runtime makes its own pacing decisions,
|
||||
// and those may vary based on machine, OS, kernel memory management, other
|
||||
// running programs, exogenous memory pressure, and Go runtime versions.
|
||||
//
|
||||
// The assertions we use here are lax, but should be sufficient to serve as a
|
||||
// reasonable litmus test of whether the watchdog is doing what it's supposed
|
||||
// to or not.
|
||||
const (
|
||||
// EnvTestIsolated is a marker property for the runner to confirm that this
|
||||
// test is running in isolation (i.e. a dedicated process).
|
||||
EnvTestIsolated = "TEST_ISOLATED"
|
||||
|
||||
// EnvTestDockerMemLimit is the memory limit applied in a docker container.
|
||||
EnvTestDockerMemLimit = "TEST_DOCKER_MEMLIMIT"
|
||||
)
|
||||
|
||||
// DockerMemLimit is initialized in the init() function from the
|
||||
// EnvTestDockerMemLimit env variable.
|
||||
var DockerMemLimit int // bytes
|
||||
|
||||
func init() {
|
||||
Logger = &stdlog{log: log.New(os.Stdout, "[watchdog test] ", log.LstdFlags|log.Lmsgprefix), debug: true}
|
||||
|
||||
if l := os.Getenv(EnvTestDockerMemLimit); l != "" {
|
||||
l, err := strconv.Atoi(l)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
DockerMemLimit = l
|
||||
}
|
||||
}
|
||||
|
||||
func skipIfNotIsolated(t *testing.T) {
|
||||
if os.Getenv(EnvTestIsolated) != "1" {
|
||||
t.Skipf("skipping test in non-isolated mode")
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
limit uint64 = 64 << 20 // 64MiB.
|
||||
)
|
||||
|
||||
func init() {
|
||||
Logger = &stdlog{log: log.New(os.Stdout, "[watchdog test] ", log.LstdFlags|log.Lmsgprefix), debug: true}
|
||||
}
|
||||
func TestControl_Isolated(t *testing.T) {
|
||||
skipIfNotIsolated(t)
|
||||
|
||||
func TestControl(t *testing.T) {
|
||||
debug.SetGCPercent(100)
|
||||
|
||||
// retain 1MiB every iteration, up to 100MiB (beyond heap limit!).
|
||||
rounds := 100
|
||||
if DockerMemLimit != 0 {
|
||||
rounds /= int(float64(DockerMemLimit)*0.8) / 1024 / 1024
|
||||
}
|
||||
|
||||
// retain 1MiB every iteration.
|
||||
var retained [][]byte
|
||||
for i := 0; i < 100; i++ {
|
||||
for i := 0; i < rounds; i++ {
|
||||
b := make([]byte, 1*1024*1024)
|
||||
for i := range b {
|
||||
b[i] = byte(i)
|
||||
|
@ -52,11 +78,13 @@ func TestControl(t *testing.T) {
|
|||
|
||||
var ms runtime.MemStats
|
||||
runtime.ReadMemStats(&ms)
|
||||
require.LessOrEqual(t, ms.NumGC, uint32(5)) // a maximum of 8 GCs should've happened.
|
||||
require.Zero(t, ms.NumForcedGC) // no forced GCs.
|
||||
require.NotZero(t, ms.NumGC) // GCs have taken place, but...
|
||||
require.Zero(t, ms.NumForcedGC) // ... no forced GCs beyond our initial one.
|
||||
}
|
||||
|
||||
func TestHeapDriven(t *testing.T) {
|
||||
func TestHeapDriven_Isolated(t *testing.T) {
|
||||
skipIfNotIsolated(t)
|
||||
|
||||
// we can't mock ReadMemStats, because we're relying on the go runtime to
|
||||
// enforce the GC run, and the go runtime won't use our mock. Therefore, we
|
||||
// need to do the actual thing.
|
||||
|
@ -66,7 +94,7 @@ func TestHeapDriven(t *testing.T) {
|
|||
Clock = clk
|
||||
|
||||
observations := make([]*runtime.MemStats, 0, 100)
|
||||
NotifyFired = func() {
|
||||
NotifyGC = func() {
|
||||
var ms runtime.MemStats
|
||||
runtime.ReadMemStats(&ms)
|
||||
observations = append(observations, &ms)
|
||||
|
@ -94,7 +122,9 @@ func TestHeapDriven(t *testing.T) {
|
|||
require.GreaterOrEqual(t, ms.NumGC, uint32(9)) // over 9 GCs should've taken place.
|
||||
}
|
||||
|
||||
func TestSystemDriven(t *testing.T) {
|
||||
func TestSystemDriven_Isolated(t *testing.T) {
|
||||
skipIfNotIsolated(t)
|
||||
|
||||
debug.SetGCPercent(100)
|
||||
|
||||
clk := clock.NewMock()
|
||||
|
@ -115,7 +145,7 @@ func TestSystemDriven(t *testing.T) {
|
|||
time.Sleep(200 * time.Millisecond) // give time for the watchdog to init.
|
||||
|
||||
notifyCh := make(chan struct{}, 1)
|
||||
NotifyFired = func() {
|
||||
NotifyGC = func() {
|
||||
notifyCh <- struct{}{}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue