remove 'immediate' flag in policies.

This commit is contained in:
Raúl Kripalani 2020-12-09 15:35:23 +00:00
parent 12a1d3f053
commit 5f00469e3a
6 changed files with 50 additions and 84 deletions

View File

@ -3,8 +3,7 @@ package watchdog
// NewAdaptivePolicy creates a policy that forces GC when the usage surpasses a
// user-configured percentage (factor) of the available memory.
//
// This policy recalculates the next target as usage+(limit-usage)*factor, and
// forces immediate GC when used >= limit.
// This policy recalculates the next target as usage+(limit-usage)*factor.
func NewAdaptivePolicy(factor float64) PolicyCtor {
return func(limit uint64) (Policy, error) {
return &adaptivePolicy{
@ -21,12 +20,12 @@ type adaptivePolicy struct {
var _ Policy = (*adaptivePolicy)(nil)
func (p *adaptivePolicy) Evaluate(_ UtilizationType, used uint64) (next uint64, immediate bool) {
func (p *adaptivePolicy) Evaluate(_ UtilizationType, used uint64) (next uint64) {
if used >= p.limit {
return used, true
return used
}
available := float64(p.limit) - float64(used)
next = used + uint64(available*p.factor)
return next, false
return next
}

View File

@ -15,17 +15,14 @@ func TestAdaptivePolicy(t *testing.T) {
require.NoError(t, err)
// at zero; next = 50%.
next, immediate := p.Evaluate(UtilizationSystem, 0)
require.False(t, immediate)
next := p.Evaluate(UtilizationSystem, 0)
require.EqualValues(t, limit/2, next)
// at half; next = 75%.
next, immediate = p.Evaluate(UtilizationSystem, limit/2)
require.False(t, immediate)
next = p.Evaluate(UtilizationSystem, limit/2)
require.EqualValues(t, 3*(limit/4), next)
// at limit; immediate = true.
next, immediate = p.Evaluate(UtilizationSystem, limit)
require.True(t, immediate)
// at limit.
next = p.Evaluate(UtilizationSystem, limit)
require.EqualValues(t, limit, next)
}

View File

@ -3,6 +3,7 @@ package watchdog
import (
"fmt"
"log"
"math"
"runtime"
"runtime/debug"
"sync"
@ -12,6 +13,11 @@ import (
"github.com/raulk/clock"
)
// PolicyTempDisabled is a marker value for policies to signal that the policy
// is temporarily disabled. Use it when all hope is lost to turn around from
// significant memory pressure (such as when above an "extreme" watermark).
const PolicyTempDisabled uint64 = math.MaxUint64
// The watchdog is designed to be used as a singleton; global vars are OK for
// that reason.
var (
@ -110,24 +116,15 @@ type PolicyCtor func(limit uint64) (Policy, error)
type Policy interface {
// Evaluate determines when the next GC should take place. It receives the
// current usage, and it returns the next usage at which to trigger GC.
//
// The policy can request immediate GC, in which case next should match the
// used memory.
Evaluate(scope UtilizationType, used uint64) (next uint64, immediate bool)
Evaluate(scope UtilizationType, used uint64) (next uint64)
}
// HeapDriven starts a singleton heap-driven watchdog.
//
// The heap-driven watchdog adjusts GOGC dynamically after every GC, to honour
// the policy. When an immediate GC is requested, runtime.GC() is called, and
// the policy is re-evaluated at the end of GC.
// the policy requirements.
//
// It is entirely possible for the policy to keep requesting immediate GC
// repeateadly. This usually signals an emergency situation, and won't prevent
// the program from making progress, since the Go's garbage collection is not
// stop-the-world (for the major part).
//
// A limit value of 0 will error.
// A zero-valued limit will error.
func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func()) {
_watchdog.lk.Lock()
defer _watchdog.lk.Unlock()
@ -185,24 +182,11 @@ func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func())
}
// evaluate the policy.
next, immediate := policy.Evaluate(UtilizationHeap, memstats.HeapAlloc)
if immediate {
// trigger a forced GC; because we're not making the finalizer
// skip sending to the trigger channel, we will get fired again.
// at this stage, the program is under significant pressure, and
// given that Go GC is not STW for the largest part, the worse
// thing that could happen from infinitely GC'ing is that the
// program will run in a degrated state for longer, possibly
// long enough for an operator to intervene.
Logger.Warnf("heap-driven watchdog requested immediate GC; " +
"system is probably under significant pressure; " +
"performance compromised")
forceGC(&memstats)
continue
}
next := policy.Evaluate(UtilizationHeap, memstats.HeapAlloc)
// calculate how much to set GOGC to honour the next trigger point.
// next=PolicyTempDisabled value would make currGOGC extremely high,
// greater than originalGOGC, and therefore we'd restore originalGOGC.
currGOGC = int(((float64(next) / float64(heapMarked)) - float64(1)) * 100)
if currGOGC >= originalGOGC {
Logger.Debugf("heap watchdog: requested GOGC percent higher than default; capping at default; requested: %d; default: %d", currGOGC, originalGOGC)
@ -270,15 +254,19 @@ func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor)
threshold uint64
)
// initialize the threshold.
threshold, immediate := policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
if immediate {
Logger.Warnf("system-driven watchdog requested immediate GC upon startup; " +
"policy is probably misconfigured; " +
"performance compromised")
forceGC(&memstats)
renewThreshold := func() {
// get the current usage.
if err := sysmemFn(&sysmem); err != nil {
Logger.Warnf("failed to obtain system memory stats; err: %s", err)
return
}
// calculate the threshold.
threshold = policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
}
// initialize the threshold.
renewThreshold()
for {
select {
case <-Clock.After(frequency):
@ -300,18 +288,7 @@ func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor)
case <-gcTriggered:
NotifyFired()
// get the current usage.
if err := sysmemFn(&sysmem); err != nil {
Logger.Warnf("failed to obtain system memory stats; err: %s", err)
continue
}
// adjust the threshold.
threshold, immediate = policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
if immediate {
Logger.Warnf("system-driven watchdog triggering immediate GC; %d used bytes", sysmem.ActualUsed)
forceGC(&memstats)
}
renewThreshold()
case <-_watchdog.closing:
return

View File

@ -91,8 +91,7 @@ func TestHeapDriven(t *testing.T) {
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
require.GreaterOrEqual(t, ms.NumGC, uint32(12)) // over 12 GCs should've taken place.
require.GreaterOrEqual(t, ms.NumForcedGC, uint32(5)) // at least 5 forced GCs.
require.GreaterOrEqual(t, ms.NumGC, uint32(10)) // over 12 GCs should've taken place.
}
func TestSystemDriven(t *testing.T) {

View File

@ -3,7 +3,8 @@ package watchdog
// NewWatermarkPolicy creates a watchdog policy that schedules GC at concrete
// watermarks. When queried, it will determine the next trigger point based
// on the current utilisation. If the last watermark is surpassed,
// the policy will request immediate GC.
// the policy will be disarmed. It is recommended to set an extreme watermark
// as the last element (e.g. 0.99) to prevent the policy from disarming too soon.
func NewWatermarkPolicy(watermarks ...float64) PolicyCtor {
return func(limit uint64) (Policy, error) {
p := new(watermarkPolicy)
@ -27,15 +28,15 @@ type watermarkPolicy struct {
var _ Policy = (*watermarkPolicy)(nil)
func (w *watermarkPolicy) Evaluate(_ UtilizationType, used uint64) (next uint64, immediate bool) {
func (w *watermarkPolicy) Evaluate(_ UtilizationType, used uint64) (next uint64) {
Logger.Debugf("watermark policy: evaluating; utilization: %d/%d (used/limit)", used, w.limit)
var i int
for ; i < len(w.thresholds); i++ {
t := w.thresholds[i]
if used < t {
return t, false
return t
}
}
// we reached the maximum threshold, so fire immediately.
return used, true
// we reached the maximum threshold, so we disable this policy.
return PolicyTempDisabled
}

View File

@ -26,36 +26,29 @@ func TestProgressiveWatermarks(t *testing.T) {
require.NoError(t, err)
// at zero
next, immediate := p.Evaluate(UtilizationSystem, uint64(0))
require.False(t, immediate)
next := p.Evaluate(UtilizationSystem, uint64(0))
require.EqualValues(t, thresholds[0], next)
// before the watermark.
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])-1)
require.False(t, immediate)
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])-1)
require.EqualValues(t, thresholds[0], next)
// exactly at the watermark; gives us the next watermark, as the watchdodg would've
// taken care of triggering the first watermark.
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0]))
require.False(t, immediate)
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0]))
require.EqualValues(t, thresholds[1], next)
// after the watermark gives us the next watermark.
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])+1)
require.False(t, immediate)
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])+1)
require.EqualValues(t, thresholds[1], next)
// last watermark; always triggers.
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]))
require.True(t, immediate)
require.EqualValues(t, uint64(float64(limit)*watermarks[2]), next)
// last watermark; disable the policy.
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]))
require.EqualValues(t, PolicyTempDisabled, next)
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]+1))
require.True(t, immediate)
require.EqualValues(t, uint64(float64(limit)*watermarks[2])+1, next)
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]+1))
require.EqualValues(t, PolicyTempDisabled, next)
next, immediate = p.Evaluate(UtilizationSystem, limit)
require.True(t, immediate)
require.EqualValues(t, limit, next)
next = p.Evaluate(UtilizationSystem, limit)
require.EqualValues(t, PolicyTempDisabled, next)
}