remove 'immediate' flag in policies.
This commit is contained in:
parent
12a1d3f053
commit
5f00469e3a
|
@ -3,8 +3,7 @@ package watchdog
|
|||
// NewAdaptivePolicy creates a policy that forces GC when the usage surpasses a
|
||||
// user-configured percentage (factor) of the available memory.
|
||||
//
|
||||
// This policy recalculates the next target as usage+(limit-usage)*factor, and
|
||||
// forces immediate GC when used >= limit.
|
||||
// This policy recalculates the next target as usage+(limit-usage)*factor.
|
||||
func NewAdaptivePolicy(factor float64) PolicyCtor {
|
||||
return func(limit uint64) (Policy, error) {
|
||||
return &adaptivePolicy{
|
||||
|
@ -21,12 +20,12 @@ type adaptivePolicy struct {
|
|||
|
||||
var _ Policy = (*adaptivePolicy)(nil)
|
||||
|
||||
func (p *adaptivePolicy) Evaluate(_ UtilizationType, used uint64) (next uint64, immediate bool) {
|
||||
func (p *adaptivePolicy) Evaluate(_ UtilizationType, used uint64) (next uint64) {
|
||||
if used >= p.limit {
|
||||
return used, true
|
||||
return used
|
||||
}
|
||||
|
||||
available := float64(p.limit) - float64(used)
|
||||
next = used + uint64(available*p.factor)
|
||||
return next, false
|
||||
return next
|
||||
}
|
||||
|
|
|
@ -15,17 +15,14 @@ func TestAdaptivePolicy(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
|
||||
// at zero; next = 50%.
|
||||
next, immediate := p.Evaluate(UtilizationSystem, 0)
|
||||
require.False(t, immediate)
|
||||
next := p.Evaluate(UtilizationSystem, 0)
|
||||
require.EqualValues(t, limit/2, next)
|
||||
|
||||
// at half; next = 75%.
|
||||
next, immediate = p.Evaluate(UtilizationSystem, limit/2)
|
||||
require.False(t, immediate)
|
||||
next = p.Evaluate(UtilizationSystem, limit/2)
|
||||
require.EqualValues(t, 3*(limit/4), next)
|
||||
|
||||
// at limit; immediate = true.
|
||||
next, immediate = p.Evaluate(UtilizationSystem, limit)
|
||||
require.True(t, immediate)
|
||||
// at limit.
|
||||
next = p.Evaluate(UtilizationSystem, limit)
|
||||
require.EqualValues(t, limit, next)
|
||||
}
|
||||
|
|
71
watchdog.go
71
watchdog.go
|
@ -3,6 +3,7 @@ package watchdog
|
|||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"sync"
|
||||
|
@ -12,6 +13,11 @@ import (
|
|||
"github.com/raulk/clock"
|
||||
)
|
||||
|
||||
// PolicyTempDisabled is a marker value for policies to signal that the policy
|
||||
// is temporarily disabled. Use it when all hope is lost to turn around from
|
||||
// significant memory pressure (such as when above an "extreme" watermark).
|
||||
const PolicyTempDisabled uint64 = math.MaxUint64
|
||||
|
||||
// The watchdog is designed to be used as a singleton; global vars are OK for
|
||||
// that reason.
|
||||
var (
|
||||
|
@ -110,24 +116,15 @@ type PolicyCtor func(limit uint64) (Policy, error)
|
|||
type Policy interface {
|
||||
// Evaluate determines when the next GC should take place. It receives the
|
||||
// current usage, and it returns the next usage at which to trigger GC.
|
||||
//
|
||||
// The policy can request immediate GC, in which case next should match the
|
||||
// used memory.
|
||||
Evaluate(scope UtilizationType, used uint64) (next uint64, immediate bool)
|
||||
Evaluate(scope UtilizationType, used uint64) (next uint64)
|
||||
}
|
||||
|
||||
// HeapDriven starts a singleton heap-driven watchdog.
|
||||
//
|
||||
// The heap-driven watchdog adjusts GOGC dynamically after every GC, to honour
|
||||
// the policy. When an immediate GC is requested, runtime.GC() is called, and
|
||||
// the policy is re-evaluated at the end of GC.
|
||||
// the policy requirements.
|
||||
//
|
||||
// It is entirely possible for the policy to keep requesting immediate GC
|
||||
// repeateadly. This usually signals an emergency situation, and won't prevent
|
||||
// the program from making progress, since the Go's garbage collection is not
|
||||
// stop-the-world (for the major part).
|
||||
//
|
||||
// A limit value of 0 will error.
|
||||
// A zero-valued limit will error.
|
||||
func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func()) {
|
||||
_watchdog.lk.Lock()
|
||||
defer _watchdog.lk.Unlock()
|
||||
|
@ -185,24 +182,11 @@ func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func())
|
|||
}
|
||||
|
||||
// evaluate the policy.
|
||||
next, immediate := policy.Evaluate(UtilizationHeap, memstats.HeapAlloc)
|
||||
|
||||
if immediate {
|
||||
// trigger a forced GC; because we're not making the finalizer
|
||||
// skip sending to the trigger channel, we will get fired again.
|
||||
// at this stage, the program is under significant pressure, and
|
||||
// given that Go GC is not STW for the largest part, the worse
|
||||
// thing that could happen from infinitely GC'ing is that the
|
||||
// program will run in a degrated state for longer, possibly
|
||||
// long enough for an operator to intervene.
|
||||
Logger.Warnf("heap-driven watchdog requested immediate GC; " +
|
||||
"system is probably under significant pressure; " +
|
||||
"performance compromised")
|
||||
forceGC(&memstats)
|
||||
continue
|
||||
}
|
||||
next := policy.Evaluate(UtilizationHeap, memstats.HeapAlloc)
|
||||
|
||||
// calculate how much to set GOGC to honour the next trigger point.
|
||||
// next=PolicyTempDisabled value would make currGOGC extremely high,
|
||||
// greater than originalGOGC, and therefore we'd restore originalGOGC.
|
||||
currGOGC = int(((float64(next) / float64(heapMarked)) - float64(1)) * 100)
|
||||
if currGOGC >= originalGOGC {
|
||||
Logger.Debugf("heap watchdog: requested GOGC percent higher than default; capping at default; requested: %d; default: %d", currGOGC, originalGOGC)
|
||||
|
@ -270,15 +254,19 @@ func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor)
|
|||
threshold uint64
|
||||
)
|
||||
|
||||
// initialize the threshold.
|
||||
threshold, immediate := policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
|
||||
if immediate {
|
||||
Logger.Warnf("system-driven watchdog requested immediate GC upon startup; " +
|
||||
"policy is probably misconfigured; " +
|
||||
"performance compromised")
|
||||
forceGC(&memstats)
|
||||
renewThreshold := func() {
|
||||
// get the current usage.
|
||||
if err := sysmemFn(&sysmem); err != nil {
|
||||
Logger.Warnf("failed to obtain system memory stats; err: %s", err)
|
||||
return
|
||||
}
|
||||
// calculate the threshold.
|
||||
threshold = policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
|
||||
}
|
||||
|
||||
// initialize the threshold.
|
||||
renewThreshold()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-Clock.After(frequency):
|
||||
|
@ -300,18 +288,7 @@ func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor)
|
|||
case <-gcTriggered:
|
||||
NotifyFired()
|
||||
|
||||
// get the current usage.
|
||||
if err := sysmemFn(&sysmem); err != nil {
|
||||
Logger.Warnf("failed to obtain system memory stats; err: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// adjust the threshold.
|
||||
threshold, immediate = policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
|
||||
if immediate {
|
||||
Logger.Warnf("system-driven watchdog triggering immediate GC; %d used bytes", sysmem.ActualUsed)
|
||||
forceGC(&memstats)
|
||||
}
|
||||
renewThreshold()
|
||||
|
||||
case <-_watchdog.closing:
|
||||
return
|
||||
|
|
|
@ -91,8 +91,7 @@ func TestHeapDriven(t *testing.T) {
|
|||
|
||||
var ms runtime.MemStats
|
||||
runtime.ReadMemStats(&ms)
|
||||
require.GreaterOrEqual(t, ms.NumGC, uint32(12)) // over 12 GCs should've taken place.
|
||||
require.GreaterOrEqual(t, ms.NumForcedGC, uint32(5)) // at least 5 forced GCs.
|
||||
require.GreaterOrEqual(t, ms.NumGC, uint32(10)) // over 12 GCs should've taken place.
|
||||
}
|
||||
|
||||
func TestSystemDriven(t *testing.T) {
|
||||
|
|
|
@ -3,7 +3,8 @@ package watchdog
|
|||
// NewWatermarkPolicy creates a watchdog policy that schedules GC at concrete
|
||||
// watermarks. When queried, it will determine the next trigger point based
|
||||
// on the current utilisation. If the last watermark is surpassed,
|
||||
// the policy will request immediate GC.
|
||||
// the policy will be disarmed. It is recommended to set an extreme watermark
|
||||
// as the last element (e.g. 0.99) to prevent the policy from disarming too soon.
|
||||
func NewWatermarkPolicy(watermarks ...float64) PolicyCtor {
|
||||
return func(limit uint64) (Policy, error) {
|
||||
p := new(watermarkPolicy)
|
||||
|
@ -27,15 +28,15 @@ type watermarkPolicy struct {
|
|||
|
||||
var _ Policy = (*watermarkPolicy)(nil)
|
||||
|
||||
func (w *watermarkPolicy) Evaluate(_ UtilizationType, used uint64) (next uint64, immediate bool) {
|
||||
func (w *watermarkPolicy) Evaluate(_ UtilizationType, used uint64) (next uint64) {
|
||||
Logger.Debugf("watermark policy: evaluating; utilization: %d/%d (used/limit)", used, w.limit)
|
||||
var i int
|
||||
for ; i < len(w.thresholds); i++ {
|
||||
t := w.thresholds[i]
|
||||
if used < t {
|
||||
return t, false
|
||||
return t
|
||||
}
|
||||
}
|
||||
// we reached the maximum threshold, so fire immediately.
|
||||
return used, true
|
||||
// we reached the maximum threshold, so we disable this policy.
|
||||
return PolicyTempDisabled
|
||||
}
|
||||
|
|
|
@ -26,36 +26,29 @@ func TestProgressiveWatermarks(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
|
||||
// at zero
|
||||
next, immediate := p.Evaluate(UtilizationSystem, uint64(0))
|
||||
require.False(t, immediate)
|
||||
next := p.Evaluate(UtilizationSystem, uint64(0))
|
||||
require.EqualValues(t, thresholds[0], next)
|
||||
|
||||
// before the watermark.
|
||||
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])-1)
|
||||
require.False(t, immediate)
|
||||
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])-1)
|
||||
require.EqualValues(t, thresholds[0], next)
|
||||
|
||||
// exactly at the watermark; gives us the next watermark, as the watchdodg would've
|
||||
// taken care of triggering the first watermark.
|
||||
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0]))
|
||||
require.False(t, immediate)
|
||||
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0]))
|
||||
require.EqualValues(t, thresholds[1], next)
|
||||
|
||||
// after the watermark gives us the next watermark.
|
||||
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])+1)
|
||||
require.False(t, immediate)
|
||||
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])+1)
|
||||
require.EqualValues(t, thresholds[1], next)
|
||||
|
||||
// last watermark; always triggers.
|
||||
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]))
|
||||
require.True(t, immediate)
|
||||
require.EqualValues(t, uint64(float64(limit)*watermarks[2]), next)
|
||||
// last watermark; disable the policy.
|
||||
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]))
|
||||
require.EqualValues(t, PolicyTempDisabled, next)
|
||||
|
||||
next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]+1))
|
||||
require.True(t, immediate)
|
||||
require.EqualValues(t, uint64(float64(limit)*watermarks[2])+1, next)
|
||||
next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]+1))
|
||||
require.EqualValues(t, PolicyTempDisabled, next)
|
||||
|
||||
next, immediate = p.Evaluate(UtilizationSystem, limit)
|
||||
require.True(t, immediate)
|
||||
require.EqualValues(t, limit, next)
|
||||
next = p.Evaluate(UtilizationSystem, limit)
|
||||
require.EqualValues(t, PolicyTempDisabled, next)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue