remove 'immediate' flag in policies.

2025-02-12 09:17:41 +00:00 · 2020-12-09 15:35:23 +00:00 · 2020-12-09 15:35:23 +00:00 · 5f00469e3a
commit 5f00469e3a
parent 12a1d3f053
6 changed files with 50 additions and 84 deletions
--- a/adaptive.go
+++ b/adaptive.go
@ -3,8 +3,7 @@ package watchdog
 // NewAdaptivePolicy creates a policy that forces GC when the usage surpasses a
 // user-configured percentage (factor) of the available memory.
 //
-// This policy recalculates the next target as usage+(limit-usage)*factor, and
-// forces immediate GC when used >= limit.
+// This policy recalculates the next target as usage+(limit-usage)*factor.
 func NewAdaptivePolicy(factor float64) PolicyCtor {
 	return func(limit uint64) (Policy, error) {
 		return &adaptivePolicy{
@ -21,12 +20,12 @@ type adaptivePolicy struct {

 var _ Policy = (*adaptivePolicy)(nil)

-func (p *adaptivePolicy) Evaluate(_ UtilizationType, used uint64) (next uint64, immediate bool) {
+func (p *adaptivePolicy) Evaluate(_ UtilizationType, used uint64) (next uint64) {
 	if used >= p.limit {
-		return used, true
+		return used
 	}

 	available := float64(p.limit) - float64(used)
 	next = used + uint64(available*p.factor)
-	return next, false
+	return next
 }
--- a/adaptive_test.go
+++ b/adaptive_test.go
@ -15,17 +15,14 @@ func TestAdaptivePolicy(t *testing.T) {
 	require.NoError(t, err)

 	// at zero; next = 50%.
-	next, immediate := p.Evaluate(UtilizationSystem, 0)
-	require.False(t, immediate)
+	next := p.Evaluate(UtilizationSystem, 0)
 	require.EqualValues(t, limit/2, next)

 	// at half; next = 75%.
-	next, immediate = p.Evaluate(UtilizationSystem, limit/2)
-	require.False(t, immediate)
+	next = p.Evaluate(UtilizationSystem, limit/2)
 	require.EqualValues(t, 3*(limit/4), next)

-	// at limit; immediate = true.
-	next, immediate = p.Evaluate(UtilizationSystem, limit)
-	require.True(t, immediate)
+	// at limit.
+	next = p.Evaluate(UtilizationSystem, limit)
 	require.EqualValues(t, limit, next)
 }
--- a/watchdog.go
+++ b/watchdog.go
@ -3,6 +3,7 @@ package watchdog
 import (
 	"fmt"
 	"log"
+	"math"
 	"runtime"
 	"runtime/debug"
 	"sync"
@ -12,6 +13,11 @@ import (
 	"github.com/raulk/clock"
 )

+// PolicyTempDisabled is a marker value for policies to signal that the policy
+// is temporarily disabled. Use it when all hope is lost to turn around from
+// significant memory pressure (such as when above an "extreme" watermark).
+const PolicyTempDisabled uint64 = math.MaxUint64
+
 // The watchdog is designed to be used as a singleton; global vars are OK for
 // that reason.
 var (
@ -110,24 +116,15 @@ type PolicyCtor func(limit uint64) (Policy, error)
 type Policy interface {
 	// Evaluate determines when the next GC should take place. It receives the
 	// current usage, and it returns the next usage at which to trigger GC.
-	//
-	// The policy can request immediate GC, in which case next should match the
-	// used memory.
-	Evaluate(scope UtilizationType, used uint64) (next uint64, immediate bool)
+	Evaluate(scope UtilizationType, used uint64) (next uint64)
 }

 // HeapDriven starts a singleton heap-driven watchdog.
 //
 // The heap-driven watchdog adjusts GOGC dynamically after every GC, to honour
-// the policy. When an immediate GC is requested, runtime.GC() is called, and
-// the policy is re-evaluated at the end of GC.
+// the policy requirements.
 //
-// It is entirely possible for the policy to keep requesting immediate GC
-// repeateadly. This usually signals an emergency situation, and won't prevent
-// the program from making progress, since the Go's garbage collection is not
-// stop-the-world (for the major part).
-//
-// A limit value of 0 will error.
+// A zero-valued limit will error.
 func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func()) {
 	_watchdog.lk.Lock()
 	defer _watchdog.lk.Unlock()
@ -185,24 +182,11 @@ func HeapDriven(limit uint64, policyCtor PolicyCtor) (err error, stopFn func())
 			}

 			// evaluate the policy.
-			next, immediate := policy.Evaluate(UtilizationHeap, memstats.HeapAlloc)
-
-			if immediate {
-				// trigger a forced GC; because we're not making the finalizer
-				// skip sending to the trigger channel, we will get fired again.
-				// at this stage, the program is under significant pressure, and
-				// given that Go GC is not STW for the largest part, the worse
-				// thing that could happen from infinitely GC'ing is that the
-				// program will run in a degrated state for longer, possibly
-				// long enough for an operator to intervene.
-				Logger.Warnf("heap-driven watchdog requested immediate GC; " +
-					"system is probably under significant pressure; " +
-					"performance compromised")
-				forceGC(&memstats)
-				continue
-			}
+			next := policy.Evaluate(UtilizationHeap, memstats.HeapAlloc)

 			// calculate how much to set GOGC to honour the next trigger point.
+			// next=PolicyTempDisabled value would make currGOGC extremely high,
+			// greater than originalGOGC, and therefore we'd restore originalGOGC.
 			currGOGC = int(((float64(next) / float64(heapMarked)) - float64(1)) * 100)
 			if currGOGC >= originalGOGC {
 				Logger.Debugf("heap watchdog: requested GOGC percent higher than default; capping at default; requested: %d; default: %d", currGOGC, originalGOGC)
@ -270,15 +254,19 @@ func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor)
 			threshold uint64
 		)

-		// initialize the threshold.
-		threshold, immediate := policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
-		if immediate {
-			Logger.Warnf("system-driven watchdog requested immediate GC upon startup; " +
-				"policy is probably misconfigured; " +
-				"performance compromised")
-			forceGC(&memstats)
+		renewThreshold := func() {
+			// get the current usage.
+			if err := sysmemFn(&sysmem); err != nil {
+				Logger.Warnf("failed to obtain system memory stats; err: %s", err)
+				return
+			}
+			// calculate the threshold.
+			threshold = policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
 		}

+		// initialize the threshold.
+		renewThreshold()
+
 		for {
 			select {
 			case <-Clock.After(frequency):
@ -300,18 +288,7 @@ func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor)
 			case <-gcTriggered:
 				NotifyFired()

-				// get the current usage.
-				if err := sysmemFn(&sysmem); err != nil {
-					Logger.Warnf("failed to obtain system memory stats; err: %s", err)
-					continue
-				}
-
-				// adjust the threshold.
-				threshold, immediate = policy.Evaluate(UtilizationSystem, sysmem.ActualUsed)
-				if immediate {
-					Logger.Warnf("system-driven watchdog triggering immediate GC; %d used bytes", sysmem.ActualUsed)
-					forceGC(&memstats)
-				}
+				renewThreshold()

 			case <-_watchdog.closing:
 				return
--- a/watchdog_test.go
+++ b/watchdog_test.go
@ -91,8 +91,7 @@ func TestHeapDriven(t *testing.T) {

 	var ms runtime.MemStats
 	runtime.ReadMemStats(&ms)
-	require.GreaterOrEqual(t, ms.NumGC, uint32(12))      // over 12 GCs should've taken place.
-	require.GreaterOrEqual(t, ms.NumForcedGC, uint32(5)) // at least 5 forced GCs.
+	require.GreaterOrEqual(t, ms.NumGC, uint32(10)) // over 12 GCs should've taken place.
 }

 func TestSystemDriven(t *testing.T) {
--- a/watermarks.go
+++ b/watermarks.go
@ -3,7 +3,8 @@ package watchdog
 // NewWatermarkPolicy creates a watchdog policy that schedules GC at concrete
 // watermarks. When queried, it will determine the next trigger point based
 // on the current utilisation. If the last watermark is surpassed,
-// the policy will request immediate GC.
+// the policy will be disarmed. It is recommended to set an extreme watermark
+// as the last element (e.g. 0.99) to prevent the policy from disarming too soon.
 func NewWatermarkPolicy(watermarks ...float64) PolicyCtor {
 	return func(limit uint64) (Policy, error) {
 		p := new(watermarkPolicy)
@ -27,15 +28,15 @@ type watermarkPolicy struct {

 var _ Policy = (*watermarkPolicy)(nil)

-func (w *watermarkPolicy) Evaluate(_ UtilizationType, used uint64) (next uint64, immediate bool) {
+func (w *watermarkPolicy) Evaluate(_ UtilizationType, used uint64) (next uint64) {
 	Logger.Debugf("watermark policy: evaluating; utilization: %d/%d (used/limit)", used, w.limit)
 	var i int
 	for ; i < len(w.thresholds); i++ {
 		t := w.thresholds[i]
 		if used < t {
-			return t, false
+			return t
 		}
 	}
-	// we reached the maximum threshold, so fire immediately.
-	return used, true
+	// we reached the maximum threshold, so we disable this policy.
+	return PolicyTempDisabled
 }
--- a/watermarks_test.go
+++ b/watermarks_test.go
@ -26,36 +26,29 @@ func TestProgressiveWatermarks(t *testing.T) {
 	require.NoError(t, err)

 	// at zero
-	next, immediate := p.Evaluate(UtilizationSystem, uint64(0))
-	require.False(t, immediate)
+	next := p.Evaluate(UtilizationSystem, uint64(0))
 	require.EqualValues(t, thresholds[0], next)

 	// before the watermark.
-	next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])-1)
-	require.False(t, immediate)
+	next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])-1)
 	require.EqualValues(t, thresholds[0], next)

 	// exactly at the watermark; gives us the next watermark, as the watchdodg would've
 	// taken care of triggering the first watermark.
-	next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0]))
-	require.False(t, immediate)
+	next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0]))
 	require.EqualValues(t, thresholds[1], next)

 	// after the watermark gives us the next watermark.
-	next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])+1)
-	require.False(t, immediate)
+	next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[0])+1)
 	require.EqualValues(t, thresholds[1], next)

-	// last watermark; always triggers.
-	next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]))
-	require.True(t, immediate)
-	require.EqualValues(t, uint64(float64(limit)*watermarks[2]), next)
+	// last watermark; disable the policy.
+	next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]))
+	require.EqualValues(t, PolicyTempDisabled, next)

-	next, immediate = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]+1))
-	require.True(t, immediate)
-	require.EqualValues(t, uint64(float64(limit)*watermarks[2])+1, next)
+	next = p.Evaluate(UtilizationSystem, uint64(float64(limit)*watermarks[2]+1))
+	require.EqualValues(t, PolicyTempDisabled, next)

-	next, immediate = p.Evaluate(UtilizationSystem, limit)
-	require.True(t, immediate)
-	require.EqualValues(t, limit, next)
+	next = p.Evaluate(UtilizationSystem, limit)
+	require.EqualValues(t, PolicyTempDisabled, next)
 }