143 lines
4.9 KiB
Go
143 lines
4.9 KiB
Go
package watchdog
|
|
|
|
import (
|
|
"math"
|
|
"time"
|
|
)
|
|
|
|
// WatermarkPolicy is a watchdog firing policy that triggers when watermarks are
|
|
// surpassed in the increasing direction.
|
|
//
|
|
// For example, a policy configured with the watermarks 0.50, 0.75, 0.80, and
|
|
// 0.99 will trigger at most once, and only once, each time that a watermark
|
|
// is surpassed upwards.
|
|
//
|
|
// Even if utilisation pierces through several watermarks at once between two
|
|
// subsequent calls to Evaluate, the policy will fire only once.
|
|
//
|
|
// It is possible to suppress the watermark policy from firing too often by
|
|
// setting a Silence period. When non-zero, if a watermark is surpassed within
|
|
// the Silence window (using the last GC timestamp as basis), that event will
|
|
// not immediately trigger firing. Instead, the policy will wait until the
|
|
// silence period is over, and only then will it fire if utilisation is still
|
|
// beyond that watermark.
|
|
//
|
|
// At last, if an EmergencyWatermark is set, when utilisation is above that
|
|
// level, the Silence period will be ignored and the policy will fire
|
|
// persistenly, as long as utilisation stays above that watermark.
|
|
type WatermarkPolicy struct {
|
|
// Watermarks are the percentual amounts of limit. The policy will panic if
|
|
// Watermarks is zero length.
|
|
Watermarks []float64
|
|
|
|
// EmergencyWatermark is a watermark that, when surpassed, puts this
|
|
// watchdog in emergency mode. During emergency mode, the system is
|
|
// considered to be under significant memory pressure, and the Quiesce
|
|
// period is not honoured.
|
|
EmergencyWatermark float64
|
|
|
|
// Silence is the quiet period the watchdog will honour except when in
|
|
// emergency mode.
|
|
Silence time.Duration
|
|
|
|
// internal state.
|
|
thresholds []uint64
|
|
currIdx int // idx of the current watermark.
|
|
lastIdx int
|
|
firedLast bool
|
|
silenceNs int64
|
|
initialized bool
|
|
}
|
|
|
|
var _ Policy = (*WatermarkPolicy)(nil)
|
|
|
|
func (w *WatermarkPolicy) Evaluate(input PolicyInput) (trigger bool) {
|
|
if !w.initialized {
|
|
w.thresholds = make([]uint64, 0, len(w.Watermarks))
|
|
for _, m := range w.Watermarks {
|
|
w.thresholds = append(w.thresholds, uint64(float64(input.Limit)*m))
|
|
}
|
|
w.silenceNs = w.Silence.Nanoseconds()
|
|
w.lastIdx = len(w.Watermarks) - 1
|
|
w.initialized = true
|
|
input.Logger.Infof("initialized watermark watchdog policy; watermarks: %v; emergency watermark: %f, thresholds: %v; silence period: %s",
|
|
w.Watermarks, w.EmergencyWatermark, w.thresholds, w.Silence)
|
|
}
|
|
|
|
// determine the value to compare utilisation against.
|
|
var actual uint64
|
|
switch input.Scope {
|
|
case ScopeSystem:
|
|
actual = input.SysStats.ActualUsed
|
|
case ScopeHeap:
|
|
actual = input.MemStats.HeapAlloc
|
|
}
|
|
|
|
input.Logger.Debugf("watermark policy: evaluating; curr_watermark: %f, utilization: %d/%d/%d (used/curr_threshold/limit)",
|
|
w.Watermarks[w.currIdx], actual, w.thresholds[w.currIdx], input.Limit)
|
|
|
|
// determine whether we're past the emergency watermark; if we are, we fire
|
|
// unconditionally. Disabled if 0.
|
|
if w.EmergencyWatermark > 0 {
|
|
currPercentage := math.Round((float64(actual)/float64(input.Limit))*DecimalPrecision) / DecimalPrecision // round float.
|
|
if pastEmergency := currPercentage >= w.EmergencyWatermark; pastEmergency {
|
|
emergencyThreshold := uint64(float64(input.Limit) / w.EmergencyWatermark)
|
|
input.Logger.Infof("watermark policy: emergency trigger; perc: %f/%f (%% used/%% emergency), utilization: %d/%d/%d (used/emergency/limit), used: %d, limit: %d, ",
|
|
currPercentage, w.EmergencyWatermark, actual, emergencyThreshold, input.Limit)
|
|
return true
|
|
}
|
|
}
|
|
|
|
// short-circuit if within the silencing period.
|
|
if silencing := w.silenceNs > 0; silencing {
|
|
now := Clock.Now().UnixNano()
|
|
if elapsed := now - int64(input.MemStats.LastGC); elapsed < w.silenceNs {
|
|
input.Logger.Debugf("watermark policy: silenced")
|
|
return false
|
|
}
|
|
}
|
|
|
|
// check if the the utilisation is below our current threshold; try
|
|
// to downscale before returning false.
|
|
if actual < w.thresholds[w.currIdx] {
|
|
for w.currIdx > 0 {
|
|
if actual >= w.thresholds[w.currIdx-1] {
|
|
break
|
|
}
|
|
w.firedLast = false
|
|
w.currIdx--
|
|
}
|
|
return false
|
|
}
|
|
|
|
// we are above our current watermark, but if this is the last watermark and
|
|
// we've already fired, suppress the firing.
|
|
if w.currIdx == w.lastIdx && w.firedLast {
|
|
input.Logger.Debugf("watermark policy: last watermark already fired; skipping")
|
|
return false
|
|
}
|
|
|
|
// if our value is above the last threshold, record the last threshold as
|
|
// our current watermark, and also the fact that we've already fired for
|
|
// it.
|
|
if actual >= w.thresholds[w.lastIdx] {
|
|
w.currIdx = w.lastIdx
|
|
w.firedLast = true
|
|
input.Logger.Infof("watermark policy triggering: %f watermark surpassed", w.Watermarks[w.currIdx])
|
|
return true
|
|
}
|
|
|
|
input.Logger.Infof("watermark policy triggering: %f watermark surpassed", w.Watermarks[w.currIdx])
|
|
|
|
// we are triggering; update the current watermark, upscaling it to the
|
|
// next threshold.
|
|
for w.currIdx < w.lastIdx {
|
|
w.currIdx++
|
|
if actual < w.thresholds[w.currIdx] {
|
|
break
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|