consul/lib/retry/retry.go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package retry

import (
	"context"
	"fmt"
	"math/rand"
	"time"
)

// Jitter should return a new wait duration optionally with some time added or
// removed to create some randomness in wait time.
type Jitter func(baseTime time.Duration) time.Duration

// NewJitter returns a new random Jitter that is up to percent longer than the
// original wait time.
func NewJitter(percent int64) Jitter {
	if percent < 0 {
		percent = 0
	}

	return func(baseTime time.Duration) time.Duration {
		if percent == 0 {
			return baseTime
		}
		max := (int64(baseTime) * percent) / 100
		if max < 0 { // overflow
			return baseTime
		}
		return baseTime + time.Duration(rand.Int63n(max))
	}
}

// Waiter records the number of failures and performs exponential backoff when
// there are consecutive failures.
type Waiter struct {
	// MinFailures before exponential backoff starts. Any failures before
	// MinFailures is reached will wait MinWait time.
	MinFailures uint
	// MinWait time. Returned after the first failure.
	MinWait time.Duration
	// MaxWait time applied before Jitter. Note that the actual maximum wait time
	// is MaxWait + MaxWait * Jitter.
	MaxWait time.Duration
	// Jitter to add to each wait time. The Jitter is applied after MaxWait, which
	// may cause the actual wait time to exceed MaxWait.
	Jitter Jitter
	// Factor is the multiplier to use when calculating the delay. Defaults to
	// 1 second.
	Factor   time.Duration
	failures uint
}

// delay calculates the time to wait based on the number of failures
func (w *Waiter) delay() time.Duration {
	if w.failures <= w.MinFailures {
		return w.MinWait
	}
	factor := w.Factor
	if factor == 0 {
		factor = time.Second
	}

	shift := w.failures - w.MinFailures - 1
	waitTime := w.MaxWait
	if shift < 31 {
		waitTime = (1 << shift) * factor
	}
	// apply MaxWait before jitter so that multiple waiters with the same MaxWait
	// do not converge when they hit their max.
	if w.MaxWait != 0 && waitTime > w.MaxWait {
		waitTime = w.MaxWait
	}
	if w.Jitter != nil {
		waitTime = w.Jitter(waitTime)
	}
	if waitTime < w.MinWait {
		return w.MinWait
	}
	return waitTime
}

// Reset the failure count to 0.
// Reset must be called if the operation done after Wait did not fail.
func (w *Waiter) Reset() {
	w.failures = 0
}

// Failures returns the count of consecutive failures.
func (w *Waiter) Failures() int {
	return int(w.failures)
}

// Wait increases the number of failures by one, and then blocks until the context
// is cancelled, or until the wait time is reached.
//
// The wait time increases exponentially as the number of failures increases.
// Every call to Wait increments the failures count, so Reset must be called
// after Wait when there wasn't a failure.
//
// The only non-nil error that Wait returns will come from ctx.Err(),
// such as when the context is canceled. This makes it suitable for
// long-running routines that do not get re-initialized, such as replication.
func (w *Waiter) Wait(ctx context.Context) error {
	w.failures++
	timer := time.NewTimer(w.delay())
	select {
	case <-ctx.Done():
		timer.Stop()
		return ctx.Err()
	case <-timer.C:
		return nil
	}
}

// NextWait returns the period the next call to Wait with block for assuming
// it's context is not cancelled. It's useful for informing a user how long
// it will be before the next attempt is made.
func (w *Waiter) NextWait() time.Duration {
	return w.delay()
}

// RetryLoop retries an operation until either operation completes without error
// or Waiter's context is canceled.
func (w *Waiter) RetryLoop(ctx context.Context, operation func() error) error {
	var lastError error
	for {
		if err := w.Wait(ctx); err != nil {
			// The error will only be non-nil if the context is canceled.
			return fmt.Errorf("could not retry operation: %w", lastError)
		}

		if err := operation(); err == nil {
			// Reset the failure count seen by the waiter if there was no error.
			w.Reset()
			return nil
		} else {
			lastError = err
		}
	}
}
Copyright headers for missing files/folders (#16708) * copyright headers for agent folder 2023-03-28 22:48:58 +00:00			`// Copyright (c) HashiCorp, Inc.`
			`// SPDX-License-Identifier: MPL-2.0`

lib/retry: extract a new package from lib 2020-10-01 23:02:32 +00:00			`package retry`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00
			`import (`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`"context"`
Support auth method with snapshot agent [ENT] (#15020) Port of hashicorp/consul-enterprise#3303 2022-10-17 21:57:48 +00:00			`"fmt"`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`"math/rand"`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`"time"`
			`)`

lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// Jitter should return a new wait duration optionally with some time added or`
			`// removed to create some randomness in wait time.`
			`type Jitter func(baseTime time.Duration) time.Duration`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// NewJitter returns a new random Jitter that is up to percent longer than the`
			`// original wait time.`
			`func NewJitter(percent int64) Jitter {`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`if percent < 0 {`
			`percent = 0`
			`}`

lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`return func(baseTime time.Duration) time.Duration {`
			`if percent == 0 {`
			`return baseTime`
			`}`
			`max := (int64(baseTime) * percent) / 100`
			`if max < 0 { // overflow`
			`return baseTime`
			`}`
			`return baseTime + time.Duration(rand.Int63n(max))`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`
			`}`

lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// Waiter records the number of failures and performs exponential backoff when`
Support auth method with snapshot agent [ENT] (#15020) Port of hashicorp/consul-enterprise#3303 2022-10-17 21:57:48 +00:00			`// there are consecutive failures.`
lib/retry: extract a new package from lib 2020-10-01 23:02:32 +00:00			`type Waiter struct {`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// MinFailures before exponential backoff starts. Any failures before`
			`// MinFailures is reached will wait MinWait time.`
lib/retry: export fields The fields are only ever read by Waiter, and setting the fields makes the calling code read much better without having to create a bunch of constants that only ever get used once. 2020-10-01 23:03:44 +00:00			`MinFailures uint`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// MinWait time. Returned after the first failure.`
			`MinWait time.Duration`
lib/retry: allow jitter to exceed max wait. I changed this in https://github.com/hashicorp/consul/pull/8802#pullrequestreview-500779357 because exceeding the MaxWait seemed wrong, but as other have pointed out, that behaviour is probably correct. When multiple waiters hit the max value, we don't want them to converge, so restore the behaviour of allowing jitter to exceed max, and document it. 2021-04-07 22:33:11 +00:00			`// MaxWait time applied before Jitter. Note that the actual maximum wait time`
			`// is MaxWait + MaxWait * Jitter.`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`MaxWait time.Duration`
lib/retry: allow jitter to exceed max wait. I changed this in https://github.com/hashicorp/consul/pull/8802#pullrequestreview-500779357 because exceeding the MaxWait seemed wrong, but as other have pointed out, that behaviour is probably correct. When multiple waiters hit the max value, we don't want them to converge, so restore the behaviour of allowing jitter to exceed max, and document it. 2021-04-07 22:33:11 +00:00			`// Jitter to add to each wait time. The Jitter is applied after MaxWait, which`
			`// may cause the actual wait time to exceed MaxWait.`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`Jitter Jitter`
			`// Factor is the multiplier to use when calculating the delay. Defaults to`
			`// 1 second.`
			`Factor time.Duration`
			`failures uint`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`

lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// delay calculates the time to wait based on the number of failures`
			`func (w *Waiter) delay() time.Duration {`
			`if w.failures <= w.MinFailures {`
			`return w.MinWait`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`factor := w.Factor`
			`if factor == 0 {`
			`factor = time.Second`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`

lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`shift := w.failures - w.MinFailures - 1`
			`waitTime := w.MaxWait`
			`if shift < 31 {`
			`waitTime = (1 << shift) * factor`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`
lib/retry: allow jitter to exceed max wait. I changed this in https://github.com/hashicorp/consul/pull/8802#pullrequestreview-500779357 because exceeding the MaxWait seemed wrong, but as other have pointed out, that behaviour is probably correct. When multiple waiters hit the max value, we don't want them to converge, so restore the behaviour of allowing jitter to exceed max, and document it. 2021-04-07 22:33:11 +00:00			`// apply MaxWait before jitter so that multiple waiters with the same MaxWait`
			`// do not converge when they hit their max.`
			`if w.MaxWait != 0 && waitTime > w.MaxWait {`
			`waitTime = w.MaxWait`
			`}`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`if w.Jitter != nil {`
			`waitTime = w.Jitter(waitTime)`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`if waitTime < w.MinWait {`
			`return w.MinWait`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`
			`return waitTime`
			`}`

lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// Reset the failure count to 0.`
Add server certificate manager This certificate manager will request a leaf certificate for server agents and then keep them up to date. 2022-09-09 19:06:48 +00:00			`// Reset must be called if the operation done after Wait did not fail.`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`func (w *Waiter) Reset() {`
			`w.failures = 0`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`

lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// Failures returns the count of consecutive failures.`
			`func (w *Waiter) Failures() int {`
			`return int(w.failures)`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`

Add server certificate manager This certificate manager will request a leaf certificate for server agents and then keep them up to date. 2022-09-09 19:06:48 +00:00			`// Wait increases the number of failures by one, and then blocks until the context`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// is cancelled, or until the wait time is reached.`
Add server certificate manager This certificate manager will request a leaf certificate for server agents and then keep them up to date. 2022-09-09 19:06:48 +00:00			`//`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`// The wait time increases exponentially as the number of failures increases.`
Add server certificate manager This certificate manager will request a leaf certificate for server agents and then keep them up to date. 2022-09-09 19:06:48 +00:00			`// Every call to Wait increments the failures count, so Reset must be called`
			`// after Wait when there wasn't a failure.`
			`//`
Share mgw addrs in peering stream if needed This commit adds handling so that the replication stream considers whether the user intends to peer through mesh gateways. The subscription will return server or mesh gateway addresses depending on the mesh configuration setting. These watches can be updated at runtime by modifying the mesh config entry. 2022-09-21 15:55:19 +00:00			`// The only non-nil error that Wait returns will come from ctx.Err(),`
			`// such as when the context is canceled. This makes it suitable for`
			`// long-running routines that do not get re-initialized, such as replication.`
lib/retry: Refactor to reduce the interface surface Reduce Jitter to one function Rename NewRetryWaiter Fix a bug in calculateWait where maxWait was applied before jitter, which would make it possible to wait longer than maxWait. 2020-10-01 05:14:21 +00:00			`func (w *Waiter) Wait(ctx context.Context) error {`
			`w.failures++`
			`timer := time.NewTimer(w.delay())`
			`select {`
			`case <-ctx.Done():`
			`timer.Stop()`
			`return ctx.Err()`
			`case <-timer.C:`
			`return nil`
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`}`
			`}`
add HCP integration component (#14723) * add HCP integration * lint: use non-deprecated logging interface 2022-09-26 18:58:15 +00:00
			`// NextWait returns the period the next call to Wait with block for assuming`
			`// it's context is not cancelled. It's useful for informing a user how long`
			`// it will be before the next attempt is made.`
			`func (w *Waiter) NextWait() time.Duration {`
			`return w.delay()`
			`}`
Support auth method with snapshot agent [ENT] (#15020) Port of hashicorp/consul-enterprise#3303 2022-10-17 21:57:48 +00:00
			`// RetryLoop retries an operation until either operation completes without error`
			`// or Waiter's context is canceled.`
			`func (w *Waiter) RetryLoop(ctx context.Context, operation func() error) error {`
			`var lastError error`
			`for {`
			`if err := w.Wait(ctx); err != nil {`
			`// The error will only be non-nil if the context is canceled.`
			`return fmt.Errorf("could not retry operation: %w", lastError)`
			`}`

			`if err := operation(); err == nil {`
			`// Reset the failure count seen by the waiter if there was no error.`
			`w.Reset()`
			`return nil`
			`} else {`
			`lastError = err`
			`}`
			`}`
			`}`