fix(wallet)_: fix circuit breaker to use health checks for fallback

functions as well. Add tests, fix tests failing under stress due to name clashes
2024-07-01 17:46:57 +02:00 · 2024-07-01 17:46:57 +02:00 · 0e10b38e4b
parent 23b185ae2b
commit 0e10b38e4b
2 changed files with 218 additions and 92 deletions
--- a/circuitbreaker/circuit_breaker.go
+++ b/circuitbreaker/circuit_breaker.go
@ -1,8 +1,8 @@
 package circuitbreaker
 import (
 	"context"
 	"fmt"
 	"strings"
 	"github.com/afex/hystrix-go/hystrix"
 )
@ -23,11 +23,14 @@ func (cr CommandResult) Error() error {
 }
 type Command struct {
 	ctx      context.Context
 	functors []*Functor
 	cancel   bool
 }
-func NewCommand(functors []*Functor) *Command {
+func NewCommand(ctx context.Context, functors []*Functor) *Command {
 	return &Command{
 		ctx:      ctx,
 		functors: functors,
 	}
 }
@ -40,8 +43,11 @@ func (cmd *Command) IsEmpty() bool {
 	return len(cmd.functors) == 0
 }
 func (cmd *Command) Cancel() {
 	cmd.cancel = true
 }
 type Config struct {
 	CommandName            string
 	Timeout                int
 	MaxConcurrentRequests  int
 	RequestVolumeThreshold int
@ -60,76 +66,72 @@ func NewCircuitBreaker(config Config) *CircuitBreaker {
 }
 type Functor struct {
-	Exec FallbackFunc
+	exec        FallbackFunc
 	circuitName string
 }
-func NewFunctor(exec FallbackFunc) *Functor {
+func NewFunctor(exec FallbackFunc, circuitName string) *Functor {
 	return &Functor{
-		Exec: exec,
+		exec:        exec,
 		circuitName: circuitName,
 	}
 }
-// This a blocking function
+// Executes the command in its circuit if set.
-func (eh *CircuitBreaker) Execute(cmd Command) CommandResult {
+// If the command's circuit is not configured, the circuit of the CircuitBreaker is used.
-	resultChan := make(chan CommandResult, 1)
+// This is a blocking function.
-	var result CommandResult
+func (cb *CircuitBreaker) Execute(cmd *Command) CommandResult {
 	if cmd == nil || cmd.IsEmpty() {
 		return CommandResult{err: fmt.Errorf("command is nil or empty")}
 	}
-	for i := 0; i < len(cmd.functors); i += 2 {
+	var result CommandResult
-		f1 := cmd.functors[i]
+	ctx := cmd.ctx
-		var f2 *Functor
+	if ctx == nil {
-		if i+1 < len(cmd.functors) {
+		ctx = context.Background()
-			f2 = cmd.functors[i+1]
+	}
 	for _, f := range cmd.functors {
 		if cmd.cancel {
 			break
 		}
-		circuitName := fmt.Sprintf("%s_%d", eh.config.CommandName, i)
+		if hystrix.GetCircuitSettings()[f.circuitName] == nil {
-		if hystrix.GetCircuitSettings()[circuitName] == nil {
+			hystrix.ConfigureCommand(f.circuitName, hystrix.CommandConfig{
-			hystrix.ConfigureCommand(circuitName, hystrix.CommandConfig{
+				Timeout:                cb.config.Timeout,
-				Timeout:                eh.config.Timeout,
+				MaxConcurrentRequests:  cb.config.MaxConcurrentRequests,
-				MaxConcurrentRequests:  eh.config.MaxConcurrentRequests,
+				RequestVolumeThreshold: cb.config.RequestVolumeThreshold,
-				RequestVolumeThreshold: eh.config.RequestVolumeThreshold,
+				SleepWindow:            cb.config.SleepWindow,
-				SleepWindow:            eh.config.SleepWindow,
+				ErrorPercentThreshold:  cb.config.ErrorPercentThreshold,
 				ErrorPercentThreshold:  eh.config.ErrorPercentThreshold,
 			})
 		}
-		// If circuit is the same for all functions, in case of len(cmd.functors) > 2,
+		err := hystrix.DoC(ctx, f.circuitName, func(ctx context.Context) error {
-		// main and fallback providers are different next run if first two fail,
+			res, err := f.exec()
-		// which causes health issues for both main and fallback and ErrorPercentThreshold
+			// Write to result only if success
 		// is reached faster than it should be.
 		errChan := hystrix.Go(circuitName, func() error {
 			res, err := f1.Exec()
 			// Write to resultChan only if success
 			if err == nil {
-				resultChan <- CommandResult{res: res, err: err}
+				result = CommandResult{res: res}
 			}
 			return err
-		}, func(err error) error {
+		}, nil)
 			// In case of concurrency, we should not execute the fallback
 			if f2 == nil || err == hystrix.ErrMaxConcurrency {
 				return err
 			}
 			res, err := f2.Exec()
 			if err == nil {
 				resultChan <- CommandResult{res: res, err: err}
 			}
 			return err
 		})
-		select {
+		if err == nil {
-		case result = <-resultChan:
+			break
 			if result.err == nil {
 				return result
 			}
 		case err := <-errChan:
 			result = CommandResult{err: err}
 			// In case of max concurrency, we should delay the execution and stop iterating over fallbacks
 			// No error unwrapping here, so use strings.Contains
 			if strings.Contains(err.Error(), hystrix.ErrMaxConcurrency.Error()) {
 				return result
 			}
 		}
 		// Accumulate errors
 		if result.err != nil {
 			result.err = fmt.Errorf("%w, %s.error: %w", result.err, f.circuitName, err)
 		} else {
 			result.err = fmt.Errorf("%s.error: %w", f.circuitName, err)
 		}
 		// Lets abuse every provider with the same amount of MaxConcurrentRequests,
 		// keep iterating even in case of ErrMaxConcurrency error
 	}
 	return result
 }
 func (cb *CircuitBreaker) Config() Config {
 	return cb.config
 }
--- a/circuitbreaker/circuit_breaker_test.go
+++ b/circuitbreaker/circuit_breaker_test.go
@ -1,10 +1,14 @@
 package circuitbreaker
 import (
 	"context"
 	"errors"
 	"fmt"
 	"testing"
 	"time"
 	"github.com/afex/hystrix-go/hystrix"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@ -12,7 +16,6 @@ const success = "Success"
 func TestCircuitBreaker_ExecuteSuccessSingle(t *testing.T) {
 	cb := NewCircuitBreaker(Config{
 		CommandName:            "SuccessSingle", // unique name to avoid conflicts with go tests `-count` option
 		Timeout:                1000,
 		MaxConcurrentRequests:  100,
 		RequestVolumeThreshold: 10,
@ -21,13 +24,12 @@ func TestCircuitBreaker_ExecuteSuccessSingle(t *testing.T) {
 	})
 	expectedResult := success
-	cmd := Command{
+	circuitName := "SuccessSingle"
-		functors: []*Functor{
+	cmd := NewCommand(context.TODO(), []*Functor{
-			NewFunctor(func() ([]interface{}, error) {
+		NewFunctor(func() ([]interface{}, error) {
-				return []any{expectedResult}, nil
+			return []any{expectedResult}, nil
-			}),
+		}, circuitName)},
-		},
+	)
 	}
 	result := cb.Execute(cmd)
 	require.NoError(t, result.Error())
@ -36,7 +38,6 @@ func TestCircuitBreaker_ExecuteSuccessSingle(t *testing.T) {
 func TestCircuitBreaker_ExecuteMultipleFallbacksFail(t *testing.T) {
 	cb := NewCircuitBreaker(Config{
 		CommandName:            "MultipleFail", // unique name to avoid conflicts with go tests `-count` option
 		Timeout:                10,
 		MaxConcurrentRequests:  100,
 		RequestVolumeThreshold: 10,
@ -44,28 +45,31 @@ func TestCircuitBreaker_ExecuteMultipleFallbacksFail(t *testing.T) {
 		ErrorPercentThreshold:  10,
 	})
-	cmd := Command{
+	circuitName := ""
-		functors: []*Functor{
+	errSecProvFailed := errors.New("provider 2 failed")
-			NewFunctor(func() ([]interface{}, error) {
+	errThirdProvFailed := errors.New("provider 3 failed")
-				time.Sleep(100 * time.Millisecond) // will cause hystrix: timeout
+	cmd := NewCommand(context.TODO(), []*Functor{
-				return []any{success}, nil
+		NewFunctor(func() ([]interface{}, error) {
-			}),
+			time.Sleep(100 * time.Millisecond) // will cause hystrix: timeout
-			NewFunctor(func() ([]interface{}, error) {
+			return []any{success}, nil
-				return nil, errors.New("provider 2 failed")
+		}, circuitName),
-			}),
+		NewFunctor(func() ([]interface{}, error) {
-			NewFunctor(func() ([]interface{}, error) {
+			return nil, errSecProvFailed
-				return nil, errors.New("provider 3 failed")
+		}, circuitName),
-			}),
+		NewFunctor(func() ([]interface{}, error) {
-		},
+			return nil, errThirdProvFailed
-	}
+		}, circuitName),
 	})
 	result := cb.Execute(cmd)
 	require.Error(t, result.Error())
 	assert.True(t, errors.Is(result.Error(), hystrix.ErrTimeout))
 	assert.True(t, errors.Is(result.Error(), errSecProvFailed))
 	assert.True(t, errors.Is(result.Error(), errThirdProvFailed))
 }
 func TestCircuitBreaker_ExecuteMultipleFallbacksFailButLastSuccessStress(t *testing.T) {
 	cb := NewCircuitBreaker(Config{
 		CommandName:            "LastSuccessStress", // unique name to avoid conflicts with go tests `-count` option
 		Timeout:                10,
 		MaxConcurrentRequests:  100,
 		RequestVolumeThreshold: 10,
@ -74,26 +78,146 @@ func TestCircuitBreaker_ExecuteMultipleFallbacksFailButLastSuccessStress(t *test
 	})
 	expectedResult := success
 	circuitName := fmt.Sprintf("LastSuccessStress_%d", time.Now().Nanosecond()) // unique name to avoid conflicts with go tests `-count` option
 	// These are executed sequentially, but I had an issue with the test failing
 	// because of the open circuit
 	for i := 0; i < 1000; i++ {
-		cmd := Command{
+		cmd := NewCommand(context.TODO(), []*Functor{
-			functors: []*Functor{
+			NewFunctor(func() ([]interface{}, error) {
-				NewFunctor(func() ([]interface{}, error) {
+				return nil, errors.New("provider 1 failed")
-					return nil, errors.New("provider 1 failed")
+			}, circuitName+"1"),
-				}),
+			NewFunctor(func() ([]interface{}, error) {
-				NewFunctor(func() ([]interface{}, error) {
+				return nil, errors.New("provider 2 failed")
-					return nil, errors.New("provider 2 failed")
+			}, circuitName+"2"),
-				}),
+			NewFunctor(func() ([]interface{}, error) {
-				NewFunctor(func() ([]interface{}, error) {
+				return []any{expectedResult}, nil
-					return []any{expectedResult}, nil
+			}, circuitName+"3"),
-				}),
+		},
-			},
+		)
 		}
 		result := cb.Execute(cmd)
 		require.NoError(t, result.Error())
 		require.Equal(t, expectedResult, result.Result()[0].(string))
 	}
 }
 func TestCircuitBreaker_ExecuteSwitchToWorkingProviderOnVolumeThresholdReached(t *testing.T) {
 	cb := NewCircuitBreaker(Config{
 		RequestVolumeThreshold: 10,
 	})
 	expectedResult := success
 	circuitName := fmt.Sprintf("SwitchToWorkingProviderOnVolumeThresholdReached_%d", time.Now().Nanosecond()) // unique name to avoid conflicts with go tests `-count` option
 	prov1Called := 0
 	prov2Called := 0
 	prov3Called := 0
 	// These are executed sequentially
 	for i := 0; i < 20; i++ {
 		cmd := NewCommand(context.TODO(), []*Functor{
 			NewFunctor(func() ([]interface{}, error) {
 				prov1Called++
 				return nil, errors.New("provider 1 failed")
 			}, circuitName+"1"),
 			NewFunctor(func() ([]interface{}, error) {
 				prov2Called++
 				return nil, errors.New("provider 2 failed")
 			}, circuitName+"2"),
 			NewFunctor(func() ([]interface{}, error) {
 				prov3Called++
 				return []any{expectedResult}, nil
 			}, circuitName+"3"),
 		})
 		result := cb.Execute(cmd)
 		require.NoError(t, result.Error())
 		require.Equal(t, expectedResult, result.Result()[0].(string))
 	}
 	assert.Equal(t, 10, prov1Called)
 	assert.Equal(t, 10, prov2Called)
 	assert.Equal(t, 20, prov3Called)
 }
 func TestCircuitBreaker_ExecuteHealthCheckOnWindowTimeout(t *testing.T) {
 	sleepWindow := 10
 	cb := NewCircuitBreaker(Config{
 		RequestVolumeThreshold: 1, // 1 failed request is enough to trip the circuit
 		SleepWindow:            sleepWindow,
 		ErrorPercentThreshold:  1, // Trip on first error
 	})
 	expectedResult := success
 	circuitName := fmt.Sprintf("SwitchToWorkingProviderOnWindowTimeout_%d", time.Now().Nanosecond()) // unique name to avoid conflicts with go tests `-count` option
 	prov1Called := 0
 	prov2Called := 0
 	// These are executed sequentially
 	for i := 0; i < 10; i++ {
 		cmd := NewCommand(context.TODO(), []*Functor{
 			NewFunctor(func() ([]interface{}, error) {
 				prov1Called++
 				return nil, errors.New("provider 1 failed")
 			}, circuitName+"1"),
 			NewFunctor(func() ([]interface{}, error) {
 				prov2Called++
 				return []any{expectedResult}, nil
 			}, circuitName+"2"),
 		})
 		result := cb.Execute(cmd)
 		require.NoError(t, result.Error())
 		require.Equal(t, expectedResult, result.Result()[0].(string))
 	}
 	assert.Equal(t, 1, prov1Called)
 	assert.Equal(t, 10, prov2Called)
 	// Wait for the sleep window to expire
 	time.Sleep(time.Duration(sleepWindow+1) * time.Millisecond)
 	cmd := NewCommand(context.TODO(), []*Functor{
 		NewFunctor(func() ([]interface{}, error) {
 			prov1Called++
 			return []any{expectedResult}, nil // Now it is working
 		}, circuitName+"1"),
 		NewFunctor(func() ([]interface{}, error) {
 			prov2Called++
 			return []any{expectedResult}, nil
 		}, circuitName+"2"),
 	})
 	result := cb.Execute(cmd)
 	require.NoError(t, result.Error())
 	assert.Equal(t, 2, prov1Called)
 	assert.Equal(t, 10, prov2Called)
 }
 func TestCircuitBreaker_CommandCancel(t *testing.T) {
 	cb := NewCircuitBreaker(Config{})
 	circuitName := fmt.Sprintf("CommandCancel_%d", time.Now().Nanosecond()) // unique name to avoid conflicts with go tests `-count` option
 	prov1Called := 0
 	prov2Called := 0
 	expectedErr := errors.New("provider 1 failed")
 	// These are executed sequentially
 	cmd := NewCommand(context.TODO(), nil)
 	cmd.Add(NewFunctor(func() ([]interface{}, error) {
 		prov1Called++
 		cmd.Cancel()
 		return nil, expectedErr
 	}, circuitName+"1"))
 	cmd.Add(NewFunctor(func() ([]interface{}, error) {
 		prov2Called++
 		return nil, errors.New("provider 2 failed")
 	}, circuitName+"2"))
 	result := cb.Execute(cmd)
 	t.Log(result.Error())
 	require.True(t, errors.Is(result.Error(), expectedErr))
 	assert.Equal(t, 1, prov1Called)
 	assert.Equal(t, 0, prov2Called)
 }