mirror of https://github.com/status-im/consul.git
retry locks on network errors (#3553)
* retry locks on network errors When communicating with a local agent and watching a lock, a dropped connection between the agent and server will show up as a server error and immediately be retried. However if the client is connected to a remote server, a dropped connection immediately aborts the lock. * Updates comment about it being unsafe for writes.
This commit is contained in:
parent
2e229617ce
commit
fea32fb7d7
13
api/api.go
13
api/api.go
|
@ -555,13 +555,20 @@ func durToMsec(dur time.Duration) string {
|
|||
// serverError is a string we look for to detect 500 errors.
|
||||
const serverError = "Unexpected response code: 500"
|
||||
|
||||
// IsServerError returns true for 500 errors from the Consul servers, these are
|
||||
// usually retryable at a later time.
|
||||
func IsServerError(err error) bool {
|
||||
// IsRetryableError returns true for 500 errors from the Consul servers, and
|
||||
// network connection errors. These are usually retryable at a later time.
|
||||
// This applies to reads but NOT to writes. This may return true for errors
|
||||
// on writes that may have still gone through, so do not use this to retry
|
||||
// any write operations.
|
||||
func IsRetryableError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if _, ok := err.(net.Error); ok {
|
||||
return true
|
||||
}
|
||||
|
||||
// TODO (slackpad) - Make a real error type here instead of using
|
||||
// a string check.
|
||||
return strings.Contains(err.Error(), serverError)
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
crand "crypto/rand"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
@ -529,17 +530,21 @@ func TestAPI_durToMsec(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestAPI_IsServerError(t *testing.T) {
|
||||
func TestAPI_IsRetryableError(t *testing.T) {
|
||||
t.Parallel()
|
||||
if IsServerError(nil) {
|
||||
t.Fatalf("should not be a server error")
|
||||
if IsRetryableError(nil) {
|
||||
t.Fatal("should not be a retryable error")
|
||||
}
|
||||
|
||||
if IsServerError(fmt.Errorf("not the error you are looking for")) {
|
||||
t.Fatalf("should not be a server error")
|
||||
if IsRetryableError(fmt.Errorf("not the error you are looking for")) {
|
||||
t.Fatal("should not be a retryable error")
|
||||
}
|
||||
|
||||
if !IsServerError(fmt.Errorf(serverError)) {
|
||||
t.Fatalf("should be a server error")
|
||||
if !IsRetryableError(fmt.Errorf(serverError)) {
|
||||
t.Fatal("should be a retryable error")
|
||||
}
|
||||
|
||||
if !IsRetryableError(&net.OpError{Err: fmt.Errorf("network conn error")}) {
|
||||
t.Fatal("should be a retryable error")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -370,7 +370,7 @@ RETRY:
|
|||
// by doing retries. Note that we have to attempt the retry in a non-
|
||||
// blocking fashion so that we have a clean place to reset the retry
|
||||
// counter if service is restored.
|
||||
if retries > 0 && IsServerError(err) {
|
||||
if retries > 0 && IsRetryableError(err) {
|
||||
time.Sleep(l.opts.MonitorRetryTime)
|
||||
retries--
|
||||
opts.WaitIndex = 0
|
||||
|
|
|
@ -492,7 +492,7 @@ RETRY:
|
|||
// by doing retries. Note that we have to attempt the retry in a non-
|
||||
// blocking fashion so that we have a clean place to reset the retry
|
||||
// counter if service is restored.
|
||||
if retries > 0 && IsServerError(err) {
|
||||
if retries > 0 && IsRetryableError(err) {
|
||||
time.Sleep(s.opts.MonitorRetryTime)
|
||||
retries--
|
||||
opts.WaitIndex = 0
|
||||
|
|
Loading…
Reference in New Issue