retry locks on network errors (#3553)

* retry locks on network errors

When communicating with a local agent and watching a lock, a dropped
connection between the agent and server will show up as a server error
and immediately be retried. However if the client is connected to a
remote server, a dropped connection immediately aborts the lock.

* Updates comment about it being unsafe for writes.
This commit is contained in:
James Bardin 2017-10-11 10:42:10 -04:00 committed by James Phillips
parent 2e229617ce
commit fea32fb7d7
4 changed files with 24 additions and 12 deletions

View File

@ -555,13 +555,20 @@ func durToMsec(dur time.Duration) string {
// serverError is a string we look for to detect 500 errors.
const serverError = "Unexpected response code: 500"
// IsServerError returns true for 500 errors from the Consul servers, these are
// usually retryable at a later time.
func IsServerError(err error) bool {
// IsRetryableError returns true for 500 errors from the Consul servers, and
// network connection errors. These are usually retryable at a later time.
// This applies to reads but NOT to writes. This may return true for errors
// on writes that may have still gone through, so do not use this to retry
// any write operations.
func IsRetryableError(err error) bool {
if err == nil {
return false
}
if _, ok := err.(net.Error); ok {
return true
}
// TODO (slackpad) - Make a real error type here instead of using
// a string check.
return strings.Contains(err.Error(), serverError)

View File

@ -4,6 +4,7 @@ import (
crand "crypto/rand"
"crypto/tls"
"fmt"
"net"
"net/http"
"os"
"path/filepath"
@ -529,17 +530,21 @@ func TestAPI_durToMsec(t *testing.T) {
}
}
func TestAPI_IsServerError(t *testing.T) {
func TestAPI_IsRetryableError(t *testing.T) {
t.Parallel()
if IsServerError(nil) {
t.Fatalf("should not be a server error")
if IsRetryableError(nil) {
t.Fatal("should not be a retryable error")
}
if IsServerError(fmt.Errorf("not the error you are looking for")) {
t.Fatalf("should not be a server error")
if IsRetryableError(fmt.Errorf("not the error you are looking for")) {
t.Fatal("should not be a retryable error")
}
if !IsServerError(fmt.Errorf(serverError)) {
t.Fatalf("should be a server error")
if !IsRetryableError(fmt.Errorf(serverError)) {
t.Fatal("should be a retryable error")
}
if !IsRetryableError(&net.OpError{Err: fmt.Errorf("network conn error")}) {
t.Fatal("should be a retryable error")
}
}

View File

@ -370,7 +370,7 @@ RETRY:
// by doing retries. Note that we have to attempt the retry in a non-
// blocking fashion so that we have a clean place to reset the retry
// counter if service is restored.
if retries > 0 && IsServerError(err) {
if retries > 0 && IsRetryableError(err) {
time.Sleep(l.opts.MonitorRetryTime)
retries--
opts.WaitIndex = 0

View File

@ -492,7 +492,7 @@ RETRY:
// by doing retries. Note that we have to attempt the retry in a non-
// blocking fashion so that we have a clean place to reset the retry
// counter if service is restored.
if retries > 0 && IsServerError(err) {
if retries > 0 && IsRetryableError(err) {
time.Sleep(s.opts.MonitorRetryTime)
retries--
opts.WaitIndex = 0