diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go
index 32e5fc4a53..baf5c5dc53 100644
--- a/command/agent/agent_test.go
+++ b/command/agent/agent_test.go
@@ -194,20 +194,20 @@ func TestAgent_ReconnectConfigSettings(t *testing.T) {
}
}()
- c.ReconnectTimeoutLan = 2 * time.Hour
- c.ReconnectTimeoutWan = 3 * time.Hour
+ c.ReconnectTimeoutLan = 24 * time.Hour
+ c.ReconnectTimeoutWan = 36 * time.Hour
func() {
dir, agent := makeAgent(t, c)
defer os.RemoveAll(dir)
defer agent.Shutdown()
lan := agent.consulConfig().SerfLANConfig.ReconnectTimeout
- if lan != 2*time.Hour {
+ if lan != 24*time.Hour {
t.Fatalf("bad: %s", lan.String())
}
wan := agent.consulConfig().SerfWANConfig.ReconnectTimeout
- if wan != 3*time.Hour {
+ if wan != 36*time.Hour {
t.Fatalf("bad: %s", wan.String())
}
}()
diff --git a/command/agent/config.go b/command/agent/config.go
index ce8287ce95..21a3fa454b 100644
--- a/command/agent/config.go
+++ b/command/agent/config.go
@@ -786,19 +786,25 @@ func DecodeConfig(r io.Reader) (*Config, error) {
result.RetryIntervalWan = dur
}
+ const reconnectTimeoutMin = 8 * time.Hour
if raw := result.ReconnectTimeoutLanRaw; raw != "" {
dur, err := time.ParseDuration(raw)
if err != nil {
return nil, fmt.Errorf("ReconnectTimeoutLan invalid: %v", err)
}
+ if dur < reconnectTimeoutMin {
+ return nil, fmt.Errorf("ReconnectTimeoutLan must be >= %s", reconnectTimeoutMin.String())
+ }
result.ReconnectTimeoutLan = dur
}
-
if raw := result.ReconnectTimeoutWanRaw; raw != "" {
dur, err := time.ParseDuration(raw)
if err != nil {
return nil, fmt.Errorf("ReconnectTimeoutWan invalid: %v", err)
}
+ if dur < reconnectTimeoutMin {
+ return nil, fmt.Errorf("ReconnectTimeoutWan must be >= %s", reconnectTimeoutMin.String())
+ }
result.ReconnectTimeoutWan = dur
}
diff --git a/command/agent/config_test.go b/command/agent/config_test.go
index 09dad36b8b..3fc0a6880d 100644
--- a/command/agent/config_test.go
+++ b/command/agent/config_test.go
@@ -463,17 +463,27 @@ func TestDecodeConfig(t *testing.T) {
}
// Reconnect timeout LAN and WAN
- input = `{"reconnect_timeout": "1m", "reconnect_timeout_wan": "2m"}`
+ input = `{"reconnect_timeout": "8h", "reconnect_timeout_wan": "10h"}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
- if config.ReconnectTimeoutLanRaw != "1m" ||
- config.ReconnectTimeoutLan.String() != "1m0s" ||
- config.ReconnectTimeoutWanRaw != "2m" ||
- config.ReconnectTimeoutWan.String() != "2m0s" {
+ if config.ReconnectTimeoutLanRaw != "8h" ||
+ config.ReconnectTimeoutLan.String() != "8h0m0s" ||
+ config.ReconnectTimeoutWanRaw != "10h" ||
+ config.ReconnectTimeoutWan.String() != "10h0m0s" {
t.Fatalf("bad: %#v", config)
}
+ input = `{"reconnect_timeout": "7h"}`
+ config, err = DecodeConfig(bytes.NewReader([]byte(input)))
+ if err == nil {
+ t.Fatalf("decode should have failed")
+ }
+ input = `{"reconnect_timeout_wan": "7h"}`
+ config, err = DecodeConfig(bytes.NewReader([]byte(input)))
+ if err == nil {
+ t.Fatalf("decode should have failed")
+ }
// Static UI server
input = `{"ui": true}`
@@ -1364,10 +1374,10 @@ func TestMergeConfig(t *testing.T) {
RetryJoinWan: []string{"1.1.1.1"},
RetryIntervalWanRaw: "10s",
RetryIntervalWan: 10 * time.Second,
- ReconnectTimeoutLanRaw: "1s",
- ReconnectTimeoutLan: 1 * time.Second,
- ReconnectTimeoutWanRaw: "2s",
- ReconnectTimeoutWan: 2 * time.Second,
+ ReconnectTimeoutLanRaw: "24h",
+ ReconnectTimeoutLan: 24 * time.Hour,
+ ReconnectTimeoutWanRaw: "36h",
+ ReconnectTimeoutWan: 36 * time.Hour,
CheckUpdateInterval: 8 * time.Minute,
CheckUpdateIntervalRaw: "8m",
ACLToken: "1234",
diff --git a/website/source/docs/agent/options.html.markdown b/website/source/docs/agent/options.html.markdown
index fc21a7e2e8..06c1e404d0 100644
--- a/website/source/docs/agent/options.html.markdown
+++ b/website/source/docs/agent/options.html.markdown
@@ -583,13 +583,15 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
* `reconnect_timeout` This controls
how long it takes for a failed node to be completely removed from the cluster. This defaults to
72 hours and it is recommended that this is set to at least double the maximum expected recoverable
- outage time for a node or network partition. The value is a time with a unit suffix, which can be
- "s", "m", "h" for seconds, minutes, or hours.
+ outage time for a node or network partition. WARNING: Setting this time too low could cause Consul
+ servers to be removed from quorum during an extended node failure or partition, which could complicate
+ recovery of the cluster. The value is a time with a unit suffix, which can be "s", "m", "h" for seconds,
+ minutes, or hours. The value must be >= 8 hours.
* `reconnect_timeout_wan` This
is the WAN equivalent of the `reconnect_timeout` parameter, which
controls how long it takes for a failed server to be completely removed from the WAN pool. This also
- defaults to 72 hours.
+ defaults to 72 hours, and must be >= 8 hours.
* `recursor` Provides a single recursor address.
This has been deprecated, and the value is appended to the [`recursors`](#recursors) list for