Sets an anti-footgun floor for the configurable reap time.

This commit is contained in:
James Phillips 2016-04-10 23:31:16 -07:00
parent 7f55f9825f
commit cf00c11221
4 changed files with 35 additions and 17 deletions

View File

@ -194,20 +194,20 @@ func TestAgent_ReconnectConfigSettings(t *testing.T) {
}
}()
c.ReconnectTimeoutLan = 2 * time.Hour
c.ReconnectTimeoutWan = 3 * time.Hour
c.ReconnectTimeoutLan = 24 * time.Hour
c.ReconnectTimeoutWan = 36 * time.Hour
func() {
dir, agent := makeAgent(t, c)
defer os.RemoveAll(dir)
defer agent.Shutdown()
lan := agent.consulConfig().SerfLANConfig.ReconnectTimeout
if lan != 2*time.Hour {
if lan != 24*time.Hour {
t.Fatalf("bad: %s", lan.String())
}
wan := agent.consulConfig().SerfWANConfig.ReconnectTimeout
if wan != 3*time.Hour {
if wan != 36*time.Hour {
t.Fatalf("bad: %s", wan.String())
}
}()

View File

@ -786,19 +786,25 @@ func DecodeConfig(r io.Reader) (*Config, error) {
result.RetryIntervalWan = dur
}
const reconnectTimeoutMin = 8 * time.Hour
if raw := result.ReconnectTimeoutLanRaw; raw != "" {
dur, err := time.ParseDuration(raw)
if err != nil {
return nil, fmt.Errorf("ReconnectTimeoutLan invalid: %v", err)
}
if dur < reconnectTimeoutMin {
return nil, fmt.Errorf("ReconnectTimeoutLan must be >= %s", reconnectTimeoutMin.String())
}
result.ReconnectTimeoutLan = dur
}
if raw := result.ReconnectTimeoutWanRaw; raw != "" {
dur, err := time.ParseDuration(raw)
if err != nil {
return nil, fmt.Errorf("ReconnectTimeoutWan invalid: %v", err)
}
if dur < reconnectTimeoutMin {
return nil, fmt.Errorf("ReconnectTimeoutWan must be >= %s", reconnectTimeoutMin.String())
}
result.ReconnectTimeoutWan = dur
}

View File

@ -463,17 +463,27 @@ func TestDecodeConfig(t *testing.T) {
}
// Reconnect timeout LAN and WAN
input = `{"reconnect_timeout": "1m", "reconnect_timeout_wan": "2m"}`
input = `{"reconnect_timeout": "8h", "reconnect_timeout_wan": "10h"}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
if config.ReconnectTimeoutLanRaw != "1m" ||
config.ReconnectTimeoutLan.String() != "1m0s" ||
config.ReconnectTimeoutWanRaw != "2m" ||
config.ReconnectTimeoutWan.String() != "2m0s" {
if config.ReconnectTimeoutLanRaw != "8h" ||
config.ReconnectTimeoutLan.String() != "8h0m0s" ||
config.ReconnectTimeoutWanRaw != "10h" ||
config.ReconnectTimeoutWan.String() != "10h0m0s" {
t.Fatalf("bad: %#v", config)
}
input = `{"reconnect_timeout": "7h"}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err == nil {
t.Fatalf("decode should have failed")
}
input = `{"reconnect_timeout_wan": "7h"}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err == nil {
t.Fatalf("decode should have failed")
}
// Static UI server
input = `{"ui": true}`
@ -1364,10 +1374,10 @@ func TestMergeConfig(t *testing.T) {
RetryJoinWan: []string{"1.1.1.1"},
RetryIntervalWanRaw: "10s",
RetryIntervalWan: 10 * time.Second,
ReconnectTimeoutLanRaw: "1s",
ReconnectTimeoutLan: 1 * time.Second,
ReconnectTimeoutWanRaw: "2s",
ReconnectTimeoutWan: 2 * time.Second,
ReconnectTimeoutLanRaw: "24h",
ReconnectTimeoutLan: 24 * time.Hour,
ReconnectTimeoutWanRaw: "36h",
ReconnectTimeoutWan: 36 * time.Hour,
CheckUpdateInterval: 8 * time.Minute,
CheckUpdateIntervalRaw: "8m",
ACLToken: "1234",

View File

@ -583,13 +583,15 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
* <a name="reconnect_timeout"></a><a href="#reconnect_timeout">`reconnect_timeout`</a> This controls
how long it takes for a failed node to be completely removed from the cluster. This defaults to
72 hours and it is recommended that this is set to at least double the maximum expected recoverable
outage time for a node or network partition. The value is a time with a unit suffix, which can be
"s", "m", "h" for seconds, minutes, or hours.
outage time for a node or network partition. WARNING: Setting this time too low could cause Consul
servers to be removed from quorum during an extended node failure or partition, which could complicate
recovery of the cluster. The value is a time with a unit suffix, which can be "s", "m", "h" for seconds,
minutes, or hours. The value must be >= 8 hours.
* <a name="reconnect_timeout_wan"></a><a href="#reconnect_timeout_wan">`reconnect_timeout_wan`</a> This
is the WAN equivalent of the <a href="#reconnect_timeout">`reconnect_timeout`</a> parameter, which
controls how long it takes for a failed server to be completely removed from the WAN pool. This also
defaults to 72 hours.
defaults to 72 hours, and must be >= 8 hours.
* <a name="recursor"></a><a href="#recursor">`recursor`</a> Provides a single recursor address.
This has been deprecated, and the value is appended to the [`recursors`](#recursors) list for