Makes reap time configurable for LAN and WAN.

This commit is contained in:
James Phillips 2016-04-10 22:46:07 -07:00
parent 529b24adbf
commit eedeba682b
5 changed files with 103 additions and 0 deletions

View File

@ -299,6 +299,12 @@ func (a *Agent) consulConfig() *consul.Config {
base.SerfWANConfig.MemberlistConfig.AdvertiseAddr = a.config.AdvertiseAddrs.SerfWan.IP.String() base.SerfWANConfig.MemberlistConfig.AdvertiseAddr = a.config.AdvertiseAddrs.SerfWan.IP.String()
base.SerfWANConfig.MemberlistConfig.AdvertisePort = a.config.AdvertiseAddrs.SerfWan.Port base.SerfWANConfig.MemberlistConfig.AdvertisePort = a.config.AdvertiseAddrs.SerfWan.Port
} }
if a.config.ReconnectTimeoutLan != 0 {
base.SerfLANConfig.ReconnectTimeout = a.config.ReconnectTimeoutLan
}
if a.config.ReconnectTimeoutWan != 0 {
base.SerfWANConfig.ReconnectTimeout = a.config.ReconnectTimeoutWan
}
if a.config.AdvertiseAddrs.RPC != nil { if a.config.AdvertiseAddrs.RPC != nil {
base.RPCAdvertise = a.config.AdvertiseAddrs.RPC base.RPCAdvertise = a.config.AdvertiseAddrs.RPC
} }

View File

@ -176,6 +176,43 @@ func TestAgent_CheckAdvertiseAddrsSettings(t *testing.T) {
} }
} }
func TestAgent_ReconnectConfigSettings(t *testing.T) {
c := nextConfig()
func() {
dir, agent := makeAgent(t, c)
defer os.RemoveAll(dir)
defer agent.Shutdown()
lan := agent.consulConfig().SerfLANConfig.ReconnectTimeout
if lan != 3*24*time.Hour {
t.Fatalf("bad: %s", lan.String())
}
wan := agent.consulConfig().SerfWANConfig.ReconnectTimeout
if wan != 3*24*time.Hour {
t.Fatalf("bad: %s", wan.String())
}
}()
c.ReconnectTimeoutLan = 2 * time.Hour
c.ReconnectTimeoutWan = 3 * time.Hour
func() {
dir, agent := makeAgent(t, c)
defer os.RemoveAll(dir)
defer agent.Shutdown()
lan := agent.consulConfig().SerfLANConfig.ReconnectTimeout
if lan != 2*time.Hour {
t.Fatalf("bad: %s", lan.String())
}
wan := agent.consulConfig().SerfWANConfig.ReconnectTimeout
if wan != 3*time.Hour {
t.Fatalf("bad: %s", wan.String())
}
}()
}
func TestAgent_AddService(t *testing.T) { func TestAgent_AddService(t *testing.T) {
dir, agent := makeAgent(t, nextConfig()) dir, agent := makeAgent(t, nextConfig())
defer os.RemoveAll(dir) defer os.RemoveAll(dir)

View File

@ -312,6 +312,14 @@ type Config struct {
RetryIntervalWan time.Duration `mapstructure:"-" json:"-"` RetryIntervalWan time.Duration `mapstructure:"-" json:"-"`
RetryIntervalWanRaw string `mapstructure:"retry_interval_wan"` RetryIntervalWanRaw string `mapstructure:"retry_interval_wan"`
// ReconnectTimeout* specify the amount of time to wait to reconnect with
// another agent before deciding it's permanently gone. This can be used to
// control the time it takes to reap failed nodes from the cluster.
ReconnectTimeoutLan time.Duration `mapstructure:"-"`
ReconnectTimeoutLanRaw string `mapstructure:"reconnect_timeout"`
ReconnectTimeoutWan time.Duration `mapstructure:"-"`
ReconnectTimeoutWanRaw string `mapstructure:"reconnect_timeout_wan"`
// EnableUi enables the statically-compiled assets for the Consul web UI and // EnableUi enables the statically-compiled assets for the Consul web UI and
// serves them at the default /ui/ endpoint automatically. // serves them at the default /ui/ endpoint automatically.
EnableUi bool `mapstructure:"ui"` EnableUi bool `mapstructure:"ui"`
@ -778,6 +786,22 @@ func DecodeConfig(r io.Reader) (*Config, error) {
result.RetryIntervalWan = dur result.RetryIntervalWan = dur
} }
if raw := result.ReconnectTimeoutLanRaw; raw != "" {
dur, err := time.ParseDuration(raw)
if err != nil {
return nil, fmt.Errorf("ReconnectTimeoutLan invalid: %v", err)
}
result.ReconnectTimeoutLan = dur
}
if raw := result.ReconnectTimeoutWanRaw; raw != "" {
dur, err := time.ParseDuration(raw)
if err != nil {
return nil, fmt.Errorf("ReconnectTimeoutWan invalid: %v", err)
}
result.ReconnectTimeoutWan = dur
}
// Merge the single recursor // Merge the single recursor
if result.DNSRecursor != "" { if result.DNSRecursor != "" {
result.DNSRecursors = append(result.DNSRecursors, result.DNSRecursor) result.DNSRecursors = append(result.DNSRecursors, result.DNSRecursor)
@ -1131,6 +1155,14 @@ func MergeConfig(a, b *Config) *Config {
if b.RetryIntervalWan != 0 { if b.RetryIntervalWan != 0 {
result.RetryIntervalWan = b.RetryIntervalWan result.RetryIntervalWan = b.RetryIntervalWan
} }
if b.ReconnectTimeoutLan != 0 {
result.ReconnectTimeoutLan = b.ReconnectTimeoutLan
result.ReconnectTimeoutLanRaw = b.ReconnectTimeoutLanRaw
}
if b.ReconnectTimeoutWan != 0 {
result.ReconnectTimeoutWan = b.ReconnectTimeoutWan
result.ReconnectTimeoutWanRaw = b.ReconnectTimeoutWanRaw
}
if b.DNSConfig.NodeTTL != 0 { if b.DNSConfig.NodeTTL != 0 {
result.DNSConfig.NodeTTL = b.DNSConfig.NodeTTL result.DNSConfig.NodeTTL = b.DNSConfig.NodeTTL
} }

View File

@ -462,6 +462,19 @@ func TestDecodeConfig(t *testing.T) {
t.Fatalf("bad: %#v", config) t.Fatalf("bad: %#v", config)
} }
// Reconnect timeout LAN and WAN
input = `{"reconnect_timeout": "1m", "reconnect_timeout_wan": "2m"}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
if config.ReconnectTimeoutLanRaw != "1m" ||
config.ReconnectTimeoutLan.String() != "1m0s" ||
config.ReconnectTimeoutWanRaw != "2m" ||
config.ReconnectTimeoutWan.String() != "2m0s" {
t.Fatalf("bad: %#v", config)
}
// Static UI server // Static UI server
input = `{"ui": true}` input = `{"ui": true}`
config, err = DecodeConfig(bytes.NewReader([]byte(input))) config, err = DecodeConfig(bytes.NewReader([]byte(input)))
@ -1351,6 +1364,10 @@ func TestMergeConfig(t *testing.T) {
RetryJoinWan: []string{"1.1.1.1"}, RetryJoinWan: []string{"1.1.1.1"},
RetryIntervalWanRaw: "10s", RetryIntervalWanRaw: "10s",
RetryIntervalWan: 10 * time.Second, RetryIntervalWan: 10 * time.Second,
ReconnectTimeoutLanRaw: "1s",
ReconnectTimeoutLan: 1 * time.Second,
ReconnectTimeoutWanRaw: "2s",
ReconnectTimeoutWan: 2 * time.Second,
CheckUpdateInterval: 8 * time.Minute, CheckUpdateInterval: 8 * time.Minute,
CheckUpdateIntervalRaw: "8m", CheckUpdateIntervalRaw: "8m",
ACLToken: "1234", ACLToken: "1234",

View File

@ -580,6 +580,17 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
automatically reap child processes if it detects it is running as PID 1. If this is set to true or false, then automatically reap child processes if it detects it is running as PID 1. If this is set to true or false, then
it controls reaping regardless of Consul's PID (forces reaping on or off, respectively). it controls reaping regardless of Consul's PID (forces reaping on or off, respectively).
* <a name="reconnect_timeout"></a><a href="#reconnect_timeout">`reconnect_timeout`</a> This controls
how long it takes for a failed node to be completely removed from the cluster. This defaults to
72 hours and it is recommended that this is set to at least double the maximum expected recoverable
outage time for a node or network partition. The value is a time with a unit suffix, which can be
"s", "m", "h" for seconds, minutes, or hours.
* <a name="reconnect_timeout_wan"></a><a href="#reconnect_timeout_wan">`reconnect_timeout_wan`</a> This
is the WAN equivalent of the <a href="#reconnect_timeout">`reconnect_timeout`</a> parameter, which
controls how long it takes for a failed server to be completely removed from the WAN pool. This also
defaults to 72 hours.
* <a name="recursor"></a><a href="#recursor">`recursor`</a> Provides a single recursor address. * <a name="recursor"></a><a href="#recursor">`recursor`</a> Provides a single recursor address.
This has been deprecated, and the value is appended to the [`recursors`](#recursors) list for This has been deprecated, and the value is appended to the [`recursors`](#recursors) list for
backwards compatibility. backwards compatibility.