From 66f31cd25a160aacbd453d311c05d5f80d946408 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 10 May 2018 10:16:38 -0500 Subject: [PATCH] Make raft snapshot commit threshold configurable --- agent/agent.go | 3 +++ agent/config/builder.go | 1 + agent/config/config.go | 2 ++ agent/config/runtime.go | 6 ++++++ agent/config/runtime_test.go | 4 ++++ agent/consul/config.go | 3 +++ website/source/docs/agent/options.html.md | 6 ++++++ 7 files changed, 25 insertions(+) diff --git a/agent/agent.go b/agent/agent.go index 5da86230f2..3b77878b1b 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -763,6 +763,9 @@ func (a *Agent) consulConfig() (*consul.Config, error) { if a.config.RaftProtocol != 0 { base.RaftConfig.ProtocolVersion = raft.ProtocolVersion(a.config.RaftProtocol) } + if a.config.RaftSnapshotThreshold != 0 { + base.RaftConfig.SnapshotThreshold = uint64(a.config.RaftSnapshotThreshold) + } if a.config.ACLMasterToken != "" { base.ACLMasterToken = a.config.ACLMasterToken } diff --git a/agent/config/builder.go b/agent/config/builder.go index b3902fc208..9a2bdc1d40 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -673,6 +673,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { RPCProtocol: b.intVal(c.RPCProtocol), RPCRateLimit: rate.Limit(b.float64Val(c.Limits.RPCRate)), RaftProtocol: b.intVal(c.RaftProtocol), + RaftSnapshotThreshold: b.intVal(c.RaftSnapshotThreshold), ReconnectTimeoutLAN: b.durationVal("reconnect_timeout", c.ReconnectTimeoutLAN), ReconnectTimeoutWAN: b.durationVal("reconnect_timeout_wan", c.ReconnectTimeoutWAN), RejoinAfterLeave: b.boolVal(c.RejoinAfterLeave), diff --git a/agent/config/config.go b/agent/config/config.go index 3468eb5755..58d585acee 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -194,6 +194,7 @@ type Config struct { Ports Ports `json:"ports,omitempty" hcl:"ports" mapstructure:"ports"` RPCProtocol *int `json:"protocol,omitempty" hcl:"protocol" mapstructure:"protocol"` RaftProtocol *int `json:"raft_protocol,omitempty" hcl:"raft_protocol" mapstructure:"raft_protocol"` + RaftSnapshotThreshold *int `json:"raft_snapshot_threshold,omitempty" hcl:"raft_snapshot_threshold" mapstructure:"raft_snapshot_threshold"` ReconnectTimeoutLAN *string `json:"reconnect_timeout,omitempty" hcl:"reconnect_timeout" mapstructure:"reconnect_timeout"` ReconnectTimeoutWAN *string `json:"reconnect_timeout_wan,omitempty" hcl:"reconnect_timeout_wan" mapstructure:"reconnect_timeout_wan"` RejoinAfterLeave *bool `json:"rejoin_after_leave,omitempty" hcl:"rejoin_after_leave" mapstructure:"rejoin_after_leave"` @@ -264,6 +265,7 @@ type Consul struct { ElectionTimeout *string `json:"election_timeout,omitempty" hcl:"election_timeout" mapstructure:"election_timeout"` HeartbeatTimeout *string `json:"heartbeat_timeout,omitempty" hcl:"heartbeat_timeout" mapstructure:"heartbeat_timeout"` LeaderLeaseTimeout *string `json:"leader_lease_timeout,omitempty" hcl:"leader_lease_timeout" mapstructure:"leader_lease_timeout"` + SnapshotThreshold *int `json:"snapshot_threshold,omitempty" hcl:"snapshot_threshold" mapstructure:"snapshot_threshold"` } `json:"raft,omitempty" hcl:"raft" mapstructure:"raft"` SerfLAN struct { diff --git a/agent/config/runtime.go b/agent/config/runtime.go index 4481a667a1..fe1e315f20 100644 --- a/agent/config/runtime.go +++ b/agent/config/runtime.go @@ -899,6 +899,12 @@ type RuntimeConfig struct { // hcl: raft_protocol = int RaftProtocol int + // RaftSnapshotThreshold sets the minimum threshold of raft commits after which + // a snapshot is created. Defaults to 8192 + // + // hcl: raft_snapshot_threshold = int + RaftSnapshotThreshold int + // ReconnectTimeoutLAN specifies the amount of time to wait to reconnect with // another agent before deciding it's permanently gone. This can be used to // control the time it takes to reap failed nodes from the cluster. diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index f1376e6d4f..d146fd52c1 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -2421,6 +2421,7 @@ func TestFullConfig(t *testing.T) { }, "protocol": 30793, "raft_protocol": 19016, + "raft_snapshot_threshold": 16384, "reconnect_timeout": "23739s", "reconnect_timeout_wan": "26694s", "recursors": [ "63.38.39.58", "92.49.18.18" ], @@ -2852,6 +2853,7 @@ func TestFullConfig(t *testing.T) { } protocol = 30793 raft_protocol = 19016 + raft_snapshot_threshold = 16384 reconnect_timeout = "23739s" reconnect_timeout_wan = "26694s" recursors = [ "63.38.39.58", "92.49.18.18" ] @@ -3409,6 +3411,7 @@ func TestFullConfig(t *testing.T) { RPCRateLimit: 12029.43, RPCMaxBurst: 44848, RaftProtocol: 19016, + RaftSnapshotThreshold: 16384, ReconnectTimeoutLAN: 23739 * time.Second, ReconnectTimeoutWAN: 26694 * time.Second, RejoinAfterLeave: true, @@ -4089,6 +4092,7 @@ func TestSanitize(t *testing.T) { "RPCProtocol": 0, "RPCRateLimit": 0, "RaftProtocol": 0, + "RaftSnapshotThreshold": 0, "ReconnectTimeoutLAN": "0s", "ReconnectTimeoutWAN": "0s", "RejoinAfterLeave": false, diff --git a/agent/consul/config.go b/agent/consul/config.go index a8a7f249be..ef7b153438 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -451,6 +451,9 @@ func DefaultConfig() *Config { // Check every 5 seconds to see if there are enough new entries for a snapshot conf.RaftConfig.SnapshotInterval = 5 * time.Second + // Snapshots are created every 8192 entries by default, can be overridden + conf.RaftConfig.SnapshotThreshold = 8192 + return conf } diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index e5f8e464f3..1d99892389 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -359,6 +359,9 @@ will exit with an error at startup. [Raft Protocol Version Compatibility](/docs/upgrade-specific.html#raft-protocol-version-compatibility) for more details. +* `-raft-snapshot-threshold` - This + control the minimum number of raft commit entries between snapshots that are saved to disk. + * `-recursor` - Specifies the address of an upstream DNS server. This option may be provided multiple times, and is functionally equivalent to the [`recursors` configuration option](#recursors). @@ -935,6 +938,9 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass * `raft_protocol` Equivalent to the [`-raft-protocol` command-line flag](#_raft_protocol). +* `raft_snapshot_threshold` Equivalent to the + [`-raft-snapshot-threshold` command-line flag](#_raft_snapshot_threshold). + * `reap` This controls Consul's automatic reaping of child processes, which is useful if Consul is running as PID 1 in a Docker container. If this isn't specified, then Consul will automatically reap child processes if it detects it is running as PID 1. If this is set to true or false, then