Adds performance tuning capability for Raft, detuned defaults, and supplemental docs.

This commit is contained in:
James Phillips 2016-08-24 17:33:53 -07:00
parent 36dc9201f2
commit 57db4bcce6
No known key found for this signature in database
GPG Key ID: 77183E682AC5FC11
13 changed files with 263 additions and 27 deletions

View File

@ -261,6 +261,11 @@ func (a *Agent) consulConfig() *consul.Config {
// Apply dev mode
base.DevMode = a.config.DevMode
// Apply performance factors
if a.config.Performance.RaftMultiplier > 0 {
base.ScaleRaft(a.config.Performance.RaftMultiplier)
}
// Override with our config
if a.config.Datacenter != "" {
base.Datacenter = a.config.Datacenter

View File

@ -17,6 +17,7 @@ import (
"github.com/hashicorp/consul/consul"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/consul/testutil"
"github.com/hashicorp/raft"
)
const (
@ -191,6 +192,46 @@ func TestAgent_CheckAdvertiseAddrsSettings(t *testing.T) {
}
}
func TestAgent_CheckPerformanceSettings(t *testing.T) {
// Try a default config.
{
c := nextConfig()
c.ConsulConfig = nil
dir, agent := makeAgent(t, c)
defer os.RemoveAll(dir)
defer agent.Shutdown()
raftMult := time.Duration(consul.DefaultRaftMultiplier)
r := agent.consulConfig().RaftConfig
def := raft.DefaultConfig()
if r.HeartbeatTimeout != raftMult*def.HeartbeatTimeout ||
r.ElectionTimeout != raftMult*def.ElectionTimeout ||
r.CommitTimeout != raftMult*def.CommitTimeout ||
r.LeaderLeaseTimeout != raftMult*def.LeaderLeaseTimeout {
t.Fatalf("bad: %#v", *r)
}
}
// Try a multiplier.
{
c := nextConfig()
c.Performance.RaftMultiplier = 99
dir, agent := makeAgent(t, c)
defer os.RemoveAll(dir)
defer agent.Shutdown()
const raftMult time.Duration = 99
r := agent.consulConfig().RaftConfig
def := raft.DefaultConfig()
if r.HeartbeatTimeout != raftMult*def.HeartbeatTimeout ||
r.ElectionTimeout != raftMult*def.ElectionTimeout ||
r.CommitTimeout != raftMult*def.CommitTimeout ||
r.LeaderLeaseTimeout != raftMult*def.LeaderLeaseTimeout {
t.Fatalf("bad: %#v", *r)
}
}
}
func TestAgent_ReconnectConfigSettings(t *testing.T) {
c := nextConfig()
func() {

View File

@ -111,6 +111,14 @@ type DNSConfig struct {
DisableCompression bool `mapstructure:"disable_compression"`
}
// Performance is used to tune the performance of Consul's subsystems.
type Performance struct {
// RaftMultiplier is an integer multiplier used to scale Raft timing
// parameters: HeartbeatTimeout, ElectionTimeout, CommitTimeout, and
// LeaderLeaseTimeout.
RaftMultiplier uint `mapstructure:"raft_multiplier"`
}
// Telemetry is the telemetry configuration for the server
type Telemetry struct {
// StatsiteAddr is the address of a statsite instance. If provided,
@ -205,10 +213,13 @@ type Telemetry struct {
// Some of this is configurable as CLI flags, but most must
// be set using a configuration file.
type Config struct {
// DevMode enables a fast-path mode of opertaion to bring up an in-memory
// DevMode enables a fast-path mode of operation to bring up an in-memory
// server with minimal configuration. Useful for developing Consul.
DevMode bool `mapstructure:"-"`
// Performance is used to tune the performance of Consul's subsystems.
Performance Performance `mapstructure:"performance"`
// Bootstrap is used to bring up the first Consul server, and
// permits that node to elect itself leader
Bootstrap bool `mapstructure:"bootstrap"`
@ -1085,6 +1096,11 @@ func DecodeCheckDefinition(raw interface{}) (*CheckDefinition, error) {
func MergeConfig(a, b *Config) *Config {
var result Config = *a
// Propagate non-default performance settings
if b.Performance.RaftMultiplier > 0 {
result.Performance.RaftMultiplier = b.Performance.RaftMultiplier
}
// Copy the strings if they're set
if b.Bootstrap {
result.Bootstrap = true

View File

@ -957,6 +957,17 @@ func TestDecodeConfig_invalidKeys(t *testing.T) {
}
}
func TestDecodeConfig_Performance(t *testing.T) {
input := `{"performance": { "raft_multiplier": 3 }}`
config, err := DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
if config.Performance.RaftMultiplier != 3 {
t.Fatalf("bad: multiplier isn't set: %#v", config)
}
}
func TestDecodeConfig_Services(t *testing.T) {
input := `{
"services": [
@ -1382,6 +1393,9 @@ func TestMergeConfig(t *testing.T) {
}
b := &Config{
Performance: Performance{
RaftMultiplier: 99,
},
Bootstrap: true,
BootstrapExpect: 3,
Datacenter: "dc2",

View File

@ -17,6 +17,10 @@ const (
DefaultDC = "dc1"
DefaultLANSerfPort = 8301
DefaultWANSerfPort = 8302
// See docs/guides/performance.html for information on how this value
// was obtained.
DefaultRaftMultiplier uint = 5
)
var (
@ -333,6 +337,7 @@ func DefaultConfig() *Config {
// Enable interoperability with unversioned Raft library, and don't
// start using new ID-based features yet.
conf.RaftConfig.ProtocolVersion = 1
conf.ScaleRaft(DefaultRaftMultiplier)
// Disable shutdown on removal
conf.RaftConfig.ShutdownOnRemove = false
@ -340,6 +345,20 @@ func DefaultConfig() *Config {
return conf
}
// ScaleRaft sets the config to have Raft timing parameters scaled by the given
// performance multiplier. This is done in an idempotent way so it's not tricky
// to call this when composing configurations and potentially calling this
// multiple times on the same structure.
func (c *Config) ScaleRaft(raftMultRaw uint) {
raftMult := time.Duration(raftMultRaw)
def := raft.DefaultConfig()
c.RaftConfig.HeartbeatTimeout = raftMult * def.HeartbeatTimeout
c.RaftConfig.ElectionTimeout = raftMult * def.ElectionTimeout
c.RaftConfig.CommitTimeout = raftMult * def.CommitTimeout
c.RaftConfig.LeaderLeaseTimeout = raftMult * def.LeaderLeaseTimeout
}
func (c *Config) tlsConfig() *tlsutil.Config {
tlsConf := &tlsutil.Config{
VerifyIncoming: c.VerifyIncoming,

View File

@ -820,11 +820,12 @@ func (s *Server) Stats() map[string]map[string]string {
s.remoteLock.RUnlock()
stats := map[string]map[string]string{
"consul": map[string]string{
"server": "true",
"leader": fmt.Sprintf("%v", s.IsLeader()),
"leader_addr": string(s.raft.Leader()),
"bootstrap": fmt.Sprintf("%v", s.config.Bootstrap),
"known_datacenters": toString(uint64(numKnownDCs)),
"server": "true",
"leader": fmt.Sprintf("%v", s.IsLeader()),
"leader_addr": string(s.raft.Leader()),
"bootstrap": fmt.Sprintf("%v", s.config.Bootstrap),
"known_datacenters": toString(uint64(numKnownDCs)),
"leader_lease_timeout": fmt.Sprintf("%v", s.config.RaftConfig.LeaderLeaseTimeout),
},
"raft": s.raft.Stats(),
"serf_lan": s.serfLAN.Stats(),

View File

@ -32,6 +32,11 @@ import (
// offset is used to atomically increment the port numbers.
var offset uint64
// TestPerformanceConfig configures the performance parameters.
type TestPerformanceConfig struct {
RaftMultiplier uint `json:"raft_multiplier,omitempty"`
}
// TestPortConfig configures the various ports used for services
// provided by the Consul server.
type TestPortConfig struct {
@ -51,20 +56,21 @@ type TestAddressConfig struct {
// TestServerConfig is the main server configuration struct.
type TestServerConfig struct {
NodeName string `json:"node_name"`
Bootstrap bool `json:"bootstrap,omitempty"`
Server bool `json:"server,omitempty"`
DataDir string `json:"data_dir,omitempty"`
Datacenter string `json:"datacenter,omitempty"`
DisableCheckpoint bool `json:"disable_update_check"`
LogLevel string `json:"log_level,omitempty"`
Bind string `json:"bind_addr,omitempty"`
Addresses *TestAddressConfig `json:"addresses,omitempty"`
Ports *TestPortConfig `json:"ports,omitempty"`
ACLMasterToken string `json:"acl_master_token,omitempty"`
ACLDatacenter string `json:"acl_datacenter,omitempty"`
ACLDefaultPolicy string `json:"acl_default_policy,omitempty"`
Stdout, Stderr io.Writer `json:"-"`
NodeName string `json:"node_name"`
Performance *TestPerformanceConfig `json:"performance,omitempty"`
Bootstrap bool `json:"bootstrap,omitempty"`
Server bool `json:"server,omitempty"`
DataDir string `json:"data_dir,omitempty"`
Datacenter string `json:"datacenter,omitempty"`
DisableCheckpoint bool `json:"disable_update_check"`
LogLevel string `json:"log_level,omitempty"`
Bind string `json:"bind_addr,omitempty"`
Addresses *TestAddressConfig `json:"addresses,omitempty"`
Ports *TestPortConfig `json:"ports,omitempty"`
ACLMasterToken string `json:"acl_master_token,omitempty"`
ACLDatacenter string `json:"acl_datacenter,omitempty"`
ACLDefaultPolicy string `json:"acl_default_policy,omitempty"`
Stdout, Stderr io.Writer `json:"-"`
}
// ServerConfigCallback is a function interface which can be
@ -79,11 +85,14 @@ func defaultServerConfig() *TestServerConfig {
return &TestServerConfig{
NodeName: fmt.Sprintf("node%d", idx),
DisableCheckpoint: true,
Bootstrap: true,
Server: true,
LogLevel: "debug",
Bind: "127.0.0.1",
Addresses: &TestAddressConfig{},
Performance: &TestPerformanceConfig{
RaftMultiplier: 1,
},
Bootstrap: true,
Server: true,
LogLevel: "debug",
Bind: "127.0.0.1",
Addresses: &TestAddressConfig{},
Ports: &TestPortConfig{
DNS: 20000 + idx,
HTTP: 21000 + idx,

View File

@ -576,6 +576,24 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
* <a name="node_name"></a><a href="#node_name">`node_name`</a> Equivalent to the
[`-node` command-line flag](#_node).
* <a name="performance"></a><a href="#performance">`performance`</a> Available in Consul 0.7 and
later, this is a nested object that allows tuning the performance of different subsystems in
Consul. See the [Server Performance](/docs/guides/performance.html) guide for more details. The
following parameters are available:
* <a name="raft_multiplier"></a><a href="#raft_multiplier">`raft_multiplier`</a> - An integer
multiplier used by Consul servers to scale key Raft timing parameters. Tuning this affects
the time it takes Consul to detect leader failures and to perform leader elections, at the
expense of requiring more network and CPU resources for better performance.<br><br>A value
of 0, the default, means that Consul will use a lower-performance timing that's suitable for
[minimal Consul servers](/docs/guides/performance.html#minumum), currently equivalent to
setting this to a value of 5 (this default may be changed in future versions of Consul,
depending if the target minimum server profile changes). Above 0, higher values imply lower
levels of performance. Setting this to a value of 1 will configure Raft to its
highest-performance mode, equivalent to the default timing of Consul prior to 0.7, and is
recommended for [production Consul servers](/docs/guides/performance.html#production). See
the note on [last contact](/docs/guides/performance.html#last-contact) timing for more
details on tuning this parameter.
* <a name="ports"></a><a href="#ports">`ports`</a> This is a nested object that allows setting
the bind ports for the following keys:
* <a name="dns_port"></a><a href="#dns_port">`dns`</a> - The DNS server, -1 to disable. Default 8600.

View File

@ -129,8 +129,8 @@ These metrics are used to monitor the health of the Consul servers.
<td>timer</td>
</tr>
<tr>
<td>`consul.raft.leader.lastContact`</td>
<td>This measures the time that a Consul server was last contacted by the leader (will be zero on the leader itself). This is a general indicator of latency in the Raft subsystem, and gives a general indicator of how far behind [stale](/docs/agent/http.html#consistency) queries will be.</td>
<td><a name="last-contact"></a>`consul.raft.leader.lastContact`</td>
<td>This will only be emitted by the Raft leader and measures the time since the leader was last able to contact the follower nodes when checking its leader lease. It can be used as a measure for how stable the Raft timing is and how close the leader is to timing out its lease.<br><br>The lease timeout is 500 ms times the [`raft_multiplier` configuration](/docs/agent/options.html#raft_multiplier), so this telemetry value should not be getting close to that configured value, otherwise the Raft timing is marginal and might need to be tuned, or more powerful servers might be needed. See the [Server Performance](/docs/guides/performance.html) guide for more details.</td>
<td>ms</td>
<td>timer</td>
</tr>

View File

@ -20,6 +20,7 @@ for each lookup and can potentially exhaust the query throughput of a cluster.
For this reason, Consul provides a number of tuning parameters that can
customize how DNS queries are handled.
<a name="stale"></a>
## Stale Reads
Stale reads can be used to reduce latency and increase the throughput
@ -60,6 +61,7 @@ client and Consul and set the cache values appropriately. In many cases
"appropriately" simply is turning negative response caching off to get the best
recovery time when a service becomes available again.
<a name="ttl"></a>
## TTL Values
TTL values can be set to allow DNS results to be cached downstream of Consul. Higher

View File

@ -0,0 +1,86 @@
---
layout: "docs"
page_title: "Server Performance"
sidebar_current: "docs-guides-performance"
description: |-
Consul requires different amounts of compute resources, depending on cluster size and expected workload. This guide provides guidance on choosing compute resources.
---
# Server Performance
Since Consul servers run a [consensus protocol](/docs/internals/consensus.html) to
process all write operations and are contacted on nearly all read operations, server
performance is critical for overall throughput and health of a Consul cluster. Servers
are generally I/O bound for writes because the underlying Raft log store performs a sync
to disk every time an entry is appended. Servers are generally CPU bound for reads since
reads work from a fully in-memory data store that is optimized for concurrent access.
<a name="minimum"></a>
## Minimum Server Requirements
In Consul 0.7, the default server [performance parameters](/docs/agent/options.html#performance)
were tuned to allow Consul to run reliably (but relatively slowly) on a server cluster of three
[AWS t2.micro](https://aws.amazon.com/ec2/instance-types/) instances. These thresholds
were determined empirically using a leader instance that was under sufficient read, write,
and network load to cause it to permanently be at zero CPU credits, forcing it to the baseline
performance mode for that instance type. Real-world workloads typically have more bursts of
activity, so this is a conservative and pessimistic tuning strategy.
This default was chosen based on feedback from users, many of whom wanted a low cost way
to run small production or development clusters with low cost compute resources, at the
expense of some performance in leader failure detection and leader election times.
The default performance configuration is equivalent to this:
```javascript
{
"performance": {
"raft_multiplier": 5
}
}
```
<a name="production"></a>
## Production Server Requirements
When running Consul 0.7 and later in production, it is recommended to configure the server
[performance parameters](/docs/agent/options.html#performance) back to Consul's original
high-performance settings. This will let Consul servers detect a failed leader and complete
leader elections much more quickly than the default configuration which extends key Raft
timeouts by a factor of 5, so it quite slow during these events.
The high performance configuration is simple and looks like this:
```javascript
{
"performance": {
"raft_multiplier": 1
}
}
```
It's best to benchmark with a realistic workload when choosing a production server for Consul.
Here are some general recommendations:
* For write-heavy workloads, disk speed on the servers is key for performance. Use SSDs or
another fast disk technology for the best write throughput.
* <a name="last-contact"></a>Spurious leader elections can be caused by networking issues between
the servers or lack of CPU. Users in cloud environments often bump their servers up to the next
instance class with improved networking and CPU until leader elections stabilize, and in Consul
0.7 or later the [performance parameters](/docs/agent/options.html#performance) configuration
now gives you the option to trade off performance instead of upsizing servers. You can use the
[`consul.raft.leader.lastContact` telemetry](/docs/agent/telemetry.html#last-contact) to help
observe how the Raft timing is performing and decide if de-tuning Raft performance or adding
more powerful servers might be needed.
* For DNS-heavy workloads, configuring all Consul agents in a cluster with the
[`allow_stale`](/docs/agent/options.html#allow_stale) configuration option will allow reads to
scale across all Consul servers, not just the leader. See [Stale Reads](/docs/guides/dns-cache.html#stale)
in the [DNS Caching](/docs/guides/dns-cache.html) guide for more details. It's also good to set
reasonable, non-zero [DNS TTL values](/docs/guides/dns-cache.html#ttl) if your clients will
respect them.
* In other applications that perform high volumes of reads against Consul, consider using the
[stale consistency mode](/docs/agent/http.html#consistency) available to allow reads to scale
across all the servers and not just be forwarded to the leader.

View File

@ -19,6 +19,27 @@ standard upgrade flow.
Consul version 0.7 is a very large release with many important changes. Changes
to be aware of during an upgrade are categorized below.
#### Performance Tuning and New Defaults
Consul 0.7 introduced support for tuning Raft performance using a new
[performance configuration block](/docs/agent/options.html#performance). Also,
the default Raft timing is set to a lower-performance mode suitable for
[minimal Consul servers](/docs/guides/performance.html#minumum).
To continue to use the high-performance settings that were the default prior to
Consul 0.7 (recommended for production servers), add the following configuration
to all Consul servers when upgrading:
```javascript
{
"performance": {
"raft_multiplier": 1
}
}
```
See the [Server Performance](/docs/guides/performance.html) guide for more details.
#### Default Configuration Changes
The default behavior of [`skip_leave_on_interrupt`](/docs/agent/options.html#skip_leave_on_interrupt)

View File

@ -228,6 +228,10 @@
<a href="/docs/guides/atlas.html">Atlas Integration</a>
</li>
<li<%= sidebar_current("docs-guides-performance") %>>
<a href="/docs/guides/performance.html">Server Performance</a>
</li>
<li<%= sidebar_current("docs-guides-servers") %>>
<a href="/docs/guides/servers.html">Adding/Removing Servers</a>
</li>