Refactor to call non-voting servers read replicas (#9191)

Co-authored-by: Kit Patella <kit@jepsen.io>
This commit is contained in:
Matt Keeler 2020-11-17 10:53:57 -05:00 committed by hashicorp-ci
parent d3e379b712
commit dfaaa0b73a
21 changed files with 141 additions and 38 deletions

18
.changelog/9191.txt Normal file
View File

@ -0,0 +1,18 @@
```release-note:deprecation
cli: **(Enterprise only)** The `-non-voting-server` flag is deprecated in favor of the new `-read-replica` flag. The `-non-voting-server` flag is still present along side the new flag but it will be removed in a future release.
```
```release-note:improvement
cli: **(Enterprise only)** A new `-read-replica` flag can now be used to enable running a server as a read only replica. Previously this was enabled with the now deprecated `-non-voting-server` flag.
```
```release-note:deprecation
config: **(Enterprise only)** The `non_voting_server` configuration setting is deprecated in favor of the new `read_replica` setting. The `non_voting_server` configuration setting is still present but will be removed in a future release.
```
```release-note:improvement
config: **(Enterprise only)** A new `read_replica` configuration setting can now be used to enable running a server as a read only replica. Previously this was enabled with the now deprecated `non_voting_server` setting.
```
```release-note:deprecation
server: **(Enterprise only)** Addition of the `nonvoter` tag to the service registration made for read replicas is deprecated in favor of the new tag name of `read_replica`. Both are present in the registration but the `nonvoter` tag will be completely removed in a future release.
```
```release-note:deprecation
gossip: **(Enterprise only)** Read replicas now advertise themselves by setting the `read_replica` tag. The old `nonvoter` tag is still present but is deprecated and will be removed in a future release.
```

View File

@ -1110,8 +1110,8 @@ func newConsulConfig(runtimeCfg *config.RuntimeConfig, logger hclog.Logger) (*co
if runtimeCfg.SessionTTLMin != 0 {
cfg.SessionTTLMin = runtimeCfg.SessionTTLMin
}
if runtimeCfg.NonVotingServer {
cfg.NonVoter = runtimeCfg.NonVotingServer
if runtimeCfg.ReadReplica {
cfg.ReadReplica = runtimeCfg.ReadReplica
}
// These are fully specified in the agent defaults, so we can simply

View File

@ -1034,7 +1034,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
NodeID: types.NodeID(b.stringVal(c.NodeID)),
NodeMeta: c.NodeMeta,
NodeName: b.nodeName(c.NodeName),
NonVotingServer: b.boolVal(c.NonVotingServer),
ReadReplica: b.boolVal(c.ReadReplica),
PidFile: b.stringVal(c.PidFile),
PrimaryDatacenter: primaryDatacenter,
PrimaryGateways: b.expandAllOptionalAddrs("primary_gateways", c.PrimaryGateways),

View File

@ -13,6 +13,9 @@ var (
"non_voting_server": func(c *Config) {
// to maintain existing compatibility we don't nullify the value
},
"read_replica": func(c *Config) {
// to maintain existing compatibility we don't nullify the value
},
"segment": func(c *Config) {
// to maintain existing compatibility we don't nullify the value
},

View File

@ -24,11 +24,18 @@ func TestBuilder_validateEnterpriseConfigKeys(t *testing.T) {
cases := map[string]testCase{
"non_voting_server": {
config: Config{
NonVotingServer: &boolVal,
ReadReplica: &boolVal,
},
keys: []string{"non_voting_server"},
badKeys: []string{"non_voting_server"},
},
"read_replica": {
config: Config{
ReadReplica: &boolVal,
},
keys: []string{"read_replica"},
badKeys: []string{"read_replica"},
},
"segment": {
config: Config{
SegmentName: &stringVal,
@ -118,11 +125,11 @@ func TestBuilder_validateEnterpriseConfigKeys(t *testing.T) {
},
"multi": {
config: Config{
NonVotingServer: &boolVal,
SegmentName: &stringVal,
ReadReplica: &boolVal,
SegmentName: &stringVal,
},
keys: []string{"non_voting_server", "segment", "acl.tokens.agent_master"},
badKeys: []string{"non_voting_server", "segment"},
keys: []string{"non_voting_server", "read_replica", "segment", "acl.tokens.agent_master"},
badKeys: []string{"non_voting_server", "read_replica", "segment"},
},
}

View File

@ -289,7 +289,7 @@ type Config struct {
// Enterprise Only
Audit *Audit `json:"audit,omitempty" hcl:"audit" mapstructure:"audit"`
// Enterprise Only
NonVotingServer *bool `json:"non_voting_server,omitempty" hcl:"non_voting_server" mapstructure:"non_voting_server"`
ReadReplica *bool `json:"read_replica,omitempty" hcl:"read_replica" mapstructure:"read_replica" alias:"non_voting_server"`
// Enterprise Only
SegmentName *string `json:"segment,omitempty" hcl:"segment" mapstructure:"segment"`
// Enterprise Only

View File

@ -89,7 +89,8 @@ func AddFlags(fs *flag.FlagSet, f *BuilderOpts) {
add(&f.Config.NodeName, "node", "Name of this node. Must be unique in the cluster.")
add(&f.Config.NodeID, "node-id", "A unique ID for this node across space and time. Defaults to a randomly-generated ID that persists in the data-dir.")
add(&f.Config.NodeMeta, "node-meta", "An arbitrary metadata key/value pair for this node, of the format `key:value`. Can be specified multiple times.")
add(&f.Config.NonVotingServer, "non-voting-server", "(Enterprise-only) This flag is used to make the server not participate in the Raft quorum, and have it only receive the data replication stream. This can be used to add read scalability to a cluster in cases where a high volume of reads to servers are needed.")
add(&f.Config.ReadReplica, "non-voting-server", "(Enterprise-only) DEPRECATED: -read-replica should be used instead")
add(&f.Config.ReadReplica, "read-replica", "(Enterprise-only) This flag is used to make the server not participate in the Raft quorum, and have it only receive the data replication stream. This can be used to add read scalability to a cluster in cases where a high volume of reads to servers are needed.")
add(&f.Config.PidFile, "pid-file", "Path to file to store agent PID.")
add(&f.Config.RPCProtocol, "protocol", "Sets the protocol version. Defaults to latest.")
add(&f.Config.RaftProtocol, "raft-protocol", "Sets the Raft protocol version. Defaults to latest.")

View File

@ -845,12 +845,12 @@ type RuntimeConfig struct {
// flag: -node-meta "key:value" -node-meta "key:value" ...
NodeMeta map[string]string
// NonVotingServer is whether this server will act as a non-voting member
// ReadReplica is whether this server will act as a non-voting member
// of the cluster to help provide read scalability. (Enterprise-only)
//
// hcl: non_voting_server = (true|false)
// flag: -non-voting-server
NonVotingServer bool
ReadReplica bool
// PidFile is the file to store our PID in.
//

View File

@ -12,12 +12,16 @@ var entTokenConfigSanitize = `"EnterpriseConfig": {},`
func entFullRuntimeConfig(rt *RuntimeConfig) {}
var enterpriseNonVotingServerWarnings []string = []string{enterpriseConfigKeyError{key: "non_voting_server"}.Error()}
var enterpriseReadReplicaWarnings []string = []string{enterpriseConfigKeyError{key: "read_replica"}.Error()}
var enterpriseConfigKeyWarnings []string
func init() {
for k := range enterpriseConfigMap {
if k == "non_voting_server" {
// this is an alias for "read_replica" so we shouldn't see it in warnings
continue
}
enterpriseConfigKeyWarnings = append(enterpriseConfigKeyWarnings, enterpriseConfigKeyError{key: k}.Error())
}
}

View File

@ -610,10 +610,10 @@ func TestBuilder_BuildAndValidate_ConfigFlagsAndEdgecases(t *testing.T) {
`-data-dir=` + dataDir,
},
patch: func(rt *RuntimeConfig) {
rt.NonVotingServer = true
rt.ReadReplica = true
rt.DataDir = dataDir
},
warns: enterpriseNonVotingServerWarnings,
warns: enterpriseReadReplicaWarnings,
},
{
desc: "-pid-file",
@ -5315,6 +5315,7 @@ func TestFullConfig(t *testing.T) {
"raft_snapshot_threshold": 16384,
"raft_snapshot_interval": "30s",
"raft_trailing_logs": 83749,
"read_replica": true,
"reconnect_timeout": "23739s",
"reconnect_timeout_wan": "26694s",
"recursors": [ "63.38.39.58", "92.49.18.18" ],
@ -6004,6 +6005,7 @@ func TestFullConfig(t *testing.T) {
raft_snapshot_threshold = 16384
raft_snapshot_interval = "30s"
raft_trailing_logs = 83749
read_replica = true
reconnect_timeout = "23739s"
reconnect_timeout_wan = "26694s"
recursors = [ "63.38.39.58", "92.49.18.18" ]
@ -6749,7 +6751,7 @@ func TestFullConfig(t *testing.T) {
NodeID: types.NodeID("AsUIlw99"),
NodeMeta: map[string]string{"5mgGQMBk": "mJLtVMSG", "A7ynFMJB": "0Nx6RGab"},
NodeName: "otlLxGaI",
NonVotingServer: true,
ReadReplica: true,
PidFile: "43xN80Km",
PrimaryDatacenter: "ejtmd43d",
PrimaryGateways: []string{"aej8eeZo", "roh2KahS"},
@ -7684,13 +7686,13 @@ func TestSanitize(t *testing.T) {
"NodeID": "",
"NodeMeta": {},
"NodeName": "",
"NonVotingServer": false,
"PidFile": "",
"PrimaryDatacenter": "",
"PrimaryGateways": [
"pmgw_foo=bar pmgw_key=baz pmgw_secret=boom pmgw_bang=bar"
],
"PrimaryGatewaysInterval": "0s",
"ReadReplica": false,
"RPCAdvertiseAddr": "",
"RPCBindAddr": "",
"RPCHandshakeTimeout": "0s",

View File

@ -110,9 +110,9 @@ type Config struct {
// RaftConfig is the configuration used for Raft in the local DC
RaftConfig *raft.Config
// (Enterprise-only) NonVoter is used to prevent this server from being added
// (Enterprise-only) ReadReplica is used to prevent this server from being added
// as a voting member of the Raft cluster.
NonVoter bool
ReadReplica bool
// NotifyListen is called after the RPC listener has been configured.
// RPCAdvertise will be set to the listener address if it hasn't been

View File

@ -1230,7 +1230,9 @@ func (s *Server) handleAliveMember(member serf.Member) error {
Warning: 1,
},
Meta: map[string]string{
// DEPRECATED - remove nonvoter in favor of read_replica in a future version of consul
"non_voter": strconv.FormatBool(member.Tags["nonvoter"] == "1"),
"read_replica": strconv.FormatBool(member.Tags["read_replica"] == "1"),
"raft_version": strconv.Itoa(parts.RaftVersion),
"serf_protocol_current": strconv.FormatUint(uint64(member.ProtocolCur), 10),
"serf_protocol_min": strconv.FormatUint(uint64(member.ProtocolMin), 10),

View File

@ -334,7 +334,9 @@ func TestLeader_CheckServersMeta(t *testing.T) {
versionToExpect := "19.7.9"
retry.Run(t, func(r *retry.R) {
// DEPRECATED - remove nonvoter tag in favor of read_replica in a future version of consul
member.Tags["nonvoter"] = "1"
member.Tags["read_replica"] = "1"
member.Tags["build"] = versionToExpect
err := s1.handleAliveMember(member)
if err != nil {
@ -347,9 +349,13 @@ func TestLeader_CheckServersMeta(t *testing.T) {
if service == nil {
r.Fatal("client not registered")
}
// DEPRECATED - remove non_voter in favor of read_replica in a future version of consul
if service.Meta["non_voter"] != "true" {
r.Fatalf("Expected to be non_voter == true, was: %s", service.Meta["non_voter"])
}
if service.Meta["read_replica"] != "true" {
r.Fatalf("Expected to be read_replica == true, was: %s", service.Meta["non_voter"])
}
newVersion := service.Meta["version"]
if newVersion != versionToExpect {
r.Fatalf("Expected version to be updated to %s, was %s", versionToExpect, newVersion)

View File

@ -61,8 +61,11 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w
if s.config.BootstrapExpect != 0 {
conf.Tags["expect"] = fmt.Sprintf("%d", s.config.BootstrapExpect)
}
if s.config.NonVoter {
if s.config.ReadReplica {
// DEPRECATED - This tag should be removed when we no longer want to support
// upgrades from 1.8.x and below
conf.Tags["nonvoter"] = "1"
conf.Tags["read_replica"] = "1"
}
if s.config.UseTLS {
conf.Tags["use_tls"] = "1"
@ -351,7 +354,7 @@ func (s *Server) maybeBootstrap() {
s.logger.Error("Member has bootstrap mode. Expect disabled.", "member", member)
return
}
if !p.NonVoter {
if !p.ReadReplica {
voters++
}
servers = append(servers, *p)
@ -410,7 +413,7 @@ func (s *Server) maybeBootstrap() {
id := raft.ServerID(server.ID)
suffrage := raft.Voter
if server.NonVoter {
if server.ReadReplica {
suffrage = raft.Nonvoter
}
peer := raft.Server{

View File

@ -240,7 +240,7 @@ func testServerDCExpectNonVoter(t *testing.T, dc string, expect int) (string, *S
c.Datacenter = dc
c.Bootstrap = false
c.BootstrapExpect = expect
c.NonVoter = true
c.ReadReplica = true
})
}

View File

@ -40,7 +40,7 @@ type Server struct {
RaftVersion int
Addr net.Addr
Status serf.MemberStatus
NonVoter bool
ReadReplica bool
ACLs structs.ACLMode
FeatureFlags map[string]int
@ -160,7 +160,10 @@ func IsConsulServer(m serf.Member) (bool, *Server) {
}
// Check if the server is a non voter
// DEPRECATED - remove looking for the nonvoter tag eventually once we don't have to support
// read replicas running v1.8.x and below.
_, nonVoter := m.Tags["nonvoter"]
_, readReplica := m.Tags["read_replica"]
addr := &net.TCPAddr{IP: m.Addr, Port: port}
@ -182,7 +185,8 @@ func IsConsulServer(m serf.Member) (bool, *Server) {
RaftVersion: raftVsn,
Status: m.Status,
UseTLS: useTLS,
NonVoter: nonVoter,
// DEPRECATED - remove nonVoter check once support for that tag is removed
ReadReplica: nonVoter || readReplica,
ACLs: acls,
FeatureFlags: featureFlags,
}

View File

@ -66,7 +66,7 @@ func TestIsConsulServer(t *testing.T) {
"expect": "3",
"raft_vsn": "3",
"use_tls": "1",
"nonvoter": "1",
"read_replica": "1",
},
Status: serf.StatusLeft,
}
@ -101,7 +101,7 @@ func TestIsConsulServer(t *testing.T) {
if !parts.UseTLS {
t.Fatalf("bad: %v", parts.UseTLS)
}
if !parts.NonVoter {
if !parts.ReadReplica {
t.Fatalf("unexpected voter")
}
m.Tags["bootstrap"] = "1"
@ -130,10 +130,16 @@ func TestIsConsulServer(t *testing.T) {
t.Fatalf("unexpected bootstrap")
}
delete(m.Tags, "nonvoter")
delete(m.Tags, "read_replica")
ok, parts = metadata.IsConsulServer(m)
if !ok || parts.NonVoter {
t.Fatalf("unexpected nonvoter")
if !ok || parts.ReadReplica {
t.Fatalf("unexpected read replica")
}
m.Tags["nonvoter"] = "1"
ok, parts = metadata.IsConsulServer(m)
if !ok || !parts.ReadReplica {
t.Fatalf("expected read replica")
}
delete(m.Tags, "role")

View File

@ -11,12 +11,47 @@ import (
"github.com/hashicorp/consul/api"
bexpr "github.com/hashicorp/go-bexpr"
"github.com/mitchellh/pointerstructure"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var dumpFieldConfig = flag.Bool("dump-field-config", false, "generate field config dump file")
func TestPointerStructure(t *testing.T) {
csn := CheckServiceNode{
Node: &Node{
ID: "f18f3a10-2153-40ae-af7d-68db0e856498",
Node: "node1",
Address: "198.18.0.1",
},
Service: &NodeService{
ID: "test",
Service: "test",
Port: 1234,
TaggedAddresses: map[string]ServiceAddress{
"wan": {
Address: "1.1.1.1",
Port: 443,
},
},
},
}
ptr := pointerstructure.Pointer{
Parts: []string{
"Service",
"TaggedAddresses",
"wan",
"Address",
},
}
val, err := ptr.Get(csn)
require.NoError(t, err)
require.Equal(t, "1.1.1.1", val)
}
///////////////////////////////////////////////////////////////////////////////
//
// NOTE: The tests within this file are designed to validate that the fields

View File

@ -158,6 +158,15 @@ const (
// configured to use TLS. Any other value indicates that it was not setup in
// that manner.
MemberTagValueUseTLS = "1"
// MemberTagKeyReadReplica is the key used to indicate that the member is a read
// replica server (will remain a Raft non-voter).
// Read Replicas are a Consul Enterprise feature.
MemberTagKeyReadReplica = "read_replica"
// MemberTagValueReadReplica is the value of the MemberTagKeyReadReplica key when
// the member is in fact a read-replica. Any other value indicates that it is not.
// Read Replicas are a Consul Enterprise feature.
MemberTagValueReadReplica = "1"
)
type MemberACLMode string

View File

@ -479,7 +479,10 @@ The options below are all specified on the command-line.
This overrides the default server RPC port 8300. This is available in Consul 1.2.2
and later.
- `-non-voting-server` ((#\_non_voting_server)) <EnterpriseAlert inline /> - This
- `-non-voting-server` ((#\_non_voting_server)) <EnterpriseAlert inline /> - **This field
is deprecated in Consul 1.9.1. See the [`-read-replica`](#_read_replica) flag instead.**
- `-read-replica` ((#\_read_replica)) <EnterpriseAlert inline /> - This
flag is used to make the server not participate in the Raft quorum, and have it
only receive the data replication stream. This can be used to add read scalability
to a cluster in cases where a high volume of reads to servers are needed.
@ -1852,7 +1855,9 @@ Valid time units are 'ns', 'us' (or 'µs'), 'ms', 's', 'm', 'h'."
- `server` Equivalent to the [`-server` command-line flag](#_server).
- `non_voting_server` - Equivalent to the [`-non-voting-server` command-line flag](#_non_voting_server).
- `non_voting_server` - **This field is deprecated in Consul 1.9.1. See the [`read_replica`](#read_replica) field instead.**
- `read_replica` - Equivalent to the [`-read-replica` command-line flag](#_read_replica).
- `server_name` When provided, this overrides the [`node_name`](#_node)
for the TLS certificate. It can be used to ensure that the certificate name matches

View File

@ -4,12 +4,10 @@ page_title: Consul Enterprise Enhanced Read Scalability
sidebar_title: Enhanced Read Scalability
description: >-
Consul Enterprise supports increased read scalability without impacting write
latency by introducing
non-voting servers.
latency by introducing read replicas.
---
# Enhanced Read Scalability with Non-Voting Servers
# Enhanced Read Scalability with Read Replicas
<EnterpriseAlert>
This feature requires{' '}
@ -18,10 +16,10 @@ description: >-
</EnterpriseAlert>
Consul Enterprise provides the ability to scale clustered Consul servers
to include voting and non-voting servers. Non-voting servers still receive data from the cluster replication,
to include voting servers and read replicas. Read replicas still receive data from the cluster replication,
however, they do not take part in quorum election operations. Expanding your Consul cluster in this way can scale
reads without impacting write latency.
For more details, review the [Consul server configuration](/docs/agent/options)
documentation and the [-non-voting-server](/docs/agent/options#_non_voting_server)
documentation and the [-read-replica](/docs/agent/options#_read_replica)
configuration flag.