From 66fd23d67f41780c62a5e7314d5a21f78488ea20 Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Tue, 17 Nov 2020 10:53:57 -0500 Subject: [PATCH] Refactor to call non-voting servers read replicas (#9191) Co-authored-by: Kit Patella --- .changelog/9191.txt | 18 ++++++++++ agent/agent.go | 4 +-- agent/config/builder.go | 2 +- agent/config/builder_oss.go | 3 ++ agent/config/builder_oss_test.go | 17 +++++++--- agent/config/config.go | 2 +- agent/config/flags.go | 3 +- agent/config/runtime.go | 4 +-- agent/config/runtime_oss_test.go | 6 +++- agent/config/runtime_test.go | 10 +++--- agent/consul/config.go | 4 +-- agent/consul/leader.go | 2 ++ agent/consul/leader_test.go | 6 ++++ agent/consul/server_serf.go | 9 +++-- agent/consul/server_test.go | 2 +- agent/metadata/server.go | 8 +++-- agent/metadata/server_test.go | 16 ++++++--- agent/structs/structs_filtering_test.go | 35 ++++++++++++++++++++ api/agent.go | 9 +++++ website/pages/docs/agent/options.mdx | 9 +++-- website/pages/docs/enterprise/read-scale.mdx | 10 +++--- 21 files changed, 141 insertions(+), 38 deletions(-) create mode 100644 .changelog/9191.txt diff --git a/.changelog/9191.txt b/.changelog/9191.txt new file mode 100644 index 0000000000..25f7da43db --- /dev/null +++ b/.changelog/9191.txt @@ -0,0 +1,18 @@ +```release-note:deprecation +cli: **(Enterprise only)** The `-non-voting-server` flag is deprecated in favor of the new `-read-replica` flag. The `-non-voting-server` flag is still present along side the new flag but it will be removed in a future release. +``` +```release-note:improvement +cli: **(Enterprise only)** A new `-read-replica` flag can now be used to enable running a server as a read only replica. Previously this was enabled with the now deprecated `-non-voting-server` flag. +``` +```release-note:deprecation +config: **(Enterprise only)** The `non_voting_server` configuration setting is deprecated in favor of the new `read_replica` setting. The `non_voting_server` configuration setting is still present but will be removed in a future release. +``` +```release-note:improvement +config: **(Enterprise only)** A new `read_replica` configuration setting can now be used to enable running a server as a read only replica. Previously this was enabled with the now deprecated `non_voting_server` setting. +``` +```release-note:deprecation +server: **(Enterprise only)** Addition of the `nonvoter` tag to the service registration made for read replicas is deprecated in favor of the new tag name of `read_replica`. Both are present in the registration but the `nonvoter` tag will be completely removed in a future release. +``` +```release-note:deprecation +gossip: **(Enterprise only)** Read replicas now advertise themselves by setting the `read_replica` tag. The old `nonvoter` tag is still present but is deprecated and will be removed in a future release. +``` diff --git a/agent/agent.go b/agent/agent.go index 76fd923d4a..07f21a2306 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1110,8 +1110,8 @@ func newConsulConfig(runtimeCfg *config.RuntimeConfig, logger hclog.Logger) (*co if runtimeCfg.SessionTTLMin != 0 { cfg.SessionTTLMin = runtimeCfg.SessionTTLMin } - if runtimeCfg.NonVotingServer { - cfg.NonVoter = runtimeCfg.NonVotingServer + if runtimeCfg.ReadReplica { + cfg.ReadReplica = runtimeCfg.ReadReplica } // These are fully specified in the agent defaults, so we can simply diff --git a/agent/config/builder.go b/agent/config/builder.go index 062fb440f7..48a2c55bec 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -1034,7 +1034,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { NodeID: types.NodeID(b.stringVal(c.NodeID)), NodeMeta: c.NodeMeta, NodeName: b.nodeName(c.NodeName), - NonVotingServer: b.boolVal(c.NonVotingServer), + ReadReplica: b.boolVal(c.ReadReplica), PidFile: b.stringVal(c.PidFile), PrimaryDatacenter: primaryDatacenter, PrimaryGateways: b.expandAllOptionalAddrs("primary_gateways", c.PrimaryGateways), diff --git a/agent/config/builder_oss.go b/agent/config/builder_oss.go index 85cf081375..82a037994b 100644 --- a/agent/config/builder_oss.go +++ b/agent/config/builder_oss.go @@ -13,6 +13,9 @@ var ( "non_voting_server": func(c *Config) { // to maintain existing compatibility we don't nullify the value }, + "read_replica": func(c *Config) { + // to maintain existing compatibility we don't nullify the value + }, "segment": func(c *Config) { // to maintain existing compatibility we don't nullify the value }, diff --git a/agent/config/builder_oss_test.go b/agent/config/builder_oss_test.go index d7a94a9821..e94fec68a2 100644 --- a/agent/config/builder_oss_test.go +++ b/agent/config/builder_oss_test.go @@ -24,11 +24,18 @@ func TestBuilder_validateEnterpriseConfigKeys(t *testing.T) { cases := map[string]testCase{ "non_voting_server": { config: Config{ - NonVotingServer: &boolVal, + ReadReplica: &boolVal, }, keys: []string{"non_voting_server"}, badKeys: []string{"non_voting_server"}, }, + "read_replica": { + config: Config{ + ReadReplica: &boolVal, + }, + keys: []string{"read_replica"}, + badKeys: []string{"read_replica"}, + }, "segment": { config: Config{ SegmentName: &stringVal, @@ -118,11 +125,11 @@ func TestBuilder_validateEnterpriseConfigKeys(t *testing.T) { }, "multi": { config: Config{ - NonVotingServer: &boolVal, - SegmentName: &stringVal, + ReadReplica: &boolVal, + SegmentName: &stringVal, }, - keys: []string{"non_voting_server", "segment", "acl.tokens.agent_master"}, - badKeys: []string{"non_voting_server", "segment"}, + keys: []string{"non_voting_server", "read_replica", "segment", "acl.tokens.agent_master"}, + badKeys: []string{"non_voting_server", "read_replica", "segment"}, }, } diff --git a/agent/config/config.go b/agent/config/config.go index f966297baf..a6d4df8434 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -289,7 +289,7 @@ type Config struct { // Enterprise Only Audit *Audit `json:"audit,omitempty" hcl:"audit" mapstructure:"audit"` // Enterprise Only - NonVotingServer *bool `json:"non_voting_server,omitempty" hcl:"non_voting_server" mapstructure:"non_voting_server"` + ReadReplica *bool `json:"read_replica,omitempty" hcl:"read_replica" mapstructure:"read_replica" alias:"non_voting_server"` // Enterprise Only SegmentName *string `json:"segment,omitempty" hcl:"segment" mapstructure:"segment"` // Enterprise Only diff --git a/agent/config/flags.go b/agent/config/flags.go index a032944d3e..77dc2abb38 100644 --- a/agent/config/flags.go +++ b/agent/config/flags.go @@ -89,7 +89,8 @@ func AddFlags(fs *flag.FlagSet, f *BuilderOpts) { add(&f.Config.NodeName, "node", "Name of this node. Must be unique in the cluster.") add(&f.Config.NodeID, "node-id", "A unique ID for this node across space and time. Defaults to a randomly-generated ID that persists in the data-dir.") add(&f.Config.NodeMeta, "node-meta", "An arbitrary metadata key/value pair for this node, of the format `key:value`. Can be specified multiple times.") - add(&f.Config.NonVotingServer, "non-voting-server", "(Enterprise-only) This flag is used to make the server not participate in the Raft quorum, and have it only receive the data replication stream. This can be used to add read scalability to a cluster in cases where a high volume of reads to servers are needed.") + add(&f.Config.ReadReplica, "non-voting-server", "(Enterprise-only) DEPRECATED: -read-replica should be used instead") + add(&f.Config.ReadReplica, "read-replica", "(Enterprise-only) This flag is used to make the server not participate in the Raft quorum, and have it only receive the data replication stream. This can be used to add read scalability to a cluster in cases where a high volume of reads to servers are needed.") add(&f.Config.PidFile, "pid-file", "Path to file to store agent PID.") add(&f.Config.RPCProtocol, "protocol", "Sets the protocol version. Defaults to latest.") add(&f.Config.RaftProtocol, "raft-protocol", "Sets the Raft protocol version. Defaults to latest.") diff --git a/agent/config/runtime.go b/agent/config/runtime.go index 4acc823faa..ffaf1f7f09 100644 --- a/agent/config/runtime.go +++ b/agent/config/runtime.go @@ -845,12 +845,12 @@ type RuntimeConfig struct { // flag: -node-meta "key:value" -node-meta "key:value" ... NodeMeta map[string]string - // NonVotingServer is whether this server will act as a non-voting member + // ReadReplica is whether this server will act as a non-voting member // of the cluster to help provide read scalability. (Enterprise-only) // // hcl: non_voting_server = (true|false) // flag: -non-voting-server - NonVotingServer bool + ReadReplica bool // PidFile is the file to store our PID in. // diff --git a/agent/config/runtime_oss_test.go b/agent/config/runtime_oss_test.go index c7c21eb0a0..670008ce93 100644 --- a/agent/config/runtime_oss_test.go +++ b/agent/config/runtime_oss_test.go @@ -12,12 +12,16 @@ var entTokenConfigSanitize = `"EnterpriseConfig": {},` func entFullRuntimeConfig(rt *RuntimeConfig) {} -var enterpriseNonVotingServerWarnings []string = []string{enterpriseConfigKeyError{key: "non_voting_server"}.Error()} +var enterpriseReadReplicaWarnings []string = []string{enterpriseConfigKeyError{key: "read_replica"}.Error()} var enterpriseConfigKeyWarnings []string func init() { for k := range enterpriseConfigMap { + if k == "non_voting_server" { + // this is an alias for "read_replica" so we shouldn't see it in warnings + continue + } enterpriseConfigKeyWarnings = append(enterpriseConfigKeyWarnings, enterpriseConfigKeyError{key: k}.Error()) } } diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index 2dff70d1bf..a989add2f7 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -610,10 +610,10 @@ func TestBuilder_BuildAndValidate_ConfigFlagsAndEdgecases(t *testing.T) { `-data-dir=` + dataDir, }, patch: func(rt *RuntimeConfig) { - rt.NonVotingServer = true + rt.ReadReplica = true rt.DataDir = dataDir }, - warns: enterpriseNonVotingServerWarnings, + warns: enterpriseReadReplicaWarnings, }, { desc: "-pid-file", @@ -5315,6 +5315,7 @@ func TestFullConfig(t *testing.T) { "raft_snapshot_threshold": 16384, "raft_snapshot_interval": "30s", "raft_trailing_logs": 83749, + "read_replica": true, "reconnect_timeout": "23739s", "reconnect_timeout_wan": "26694s", "recursors": [ "63.38.39.58", "92.49.18.18" ], @@ -6004,6 +6005,7 @@ func TestFullConfig(t *testing.T) { raft_snapshot_threshold = 16384 raft_snapshot_interval = "30s" raft_trailing_logs = 83749 + read_replica = true reconnect_timeout = "23739s" reconnect_timeout_wan = "26694s" recursors = [ "63.38.39.58", "92.49.18.18" ] @@ -6749,7 +6751,7 @@ func TestFullConfig(t *testing.T) { NodeID: types.NodeID("AsUIlw99"), NodeMeta: map[string]string{"5mgGQMBk": "mJLtVMSG", "A7ynFMJB": "0Nx6RGab"}, NodeName: "otlLxGaI", - NonVotingServer: true, + ReadReplica: true, PidFile: "43xN80Km", PrimaryDatacenter: "ejtmd43d", PrimaryGateways: []string{"aej8eeZo", "roh2KahS"}, @@ -7684,13 +7686,13 @@ func TestSanitize(t *testing.T) { "NodeID": "", "NodeMeta": {}, "NodeName": "", - "NonVotingServer": false, "PidFile": "", "PrimaryDatacenter": "", "PrimaryGateways": [ "pmgw_foo=bar pmgw_key=baz pmgw_secret=boom pmgw_bang=bar" ], "PrimaryGatewaysInterval": "0s", + "ReadReplica": false, "RPCAdvertiseAddr": "", "RPCBindAddr": "", "RPCHandshakeTimeout": "0s", diff --git a/agent/consul/config.go b/agent/consul/config.go index 73db2d2967..7b4cbb507e 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -110,9 +110,9 @@ type Config struct { // RaftConfig is the configuration used for Raft in the local DC RaftConfig *raft.Config - // (Enterprise-only) NonVoter is used to prevent this server from being added + // (Enterprise-only) ReadReplica is used to prevent this server from being added // as a voting member of the Raft cluster. - NonVoter bool + ReadReplica bool // NotifyListen is called after the RPC listener has been configured. // RPCAdvertise will be set to the listener address if it hasn't been diff --git a/agent/consul/leader.go b/agent/consul/leader.go index d050e297b4..1fcef0624c 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -1230,7 +1230,9 @@ func (s *Server) handleAliveMember(member serf.Member) error { Warning: 1, }, Meta: map[string]string{ + // DEPRECATED - remove nonvoter in favor of read_replica in a future version of consul "non_voter": strconv.FormatBool(member.Tags["nonvoter"] == "1"), + "read_replica": strconv.FormatBool(member.Tags["read_replica"] == "1"), "raft_version": strconv.Itoa(parts.RaftVersion), "serf_protocol_current": strconv.FormatUint(uint64(member.ProtocolCur), 10), "serf_protocol_min": strconv.FormatUint(uint64(member.ProtocolMin), 10), diff --git a/agent/consul/leader_test.go b/agent/consul/leader_test.go index 60f817a8d3..0d02e522bd 100644 --- a/agent/consul/leader_test.go +++ b/agent/consul/leader_test.go @@ -334,7 +334,9 @@ func TestLeader_CheckServersMeta(t *testing.T) { versionToExpect := "19.7.9" retry.Run(t, func(r *retry.R) { + // DEPRECATED - remove nonvoter tag in favor of read_replica in a future version of consul member.Tags["nonvoter"] = "1" + member.Tags["read_replica"] = "1" member.Tags["build"] = versionToExpect err := s1.handleAliveMember(member) if err != nil { @@ -347,9 +349,13 @@ func TestLeader_CheckServersMeta(t *testing.T) { if service == nil { r.Fatal("client not registered") } + // DEPRECATED - remove non_voter in favor of read_replica in a future version of consul if service.Meta["non_voter"] != "true" { r.Fatalf("Expected to be non_voter == true, was: %s", service.Meta["non_voter"]) } + if service.Meta["read_replica"] != "true" { + r.Fatalf("Expected to be read_replica == true, was: %s", service.Meta["non_voter"]) + } newVersion := service.Meta["version"] if newVersion != versionToExpect { r.Fatalf("Expected version to be updated to %s, was %s", versionToExpect, newVersion) diff --git a/agent/consul/server_serf.go b/agent/consul/server_serf.go index ce57d9d4b8..d6b79f0a79 100644 --- a/agent/consul/server_serf.go +++ b/agent/consul/server_serf.go @@ -61,8 +61,11 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if s.config.BootstrapExpect != 0 { conf.Tags["expect"] = fmt.Sprintf("%d", s.config.BootstrapExpect) } - if s.config.NonVoter { + if s.config.ReadReplica { + // DEPRECATED - This tag should be removed when we no longer want to support + // upgrades from 1.8.x and below conf.Tags["nonvoter"] = "1" + conf.Tags["read_replica"] = "1" } if s.config.UseTLS { conf.Tags["use_tls"] = "1" @@ -351,7 +354,7 @@ func (s *Server) maybeBootstrap() { s.logger.Error("Member has bootstrap mode. Expect disabled.", "member", member) return } - if !p.NonVoter { + if !p.ReadReplica { voters++ } servers = append(servers, *p) @@ -410,7 +413,7 @@ func (s *Server) maybeBootstrap() { id := raft.ServerID(server.ID) suffrage := raft.Voter - if server.NonVoter { + if server.ReadReplica { suffrage = raft.Nonvoter } peer := raft.Server{ diff --git a/agent/consul/server_test.go b/agent/consul/server_test.go index 928e59257b..c645386388 100644 --- a/agent/consul/server_test.go +++ b/agent/consul/server_test.go @@ -240,7 +240,7 @@ func testServerDCExpectNonVoter(t *testing.T, dc string, expect int) (string, *S c.Datacenter = dc c.Bootstrap = false c.BootstrapExpect = expect - c.NonVoter = true + c.ReadReplica = true }) } diff --git a/agent/metadata/server.go b/agent/metadata/server.go index d145a9d266..5b35ec78ec 100644 --- a/agent/metadata/server.go +++ b/agent/metadata/server.go @@ -40,7 +40,7 @@ type Server struct { RaftVersion int Addr net.Addr Status serf.MemberStatus - NonVoter bool + ReadReplica bool ACLs structs.ACLMode FeatureFlags map[string]int @@ -160,7 +160,10 @@ func IsConsulServer(m serf.Member) (bool, *Server) { } // Check if the server is a non voter + // DEPRECATED - remove looking for the nonvoter tag eventually once we don't have to support + // read replicas running v1.8.x and below. _, nonVoter := m.Tags["nonvoter"] + _, readReplica := m.Tags["read_replica"] addr := &net.TCPAddr{IP: m.Addr, Port: port} @@ -182,7 +185,8 @@ func IsConsulServer(m serf.Member) (bool, *Server) { RaftVersion: raftVsn, Status: m.Status, UseTLS: useTLS, - NonVoter: nonVoter, + // DEPRECATED - remove nonVoter check once support for that tag is removed + ReadReplica: nonVoter || readReplica, ACLs: acls, FeatureFlags: featureFlags, } diff --git a/agent/metadata/server_test.go b/agent/metadata/server_test.go index 6b96785ccf..4f479472c0 100644 --- a/agent/metadata/server_test.go +++ b/agent/metadata/server_test.go @@ -66,7 +66,7 @@ func TestIsConsulServer(t *testing.T) { "expect": "3", "raft_vsn": "3", "use_tls": "1", - "nonvoter": "1", + "read_replica": "1", }, Status: serf.StatusLeft, } @@ -101,7 +101,7 @@ func TestIsConsulServer(t *testing.T) { if !parts.UseTLS { t.Fatalf("bad: %v", parts.UseTLS) } - if !parts.NonVoter { + if !parts.ReadReplica { t.Fatalf("unexpected voter") } m.Tags["bootstrap"] = "1" @@ -130,10 +130,16 @@ func TestIsConsulServer(t *testing.T) { t.Fatalf("unexpected bootstrap") } - delete(m.Tags, "nonvoter") + delete(m.Tags, "read_replica") ok, parts = metadata.IsConsulServer(m) - if !ok || parts.NonVoter { - t.Fatalf("unexpected nonvoter") + if !ok || parts.ReadReplica { + t.Fatalf("unexpected read replica") + } + + m.Tags["nonvoter"] = "1" + ok, parts = metadata.IsConsulServer(m) + if !ok || !parts.ReadReplica { + t.Fatalf("expected read replica") } delete(m.Tags, "role") diff --git a/agent/structs/structs_filtering_test.go b/agent/structs/structs_filtering_test.go index e13c98043c..319f47e9e2 100644 --- a/agent/structs/structs_filtering_test.go +++ b/agent/structs/structs_filtering_test.go @@ -11,12 +11,47 @@ import ( "github.com/hashicorp/consul/api" bexpr "github.com/hashicorp/go-bexpr" + "github.com/mitchellh/pointerstructure" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) var dumpFieldConfig = flag.Bool("dump-field-config", false, "generate field config dump file") +func TestPointerStructure(t *testing.T) { + csn := CheckServiceNode{ + Node: &Node{ + ID: "f18f3a10-2153-40ae-af7d-68db0e856498", + Node: "node1", + Address: "198.18.0.1", + }, + Service: &NodeService{ + ID: "test", + Service: "test", + Port: 1234, + TaggedAddresses: map[string]ServiceAddress{ + "wan": { + Address: "1.1.1.1", + Port: 443, + }, + }, + }, + } + + ptr := pointerstructure.Pointer{ + Parts: []string{ + "Service", + "TaggedAddresses", + "wan", + "Address", + }, + } + + val, err := ptr.Get(csn) + require.NoError(t, err) + require.Equal(t, "1.1.1.1", val) +} + /////////////////////////////////////////////////////////////////////////////// // // NOTE: The tests within this file are designed to validate that the fields diff --git a/api/agent.go b/api/agent.go index 7b7fbdfc93..d392d08973 100644 --- a/api/agent.go +++ b/api/agent.go @@ -158,6 +158,15 @@ const ( // configured to use TLS. Any other value indicates that it was not setup in // that manner. MemberTagValueUseTLS = "1" + + // MemberTagKeyReadReplica is the key used to indicate that the member is a read + // replica server (will remain a Raft non-voter). + // Read Replicas are a Consul Enterprise feature. + MemberTagKeyReadReplica = "read_replica" + // MemberTagValueReadReplica is the value of the MemberTagKeyReadReplica key when + // the member is in fact a read-replica. Any other value indicates that it is not. + // Read Replicas are a Consul Enterprise feature. + MemberTagValueReadReplica = "1" ) type MemberACLMode string diff --git a/website/pages/docs/agent/options.mdx b/website/pages/docs/agent/options.mdx index da8dba391b..df7f41e1b9 100644 --- a/website/pages/docs/agent/options.mdx +++ b/website/pages/docs/agent/options.mdx @@ -479,7 +479,10 @@ The options below are all specified on the command-line. This overrides the default server RPC port 8300. This is available in Consul 1.2.2 and later. -- `-non-voting-server` ((#\_non_voting_server)) - This +- `-non-voting-server` ((#\_non_voting_server)) - **This field + is deprecated in Consul 1.9.1. See the [`-read-replica`](#_read_replica) flag instead.** + +- `-read-replica` ((#\_read_replica)) - This flag is used to make the server not participate in the Raft quorum, and have it only receive the data replication stream. This can be used to add read scalability to a cluster in cases where a high volume of reads to servers are needed. @@ -1852,7 +1855,9 @@ Valid time units are 'ns', 'us' (or 'µs'), 'ms', 's', 'm', 'h'." - `server` Equivalent to the [`-server` command-line flag](#_server). -- `non_voting_server` - Equivalent to the [`-non-voting-server` command-line flag](#_non_voting_server). +- `non_voting_server` - **This field is deprecated in Consul 1.9.1. See the [`read_replica`](#read_replica) field instead.** + +- `read_replica` - Equivalent to the [`-read-replica` command-line flag](#_read_replica). - `server_name` When provided, this overrides the [`node_name`](#_node) for the TLS certificate. It can be used to ensure that the certificate name matches diff --git a/website/pages/docs/enterprise/read-scale.mdx b/website/pages/docs/enterprise/read-scale.mdx index a5bb4102f7..cf2dd536fa 100644 --- a/website/pages/docs/enterprise/read-scale.mdx +++ b/website/pages/docs/enterprise/read-scale.mdx @@ -4,12 +4,10 @@ page_title: Consul Enterprise Enhanced Read Scalability sidebar_title: Enhanced Read Scalability description: >- Consul Enterprise supports increased read scalability without impacting write - latency by introducing - - non-voting servers. + latency by introducing read replicas. --- -# Enhanced Read Scalability with Non-Voting Servers +# Enhanced Read Scalability with Read Replicas This feature requires{' '} @@ -18,10 +16,10 @@ description: >- Consul Enterprise provides the ability to scale clustered Consul servers -to include voting and non-voting servers. Non-voting servers still receive data from the cluster replication, +to include voting servers and read replicas. Read replicas still receive data from the cluster replication, however, they do not take part in quorum election operations. Expanding your Consul cluster in this way can scale reads without impacting write latency. For more details, review the [Consul server configuration](/docs/agent/options) -documentation and the [-non-voting-server](/docs/agent/options#_non_voting_server) +documentation and the [-read-replica](/docs/agent/options#_read_replica) configuration flag.