From 7093e48f97ff7cdd7e054726267e6269258323be Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Fri, 23 Apr 2021 16:17:09 -0400 Subject: [PATCH] [Backport/1.9.x] Backport #10073 (#10104) * Merge pull request #10094 from hashicorp/update-fingerprint updating fingerprint * Add replication metrics (#10073) Co-authored-by: Daniel Nephin --- .circleci/config.yml | 8 ++++---- .circleci/scripts/cherry-picker.sh | 4 ++-- agent/consul/leader.go | 25 ++++++++++++++++++++++--- agent/consul/replication.go | 20 ++++++++++++++++++++ agent/consul/replication_test.go | 1 + agent/consul/server.go | 2 +- 6 files changed, 50 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e34d3ebd9b..ebfebfcbe3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -655,7 +655,7 @@ jobs: - checkout - add_ssh_keys: # needs a key to push updated static asset commit back to github fingerprints: - - "3d:6b:98:55:78:4e:52:17:4e:17:ba:f3:bf:0b:96:2a" + - "fc:55:84:15:0a:1d:c8:e9:06:d0:e8:9c:7b:a9:b7:31" - attach_workspace: at: . - run: @@ -665,8 +665,8 @@ jobs: # if there are, we commit the ui static asset file # HEAD^! is shorthand for HEAD^..HEAD (parent of HEAD and HEAD) if ! git diff --quiet --exit-code HEAD^! ui/; then - git config --local user.email "hashicorp-ci@users.noreply.github.com" - git config --local user.name "hashicorp-ci" + git config --local user.email "github-team-consul-core@hashicorp.com" + git config --local user.name "hc-github-team-consul-core" short_sha=$(git rev-parse --short HEAD) git add agent/uiserver/bindata_assetfs.go @@ -844,7 +844,7 @@ jobs: - checkout - add_ssh_keys: # needs a key to push cherry-picked commits back to github fingerprints: - - "3d:6b:98:55:78:4e:52:17:4e:17:ba:f3:bf:0b:96:2a" + - "fc:55:84:15:0a:1d:c8:e9:06:d0:e8:9c:7b:a9:b7:31" - run: .circleci/scripts/cherry-picker.sh - run: *notify-slack-failure diff --git a/.circleci/scripts/cherry-picker.sh b/.circleci/scripts/cherry-picker.sh index f6ce0bc431..ac08a2acc2 100755 --- a/.circleci/scripts/cherry-picker.sh +++ b/.circleci/scripts/cherry-picker.sh @@ -160,8 +160,8 @@ fi backport_failures=0 # loop through all labels on the PR for label in $labels; do - git config --local user.email "hashicorp-ci@users.noreply.github.com" - git config --local user.name "hashicorp-ci" + git config --local user.email "github-team-consul-core@hashicorp.com" + git config --local user.name "hc-github-team-consul-core" status "checking label: $label" # if the label matches docs-cherrypick, it will attempt to cherry-pick to stable-website if [[ $label =~ docs-cherrypick ]]; then diff --git a/agent/consul/leader.go b/agent/consul/leader.go index e4470ac1f7..b45cf6d46a 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -812,10 +812,19 @@ func (s *Server) runLegacyACLReplication(ctx context.Context) error { } if err != nil { + metrics.SetGauge([]string{"leader", "replication", "acl-legacy", "status"}, + 0, + ) lastRemoteIndex = 0 s.updateACLReplicationStatusError() legacyACLLogger.Warn("Legacy ACL replication error (will retry if still leader)", "error", err) } else { + metrics.SetGauge([]string{"leader", "replication", "acl-legacy", "status"}, + 1, + ) + metrics.SetGauge([]string{"leader", "replication", "acl-legacy", "index"}, + float32(index), + ) lastRemoteIndex = index s.updateACLReplicationStatusIndex(structs.ACLReplicateLegacy, index) legacyACLLogger.Debug("Legacy ACL replication completed through remote index", "index", index) @@ -873,7 +882,7 @@ type replicateFunc func(ctx context.Context, logger hclog.Logger, lastRemoteInde func (s *Server) runACLPolicyReplicator(ctx context.Context) error { policyLogger := s.aclReplicationLogger(structs.ACLReplicatePolicies.SingularNoun()) policyLogger.Info("started ACL Policy replication") - return s.runACLReplicator(ctx, policyLogger, structs.ACLReplicatePolicies, s.replicateACLPolicies) + return s.runACLReplicator(ctx, policyLogger, structs.ACLReplicatePolicies, s.replicateACLPolicies, "acl-policies") } // This function is only intended to be run as a managed go routine, it will block until @@ -881,7 +890,7 @@ func (s *Server) runACLPolicyReplicator(ctx context.Context) error { func (s *Server) runACLRoleReplicator(ctx context.Context) error { roleLogger := s.aclReplicationLogger(structs.ACLReplicateRoles.SingularNoun()) roleLogger.Info("started ACL Role replication") - return s.runACLReplicator(ctx, roleLogger, structs.ACLReplicateRoles, s.replicateACLRoles) + return s.runACLReplicator(ctx, roleLogger, structs.ACLReplicateRoles, s.replicateACLRoles, "acl-roles") } // This function is only intended to be run as a managed go routine, it will block until @@ -889,7 +898,7 @@ func (s *Server) runACLRoleReplicator(ctx context.Context) error { func (s *Server) runACLTokenReplicator(ctx context.Context) error { tokenLogger := s.aclReplicationLogger(structs.ACLReplicateTokens.SingularNoun()) tokenLogger.Info("started ACL Token replication") - return s.runACLReplicator(ctx, tokenLogger, structs.ACLReplicateTokens, s.replicateACLTokens) + return s.runACLReplicator(ctx, tokenLogger, structs.ACLReplicateTokens, s.replicateACLTokens, "acl-tokens") } // This function is only intended to be run as a managed go routine, it will block until @@ -899,6 +908,7 @@ func (s *Server) runACLReplicator( logger hclog.Logger, replicationType structs.ACLReplicationType, replicateFunc replicateFunc, + metricName string, ) error { var failedAttempts uint limiter := rate.NewLimiter(rate.Limit(s.config.ACLReplicationRate), s.config.ACLReplicationBurst) @@ -919,6 +929,9 @@ func (s *Server) runACLReplicator( } if err != nil { + metrics.SetGauge([]string{"leader", "replication", metricName, "status"}, + 0, + ) lastRemoteIndex = 0 s.updateACLReplicationStatusError() logger.Warn("ACL replication error (will retry if still leader)", @@ -935,6 +948,12 @@ func (s *Server) runACLReplicator( // do nothing } } else { + metrics.SetGauge([]string{"leader", "replication", metricName, "status"}, + 1, + ) + metrics.SetGauge([]string{"leader", "replication", metricName, "index"}, + float32(index), + ) lastRemoteIndex = index s.updateACLReplicationStatusIndex(replicationType, index) logger.Debug("ACL replication completed through remote index", diff --git a/agent/consul/replication.go b/agent/consul/replication.go index eb292db59f..9ad3065cf0 100644 --- a/agent/consul/replication.go +++ b/agent/consul/replication.go @@ -22,6 +22,7 @@ const ( type ReplicatorDelegate interface { Replicate(ctx context.Context, lastRemoteIndex uint64, logger hclog.Logger) (index uint64, exit bool, err error) + MetricName() string } type ReplicatorConfig struct { @@ -100,6 +101,9 @@ func (r *Replicator) Run(ctx context.Context) error { return nil } if err != nil { + metrics.SetGauge([]string{"leader", "replication", r.delegate.MetricName(), "status"}, + 0, + ) // reset the lastRemoteIndex when there is an RPC failure. This should cause a full sync to be done during // the next round of replication atomic.StoreUint64(&r.lastRemoteIndex, 0) @@ -114,6 +118,13 @@ func (r *Replicator) Run(ctx context.Context) error { continue } + metrics.SetGauge([]string{"leader", "replication", r.delegate.MetricName(), "status"}, + 1, + ) + metrics.SetGauge([]string{"leader", "replication", r.delegate.MetricName(), "index"}, + float32(index), + ) + atomic.StoreUint64(&r.lastRemoteIndex, index) r.logger.Debug("replication completed through remote index", "index", index) r.waiter.Reset() @@ -128,6 +139,11 @@ type ReplicatorFunc func(ctx context.Context, lastRemoteIndex uint64, logger hcl type FunctionReplicator struct { ReplicateFn ReplicatorFunc + Name string +} + +func (r *FunctionReplicator) MetricName() string { + return r.Name } func (r *FunctionReplicator) Replicate(ctx context.Context, lastRemoteIndex uint64, logger hclog.Logger) (uint64, bool, error) { @@ -171,6 +187,10 @@ type IndexReplicator struct { Logger hclog.Logger } +func (r *IndexReplicator) MetricName() string { + return r.Delegate.MetricName() +} + func (r *IndexReplicator) Replicate(ctx context.Context, lastRemoteIndex uint64, _ hclog.Logger) (uint64, bool, error) { fetchStart := time.Now() lenRemote, remote, remoteIndex, err := r.Delegate.FetchRemote(lastRemoteIndex) diff --git a/agent/consul/replication_test.go b/agent/consul/replication_test.go index b70583c23d..468f0d617c 100644 --- a/agent/consul/replication_test.go +++ b/agent/consul/replication_test.go @@ -20,6 +20,7 @@ func TestReplicationRestart(t *testing.T) { ReplicateFn: func(ctx context.Context, lastRemoteIndex uint64, logger hclog.Logger) (uint64, bool, error) { return 1, false, nil }, + Name: "foo", }, Rate: 1, diff --git a/agent/consul/server.go b/agent/consul/server.go index 548328ad6a..9382b2e9f5 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -400,7 +400,7 @@ func NewServer(config *Config, flat Deps) (*Server, error) { configReplicatorConfig := ReplicatorConfig{ Name: logging.ConfigEntry, - Delegate: &FunctionReplicator{ReplicateFn: s.replicateConfig}, + Delegate: &FunctionReplicator{ReplicateFn: s.replicateConfig, Name: "config-entries"}, Rate: s.config.ConfigReplicationRate, Burst: s.config.ConfigReplicationBurst, Logger: s.logger,