From f4cc4577ca441dcd0df60abd45d10690de8dae1a Mon Sep 17 00:00:00 2001 From: freddygv Date: Mon, 10 Oct 2022 16:42:29 -0600 Subject: [PATCH 1/2] Fix alias check leak Preivously when alias check was removed it would not be stopped nor cleaned up from the associated aliasChecks map. This means that any time an alias check was deregistered we would leak a goroutine for CheckAlias.run() because the stopCh would never be closed. This issue mostly affects service mesh deployments on platforms where the client agent is mostly static but proxy services come and go regularly, since by default sidecars are registered with an alias check. --- agent/agent.go | 5 ++++- agent/agent_test.go | 52 +++++++++++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 3de22c6ae5..391cb388ea 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -3263,7 +3263,10 @@ func (a *Agent) cancelCheckMonitors(checkID structs.CheckID) { check.Stop() delete(a.checkH2PINGs, checkID) } - + if check, ok := a.checkAliases[checkID]; ok { + check.Stop() + delete(a.checkAliases, checkID) + } } // updateTTLCheck is used to update the status of a TTL check via the Agent API. diff --git a/agent/agent_test.go b/agent/agent_test.go index 1c7671f767..028df62764 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -1912,7 +1912,7 @@ node_name = "` + a.Config.NodeName + `" } } -func TestAgent_AddCheck_Alias(t *testing.T) { +func TestAgent_Alias_AddRemove(t *testing.T) { if testing.Short() { t.Skip("too slow for testing.Short") } @@ -1922,29 +1922,39 @@ func TestAgent_AddCheck_Alias(t *testing.T) { a := NewTestAgent(t, "") defer a.Shutdown() - health := &structs.HealthCheck{ - Node: "foo", - CheckID: "aliashealth", - Name: "Alias health check", - Status: api.HealthCritical, - } - chk := &structs.CheckType{ - AliasService: "foo", - } - err := a.AddCheck(health, chk, false, "", ConfigSourceLocal) - require.NoError(t, err) + cid := structs.NewCheckID("aliashealth", nil) - // Ensure we have a check mapping - sChk := requireCheckExists(t, a, "aliashealth") - require.Equal(t, api.HealthCritical, sChk.Status) + testutil.RunStep(t, "add check", func(t *testing.T) { + health := &structs.HealthCheck{ + Node: "foo", + CheckID: cid.ID, + Name: "Alias health check", + Status: api.HealthCritical, + } + chk := &structs.CheckType{ + AliasService: "foo", + } + err := a.AddCheck(health, chk, false, "", ConfigSourceLocal) + require.NoError(t, err) - chkImpl, ok := a.checkAliases[structs.NewCheckID("aliashealth", nil)] - require.True(t, ok, "missing aliashealth check") - require.Equal(t, "", chkImpl.RPCReq.Token) + sChk := requireCheckExists(t, a, cid.ID) + require.Equal(t, api.HealthCritical, sChk.Status) - cs := a.State.CheckState(structs.NewCheckID("aliashealth", nil)) - require.NotNil(t, cs) - require.Equal(t, "", cs.Token) + chkImpl, ok := a.checkAliases[cid] + require.True(t, ok, "missing aliashealth check") + require.Equal(t, "", chkImpl.RPCReq.Token) + + cs := a.State.CheckState(cid) + require.NotNil(t, cs) + require.Equal(t, "", cs.Token) + }) + + testutil.RunStep(t, "remove check", func(t *testing.T) { + require.NoError(t, a.RemoveCheck(cid, false)) + + requireCheckMissing(t, a, cid.ID) + requireCheckMissingMap(t, a.checkAliases, cid.ID) + }) } func TestAgent_AddCheck_Alias_setToken(t *testing.T) { From da68ed70c111d038bf420d4a787199984161824e Mon Sep 17 00:00:00 2001 From: freddygv Date: Thu, 13 Oct 2022 16:09:32 -0600 Subject: [PATCH 2/2] Add changelog entry --- .changelog/14935.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/14935.txt diff --git a/.changelog/14935.txt b/.changelog/14935.txt new file mode 100644 index 0000000000..ebacf89ab5 --- /dev/null +++ b/.changelog/14935.txt @@ -0,0 +1,3 @@ +```release-note:bug +agent: avoid leaking the alias check runner goroutine when the check is de-registered +``` \ No newline at end of file