Prevent status flap when re-registering a check (#4904)

Fixes point `#2` of: https://github.com/hashicorp/consul/issues/4903

When registering a service each healthcheck status is saved and restored (https://github.com/hashicorp/consul/blob/master/agent/agent.go#L1914) to avoid unnecessary flaps in health state.
This change extends this feature to single check registration by moving this protection in `AddCheck()` so that both `PUT /v1/agent/service/register` and `PUT /v1/agent/check/register` behave in the same idempotent way.

#### Steps to reproduce
1. Register a check :
```
curl -X PUT \
  http://127.0.0.1:8500/v1/agent/check/register \
  -H 'Content-Type: application/json' \
  -d '{
  "Name": "my_check",
  "ServiceID": "srv",
  "Interval": "10s",
  "Args": ["true"]
}'
```
2. The check will initialize and change to `passing`
3. Run the same request again
4. The check status will quickly go from `critical` to `passing` (the delay for this transission is determined by https://github.com/hashicorp/consul/blob/master/agent/checks/check.go#L95)
This commit is contained in:
Aestek 2019-01-07 19:53:03 +01:00 committed by Matt Keeler
parent 516ba47609
commit 8709213d6e
2 changed files with 52 additions and 6 deletions

View File

@ -1908,11 +1908,6 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che
a.PauseSync()
defer a.ResumeSync()
// Take a snapshot of the current state of checks (if any), and
// restore them before resuming anti-entropy.
snap := a.snapshotCheckState()
defer a.restoreCheckState(snap)
// Add the service
a.State.AddService(service, token)
@ -2052,6 +2047,14 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
a.checkLock.Lock()
defer a.checkLock.Unlock()
// snapshot the current state of the health check to avoid potential flapping
existing := a.State.Check(check.CheckID)
defer func() {
if existing != nil {
a.State.UpdateCheck(check.CheckID, existing.Status, existing.Output)
}
}()
// Check if already registered
if chkType != nil {
switch {

View File

@ -2467,7 +2467,7 @@ func TestAgent_Service_NoReap(t *testing.T) {
}
}
func TestAgent_addCheck_restoresSnapshot(t *testing.T) {
func TestAgent_AddService_restoresSnapshot(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), "")
defer a.Shutdown()
@ -2510,6 +2510,49 @@ func TestAgent_addCheck_restoresSnapshot(t *testing.T) {
}
}
func TestAgent_AddCheck_restoresSnapshot(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), "")
defer a.Shutdown()
// First register a service
svc := &structs.NodeService{
ID: "redis",
Service: "redis",
Tags: []string{"foo"},
Port: 8000,
}
if err := a.AddService(svc, nil, false, "", ConfigSourceLocal); err != nil {
t.Fatalf("err: %v", err)
}
// Register a check
check1 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "service:redis",
Name: "redischeck",
Status: api.HealthPassing,
ServiceID: "redis",
ServiceName: "redis",
}
if err := a.AddCheck(check1, nil, false, "", ConfigSourceLocal); err != nil {
t.Fatalf("err: %s", err)
}
// Re-registering the check preserves its state
check1.Status = ""
if err := a.AddCheck(check1, &structs.CheckType{TTL: 30 * time.Second}, false, "", ConfigSourceLocal); err != nil {
t.Fatalf("err: %s", err)
}
check, ok := a.State.Checks()["service:redis"]
if !ok {
t.Fatalf("missing check")
}
if check.Status != api.HealthPassing {
t.Fatalf("bad: %s", check.Status)
}
}
func TestAgent_NodeMaintenanceMode(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), "")