mirror of https://github.com/status-im/consul.git
Prevent status flap when re-registering a check (#4904)
Fixes point `#2` of: https://github.com/hashicorp/consul/issues/4903 When registering a service each healthcheck status is saved and restored (https://github.com/hashicorp/consul/blob/master/agent/agent.go#L1914) to avoid unnecessary flaps in health state. This change extends this feature to single check registration by moving this protection in `AddCheck()` so that both `PUT /v1/agent/service/register` and `PUT /v1/agent/check/register` behave in the same idempotent way. #### Steps to reproduce 1. Register a check : ``` curl -X PUT \ http://127.0.0.1:8500/v1/agent/check/register \ -H 'Content-Type: application/json' \ -d '{ "Name": "my_check", "ServiceID": "srv", "Interval": "10s", "Args": ["true"] }' ``` 2. The check will initialize and change to `passing` 3. Run the same request again 4. The check status will quickly go from `critical` to `passing` (the delay for this transission is determined by https://github.com/hashicorp/consul/blob/master/agent/checks/check.go#L95)
This commit is contained in:
parent
516ba47609
commit
8709213d6e
|
@ -1908,11 +1908,6 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che
|
||||||
a.PauseSync()
|
a.PauseSync()
|
||||||
defer a.ResumeSync()
|
defer a.ResumeSync()
|
||||||
|
|
||||||
// Take a snapshot of the current state of checks (if any), and
|
|
||||||
// restore them before resuming anti-entropy.
|
|
||||||
snap := a.snapshotCheckState()
|
|
||||||
defer a.restoreCheckState(snap)
|
|
||||||
|
|
||||||
// Add the service
|
// Add the service
|
||||||
a.State.AddService(service, token)
|
a.State.AddService(service, token)
|
||||||
|
|
||||||
|
@ -2052,6 +2047,14 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
|
||||||
a.checkLock.Lock()
|
a.checkLock.Lock()
|
||||||
defer a.checkLock.Unlock()
|
defer a.checkLock.Unlock()
|
||||||
|
|
||||||
|
// snapshot the current state of the health check to avoid potential flapping
|
||||||
|
existing := a.State.Check(check.CheckID)
|
||||||
|
defer func() {
|
||||||
|
if existing != nil {
|
||||||
|
a.State.UpdateCheck(check.CheckID, existing.Status, existing.Output)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
// Check if already registered
|
// Check if already registered
|
||||||
if chkType != nil {
|
if chkType != nil {
|
||||||
switch {
|
switch {
|
||||||
|
|
|
@ -2467,7 +2467,7 @@ func TestAgent_Service_NoReap(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAgent_addCheck_restoresSnapshot(t *testing.T) {
|
func TestAgent_AddService_restoresSnapshot(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
a := NewTestAgent(t.Name(), "")
|
a := NewTestAgent(t.Name(), "")
|
||||||
defer a.Shutdown()
|
defer a.Shutdown()
|
||||||
|
@ -2510,6 +2510,49 @@ func TestAgent_addCheck_restoresSnapshot(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAgent_AddCheck_restoresSnapshot(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
a := NewTestAgent(t.Name(), "")
|
||||||
|
defer a.Shutdown()
|
||||||
|
|
||||||
|
// First register a service
|
||||||
|
svc := &structs.NodeService{
|
||||||
|
ID: "redis",
|
||||||
|
Service: "redis",
|
||||||
|
Tags: []string{"foo"},
|
||||||
|
Port: 8000,
|
||||||
|
}
|
||||||
|
if err := a.AddService(svc, nil, false, "", ConfigSourceLocal); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register a check
|
||||||
|
check1 := &structs.HealthCheck{
|
||||||
|
Node: a.Config.NodeName,
|
||||||
|
CheckID: "service:redis",
|
||||||
|
Name: "redischeck",
|
||||||
|
Status: api.HealthPassing,
|
||||||
|
ServiceID: "redis",
|
||||||
|
ServiceName: "redis",
|
||||||
|
}
|
||||||
|
if err := a.AddCheck(check1, nil, false, "", ConfigSourceLocal); err != nil {
|
||||||
|
t.Fatalf("err: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-registering the check preserves its state
|
||||||
|
check1.Status = ""
|
||||||
|
if err := a.AddCheck(check1, &structs.CheckType{TTL: 30 * time.Second}, false, "", ConfigSourceLocal); err != nil {
|
||||||
|
t.Fatalf("err: %s", err)
|
||||||
|
}
|
||||||
|
check, ok := a.State.Checks()["service:redis"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("missing check")
|
||||||
|
}
|
||||||
|
if check.Status != api.HealthPassing {
|
||||||
|
t.Fatalf("bad: %s", check.Status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestAgent_NodeMaintenanceMode(t *testing.T) {
|
func TestAgent_NodeMaintenanceMode(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
a := NewTestAgent(t.Name(), "")
|
a := NewTestAgent(t.Name(), "")
|
||||||
|
|
Loading…
Reference in New Issue