mirror of
https://github.com/status-im/consul.git
synced 2025-02-18 00:27:04 +00:00
Merge pull request #3683 from hashicorp/check-logging-redux
Cleans up check logging.
This commit is contained in:
commit
aab5d26649
@ -103,16 +103,13 @@ func (c *CheckMonitor) check() {
|
|||||||
// Create the command
|
// Create the command
|
||||||
var cmd *osexec.Cmd
|
var cmd *osexec.Cmd
|
||||||
var err error
|
var err error
|
||||||
var cmdDisplay string
|
|
||||||
if len(c.ScriptArgs) > 0 {
|
if len(c.ScriptArgs) > 0 {
|
||||||
cmdDisplay = fmt.Sprintf("%v", c.ScriptArgs)
|
|
||||||
cmd, err = exec.Subprocess(c.ScriptArgs)
|
cmd, err = exec.Subprocess(c.ScriptArgs)
|
||||||
} else {
|
} else {
|
||||||
cmdDisplay = c.Script
|
|
||||||
cmd, err = exec.Script(c.Script)
|
cmd, err = exec.Script(c.Script)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Logger.Printf("[ERR] agent: failed to setup invoke '%s': %s", cmdDisplay, err)
|
c.Logger.Printf("[ERR] agent: Check %q failed to setup: %s", c.CheckID, err)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -129,14 +126,13 @@ func (c *CheckMonitor) check() {
|
|||||||
outputStr = fmt.Sprintf("Captured %d of %d bytes\n...\n%s",
|
outputStr = fmt.Sprintf("Captured %d of %d bytes\n...\n%s",
|
||||||
output.Size(), output.TotalWritten(), outputStr)
|
output.Size(), output.TotalWritten(), outputStr)
|
||||||
}
|
}
|
||||||
c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s",
|
c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, outputStr)
|
||||||
c.CheckID, cmdDisplay, outputStr)
|
|
||||||
return outputStr
|
return outputStr
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start the check
|
// Start the check
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
c.Logger.Printf("[ERR] agent: failed to invoke '%s': %s", cmdDisplay, err)
|
c.Logger.Printf("[ERR] agent: Check %q failed to invoke: %s", c.CheckID, err)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -154,11 +150,11 @@ func (c *CheckMonitor) check() {
|
|||||||
select {
|
select {
|
||||||
case <-time.After(timeout):
|
case <-time.After(timeout):
|
||||||
if err := exec.KillCommandSubtree(cmd); err != nil {
|
if err := exec.KillCommandSubtree(cmd); err != nil {
|
||||||
c.Logger.Printf("[WARN] Failed to kill check '%s' after timeout: %v", cmdDisplay, err)
|
c.Logger.Printf("[WARN] Check %q failed to kill after timeout: %s", c.CheckID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
msg := fmt.Sprintf("Timed out (%s) running check", timeout.String())
|
msg := fmt.Sprintf("Timed out (%s) running check", timeout.String())
|
||||||
c.Logger.Printf("[WARN] %s '%s'", msg, cmdDisplay)
|
c.Logger.Printf("[WARN] Check %q: %s", c.CheckID, msg)
|
||||||
|
|
||||||
outputStr := truncateAndLogOutput()
|
outputStr := truncateAndLogOutput()
|
||||||
if len(outputStr) > 0 {
|
if len(outputStr) > 0 {
|
||||||
@ -178,7 +174,7 @@ func (c *CheckMonitor) check() {
|
|||||||
// Check if the check passed
|
// Check if the check passed
|
||||||
outputStr := truncateAndLogOutput()
|
outputStr := truncateAndLogOutput()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
|
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, outputStr)
|
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, outputStr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -189,7 +185,7 @@ func (c *CheckMonitor) check() {
|
|||||||
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
|
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
|
||||||
code := status.ExitStatus()
|
code := status.ExitStatus()
|
||||||
if code == 1 {
|
if code == 1 {
|
||||||
c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID)
|
c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, outputStr)
|
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, outputStr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -197,7 +193,7 @@ func (c *CheckMonitor) check() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Set the health as critical
|
// Set the health as critical
|
||||||
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
|
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, outputStr)
|
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, outputStr)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,7 +243,7 @@ func (c *CheckTTL) run() {
|
|||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-c.timer.C:
|
case <-c.timer.C:
|
||||||
c.Logger.Printf("[WARN] agent: Check '%v' missed TTL, is now critical",
|
c.Logger.Printf("[WARN] agent: Check %q missed TTL, is now critical",
|
||||||
c.CheckID)
|
c.CheckID)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, c.getExpiredOutput())
|
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, c.getExpiredOutput())
|
||||||
|
|
||||||
@ -273,8 +269,7 @@ func (c *CheckTTL) getExpiredOutput() string {
|
|||||||
// SetStatus is used to update the status of the check,
|
// SetStatus is used to update the status of the check,
|
||||||
// and to renew the TTL. If expired, TTL is restarted.
|
// and to renew the TTL. If expired, TTL is restarted.
|
||||||
func (c *CheckTTL) SetStatus(status, output string) {
|
func (c *CheckTTL) SetStatus(status, output string) {
|
||||||
c.Logger.Printf("[DEBUG] agent: Check '%v' status is now %v",
|
c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status)
|
||||||
c.CheckID, status)
|
|
||||||
c.Notify.UpdateCheck(c.CheckID, status, output)
|
c.Notify.UpdateCheck(c.CheckID, status, output)
|
||||||
|
|
||||||
// Store the last output so we can retain it if the TTL expires.
|
// Store the last output so we can retain it if the TTL expires.
|
||||||
@ -358,7 +353,6 @@ func (c *CheckHTTP) Stop() {
|
|||||||
func (c *CheckHTTP) run() {
|
func (c *CheckHTTP) run() {
|
||||||
// Get the randomized initial pause time
|
// Get the randomized initial pause time
|
||||||
initialPauseTime := lib.RandomStagger(c.Interval)
|
initialPauseTime := lib.RandomStagger(c.Interval)
|
||||||
c.Logger.Printf("[DEBUG] agent: pausing %v before first HTTP request of %s", initialPauseTime, c.HTTP)
|
|
||||||
next := time.After(initialPauseTime)
|
next := time.After(initialPauseTime)
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@ -380,7 +374,7 @@ func (c *CheckHTTP) check() {
|
|||||||
|
|
||||||
req, err := http.NewRequest(method, c.HTTP, nil)
|
req, err := http.NewRequest(method, c.HTTP, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err)
|
c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -405,7 +399,7 @@ func (c *CheckHTTP) check() {
|
|||||||
|
|
||||||
resp, err := c.httpClient.Do(req)
|
resp, err := c.httpClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err)
|
c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -414,7 +408,7 @@ func (c *CheckHTTP) check() {
|
|||||||
// Read the response into a circular buffer to limit the size
|
// Read the response into a circular buffer to limit the size
|
||||||
output, _ := circbuf.NewBuffer(BufSize)
|
output, _ := circbuf.NewBuffer(BufSize)
|
||||||
if _, err := io.Copy(output, resp.Body); err != nil {
|
if _, err := io.Copy(output, resp.Body); err != nil {
|
||||||
c.Logger.Printf("[WARN] agent: Check '%v': Get error while reading body: %s", c.CheckID, err)
|
c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Format the response body
|
// Format the response body
|
||||||
@ -422,19 +416,19 @@ func (c *CheckHTTP) check() {
|
|||||||
|
|
||||||
if resp.StatusCode >= 200 && resp.StatusCode <= 299 {
|
if resp.StatusCode >= 200 && resp.StatusCode <= 299 {
|
||||||
// PASSING (2xx)
|
// PASSING (2xx)
|
||||||
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
|
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, result)
|
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, result)
|
||||||
|
|
||||||
} else if resp.StatusCode == 429 {
|
} else if resp.StatusCode == 429 {
|
||||||
// WARNING
|
// WARNING
|
||||||
// 429 Too Many Requests (RFC 6585)
|
// 429 Too Many Requests (RFC 6585)
|
||||||
// The user has sent too many requests in a given amount of time.
|
// The user has sent too many requests in a given amount of time.
|
||||||
c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID)
|
c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, result)
|
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, result)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// CRITICAL
|
// CRITICAL
|
||||||
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
|
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, result)
|
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, result)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -496,7 +490,6 @@ func (c *CheckTCP) Stop() {
|
|||||||
func (c *CheckTCP) run() {
|
func (c *CheckTCP) run() {
|
||||||
// Get the randomized initial pause time
|
// Get the randomized initial pause time
|
||||||
initialPauseTime := lib.RandomStagger(c.Interval)
|
initialPauseTime := lib.RandomStagger(c.Interval)
|
||||||
c.Logger.Printf("[DEBUG] agent: pausing %v before first socket connection of %s", initialPauseTime, c.TCP)
|
|
||||||
next := time.After(initialPauseTime)
|
next := time.After(initialPauseTime)
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@ -513,12 +506,12 @@ func (c *CheckTCP) run() {
|
|||||||
func (c *CheckTCP) check() {
|
func (c *CheckTCP) check() {
|
||||||
conn, err := c.dialer.Dial(`tcp`, c.TCP)
|
conn, err := c.dialer.Dial(`tcp`, c.TCP)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Logger.Printf("[WARN] agent: socket connection failed '%s': %s", c.TCP, err)
|
c.Logger.Printf("[WARN] agent: Check %q socket connection failed: %s", c.CheckID, err)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
conn.Close()
|
conn.Close()
|
||||||
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
|
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
|
||||||
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP))
|
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -585,7 +578,7 @@ func (c *CheckDocker) check() {
|
|||||||
var out string
|
var out string
|
||||||
status, b, err := c.doCheck()
|
status, b, err := c.doCheck()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Logger.Printf("[DEBUG] agent: Check '%s': %s", c.CheckID, err)
|
c.Logger.Printf("[DEBUG] agent: Check %q: %s", c.CheckID, err)
|
||||||
out = err.Error()
|
out = err.Error()
|
||||||
} else {
|
} else {
|
||||||
// out is already limited to CheckBufSize since we're getting a
|
// out is already limited to CheckBufSize since we're getting a
|
||||||
@ -595,11 +588,11 @@ func (c *CheckDocker) check() {
|
|||||||
if int(b.TotalWritten()) > len(out) {
|
if int(b.TotalWritten()) > len(out) {
|
||||||
out = fmt.Sprintf("Captured %d of %d bytes\n...\n%s", len(out), b.TotalWritten(), out)
|
out = fmt.Sprintf("Captured %d of %d bytes\n...\n%s", len(out), b.TotalWritten(), out)
|
||||||
}
|
}
|
||||||
c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s", c.CheckID, c.Script, out)
|
c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, out)
|
||||||
}
|
}
|
||||||
|
|
||||||
if status == api.HealthCritical {
|
if status == api.HealthCritical {
|
||||||
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
|
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
|
||||||
}
|
}
|
||||||
|
|
||||||
c.Notify.UpdateCheck(c.CheckID, status, out)
|
c.Notify.UpdateCheck(c.CheckID, status, out)
|
||||||
@ -632,10 +625,10 @@ func (c *CheckDocker) doCheck() (string, *circbuf.Buffer, error) {
|
|||||||
case 0:
|
case 0:
|
||||||
return api.HealthPassing, buf, nil
|
return api.HealthPassing, buf, nil
|
||||||
case 1:
|
case 1:
|
||||||
c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode)
|
c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode)
|
||||||
return api.HealthWarning, buf, nil
|
return api.HealthWarning, buf, nil
|
||||||
default:
|
default:
|
||||||
c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode)
|
c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode)
|
||||||
return api.HealthCritical, buf, nil
|
return api.HealthCritical, buf, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user