Merge pull request #3683 from hashicorp/check-logging-redux

Cleans up check logging.
This commit is contained in:
James Phillips 2017-11-10 12:53:18 -08:00 committed by GitHub
commit aab5d26649
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 23 additions and 30 deletions

View File

@ -103,16 +103,13 @@ func (c *CheckMonitor) check() {
// Create the command
var cmd *osexec.Cmd
var err error
var cmdDisplay string
if len(c.ScriptArgs) > 0 {
cmdDisplay = fmt.Sprintf("%v", c.ScriptArgs)
cmd, err = exec.Subprocess(c.ScriptArgs)
} else {
cmdDisplay = c.Script
cmd, err = exec.Script(c.Script)
}
if err != nil {
c.Logger.Printf("[ERR] agent: failed to setup invoke '%s': %s", cmdDisplay, err)
c.Logger.Printf("[ERR] agent: Check %q failed to setup: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
@ -129,14 +126,13 @@ func (c *CheckMonitor) check() {
outputStr = fmt.Sprintf("Captured %d of %d bytes\n...\n%s",
output.Size(), output.TotalWritten(), outputStr)
}
c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s",
c.CheckID, cmdDisplay, outputStr)
c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, outputStr)
return outputStr
}
// Start the check
if err := cmd.Start(); err != nil {
c.Logger.Printf("[ERR] agent: failed to invoke '%s': %s", cmdDisplay, err)
c.Logger.Printf("[ERR] agent: Check %q failed to invoke: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
@ -154,11 +150,11 @@ func (c *CheckMonitor) check() {
select {
case <-time.After(timeout):
if err := exec.KillCommandSubtree(cmd); err != nil {
c.Logger.Printf("[WARN] Failed to kill check '%s' after timeout: %v", cmdDisplay, err)
c.Logger.Printf("[WARN] Check %q failed to kill after timeout: %s", c.CheckID, err)
}
msg := fmt.Sprintf("Timed out (%s) running check", timeout.String())
c.Logger.Printf("[WARN] %s '%s'", msg, cmdDisplay)
c.Logger.Printf("[WARN] Check %q: %s", c.CheckID, msg)
outputStr := truncateAndLogOutput()
if len(outputStr) > 0 {
@ -178,7 +174,7 @@ func (c *CheckMonitor) check() {
// Check if the check passed
outputStr := truncateAndLogOutput()
if err == nil {
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, outputStr)
return
}
@ -189,7 +185,7 @@ func (c *CheckMonitor) check() {
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
code := status.ExitStatus()
if code == 1 {
c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, outputStr)
return
}
@ -197,7 +193,7 @@ func (c *CheckMonitor) check() {
}
// Set the health as critical
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, outputStr)
}
@ -247,7 +243,7 @@ func (c *CheckTTL) run() {
for {
select {
case <-c.timer.C:
c.Logger.Printf("[WARN] agent: Check '%v' missed TTL, is now critical",
c.Logger.Printf("[WARN] agent: Check %q missed TTL, is now critical",
c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, c.getExpiredOutput())
@ -273,8 +269,7 @@ func (c *CheckTTL) getExpiredOutput() string {
// SetStatus is used to update the status of the check,
// and to renew the TTL. If expired, TTL is restarted.
func (c *CheckTTL) SetStatus(status, output string) {
c.Logger.Printf("[DEBUG] agent: Check '%v' status is now %v",
c.CheckID, status)
c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status)
c.Notify.UpdateCheck(c.CheckID, status, output)
// Store the last output so we can retain it if the TTL expires.
@ -358,7 +353,6 @@ func (c *CheckHTTP) Stop() {
func (c *CheckHTTP) run() {
// Get the randomized initial pause time
initialPauseTime := lib.RandomStagger(c.Interval)
c.Logger.Printf("[DEBUG] agent: pausing %v before first HTTP request of %s", initialPauseTime, c.HTTP)
next := time.After(initialPauseTime)
for {
select {
@ -380,7 +374,7 @@ func (c *CheckHTTP) check() {
req, err := http.NewRequest(method, c.HTTP, nil)
if err != nil {
c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err)
c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
@ -405,7 +399,7 @@ func (c *CheckHTTP) check() {
resp, err := c.httpClient.Do(req)
if err != nil {
c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err)
c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
@ -414,7 +408,7 @@ func (c *CheckHTTP) check() {
// Read the response into a circular buffer to limit the size
output, _ := circbuf.NewBuffer(BufSize)
if _, err := io.Copy(output, resp.Body); err != nil {
c.Logger.Printf("[WARN] agent: Check '%v': Get error while reading body: %s", c.CheckID, err)
c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err)
}
// Format the response body
@ -422,19 +416,19 @@ func (c *CheckHTTP) check() {
if resp.StatusCode >= 200 && resp.StatusCode <= 299 {
// PASSING (2xx)
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, result)
} else if resp.StatusCode == 429 {
// WARNING
// 429 Too Many Requests (RFC 6585)
// The user has sent too many requests in a given amount of time.
c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, result)
} else {
// CRITICAL
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, result)
}
}
@ -496,7 +490,6 @@ func (c *CheckTCP) Stop() {
func (c *CheckTCP) run() {
// Get the randomized initial pause time
initialPauseTime := lib.RandomStagger(c.Interval)
c.Logger.Printf("[DEBUG] agent: pausing %v before first socket connection of %s", initialPauseTime, c.TCP)
next := time.After(initialPauseTime)
for {
select {
@ -513,12 +506,12 @@ func (c *CheckTCP) run() {
func (c *CheckTCP) check() {
conn, err := c.dialer.Dial(`tcp`, c.TCP)
if err != nil {
c.Logger.Printf("[WARN] agent: socket connection failed '%s': %s", c.TCP, err)
c.Logger.Printf("[WARN] agent: Check %q socket connection failed: %s", c.CheckID, err)
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}
conn.Close()
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP))
}
@ -585,7 +578,7 @@ func (c *CheckDocker) check() {
var out string
status, b, err := c.doCheck()
if err != nil {
c.Logger.Printf("[DEBUG] agent: Check '%s': %s", c.CheckID, err)
c.Logger.Printf("[DEBUG] agent: Check %q: %s", c.CheckID, err)
out = err.Error()
} else {
// out is already limited to CheckBufSize since we're getting a
@ -595,11 +588,11 @@ func (c *CheckDocker) check() {
if int(b.TotalWritten()) > len(out) {
out = fmt.Sprintf("Captured %d of %d bytes\n...\n%s", len(out), b.TotalWritten(), out)
}
c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s", c.CheckID, c.Script, out)
c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, out)
}
if status == api.HealthCritical {
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
}
c.Notify.UpdateCheck(c.CheckID, status, out)
@ -632,10 +625,10 @@ func (c *CheckDocker) doCheck() (string, *circbuf.Buffer, error) {
case 0:
return api.HealthPassing, buf, nil
case 1:
c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode)
c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode)
return api.HealthWarning, buf, nil
default:
c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode)
c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode)
return api.HealthCritical, buf, nil
}
}