diff --git a/agent/agent.go b/agent/agent.go index 1c421587be..7455ee6b77 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -915,6 +915,7 @@ func (a *Agent) consulConfig() (*consul.Config, error) { base.CoordinateUpdateBatchSize = a.config.ConsulCoordinateUpdateBatchSize base.CoordinateUpdateMaxBatches = a.config.ConsulCoordinateUpdateMaxBatches base.CoordinateUpdatePeriod = a.config.ConsulCoordinateUpdatePeriod + base.CheckOutputMaxSize = a.config.CheckOutputMaxSize base.RaftConfig.HeartbeatTimeout = a.config.ConsulRaftHeartbeatTimeout base.RaftConfig.LeaderLeaseTimeout = a.config.ConsulRaftLeaderLeaseTimeout @@ -971,6 +972,9 @@ func (a *Agent) consulConfig() (*consul.Config, error) { if a.config.Bootstrap { base.Bootstrap = true } + if a.config.CheckOutputMaxSize > 0 { + base.CheckOutputMaxSize = a.config.CheckOutputMaxSize + } if a.config.RejoinAfterLeave { base.RejoinAfterLeave = true } @@ -2248,6 +2252,13 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, // Check if already registered if chkType != nil { + maxOutputSize := a.config.CheckOutputMaxSize + if maxOutputSize == 0 { + maxOutputSize = checks.DefaultBufSize + } + if chkType.OutputMaxSize > 0 && maxOutputSize > chkType.OutputMaxSize { + maxOutputSize = chkType.OutputMaxSize + } switch { case chkType.IsTTL(): @@ -2257,10 +2268,11 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, } ttl := &checks.CheckTTL{ - Notify: a.State, - CheckID: check.CheckID, - TTL: chkType.TTL, - Logger: a.logger, + Notify: a.State, + CheckID: check.CheckID, + TTL: chkType.TTL, + Logger: a.logger, + OutputMaxSize: maxOutputSize, } // Restore persisted state, if any @@ -2294,6 +2306,7 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, Interval: chkType.Interval, Timeout: chkType.Timeout, Logger: a.logger, + OutputMaxSize: maxOutputSize, TLSClientConfig: tlsClientConfig, } http.Start() @@ -2361,7 +2374,7 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, } if a.dockerClient == nil { - dc, err := checks.NewDockerClient(os.Getenv("DOCKER_HOST"), checks.BufSize) + dc, err := checks.NewDockerClient(os.Getenv("DOCKER_HOST"), int64(maxOutputSize)) if err != nil { a.logger.Printf("[ERR] agent: error creating docker client: %s", err) return err @@ -2396,14 +2409,14 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType, check.CheckID, checks.MinInterval) chkType.Interval = checks.MinInterval } - monitor := &checks.CheckMonitor{ - Notify: a.State, - CheckID: check.CheckID, - ScriptArgs: chkType.ScriptArgs, - Interval: chkType.Interval, - Timeout: chkType.Timeout, - Logger: a.logger, + Notify: a.State, + CheckID: check.CheckID, + ScriptArgs: chkType.ScriptArgs, + Interval: chkType.Interval, + Timeout: chkType.Timeout, + Logger: a.logger, + OutputMaxSize: maxOutputSize, } monitor.Start() a.checkMonitors[check.CheckID] = monitor @@ -2878,7 +2891,7 @@ func (a *Agent) updateTTLCheck(checkID types.CheckID, status, output string) err } // Set the status through CheckTTL to reset the TTL. - check.SetStatus(status, output) + outputTruncated := check.SetStatus(status, output) // We don't write any files in dev mode so bail here. if a.config.DataDir == "" { @@ -2887,7 +2900,7 @@ func (a *Agent) updateTTLCheck(checkID types.CheckID, status, output string) err // Persist the state so the TTL check can come up in a good state after // an agent restart, especially with long TTL values. - if err := a.persistCheckState(check, status, output); err != nil { + if err := a.persistCheckState(check, status, outputTruncated); err != nil { return fmt.Errorf("failed persisting state for check %q: %s", checkID, err) } diff --git a/agent/agent_endpoint.go b/agent/agent_endpoint.go index 229969df04..e25f1ea6b1 100644 --- a/agent/agent_endpoint.go +++ b/agent/agent_endpoint.go @@ -19,7 +19,6 @@ import ( "github.com/hashicorp/consul/acl" cachetype "github.com/hashicorp/consul/agent/cache-types" - "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/debug" "github.com/hashicorp/consul/agent/local" "github.com/hashicorp/consul/agent/structs" @@ -715,12 +714,6 @@ func (s *HTTPServer) AgentCheckUpdate(resp http.ResponseWriter, req *http.Reques return nil, nil } - total := len(update.Output) - if total > checks.BufSize { - update.Output = fmt.Sprintf("%s ... (captured %d of %d bytes)", - update.Output[:checks.BufSize], checks.BufSize, total) - } - checkID := types.CheckID(strings.TrimPrefix(req.URL.Path, "/v1/agent/check/update/")) // Get the provided token, if any, and vet against any ACL policies. diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 1aa8db7084..b0fc8eb8ba 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -18,7 +18,6 @@ import ( "time" "github.com/hashicorp/consul/acl" - "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/debug" @@ -2299,7 +2298,8 @@ func TestAgent_FailCheck_ACLDeny(t *testing.T) { func TestAgent_UpdateCheck(t *testing.T) { t.Parallel() - a := NewTestAgent(t, t.Name(), "") + maxChecksSize := 256 + a := NewTestAgent(t, t.Name(), fmt.Sprintf("check_output_max_size=%d", maxChecksSize)) defer a.Shutdown() testrpc.WaitForTestAgent(t, a.RPC, "dc1") @@ -2340,7 +2340,7 @@ func TestAgent_UpdateCheck(t *testing.T) { t.Run("log output limit", func(t *testing.T) { args := checkUpdate{ Status: api.HealthPassing, - Output: strings.Repeat("-= bad -=", 5*checks.BufSize), + Output: strings.Repeat("-= bad -=", 5*maxChecksSize), } req, _ := http.NewRequest("PUT", "/v1/agent/check/update/test", jsonReader(args)) resp := httptest.NewRecorder() @@ -2359,8 +2359,8 @@ func TestAgent_UpdateCheck(t *testing.T) { // rough check that the output buffer was cut down so this test // isn't super brittle. state := a.State.Checks()["test"] - if state.Status != api.HealthPassing || len(state.Output) > 2*checks.BufSize { - t.Fatalf("bad: %v", state) + if state.Status != api.HealthPassing || len(state.Output) > 2*maxChecksSize { + t.Fatalf("bad: %v, (len:=%d)", state, len(state.Output)) } }) diff --git a/agent/agent_test.go b/agent/agent_test.go index a4f227150e..3cedfa0cf7 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -1586,7 +1586,7 @@ func TestAgent_updateTTLCheck(t *testing.T) { t.Parallel() a := NewTestAgent(t, t.Name(), "") defer a.Shutdown() - + checkBufSize := 100 health := &structs.HealthCheck{ Node: "foo", CheckID: "mem", @@ -1594,7 +1594,8 @@ func TestAgent_updateTTLCheck(t *testing.T) { Status: api.HealthCritical, } chk := &structs.CheckType{ - TTL: 15 * time.Second, + TTL: 15 * time.Second, + OutputMaxSize: checkBufSize, } // Add check and update it. @@ -1614,6 +1615,19 @@ func TestAgent_updateTTLCheck(t *testing.T) { if status.Output != "foo" { t.Fatalf("bad: %v", status) } + + if err := a.updateTTLCheck("mem", api.HealthCritical, strings.Repeat("--bad-- ", 5*checkBufSize)); err != nil { + t.Fatalf("err: %v", err) + } + + // Ensure we have a check mapping. + status = a.State.Checks()["mem"] + if status.Status != api.HealthCritical { + t.Fatalf("bad: %v", status) + } + if len(status.Output) > checkBufSize*2 { + t.Fatalf("bad: %v", len(status.Output)) + } } func TestAgent_PersistService(t *testing.T) { diff --git a/agent/checks/check.go b/agent/checks/check.go index b09bdc8be1..f61a68e519 100644 --- a/agent/checks/check.go +++ b/agent/checks/check.go @@ -28,10 +28,10 @@ const ( // Otherwise we risk fork bombing a system. MinInterval = time.Second - // BufSize is the maximum size of the captured - // check output. Prevents an enormous buffer + // DefaultBufSize is the maximum size of the captured + // check output by defaut. Prevents an enormous buffer // from being captured - BufSize = 4 * 1024 // 4KB + DefaultBufSize = 4 * 1024 // 4KB // UserAgent is the value of the User-Agent header // for HTTP health checks. @@ -56,13 +56,14 @@ type CheckNotifier interface { // determine the health of a given check. It is compatible with // nagios plugins and expects the output in the same format. type CheckMonitor struct { - Notify CheckNotifier - CheckID types.CheckID - Script string - ScriptArgs []string - Interval time.Duration - Timeout time.Duration - Logger *log.Logger + Notify CheckNotifier + CheckID types.CheckID + Script string + ScriptArgs []string + Interval time.Duration + Timeout time.Duration + Logger *log.Logger + OutputMaxSize int stop bool stopCh chan struct{} @@ -122,7 +123,7 @@ func (c *CheckMonitor) check() { } // Collect the output - output, _ := circbuf.NewBuffer(BufSize) + output, _ := circbuf.NewBuffer(int64(c.OutputMaxSize)) cmd.Stdout = output cmd.Stderr = output exec.SetSysProcAttr(cmd) @@ -222,12 +223,17 @@ type CheckTTL struct { stop bool stopCh chan struct{} stopLock sync.Mutex + + OutputMaxSize int } // Start is used to start a check ttl, runs until Stop() func (c *CheckTTL) Start() { c.stopLock.Lock() defer c.stopLock.Unlock() + if c.OutputMaxSize < 1 { + c.OutputMaxSize = DefaultBufSize + } c.stop = false c.stopCh = make(chan struct{}) c.timer = time.NewTimer(c.TTL) @@ -275,16 +281,22 @@ func (c *CheckTTL) getExpiredOutput() string { // SetStatus is used to update the status of the check, // and to renew the TTL. If expired, TTL is restarted. -func (c *CheckTTL) SetStatus(status, output string) { +// output is returned (might be truncated) +func (c *CheckTTL) SetStatus(status, output string) string { c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status) + total := len(output) + if total > c.OutputMaxSize { + output = fmt.Sprintf("%s ... (captured %d of %d bytes)", + output[:c.OutputMaxSize], c.OutputMaxSize, total) + } c.Notify.UpdateCheck(c.CheckID, status, output) - // Store the last output so we can retain it if the TTL expires. c.lastOutputLock.Lock() c.lastOutput = output c.lastOutputLock.Unlock() c.timer.Reset(c.TTL) + return output } // CheckHTTP is used to periodically make an HTTP request to @@ -303,6 +315,7 @@ type CheckHTTP struct { Timeout time.Duration Logger *log.Logger TLSClientConfig *tls.Config + OutputMaxSize int httpClient *http.Client stop bool @@ -339,6 +352,9 @@ func (c *CheckHTTP) Start() { } else if c.Interval < 10*time.Second { c.httpClient.Timeout = c.Interval } + if c.OutputMaxSize < 1 { + c.OutputMaxSize = DefaultBufSize + } } c.stop = false @@ -413,7 +429,7 @@ func (c *CheckHTTP) check() { defer resp.Body.Close() // Read the response into a circular buffer to limit the size - output, _ := circbuf.NewBuffer(BufSize) + output, _ := circbuf.NewBuffer(int64(c.OutputMaxSize)) if _, err := io.Copy(output, resp.Body); err != nil { c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err) } diff --git a/agent/checks/check_test.go b/agent/checks/check_test.go index 014d58302b..b61bbc19c8 100644 --- a/agent/checks/check_test.go +++ b/agent/checks/check_test.go @@ -44,11 +44,12 @@ func TestCheckMonitor_Script(t *testing.T) { t.Run(tt.status, func(t *testing.T) { notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - Script: tt.script, - Interval: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + Script: tt.script, + Interval: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -79,11 +80,12 @@ func TestCheckMonitor_Args(t *testing.T) { t.Run(tt.status, func(t *testing.T) { notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - ScriptArgs: tt.args, - Interval: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + ScriptArgs: tt.args, + Interval: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -103,12 +105,13 @@ func TestCheckMonitor_Timeout(t *testing.T) { // t.Parallel() // timing test. no parallel notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - ScriptArgs: []string{"sh", "-c", "sleep 1 && exit 0"}, - Interval: 50 * time.Millisecond, - Timeout: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + ScriptArgs: []string{"sh", "-c", "sleep 1 && exit 0"}, + Interval: 50 * time.Millisecond, + Timeout: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -128,11 +131,12 @@ func TestCheckMonitor_RandomStagger(t *testing.T) { // t.Parallel() // timing test. no parallel notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - ScriptArgs: []string{"sh", "-c", "exit 0"}, - Interval: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + ScriptArgs: []string{"sh", "-c", "exit 0"}, + Interval: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -153,11 +157,12 @@ func TestCheckMonitor_LimitOutput(t *testing.T) { t.Parallel() notif := mock.NewNotify() check := &CheckMonitor{ - Notify: notif, - CheckID: types.CheckID("foo"), - ScriptArgs: []string{"od", "-N", "81920", "/dev/urandom"}, - Interval: 25 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + ScriptArgs: []string{"od", "-N", "81920", "/dev/urandom"}, + Interval: 25 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: DefaultBufSize, } check.Start() defer check.Stop() @@ -165,7 +170,7 @@ func TestCheckMonitor_LimitOutput(t *testing.T) { time.Sleep(50 * time.Millisecond) // Allow for extra bytes for the truncation message - if len(notif.Output("foo")) > BufSize+100 { + if len(notif.Output("foo")) > DefaultBufSize+100 { t.Fatalf("output size is too long") } } @@ -287,7 +292,7 @@ func TestCheckHTTP(t *testing.T) { } // Body larger than 4k limit - body := bytes.Repeat([]byte{'a'}, 2*BufSize) + body := bytes.Repeat([]byte{'a'}, 2*DefaultBufSize) w.WriteHeader(tt.code) w.Write(body) })) @@ -295,13 +300,14 @@ func TestCheckHTTP(t *testing.T) { notif := mock.NewNotify() check := &CheckHTTP{ - Notify: notif, - CheckID: types.CheckID("foo"), - HTTP: server.URL, - Method: tt.method, - Header: tt.header, - Interval: 10 * time.Millisecond, - Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + Notify: notif, + CheckID: types.CheckID("foo"), + HTTP: server.URL, + Method: tt.method, + OutputMaxSize: DefaultBufSize, + Header: tt.header, + Interval: 10 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), } check.Start() defer check.Stop() @@ -313,15 +319,52 @@ func TestCheckHTTP(t *testing.T) { if got, want := notif.State("foo"), tt.status; got != want { r.Fatalf("got state %q want %q", got, want) } - // Allow slightly more data than BufSize, for the header - if n := len(notif.Output("foo")); n > (BufSize + 256) { - r.Fatalf("output too long: %d (%d-byte limit)", n, BufSize) + // Allow slightly more data than DefaultBufSize, for the header + if n := len(notif.Output("foo")); n > (DefaultBufSize + 256) { + r.Fatalf("output too long: %d (%d-byte limit)", n, DefaultBufSize) } }) }) } } +func TestCheckMaxOutputSize(t *testing.T) { + t.Parallel() + timeout := 5 * time.Millisecond + server := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, req *http.Request) { + body := bytes.Repeat([]byte{'x'}, 2*DefaultBufSize) + writer.WriteHeader(200) + writer.Write(body) + })) + defer server.Close() + + notif := mock.NewNotify() + maxOutputSize := 32 + check := &CheckHTTP{ + Notify: notif, + CheckID: types.CheckID("bar"), + HTTP: server.URL + "/v1/agent/self", + Timeout: timeout, + Interval: 2 * time.Millisecond, + Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags), + OutputMaxSize: maxOutputSize, + } + + check.Start() + defer check.Stop() + retry.Run(t, func(r *retry.R) { + if got, want := notif.Updates("bar"), 2; got < want { + r.Fatalf("got %d updates want at least %d", got, want) + } + if got, want := notif.State("bar"), api.HealthPassing; got != want { + r.Fatalf("got state %q want %q", got, want) + } + if got, want := notif.Output("bar"), "HTTP GET "+server.URL+"/v1/agent/self: 200 OK Output: "+strings.Repeat("x", maxOutputSize); got != want { + r.Fatalf("got state %q want %q", got, want) + } + }) +} + func TestCheckHTTPTimeout(t *testing.T) { t.Parallel() timeout := 5 * time.Millisecond @@ -372,7 +415,7 @@ func TestCheckHTTP_disablesKeepAlives(t *testing.T) { func largeBodyHandler(code int) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Body larger than 4k limit - body := bytes.Repeat([]byte{'a'}, 2*BufSize) + body := bytes.Repeat([]byte{'a'}, 2*DefaultBufSize) w.WriteHeader(code) w.Write(body) }) diff --git a/agent/config/builder.go b/agent/config/builder.go index dafc25a9f5..7a7a63e516 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -14,6 +14,7 @@ import ( "strings" "time" + "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/structs" @@ -786,6 +787,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { CAPath: b.stringVal(c.CAPath), CertFile: b.stringVal(c.CertFile), CheckUpdateInterval: b.durationVal("check_update_interval", c.CheckUpdateInterval), + CheckOutputMaxSize: b.intValWithDefault(c.CheckOutputMaxSize, 4096), Checks: checks, ClientAddrs: clientAddrs, ConfigEntryBootstrap: configEntries, @@ -964,6 +966,9 @@ func (b *Builder) Validate(rt RuntimeConfig) error { if rt.BootstrapExpect > 0 && rt.Bootstrap { return fmt.Errorf("'bootstrap_expect > 0' and 'bootstrap = true' are mutually exclusive") } + if rt.CheckOutputMaxSize < 1 { + return fmt.Errorf("check_output_max_size must be positive, to discard check output use the discard_check_output flag") + } if rt.AEInterval <= 0 { return fmt.Errorf("ae_interval cannot be %s. Must be positive", rt.AEInterval) } @@ -1174,6 +1179,7 @@ func (b *Builder) checkVal(v *CheckDefinition) *structs.CheckDefinition { Timeout: b.durationVal(fmt.Sprintf("check[%s].timeout", id), v.Timeout), TTL: b.durationVal(fmt.Sprintf("check[%s].ttl", id), v.TTL), DeregisterCriticalServiceAfter: b.durationVal(fmt.Sprintf("check[%s].deregister_critical_service_after", id), v.DeregisterCriticalServiceAfter), + OutputMaxSize: b.intValWithDefault(v.OutputMaxSize, checks.DefaultBufSize), } } @@ -1347,13 +1353,17 @@ func (b *Builder) durationVal(name string, v *string) (d time.Duration) { return b.durationValWithDefault(name, v, 0) } -func (b *Builder) intVal(v *int) int { +func (b *Builder) intValWithDefault(v *int, defaultVal int) int { if v == nil { - return 0 + return defaultVal } return *v } +func (b *Builder) intVal(v *int) int { + return b.intValWithDefault(v, 0) +} + func (b *Builder) portVal(name string, v *int) int { if v == nil || *v <= 0 { return -1 diff --git a/agent/config/config.go b/agent/config/config.go index fdafdb1636..d899f634c0 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -184,6 +184,7 @@ type Config struct { CAPath *string `json:"ca_path,omitempty" hcl:"ca_path" mapstructure:"ca_path"` CertFile *string `json:"cert_file,omitempty" hcl:"cert_file" mapstructure:"cert_file"` Check *CheckDefinition `json:"check,omitempty" hcl:"check" mapstructure:"check"` // needs to be a pointer to avoid partial merges + CheckOutputMaxSize *int `json:"check_output_max_size,omitempty" hcl:"check_output_max_size" mapstructure:"check_output_max_size"` CheckUpdateInterval *string `json:"check_update_interval,omitempty" hcl:"check_update_interval" mapstructure:"check_update_interval"` Checks []CheckDefinition `json:"checks,omitempty" hcl:"checks" mapstructure:"checks"` ClientAddr *string `json:"client_addr,omitempty" hcl:"client_addr" mapstructure:"client_addr"` @@ -390,6 +391,7 @@ type CheckDefinition struct { HTTP *string `json:"http,omitempty" hcl:"http" mapstructure:"http"` Header map[string][]string `json:"header,omitempty" hcl:"header" mapstructure:"header"` Method *string `json:"method,omitempty" hcl:"method" mapstructure:"method"` + OutputMaxSize *int `json:"output_max_size,omitempty" hcl:"output_max_size" mapstructure:"output_max_size"` TCP *string `json:"tcp,omitempty" hcl:"tcp" mapstructure:"tcp"` Interval *string `json:"interval,omitempty" hcl:"interval" mapstructure:"interval"` DockerContainerID *string `json:"docker_container_id,omitempty" hcl:"docker_container_id" mapstructure:"docker_container_id"` diff --git a/agent/config/default.go b/agent/config/default.go index 0d6fe3ff69..477c144355 100644 --- a/agent/config/default.go +++ b/agent/config/default.go @@ -4,6 +4,7 @@ import ( "fmt" "strconv" + "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/version" ) @@ -49,6 +50,7 @@ func DefaultSource() Source { bind_addr = "0.0.0.0" bootstrap = false bootstrap_expect = 0 + check_output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` check_update_interval = "5m" client_addr = "127.0.0.1" datacenter = "` + consul.DefaultDC + `" diff --git a/agent/config/flags.go b/agent/config/flags.go index 7dc4f5a1f8..a1ea99bbab 100644 --- a/agent/config/flags.go +++ b/agent/config/flags.go @@ -60,6 +60,7 @@ func AddFlags(fs *flag.FlagSet, f *Flags) { add(&f.Config.Bootstrap, "bootstrap", "Sets server to bootstrap mode.") add(&f.Config.BootstrapExpect, "bootstrap-expect", "Sets server to expect bootstrap mode.") add(&f.Config.ClientAddr, "client", "Sets the address to bind for client access. This includes RPC, DNS, HTTP, HTTPS and gRPC (if configured).") + add(&f.Config.CheckOutputMaxSize, "check_output_max_size", "Sets the maximum output size for checks on this agent") add(&f.ConfigFiles, "config-dir", "Path to a directory to read configuration files from. This will read every file ending in '.json' as configuration in this directory in alphabetical order. Can be specified multiple times.") add(&f.ConfigFiles, "config-file", "Path to a file in JSON or HCL format with a matching file extension. Can be specified multiple times.") add(&f.ConfigFormat, "config-format", "Config files are in this format irrespective of their extension. Must be 'hcl' or 'json'") diff --git a/agent/config/runtime.go b/agent/config/runtime.go index dc9d567f81..5286392f50 100644 --- a/agent/config/runtime.go +++ b/agent/config/runtime.go @@ -459,6 +459,11 @@ type RuntimeConfig struct { // hcl: check_update_interval = "duration" CheckUpdateInterval time.Duration + // Maximum size for the output of a healtcheck + // hcl check_output_max_size int + // flag: -check_output_max_size int + CheckOutputMaxSize int + // Checks contains the provided check definitions. // // hcl: checks = [ diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index 70e6eb3d96..405d8a1406 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -18,6 +18,7 @@ import ( "testing" "time" + "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/sdk/testutil" @@ -2076,8 +2077,8 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { }, patch: func(rt *RuntimeConfig) { rt.Checks = []*structs.CheckDefinition{ - &structs.CheckDefinition{Name: "a", ScriptArgs: []string{"/bin/true"}}, - &structs.CheckDefinition{Name: "b", ScriptArgs: []string{"/bin/false"}}, + &structs.CheckDefinition{Name: "a", ScriptArgs: []string{"/bin/true"}, OutputMaxSize: checks.DefaultBufSize}, + &structs.CheckDefinition{Name: "b", ScriptArgs: []string{"/bin/false"}, OutputMaxSize: checks.DefaultBufSize}, } rt.DataDir = dataDir }, @@ -2095,7 +2096,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { }, patch: func(rt *RuntimeConfig) { rt.Checks = []*structs.CheckDefinition{ - &structs.CheckDefinition{Name: "a", GRPC: "localhost:12345/foo", GRPCUseTLS: true}, + &structs.CheckDefinition{Name: "a", GRPC: "localhost:12345/foo", GRPCUseTLS: true, OutputMaxSize: checks.DefaultBufSize}, } rt.DataDir = dataDir }, @@ -2113,7 +2114,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { }, patch: func(rt *RuntimeConfig) { rt.Checks = []*structs.CheckDefinition{ - &structs.CheckDefinition{Name: "a", AliasService: "foo"}, + &structs.CheckDefinition{Name: "a", AliasService: "foo", OutputMaxSize: checks.DefaultBufSize}, } rt.DataDir = dataDir }, @@ -2232,6 +2233,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { DockerContainerID: "z", DeregisterCriticalServiceAfter: 10 * time.Second, ScriptArgs: []string{"a", "b"}, + OutputMaxSize: checks.DefaultBufSize, }, }, Weights: &structs.Weights{ @@ -2497,8 +2499,9 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { Port: 2345, Checks: structs.CheckTypes{ { - TCP: "127.0.0.1:2345", - Interval: 10 * time.Second, + TCP: "127.0.0.1:2345", + Interval: 10 * time.Second, + OutputMaxSize: checks.DefaultBufSize, }, }, Proxy: &structs.ConnectProxyConfig{ @@ -2592,8 +2595,9 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { Port: 2345, Checks: structs.CheckTypes{ { - TCP: "127.0.0.1:2345", - Interval: 10 * time.Second, + TCP: "127.0.0.1:2345", + Interval: 10 * time.Second, + OutputMaxSize: checks.DefaultBufSize, }, }, Proxy: &structs.ConnectProxyConfig{ @@ -3025,6 +3029,7 @@ func TestFullConfig(t *testing.T) { "f3r6xFtM": [ "RyuIdDWv", "QbxEcIUM" ] }, "method": "Dou0nGT5", + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "tcp": "JY6fTTcw", "interval": "18714s", "docker_container_id": "qF66POS9", @@ -3051,6 +3056,7 @@ func TestFullConfig(t *testing.T) { "method": "aldrIQ4l", "tcp": "RJQND605", "interval": "22164s", + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "ipgdFtjd", "shell": "qAeOYy0M", "tls_skip_verify": true, @@ -3074,6 +3080,7 @@ func TestFullConfig(t *testing.T) { "method": "gLrztrNw", "tcp": "4jG5casb", "interval": "28767s", + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "THW6u7rL", "shell": "C1Zt3Zwh", "tls_skip_verify": true, @@ -3274,6 +3281,7 @@ func TestFullConfig(t *testing.T) { "method": "9afLm3Mj", "tcp": "fjiLFqVd", "interval": "23926s", + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "dO5TtRHk", "shell": "e6q2ttES", "tls_skip_verify": true, @@ -3296,6 +3304,7 @@ func TestFullConfig(t *testing.T) { "method": "T66MFBfR", "tcp": "bNnNfx2A", "interval": "22224s", + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "ipgdFtjd", "shell": "omVZq7Sz", "tls_skip_verify": true, @@ -3317,6 +3326,7 @@ func TestFullConfig(t *testing.T) { "method": "ciYHWors", "tcp": "FfvCwlqH", "interval": "12356s", + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "HBndBU6R", "shell": "hVI33JjA", "tls_skip_verify": true, @@ -3352,6 +3362,7 @@ func TestFullConfig(t *testing.T) { "method": "X5DrovFc", "tcp": "ICbxkpSF", "interval": "24392s", + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "ZKXr68Yb", "shell": "CEfzx0Fo", "tls_skip_verify": true, @@ -3390,6 +3401,7 @@ func TestFullConfig(t *testing.T) { "method": "5wkAxCUE", "tcp": "MN3oA9D2", "interval": "32718s", + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "cU15LMet", "shell": "nEz9qz2l", "tls_skip_verify": true, @@ -3411,6 +3423,7 @@ func TestFullConfig(t *testing.T) { "method": "wzByP903", "tcp": "2exjZIGE", "interval": "5656s", + "output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `, "docker_container_id": "5tDBWpfA", "shell": "rlTpLM8s", "tls_skip_verify": true, @@ -3593,6 +3606,7 @@ func TestFullConfig(t *testing.T) { method = "Dou0nGT5" tcp = "JY6fTTcw" interval = "18714s" + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "qF66POS9" shell = "sOnDy228" tls_skip_verify = true @@ -3617,6 +3631,7 @@ func TestFullConfig(t *testing.T) { method = "aldrIQ4l" tcp = "RJQND605" interval = "22164s" + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "ipgdFtjd" shell = "qAeOYy0M" tls_skip_verify = true @@ -3640,6 +3655,7 @@ func TestFullConfig(t *testing.T) { method = "gLrztrNw" tcp = "4jG5casb" interval = "28767s" + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "THW6u7rL" shell = "C1Zt3Zwh" tls_skip_verify = true @@ -3865,6 +3881,7 @@ func TestFullConfig(t *testing.T) { method = "T66MFBfR" tcp = "bNnNfx2A" interval = "22224s" + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "ipgdFtjd" shell = "omVZq7Sz" tls_skip_verify = true @@ -3886,6 +3903,7 @@ func TestFullConfig(t *testing.T) { method = "ciYHWors" tcp = "FfvCwlqH" interval = "12356s" + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "HBndBU6R" shell = "hVI33JjA" tls_skip_verify = true @@ -3921,6 +3939,7 @@ func TestFullConfig(t *testing.T) { method = "X5DrovFc" tcp = "ICbxkpSF" interval = "24392s" + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "ZKXr68Yb" shell = "CEfzx0Fo" tls_skip_verify = true @@ -3959,6 +3978,7 @@ func TestFullConfig(t *testing.T) { method = "5wkAxCUE" tcp = "MN3oA9D2" interval = "32718s" + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "cU15LMet" shell = "nEz9qz2l" tls_skip_verify = true @@ -3980,6 +4000,7 @@ func TestFullConfig(t *testing.T) { method = "wzByP903" tcp = "2exjZIGE" interval = "5656s" + output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + ` docker_container_id = "5tDBWpfA" shell = "rlTpLM8s" tls_skip_verify = true @@ -4248,6 +4269,7 @@ func TestFullConfig(t *testing.T) { CAFile: "erA7T0PM", CAPath: "mQEN1Mfp", CertFile: "7s4QAzDk", + CheckOutputMaxSize: checks.DefaultBufSize, Checks: []*structs.CheckDefinition{ &structs.CheckDefinition{ ID: "uAjE6m9Z", @@ -4265,6 +4287,7 @@ func TestFullConfig(t *testing.T) { Method: "aldrIQ4l", TCP: "RJQND605", Interval: 22164 * time.Second, + OutputMaxSize: checks.DefaultBufSize, DockerContainerID: "ipgdFtjd", Shell: "qAeOYy0M", TLSSkipVerify: true, @@ -4286,6 +4309,7 @@ func TestFullConfig(t *testing.T) { "qxvdnSE9": []string{"6wBPUYdF", "YYh8wtSZ"}, }, Method: "gLrztrNw", + OutputMaxSize: checks.DefaultBufSize, TCP: "4jG5casb", Interval: 28767 * time.Second, DockerContainerID: "THW6u7rL", @@ -4309,6 +4333,7 @@ func TestFullConfig(t *testing.T) { "f3r6xFtM": {"RyuIdDWv", "QbxEcIUM"}, }, Method: "Dou0nGT5", + OutputMaxSize: checks.DefaultBufSize, TCP: "JY6fTTcw", Interval: 18714 * time.Second, DockerContainerID: "qF66POS9", @@ -4477,6 +4502,7 @@ func TestFullConfig(t *testing.T) { "cVFpko4u": {"gGqdEB6k", "9LsRo22u"}, }, Method: "X5DrovFc", + OutputMaxSize: checks.DefaultBufSize, TCP: "ICbxkpSF", Interval: 24392 * time.Second, DockerContainerID: "ZKXr68Yb", @@ -4525,6 +4551,7 @@ func TestFullConfig(t *testing.T) { "1UJXjVrT": {"OJgxzTfk", "xZZrFsq7"}, }, Method: "5wkAxCUE", + OutputMaxSize: checks.DefaultBufSize, TCP: "MN3oA9D2", Interval: 32718 * time.Second, DockerContainerID: "cU15LMet", @@ -4546,6 +4573,7 @@ func TestFullConfig(t *testing.T) { "vr7wY7CS": {"EtCoNPPL", "9vAarJ5s"}, }, Method: "wzByP903", + OutputMaxSize: checks.DefaultBufSize, TCP: "2exjZIGE", Interval: 5656 * time.Second, DockerContainerID: "5tDBWpfA", @@ -4631,6 +4659,7 @@ func TestFullConfig(t *testing.T) { "SHOVq1Vv": {"jntFhyym", "GYJh32pp"}, }, Method: "T66MFBfR", + OutputMaxSize: checks.DefaultBufSize, TCP: "bNnNfx2A", Interval: 22224 * time.Second, DockerContainerID: "ipgdFtjd", @@ -4652,6 +4681,7 @@ func TestFullConfig(t *testing.T) { "p2UI34Qz": {"UsG1D0Qh", "NHhRiB6s"}, }, Method: "ciYHWors", + OutputMaxSize: checks.DefaultBufSize, TCP: "FfvCwlqH", Interval: 12356 * time.Second, DockerContainerID: "HBndBU6R", @@ -4673,6 +4703,7 @@ func TestFullConfig(t *testing.T) { "l4HwQ112": {"fk56MNlo", "dhLK56aZ"}, }, Method: "9afLm3Mj", + OutputMaxSize: checks.DefaultBufSize, TCP: "fjiLFqVd", Interval: 23926 * time.Second, DockerContainerID: "dO5TtRHk", @@ -5010,6 +5041,7 @@ func TestConfigDecodeBytes(t *testing.T) { func TestSanitize(t *testing.T) { rt := RuntimeConfig{ BindAddr: &net.IPAddr{IP: net.ParseIP("127.0.0.1")}, + CheckOutputMaxSize: checks.DefaultBufSize, SerfAdvertiseAddrLAN: &net.TCPAddr{IP: net.ParseIP("1.2.3.4"), Port: 5678}, DNSAddrs: []net.Addr{ &net.TCPAddr{IP: net.ParseIP("1.2.3.4"), Port: 5678}, @@ -5032,7 +5064,8 @@ func TestSanitize(t *testing.T) { Name: "foo", Token: "bar", Check: structs.CheckType{ - Name: "blurb", + Name: "blurb", + OutputMaxSize: checks.DefaultBufSize, }, Weights: &structs.Weights{ Passing: 67, @@ -5042,8 +5075,9 @@ func TestSanitize(t *testing.T) { }, Checks: []*structs.CheckDefinition{ &structs.CheckDefinition{ - Name: "zoo", - Token: "zope", + Name: "zoo", + Token: "zope", + OutputMaxSize: checks.DefaultBufSize, }, }, } @@ -5083,6 +5117,7 @@ func TestSanitize(t *testing.T) { "CAPath": "", "CertFile": "", "CheckDeregisterIntervalMin": "0s", + "CheckOutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `, "CheckReapInterval": "0s", "CheckUpdateInterval": "0s", "Checks": [{ @@ -5099,6 +5134,7 @@ func TestSanitize(t *testing.T) { "Method": "", "Name": "zoo", "Notes": "", + "OutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `, "ScriptArgs": [], "ServiceID": "", "Shell": "", @@ -5130,6 +5166,7 @@ func TestSanitize(t *testing.T) { "ConsulCoordinateUpdateMaxBatches": 0, "ConsulCoordinateUpdatePeriod": "15s", "ConsulRaftElectionTimeout": "0s", + "CheckOutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `, "ConsulRaftHeartbeatTimeout": "0s", "ConsulRaftLeaderLeaseTimeout": "0s", "GossipLANGossipInterval": "0s", @@ -5269,6 +5306,7 @@ func TestSanitize(t *testing.T) { "Method": "", "Name": "blurb", "Notes": "", + "OutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `, "ScriptArgs": [], "Shell": "", "Status": "", diff --git a/agent/consul/config.go b/agent/consul/config.go index 491bdeb4f8..cd0320d302 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -7,6 +7,7 @@ import ( "os" "time" + "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/consul/autopilot" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/lib" @@ -368,6 +369,9 @@ type Config struct { // warning and discard the remaining updates. CoordinateUpdateMaxBatches int + // CheckOutputMaxSize control the max size of output of checks + CheckOutputMaxSize int + // RPCHoldTimeout is how long an RPC can be "held" before it is errored. // This is used to paper over a loss of leadership by instead holding RPCs, // so that the caller experiences a slow response rather than an error. @@ -502,6 +506,8 @@ func DefaultConfig() *Config { CoordinateUpdateBatchSize: 128, CoordinateUpdateMaxBatches: 5, + CheckOutputMaxSize: checks.DefaultBufSize, + RPCRate: rate.Inf, RPCMaxBurst: 1000, diff --git a/agent/structs/check_definition.go b/agent/structs/check_definition.go index 4252b4449d..aaadda7c7f 100644 --- a/agent/structs/check_definition.go +++ b/agent/structs/check_definition.go @@ -37,6 +37,7 @@ type CheckDefinition struct { Timeout time.Duration TTL time.Duration DeregisterCriticalServiceAfter time.Duration + OutputMaxSize int } func (c *CheckDefinition) HealthCheck(node string) *HealthCheck { @@ -72,6 +73,7 @@ func (c *CheckDefinition) CheckType() *CheckType { GRPCUseTLS: c.GRPCUseTLS, Header: c.Header, Method: c.Method, + OutputMaxSize: c.OutputMaxSize, TCP: c.TCP, Interval: c.Interval, DockerContainerID: c.DockerContainerID, diff --git a/agent/structs/check_type.go b/agent/structs/check_type.go index 43b76057c6..9b1b055dae 100644 --- a/agent/structs/check_type.go +++ b/agent/structs/check_type.go @@ -45,6 +45,7 @@ type CheckType struct { // service, if any, to be deregistered if this check is critical for // longer than this duration. DeregisterCriticalServiceAfter time.Duration + OutputMaxSize int } type CheckTypes []*CheckType @@ -67,6 +68,9 @@ func (c *CheckType) Validate() error { if !intervalCheck && !c.IsAlias() && c.TTL <= 0 { return fmt.Errorf("TTL must be > 0 for TTL checks") } + if c.OutputMaxSize < 0 { + return fmt.Errorf("MaxOutputMaxSize must be positive") + } return nil } diff --git a/agent/structs/structs.go b/agent/structs/structs.go index 720e614ace..e79ab747fd 100644 --- a/agent/structs/structs.go +++ b/agent/structs/structs.go @@ -958,6 +958,7 @@ type HealthCheckDefinition struct { Method string `json:",omitempty"` TCP string `json:",omitempty"` Interval time.Duration `json:",omitempty"` + OutputMaxSize uint `json:",omitempty"` Timeout time.Duration `json:",omitempty"` DeregisterCriticalServiceAfter time.Duration `json:",omitempty"` } @@ -966,11 +967,13 @@ func (d *HealthCheckDefinition) MarshalJSON() ([]byte, error) { type Alias HealthCheckDefinition exported := &struct { Interval string `json:",omitempty"` + OutputMaxSize uint `json:",omitempty"` Timeout string `json:",omitempty"` DeregisterCriticalServiceAfter string `json:",omitempty"` *Alias }{ Interval: d.Interval.String(), + OutputMaxSize: d.OutputMaxSize, Timeout: d.Timeout.String(), DeregisterCriticalServiceAfter: d.DeregisterCriticalServiceAfter.String(), Alias: (*Alias)(d), diff --git a/website/source/api/agent/check.html.md b/website/source/api/agent/check.html.md index dfb04d853d..a5ad23a5e4 100644 --- a/website/source/api/agent/check.html.md +++ b/website/source/api/agent/check.html.md @@ -183,6 +183,12 @@ The table below shows this endpoint's support for case of a Script, HTTP, TCP, or gRPC check. Can be specified in the form of "10s" or "5m" (i.e., 10 seconds or 5 minutes, respectively). +- `OutputMaxSize` `(positive int: 4096)` - Allow to put a maximum size of text + for the given check. This value must be greater than 0, by default, the value + is 4k. + The value can be further limited for all checks of a given agent using the + `check_output_max_size` flag in the agent. + - `TLSSkipVerify` `(bool: false)` - Specifies if the certificate for an HTTPS check should not be verified. diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index 10905bd95f..e2160cf2b9 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -132,6 +132,14 @@ will exit with an error at startup. [go-sockaddr](https://godoc.org/github.com/hashicorp/go-sockaddr/template) template +* `-check_output_max_size` - + Override the default limit of 4k for maximum size of checks, this is a positive + value. + By limiting this size, it allows to put less pressure on Consul servers when + many checks are having a very large output in their checks. + In order to completely disable check output capture, it is possible to + use `discard_check_output`. + * `-client` - The address to which Consul will bind client interfaces, including the HTTP and DNS servers. By default, this is "127.0.0.1", allowing only loopback connections. In Consul