Support for maximum size for Output of checks (#5233)

* Support for maximum size for Output of checks

This PR allows users to limit the size of output produced by checks at the agent 
and check level.

When set at the agent level, it will limit the output for all checks monitored
by the agent.

When set at the check level, it can override the agent max for a specific check but
only if it is lower than the agent max.

Default value is 4k, and input must be at least 1.
This commit is contained in:
Pierre Souchay 2019-06-26 17:43:25 +02:00 committed by Freddy
parent 4c37598ad8
commit 0e907f5aa8
18 changed files with 260 additions and 94 deletions

View File

@ -915,6 +915,7 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
base.CoordinateUpdateBatchSize = a.config.ConsulCoordinateUpdateBatchSize
base.CoordinateUpdateMaxBatches = a.config.ConsulCoordinateUpdateMaxBatches
base.CoordinateUpdatePeriod = a.config.ConsulCoordinateUpdatePeriod
base.CheckOutputMaxSize = a.config.CheckOutputMaxSize
base.RaftConfig.HeartbeatTimeout = a.config.ConsulRaftHeartbeatTimeout
base.RaftConfig.LeaderLeaseTimeout = a.config.ConsulRaftLeaderLeaseTimeout
@ -971,6 +972,9 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
if a.config.Bootstrap {
base.Bootstrap = true
}
if a.config.CheckOutputMaxSize > 0 {
base.CheckOutputMaxSize = a.config.CheckOutputMaxSize
}
if a.config.RejoinAfterLeave {
base.RejoinAfterLeave = true
}
@ -2248,6 +2252,13 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,
// Check if already registered
if chkType != nil {
maxOutputSize := a.config.CheckOutputMaxSize
if maxOutputSize == 0 {
maxOutputSize = checks.DefaultBufSize
}
if chkType.OutputMaxSize > 0 && maxOutputSize > chkType.OutputMaxSize {
maxOutputSize = chkType.OutputMaxSize
}
switch {
case chkType.IsTTL():
@ -2257,10 +2268,11 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,
}
ttl := &checks.CheckTTL{
Notify: a.State,
CheckID: check.CheckID,
TTL: chkType.TTL,
Logger: a.logger,
Notify: a.State,
CheckID: check.CheckID,
TTL: chkType.TTL,
Logger: a.logger,
OutputMaxSize: maxOutputSize,
}
// Restore persisted state, if any
@ -2294,6 +2306,7 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,
Interval: chkType.Interval,
Timeout: chkType.Timeout,
Logger: a.logger,
OutputMaxSize: maxOutputSize,
TLSClientConfig: tlsClientConfig,
}
http.Start()
@ -2361,7 +2374,7 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,
}
if a.dockerClient == nil {
dc, err := checks.NewDockerClient(os.Getenv("DOCKER_HOST"), checks.BufSize)
dc, err := checks.NewDockerClient(os.Getenv("DOCKER_HOST"), int64(maxOutputSize))
if err != nil {
a.logger.Printf("[ERR] agent: error creating docker client: %s", err)
return err
@ -2396,14 +2409,14 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,
check.CheckID, checks.MinInterval)
chkType.Interval = checks.MinInterval
}
monitor := &checks.CheckMonitor{
Notify: a.State,
CheckID: check.CheckID,
ScriptArgs: chkType.ScriptArgs,
Interval: chkType.Interval,
Timeout: chkType.Timeout,
Logger: a.logger,
Notify: a.State,
CheckID: check.CheckID,
ScriptArgs: chkType.ScriptArgs,
Interval: chkType.Interval,
Timeout: chkType.Timeout,
Logger: a.logger,
OutputMaxSize: maxOutputSize,
}
monitor.Start()
a.checkMonitors[check.CheckID] = monitor
@ -2878,7 +2891,7 @@ func (a *Agent) updateTTLCheck(checkID types.CheckID, status, output string) err
}
// Set the status through CheckTTL to reset the TTL.
check.SetStatus(status, output)
outputTruncated := check.SetStatus(status, output)
// We don't write any files in dev mode so bail here.
if a.config.DataDir == "" {
@ -2887,7 +2900,7 @@ func (a *Agent) updateTTLCheck(checkID types.CheckID, status, output string) err
// Persist the state so the TTL check can come up in a good state after
// an agent restart, especially with long TTL values.
if err := a.persistCheckState(check, status, output); err != nil {
if err := a.persistCheckState(check, status, outputTruncated); err != nil {
return fmt.Errorf("failed persisting state for check %q: %s", checkID, err)
}

View File

@ -19,7 +19,6 @@ import (
"github.com/hashicorp/consul/acl"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/checks"
"github.com/hashicorp/consul/agent/debug"
"github.com/hashicorp/consul/agent/local"
"github.com/hashicorp/consul/agent/structs"
@ -715,12 +714,6 @@ func (s *HTTPServer) AgentCheckUpdate(resp http.ResponseWriter, req *http.Reques
return nil, nil
}
total := len(update.Output)
if total > checks.BufSize {
update.Output = fmt.Sprintf("%s ... (captured %d of %d bytes)",
update.Output[:checks.BufSize], checks.BufSize, total)
}
checkID := types.CheckID(strings.TrimPrefix(req.URL.Path, "/v1/agent/check/update/"))
// Get the provided token, if any, and vet against any ACL policies.

View File

@ -18,7 +18,6 @@ import (
"time"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/checks"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/debug"
@ -2299,7 +2298,8 @@ func TestAgent_FailCheck_ACLDeny(t *testing.T) {
func TestAgent_UpdateCheck(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, t.Name(), "")
maxChecksSize := 256
a := NewTestAgent(t, t.Name(), fmt.Sprintf("check_output_max_size=%d", maxChecksSize))
defer a.Shutdown()
testrpc.WaitForTestAgent(t, a.RPC, "dc1")
@ -2340,7 +2340,7 @@ func TestAgent_UpdateCheck(t *testing.T) {
t.Run("log output limit", func(t *testing.T) {
args := checkUpdate{
Status: api.HealthPassing,
Output: strings.Repeat("-= bad -=", 5*checks.BufSize),
Output: strings.Repeat("-= bad -=", 5*maxChecksSize),
}
req, _ := http.NewRequest("PUT", "/v1/agent/check/update/test", jsonReader(args))
resp := httptest.NewRecorder()
@ -2359,8 +2359,8 @@ func TestAgent_UpdateCheck(t *testing.T) {
// rough check that the output buffer was cut down so this test
// isn't super brittle.
state := a.State.Checks()["test"]
if state.Status != api.HealthPassing || len(state.Output) > 2*checks.BufSize {
t.Fatalf("bad: %v", state)
if state.Status != api.HealthPassing || len(state.Output) > 2*maxChecksSize {
t.Fatalf("bad: %v, (len:=%d)", state, len(state.Output))
}
})

View File

@ -1586,7 +1586,7 @@ func TestAgent_updateTTLCheck(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, t.Name(), "")
defer a.Shutdown()
checkBufSize := 100
health := &structs.HealthCheck{
Node: "foo",
CheckID: "mem",
@ -1594,7 +1594,8 @@ func TestAgent_updateTTLCheck(t *testing.T) {
Status: api.HealthCritical,
}
chk := &structs.CheckType{
TTL: 15 * time.Second,
TTL: 15 * time.Second,
OutputMaxSize: checkBufSize,
}
// Add check and update it.
@ -1614,6 +1615,19 @@ func TestAgent_updateTTLCheck(t *testing.T) {
if status.Output != "foo" {
t.Fatalf("bad: %v", status)
}
if err := a.updateTTLCheck("mem", api.HealthCritical, strings.Repeat("--bad-- ", 5*checkBufSize)); err != nil {
t.Fatalf("err: %v", err)
}
// Ensure we have a check mapping.
status = a.State.Checks()["mem"]
if status.Status != api.HealthCritical {
t.Fatalf("bad: %v", status)
}
if len(status.Output) > checkBufSize*2 {
t.Fatalf("bad: %v", len(status.Output))
}
}
func TestAgent_PersistService(t *testing.T) {

View File

@ -28,10 +28,10 @@ const (
// Otherwise we risk fork bombing a system.
MinInterval = time.Second
// BufSize is the maximum size of the captured
// check output. Prevents an enormous buffer
// DefaultBufSize is the maximum size of the captured
// check output by defaut. Prevents an enormous buffer
// from being captured
BufSize = 4 * 1024 // 4KB
DefaultBufSize = 4 * 1024 // 4KB
// UserAgent is the value of the User-Agent header
// for HTTP health checks.
@ -56,13 +56,14 @@ type CheckNotifier interface {
// determine the health of a given check. It is compatible with
// nagios plugins and expects the output in the same format.
type CheckMonitor struct {
Notify CheckNotifier
CheckID types.CheckID
Script string
ScriptArgs []string
Interval time.Duration
Timeout time.Duration
Logger *log.Logger
Notify CheckNotifier
CheckID types.CheckID
Script string
ScriptArgs []string
Interval time.Duration
Timeout time.Duration
Logger *log.Logger
OutputMaxSize int
stop bool
stopCh chan struct{}
@ -122,7 +123,7 @@ func (c *CheckMonitor) check() {
}
// Collect the output
output, _ := circbuf.NewBuffer(BufSize)
output, _ := circbuf.NewBuffer(int64(c.OutputMaxSize))
cmd.Stdout = output
cmd.Stderr = output
exec.SetSysProcAttr(cmd)
@ -222,12 +223,17 @@ type CheckTTL struct {
stop bool
stopCh chan struct{}
stopLock sync.Mutex
OutputMaxSize int
}
// Start is used to start a check ttl, runs until Stop()
func (c *CheckTTL) Start() {
c.stopLock.Lock()
defer c.stopLock.Unlock()
if c.OutputMaxSize < 1 {
c.OutputMaxSize = DefaultBufSize
}
c.stop = false
c.stopCh = make(chan struct{})
c.timer = time.NewTimer(c.TTL)
@ -275,16 +281,22 @@ func (c *CheckTTL) getExpiredOutput() string {
// SetStatus is used to update the status of the check,
// and to renew the TTL. If expired, TTL is restarted.
func (c *CheckTTL) SetStatus(status, output string) {
// output is returned (might be truncated)
func (c *CheckTTL) SetStatus(status, output string) string {
c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status)
total := len(output)
if total > c.OutputMaxSize {
output = fmt.Sprintf("%s ... (captured %d of %d bytes)",
output[:c.OutputMaxSize], c.OutputMaxSize, total)
}
c.Notify.UpdateCheck(c.CheckID, status, output)
// Store the last output so we can retain it if the TTL expires.
c.lastOutputLock.Lock()
c.lastOutput = output
c.lastOutputLock.Unlock()
c.timer.Reset(c.TTL)
return output
}
// CheckHTTP is used to periodically make an HTTP request to
@ -303,6 +315,7 @@ type CheckHTTP struct {
Timeout time.Duration
Logger *log.Logger
TLSClientConfig *tls.Config
OutputMaxSize int
httpClient *http.Client
stop bool
@ -339,6 +352,9 @@ func (c *CheckHTTP) Start() {
} else if c.Interval < 10*time.Second {
c.httpClient.Timeout = c.Interval
}
if c.OutputMaxSize < 1 {
c.OutputMaxSize = DefaultBufSize
}
}
c.stop = false
@ -413,7 +429,7 @@ func (c *CheckHTTP) check() {
defer resp.Body.Close()
// Read the response into a circular buffer to limit the size
output, _ := circbuf.NewBuffer(BufSize)
output, _ := circbuf.NewBuffer(int64(c.OutputMaxSize))
if _, err := io.Copy(output, resp.Body); err != nil {
c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err)
}

View File

@ -44,11 +44,12 @@ func TestCheckMonitor_Script(t *testing.T) {
t.Run(tt.status, func(t *testing.T) {
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
Script: tt.script,
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
Script: tt.script,
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: DefaultBufSize,
}
check.Start()
defer check.Stop()
@ -79,11 +80,12 @@ func TestCheckMonitor_Args(t *testing.T) {
t.Run(tt.status, func(t *testing.T) {
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: tt.args,
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: tt.args,
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: DefaultBufSize,
}
check.Start()
defer check.Stop()
@ -103,12 +105,13 @@ func TestCheckMonitor_Timeout(t *testing.T) {
// t.Parallel() // timing test. no parallel
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"sh", "-c", "sleep 1 && exit 0"},
Interval: 50 * time.Millisecond,
Timeout: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"sh", "-c", "sleep 1 && exit 0"},
Interval: 50 * time.Millisecond,
Timeout: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: DefaultBufSize,
}
check.Start()
defer check.Stop()
@ -128,11 +131,12 @@ func TestCheckMonitor_RandomStagger(t *testing.T) {
// t.Parallel() // timing test. no parallel
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"sh", "-c", "exit 0"},
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"sh", "-c", "exit 0"},
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: DefaultBufSize,
}
check.Start()
defer check.Stop()
@ -153,11 +157,12 @@ func TestCheckMonitor_LimitOutput(t *testing.T) {
t.Parallel()
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"od", "-N", "81920", "/dev/urandom"},
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"od", "-N", "81920", "/dev/urandom"},
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: DefaultBufSize,
}
check.Start()
defer check.Stop()
@ -165,7 +170,7 @@ func TestCheckMonitor_LimitOutput(t *testing.T) {
time.Sleep(50 * time.Millisecond)
// Allow for extra bytes for the truncation message
if len(notif.Output("foo")) > BufSize+100 {
if len(notif.Output("foo")) > DefaultBufSize+100 {
t.Fatalf("output size is too long")
}
}
@ -287,7 +292,7 @@ func TestCheckHTTP(t *testing.T) {
}
// Body larger than 4k limit
body := bytes.Repeat([]byte{'a'}, 2*BufSize)
body := bytes.Repeat([]byte{'a'}, 2*DefaultBufSize)
w.WriteHeader(tt.code)
w.Write(body)
}))
@ -295,13 +300,14 @@ func TestCheckHTTP(t *testing.T) {
notif := mock.NewNotify()
check := &CheckHTTP{
Notify: notif,
CheckID: types.CheckID("foo"),
HTTP: server.URL,
Method: tt.method,
Header: tt.header,
Interval: 10 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
HTTP: server.URL,
Method: tt.method,
OutputMaxSize: DefaultBufSize,
Header: tt.header,
Interval: 10 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
}
check.Start()
defer check.Stop()
@ -313,15 +319,52 @@ func TestCheckHTTP(t *testing.T) {
if got, want := notif.State("foo"), tt.status; got != want {
r.Fatalf("got state %q want %q", got, want)
}
// Allow slightly more data than BufSize, for the header
if n := len(notif.Output("foo")); n > (BufSize + 256) {
r.Fatalf("output too long: %d (%d-byte limit)", n, BufSize)
// Allow slightly more data than DefaultBufSize, for the header
if n := len(notif.Output("foo")); n > (DefaultBufSize + 256) {
r.Fatalf("output too long: %d (%d-byte limit)", n, DefaultBufSize)
}
})
})
}
}
func TestCheckMaxOutputSize(t *testing.T) {
t.Parallel()
timeout := 5 * time.Millisecond
server := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, req *http.Request) {
body := bytes.Repeat([]byte{'x'}, 2*DefaultBufSize)
writer.WriteHeader(200)
writer.Write(body)
}))
defer server.Close()
notif := mock.NewNotify()
maxOutputSize := 32
check := &CheckHTTP{
Notify: notif,
CheckID: types.CheckID("bar"),
HTTP: server.URL + "/v1/agent/self",
Timeout: timeout,
Interval: 2 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: maxOutputSize,
}
check.Start()
defer check.Stop()
retry.Run(t, func(r *retry.R) {
if got, want := notif.Updates("bar"), 2; got < want {
r.Fatalf("got %d updates want at least %d", got, want)
}
if got, want := notif.State("bar"), api.HealthPassing; got != want {
r.Fatalf("got state %q want %q", got, want)
}
if got, want := notif.Output("bar"), "HTTP GET "+server.URL+"/v1/agent/self: 200 OK Output: "+strings.Repeat("x", maxOutputSize); got != want {
r.Fatalf("got state %q want %q", got, want)
}
})
}
func TestCheckHTTPTimeout(t *testing.T) {
t.Parallel()
timeout := 5 * time.Millisecond
@ -372,7 +415,7 @@ func TestCheckHTTP_disablesKeepAlives(t *testing.T) {
func largeBodyHandler(code int) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Body larger than 4k limit
body := bytes.Repeat([]byte{'a'}, 2*BufSize)
body := bytes.Repeat([]byte{'a'}, 2*DefaultBufSize)
w.WriteHeader(code)
w.Write(body)
})

View File

@ -14,6 +14,7 @@ import (
"strings"
"time"
"github.com/hashicorp/consul/agent/checks"
"github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/structs"
@ -786,6 +787,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
CAPath: b.stringVal(c.CAPath),
CertFile: b.stringVal(c.CertFile),
CheckUpdateInterval: b.durationVal("check_update_interval", c.CheckUpdateInterval),
CheckOutputMaxSize: b.intValWithDefault(c.CheckOutputMaxSize, 4096),
Checks: checks,
ClientAddrs: clientAddrs,
ConfigEntryBootstrap: configEntries,
@ -964,6 +966,9 @@ func (b *Builder) Validate(rt RuntimeConfig) error {
if rt.BootstrapExpect > 0 && rt.Bootstrap {
return fmt.Errorf("'bootstrap_expect > 0' and 'bootstrap = true' are mutually exclusive")
}
if rt.CheckOutputMaxSize < 1 {
return fmt.Errorf("check_output_max_size must be positive, to discard check output use the discard_check_output flag")
}
if rt.AEInterval <= 0 {
return fmt.Errorf("ae_interval cannot be %s. Must be positive", rt.AEInterval)
}
@ -1174,6 +1179,7 @@ func (b *Builder) checkVal(v *CheckDefinition) *structs.CheckDefinition {
Timeout: b.durationVal(fmt.Sprintf("check[%s].timeout", id), v.Timeout),
TTL: b.durationVal(fmt.Sprintf("check[%s].ttl", id), v.TTL),
DeregisterCriticalServiceAfter: b.durationVal(fmt.Sprintf("check[%s].deregister_critical_service_after", id), v.DeregisterCriticalServiceAfter),
OutputMaxSize: b.intValWithDefault(v.OutputMaxSize, checks.DefaultBufSize),
}
}
@ -1347,13 +1353,17 @@ func (b *Builder) durationVal(name string, v *string) (d time.Duration) {
return b.durationValWithDefault(name, v, 0)
}
func (b *Builder) intVal(v *int) int {
func (b *Builder) intValWithDefault(v *int, defaultVal int) int {
if v == nil {
return 0
return defaultVal
}
return *v
}
func (b *Builder) intVal(v *int) int {
return b.intValWithDefault(v, 0)
}
func (b *Builder) portVal(name string, v *int) int {
if v == nil || *v <= 0 {
return -1

View File

@ -184,6 +184,7 @@ type Config struct {
CAPath *string `json:"ca_path,omitempty" hcl:"ca_path" mapstructure:"ca_path"`
CertFile *string `json:"cert_file,omitempty" hcl:"cert_file" mapstructure:"cert_file"`
Check *CheckDefinition `json:"check,omitempty" hcl:"check" mapstructure:"check"` // needs to be a pointer to avoid partial merges
CheckOutputMaxSize *int `json:"check_output_max_size,omitempty" hcl:"check_output_max_size" mapstructure:"check_output_max_size"`
CheckUpdateInterval *string `json:"check_update_interval,omitempty" hcl:"check_update_interval" mapstructure:"check_update_interval"`
Checks []CheckDefinition `json:"checks,omitempty" hcl:"checks" mapstructure:"checks"`
ClientAddr *string `json:"client_addr,omitempty" hcl:"client_addr" mapstructure:"client_addr"`
@ -390,6 +391,7 @@ type CheckDefinition struct {
HTTP *string `json:"http,omitempty" hcl:"http" mapstructure:"http"`
Header map[string][]string `json:"header,omitempty" hcl:"header" mapstructure:"header"`
Method *string `json:"method,omitempty" hcl:"method" mapstructure:"method"`
OutputMaxSize *int `json:"output_max_size,omitempty" hcl:"output_max_size" mapstructure:"output_max_size"`
TCP *string `json:"tcp,omitempty" hcl:"tcp" mapstructure:"tcp"`
Interval *string `json:"interval,omitempty" hcl:"interval" mapstructure:"interval"`
DockerContainerID *string `json:"docker_container_id,omitempty" hcl:"docker_container_id" mapstructure:"docker_container_id"`

View File

@ -4,6 +4,7 @@ import (
"fmt"
"strconv"
"github.com/hashicorp/consul/agent/checks"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/version"
)
@ -49,6 +50,7 @@ func DefaultSource() Source {
bind_addr = "0.0.0.0"
bootstrap = false
bootstrap_expect = 0
check_output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `
check_update_interval = "5m"
client_addr = "127.0.0.1"
datacenter = "` + consul.DefaultDC + `"

View File

@ -60,6 +60,7 @@ func AddFlags(fs *flag.FlagSet, f *Flags) {
add(&f.Config.Bootstrap, "bootstrap", "Sets server to bootstrap mode.")
add(&f.Config.BootstrapExpect, "bootstrap-expect", "Sets server to expect bootstrap mode.")
add(&f.Config.ClientAddr, "client", "Sets the address to bind for client access. This includes RPC, DNS, HTTP, HTTPS and gRPC (if configured).")
add(&f.Config.CheckOutputMaxSize, "check_output_max_size", "Sets the maximum output size for checks on this agent")
add(&f.ConfigFiles, "config-dir", "Path to a directory to read configuration files from. This will read every file ending in '.json' as configuration in this directory in alphabetical order. Can be specified multiple times.")
add(&f.ConfigFiles, "config-file", "Path to a file in JSON or HCL format with a matching file extension. Can be specified multiple times.")
add(&f.ConfigFormat, "config-format", "Config files are in this format irrespective of their extension. Must be 'hcl' or 'json'")

View File

@ -459,6 +459,11 @@ type RuntimeConfig struct {
// hcl: check_update_interval = "duration"
CheckUpdateInterval time.Duration
// Maximum size for the output of a healtcheck
// hcl check_output_max_size int
// flag: -check_output_max_size int
CheckOutputMaxSize int
// Checks contains the provided check definitions.
//
// hcl: checks = [

View File

@ -18,6 +18,7 @@ import (
"testing"
"time"
"github.com/hashicorp/consul/agent/checks"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/sdk/testutil"
@ -2076,8 +2077,8 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
},
patch: func(rt *RuntimeConfig) {
rt.Checks = []*structs.CheckDefinition{
&structs.CheckDefinition{Name: "a", ScriptArgs: []string{"/bin/true"}},
&structs.CheckDefinition{Name: "b", ScriptArgs: []string{"/bin/false"}},
&structs.CheckDefinition{Name: "a", ScriptArgs: []string{"/bin/true"}, OutputMaxSize: checks.DefaultBufSize},
&structs.CheckDefinition{Name: "b", ScriptArgs: []string{"/bin/false"}, OutputMaxSize: checks.DefaultBufSize},
}
rt.DataDir = dataDir
},
@ -2095,7 +2096,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
},
patch: func(rt *RuntimeConfig) {
rt.Checks = []*structs.CheckDefinition{
&structs.CheckDefinition{Name: "a", GRPC: "localhost:12345/foo", GRPCUseTLS: true},
&structs.CheckDefinition{Name: "a", GRPC: "localhost:12345/foo", GRPCUseTLS: true, OutputMaxSize: checks.DefaultBufSize},
}
rt.DataDir = dataDir
},
@ -2113,7 +2114,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
},
patch: func(rt *RuntimeConfig) {
rt.Checks = []*structs.CheckDefinition{
&structs.CheckDefinition{Name: "a", AliasService: "foo"},
&structs.CheckDefinition{Name: "a", AliasService: "foo", OutputMaxSize: checks.DefaultBufSize},
}
rt.DataDir = dataDir
},
@ -2232,6 +2233,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
DockerContainerID: "z",
DeregisterCriticalServiceAfter: 10 * time.Second,
ScriptArgs: []string{"a", "b"},
OutputMaxSize: checks.DefaultBufSize,
},
},
Weights: &structs.Weights{
@ -2497,8 +2499,9 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
Port: 2345,
Checks: structs.CheckTypes{
{
TCP: "127.0.0.1:2345",
Interval: 10 * time.Second,
TCP: "127.0.0.1:2345",
Interval: 10 * time.Second,
OutputMaxSize: checks.DefaultBufSize,
},
},
Proxy: &structs.ConnectProxyConfig{
@ -2592,8 +2595,9 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
Port: 2345,
Checks: structs.CheckTypes{
{
TCP: "127.0.0.1:2345",
Interval: 10 * time.Second,
TCP: "127.0.0.1:2345",
Interval: 10 * time.Second,
OutputMaxSize: checks.DefaultBufSize,
},
},
Proxy: &structs.ConnectProxyConfig{
@ -3025,6 +3029,7 @@ func TestFullConfig(t *testing.T) {
"f3r6xFtM": [ "RyuIdDWv", "QbxEcIUM" ]
},
"method": "Dou0nGT5",
"output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"tcp": "JY6fTTcw",
"interval": "18714s",
"docker_container_id": "qF66POS9",
@ -3051,6 +3056,7 @@ func TestFullConfig(t *testing.T) {
"method": "aldrIQ4l",
"tcp": "RJQND605",
"interval": "22164s",
"output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"docker_container_id": "ipgdFtjd",
"shell": "qAeOYy0M",
"tls_skip_verify": true,
@ -3074,6 +3080,7 @@ func TestFullConfig(t *testing.T) {
"method": "gLrztrNw",
"tcp": "4jG5casb",
"interval": "28767s",
"output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"docker_container_id": "THW6u7rL",
"shell": "C1Zt3Zwh",
"tls_skip_verify": true,
@ -3274,6 +3281,7 @@ func TestFullConfig(t *testing.T) {
"method": "9afLm3Mj",
"tcp": "fjiLFqVd",
"interval": "23926s",
"output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"docker_container_id": "dO5TtRHk",
"shell": "e6q2ttES",
"tls_skip_verify": true,
@ -3296,6 +3304,7 @@ func TestFullConfig(t *testing.T) {
"method": "T66MFBfR",
"tcp": "bNnNfx2A",
"interval": "22224s",
"output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"docker_container_id": "ipgdFtjd",
"shell": "omVZq7Sz",
"tls_skip_verify": true,
@ -3317,6 +3326,7 @@ func TestFullConfig(t *testing.T) {
"method": "ciYHWors",
"tcp": "FfvCwlqH",
"interval": "12356s",
"output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"docker_container_id": "HBndBU6R",
"shell": "hVI33JjA",
"tls_skip_verify": true,
@ -3352,6 +3362,7 @@ func TestFullConfig(t *testing.T) {
"method": "X5DrovFc",
"tcp": "ICbxkpSF",
"interval": "24392s",
"output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"docker_container_id": "ZKXr68Yb",
"shell": "CEfzx0Fo",
"tls_skip_verify": true,
@ -3390,6 +3401,7 @@ func TestFullConfig(t *testing.T) {
"method": "5wkAxCUE",
"tcp": "MN3oA9D2",
"interval": "32718s",
"output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"docker_container_id": "cU15LMet",
"shell": "nEz9qz2l",
"tls_skip_verify": true,
@ -3411,6 +3423,7 @@ func TestFullConfig(t *testing.T) {
"method": "wzByP903",
"tcp": "2exjZIGE",
"interval": "5656s",
"output_max_size": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"docker_container_id": "5tDBWpfA",
"shell": "rlTpLM8s",
"tls_skip_verify": true,
@ -3593,6 +3606,7 @@ func TestFullConfig(t *testing.T) {
method = "Dou0nGT5"
tcp = "JY6fTTcw"
interval = "18714s"
output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `
docker_container_id = "qF66POS9"
shell = "sOnDy228"
tls_skip_verify = true
@ -3617,6 +3631,7 @@ func TestFullConfig(t *testing.T) {
method = "aldrIQ4l"
tcp = "RJQND605"
interval = "22164s"
output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `
docker_container_id = "ipgdFtjd"
shell = "qAeOYy0M"
tls_skip_verify = true
@ -3640,6 +3655,7 @@ func TestFullConfig(t *testing.T) {
method = "gLrztrNw"
tcp = "4jG5casb"
interval = "28767s"
output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `
docker_container_id = "THW6u7rL"
shell = "C1Zt3Zwh"
tls_skip_verify = true
@ -3865,6 +3881,7 @@ func TestFullConfig(t *testing.T) {
method = "T66MFBfR"
tcp = "bNnNfx2A"
interval = "22224s"
output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `
docker_container_id = "ipgdFtjd"
shell = "omVZq7Sz"
tls_skip_verify = true
@ -3886,6 +3903,7 @@ func TestFullConfig(t *testing.T) {
method = "ciYHWors"
tcp = "FfvCwlqH"
interval = "12356s"
output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `
docker_container_id = "HBndBU6R"
shell = "hVI33JjA"
tls_skip_verify = true
@ -3921,6 +3939,7 @@ func TestFullConfig(t *testing.T) {
method = "X5DrovFc"
tcp = "ICbxkpSF"
interval = "24392s"
output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `
docker_container_id = "ZKXr68Yb"
shell = "CEfzx0Fo"
tls_skip_verify = true
@ -3959,6 +3978,7 @@ func TestFullConfig(t *testing.T) {
method = "5wkAxCUE"
tcp = "MN3oA9D2"
interval = "32718s"
output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `
docker_container_id = "cU15LMet"
shell = "nEz9qz2l"
tls_skip_verify = true
@ -3980,6 +4000,7 @@ func TestFullConfig(t *testing.T) {
method = "wzByP903"
tcp = "2exjZIGE"
interval = "5656s"
output_max_size = ` + strconv.Itoa(checks.DefaultBufSize) + `
docker_container_id = "5tDBWpfA"
shell = "rlTpLM8s"
tls_skip_verify = true
@ -4248,6 +4269,7 @@ func TestFullConfig(t *testing.T) {
CAFile: "erA7T0PM",
CAPath: "mQEN1Mfp",
CertFile: "7s4QAzDk",
CheckOutputMaxSize: checks.DefaultBufSize,
Checks: []*structs.CheckDefinition{
&structs.CheckDefinition{
ID: "uAjE6m9Z",
@ -4265,6 +4287,7 @@ func TestFullConfig(t *testing.T) {
Method: "aldrIQ4l",
TCP: "RJQND605",
Interval: 22164 * time.Second,
OutputMaxSize: checks.DefaultBufSize,
DockerContainerID: "ipgdFtjd",
Shell: "qAeOYy0M",
TLSSkipVerify: true,
@ -4286,6 +4309,7 @@ func TestFullConfig(t *testing.T) {
"qxvdnSE9": []string{"6wBPUYdF", "YYh8wtSZ"},
},
Method: "gLrztrNw",
OutputMaxSize: checks.DefaultBufSize,
TCP: "4jG5casb",
Interval: 28767 * time.Second,
DockerContainerID: "THW6u7rL",
@ -4309,6 +4333,7 @@ func TestFullConfig(t *testing.T) {
"f3r6xFtM": {"RyuIdDWv", "QbxEcIUM"},
},
Method: "Dou0nGT5",
OutputMaxSize: checks.DefaultBufSize,
TCP: "JY6fTTcw",
Interval: 18714 * time.Second,
DockerContainerID: "qF66POS9",
@ -4477,6 +4502,7 @@ func TestFullConfig(t *testing.T) {
"cVFpko4u": {"gGqdEB6k", "9LsRo22u"},
},
Method: "X5DrovFc",
OutputMaxSize: checks.DefaultBufSize,
TCP: "ICbxkpSF",
Interval: 24392 * time.Second,
DockerContainerID: "ZKXr68Yb",
@ -4525,6 +4551,7 @@ func TestFullConfig(t *testing.T) {
"1UJXjVrT": {"OJgxzTfk", "xZZrFsq7"},
},
Method: "5wkAxCUE",
OutputMaxSize: checks.DefaultBufSize,
TCP: "MN3oA9D2",
Interval: 32718 * time.Second,
DockerContainerID: "cU15LMet",
@ -4546,6 +4573,7 @@ func TestFullConfig(t *testing.T) {
"vr7wY7CS": {"EtCoNPPL", "9vAarJ5s"},
},
Method: "wzByP903",
OutputMaxSize: checks.DefaultBufSize,
TCP: "2exjZIGE",
Interval: 5656 * time.Second,
DockerContainerID: "5tDBWpfA",
@ -4631,6 +4659,7 @@ func TestFullConfig(t *testing.T) {
"SHOVq1Vv": {"jntFhyym", "GYJh32pp"},
},
Method: "T66MFBfR",
OutputMaxSize: checks.DefaultBufSize,
TCP: "bNnNfx2A",
Interval: 22224 * time.Second,
DockerContainerID: "ipgdFtjd",
@ -4652,6 +4681,7 @@ func TestFullConfig(t *testing.T) {
"p2UI34Qz": {"UsG1D0Qh", "NHhRiB6s"},
},
Method: "ciYHWors",
OutputMaxSize: checks.DefaultBufSize,
TCP: "FfvCwlqH",
Interval: 12356 * time.Second,
DockerContainerID: "HBndBU6R",
@ -4673,6 +4703,7 @@ func TestFullConfig(t *testing.T) {
"l4HwQ112": {"fk56MNlo", "dhLK56aZ"},
},
Method: "9afLm3Mj",
OutputMaxSize: checks.DefaultBufSize,
TCP: "fjiLFqVd",
Interval: 23926 * time.Second,
DockerContainerID: "dO5TtRHk",
@ -5010,6 +5041,7 @@ func TestConfigDecodeBytes(t *testing.T) {
func TestSanitize(t *testing.T) {
rt := RuntimeConfig{
BindAddr: &net.IPAddr{IP: net.ParseIP("127.0.0.1")},
CheckOutputMaxSize: checks.DefaultBufSize,
SerfAdvertiseAddrLAN: &net.TCPAddr{IP: net.ParseIP("1.2.3.4"), Port: 5678},
DNSAddrs: []net.Addr{
&net.TCPAddr{IP: net.ParseIP("1.2.3.4"), Port: 5678},
@ -5032,7 +5064,8 @@ func TestSanitize(t *testing.T) {
Name: "foo",
Token: "bar",
Check: structs.CheckType{
Name: "blurb",
Name: "blurb",
OutputMaxSize: checks.DefaultBufSize,
},
Weights: &structs.Weights{
Passing: 67,
@ -5042,8 +5075,9 @@ func TestSanitize(t *testing.T) {
},
Checks: []*structs.CheckDefinition{
&structs.CheckDefinition{
Name: "zoo",
Token: "zope",
Name: "zoo",
Token: "zope",
OutputMaxSize: checks.DefaultBufSize,
},
},
}
@ -5083,6 +5117,7 @@ func TestSanitize(t *testing.T) {
"CAPath": "",
"CertFile": "",
"CheckDeregisterIntervalMin": "0s",
"CheckOutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"CheckReapInterval": "0s",
"CheckUpdateInterval": "0s",
"Checks": [{
@ -5099,6 +5134,7 @@ func TestSanitize(t *testing.T) {
"Method": "",
"Name": "zoo",
"Notes": "",
"OutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"ScriptArgs": [],
"ServiceID": "",
"Shell": "",
@ -5130,6 +5166,7 @@ func TestSanitize(t *testing.T) {
"ConsulCoordinateUpdateMaxBatches": 0,
"ConsulCoordinateUpdatePeriod": "15s",
"ConsulRaftElectionTimeout": "0s",
"CheckOutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"ConsulRaftHeartbeatTimeout": "0s",
"ConsulRaftLeaderLeaseTimeout": "0s",
"GossipLANGossipInterval": "0s",
@ -5269,6 +5306,7 @@ func TestSanitize(t *testing.T) {
"Method": "",
"Name": "blurb",
"Notes": "",
"OutputMaxSize": ` + strconv.Itoa(checks.DefaultBufSize) + `,
"ScriptArgs": [],
"Shell": "",
"Status": "",

View File

@ -7,6 +7,7 @@ import (
"os"
"time"
"github.com/hashicorp/consul/agent/checks"
"github.com/hashicorp/consul/agent/consul/autopilot"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
@ -368,6 +369,9 @@ type Config struct {
// warning and discard the remaining updates.
CoordinateUpdateMaxBatches int
// CheckOutputMaxSize control the max size of output of checks
CheckOutputMaxSize int
// RPCHoldTimeout is how long an RPC can be "held" before it is errored.
// This is used to paper over a loss of leadership by instead holding RPCs,
// so that the caller experiences a slow response rather than an error.
@ -502,6 +506,8 @@ func DefaultConfig() *Config {
CoordinateUpdateBatchSize: 128,
CoordinateUpdateMaxBatches: 5,
CheckOutputMaxSize: checks.DefaultBufSize,
RPCRate: rate.Inf,
RPCMaxBurst: 1000,

View File

@ -37,6 +37,7 @@ type CheckDefinition struct {
Timeout time.Duration
TTL time.Duration
DeregisterCriticalServiceAfter time.Duration
OutputMaxSize int
}
func (c *CheckDefinition) HealthCheck(node string) *HealthCheck {
@ -72,6 +73,7 @@ func (c *CheckDefinition) CheckType() *CheckType {
GRPCUseTLS: c.GRPCUseTLS,
Header: c.Header,
Method: c.Method,
OutputMaxSize: c.OutputMaxSize,
TCP: c.TCP,
Interval: c.Interval,
DockerContainerID: c.DockerContainerID,

View File

@ -45,6 +45,7 @@ type CheckType struct {
// service, if any, to be deregistered if this check is critical for
// longer than this duration.
DeregisterCriticalServiceAfter time.Duration
OutputMaxSize int
}
type CheckTypes []*CheckType
@ -67,6 +68,9 @@ func (c *CheckType) Validate() error {
if !intervalCheck && !c.IsAlias() && c.TTL <= 0 {
return fmt.Errorf("TTL must be > 0 for TTL checks")
}
if c.OutputMaxSize < 0 {
return fmt.Errorf("MaxOutputMaxSize must be positive")
}
return nil
}

View File

@ -958,6 +958,7 @@ type HealthCheckDefinition struct {
Method string `json:",omitempty"`
TCP string `json:",omitempty"`
Interval time.Duration `json:",omitempty"`
OutputMaxSize uint `json:",omitempty"`
Timeout time.Duration `json:",omitempty"`
DeregisterCriticalServiceAfter time.Duration `json:",omitempty"`
}
@ -966,11 +967,13 @@ func (d *HealthCheckDefinition) MarshalJSON() ([]byte, error) {
type Alias HealthCheckDefinition
exported := &struct {
Interval string `json:",omitempty"`
OutputMaxSize uint `json:",omitempty"`
Timeout string `json:",omitempty"`
DeregisterCriticalServiceAfter string `json:",omitempty"`
*Alias
}{
Interval: d.Interval.String(),
OutputMaxSize: d.OutputMaxSize,
Timeout: d.Timeout.String(),
DeregisterCriticalServiceAfter: d.DeregisterCriticalServiceAfter.String(),
Alias: (*Alias)(d),

View File

@ -183,6 +183,12 @@ The table below shows this endpoint's support for
case of a Script, HTTP, TCP, or gRPC check. Can be specified in the form of "10s"
or "5m" (i.e., 10 seconds or 5 minutes, respectively).
- `OutputMaxSize` `(positive int: 4096)` - Allow to put a maximum size of text
for the given check. This value must be greater than 0, by default, the value
is 4k.
The value can be further limited for all checks of a given agent using the
`check_output_max_size` flag in the agent.
- `TLSSkipVerify` `(bool: false)` - Specifies if the certificate for an HTTPS
check should not be verified.

View File

@ -132,6 +132,14 @@ will exit with an error at startup.
[go-sockaddr](https://godoc.org/github.com/hashicorp/go-sockaddr/template)
template
* <a name="check_output_max_size"></a><a href="#check_output_max_size">`-check_output_max_size`</a> -
Override the default limit of 4k for maximum size of checks, this is a positive
value.
By limiting this size, it allows to put less pressure on Consul servers when
many checks are having a very large output in their checks.
In order to completely disable check output capture, it is possible to
use <a href="#discard_check_output">`discard_check_output`</a>.
* <a name="_client"></a><a href="#_client">`-client`</a> - The address to which
Consul will bind client interfaces, including the HTTP and DNS servers. By
default, this is "127.0.0.1", allowing only loopback connections. In Consul