mirror of https://github.com/status-im/consul.git
Adds new config to make script checks opt-in, updates documentation. (#3284)
This commit is contained in:
parent
74946ddde9
commit
1791d99a10
|
@ -1595,8 +1595,15 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
|
|||
if check.CheckID == "" {
|
||||
return fmt.Errorf("CheckID missing")
|
||||
}
|
||||
if chkType != nil && !chkType.Valid() {
|
||||
return fmt.Errorf("Check type is not valid")
|
||||
|
||||
if chkType != nil {
|
||||
if !chkType.Valid() {
|
||||
return fmt.Errorf("Check type is not valid")
|
||||
}
|
||||
|
||||
if chkType.IsScript() && !a.config.EnableScriptChecks {
|
||||
return fmt.Errorf("Check types that exec scripts are disabled on this agent")
|
||||
}
|
||||
}
|
||||
|
||||
if check.ServiceID != "" {
|
||||
|
|
|
@ -628,7 +628,9 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) {
|
|||
|
||||
func TestAgent_AddCheck(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := NewTestAgent(t.Name(), nil)
|
||||
cfg := TestConfig()
|
||||
cfg.EnableScriptChecks = true
|
||||
a := NewTestAgent(t.Name(), cfg)
|
||||
defer a.Shutdown()
|
||||
|
||||
health := &structs.HealthCheck{
|
||||
|
@ -665,7 +667,9 @@ func TestAgent_AddCheck(t *testing.T) {
|
|||
|
||||
func TestAgent_AddCheck_StartPassing(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := NewTestAgent(t.Name(), nil)
|
||||
cfg := TestConfig()
|
||||
cfg.EnableScriptChecks = true
|
||||
a := NewTestAgent(t.Name(), cfg)
|
||||
defer a.Shutdown()
|
||||
|
||||
health := &structs.HealthCheck{
|
||||
|
@ -702,7 +706,9 @@ func TestAgent_AddCheck_StartPassing(t *testing.T) {
|
|||
|
||||
func TestAgent_AddCheck_MinInterval(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := NewTestAgent(t.Name(), nil)
|
||||
cfg := TestConfig()
|
||||
cfg.EnableScriptChecks = true
|
||||
a := NewTestAgent(t.Name(), cfg)
|
||||
defer a.Shutdown()
|
||||
|
||||
health := &structs.HealthCheck{
|
||||
|
@ -735,7 +741,9 @@ func TestAgent_AddCheck_MinInterval(t *testing.T) {
|
|||
|
||||
func TestAgent_AddCheck_MissingService(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := NewTestAgent(t.Name(), nil)
|
||||
cfg := TestConfig()
|
||||
cfg.EnableScriptChecks = true
|
||||
a := NewTestAgent(t.Name(), cfg)
|
||||
defer a.Shutdown()
|
||||
|
||||
health := &structs.HealthCheck{
|
||||
|
@ -797,9 +805,38 @@ func TestAgent_AddCheck_RestoreState(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestAgent_AddCheck_ExecDisable(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
a := NewTestAgent(t.Name(), nil)
|
||||
defer a.Shutdown()
|
||||
|
||||
health := &structs.HealthCheck{
|
||||
Node: "foo",
|
||||
CheckID: "mem",
|
||||
Name: "memory util",
|
||||
Status: api.HealthCritical,
|
||||
}
|
||||
chk := &structs.CheckType{
|
||||
Script: "exit 0",
|
||||
Interval: 15 * time.Second,
|
||||
}
|
||||
err := a.AddCheck(health, chk, false, "")
|
||||
if err == nil || !strings.Contains(err.Error(), "exec scripts are disabled on this agent") {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure we don't have a check mapping
|
||||
if memChk := a.state.Checks()["mem"]; memChk != nil {
|
||||
t.Fatalf("should be missing mem check")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_RemoveCheck(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := NewTestAgent(t.Name(), nil)
|
||||
cfg := TestConfig()
|
||||
cfg.EnableScriptChecks = true
|
||||
a := NewTestAgent(t.Name(), cfg)
|
||||
defer a.Shutdown()
|
||||
|
||||
// Remove check that doesn't exist
|
||||
|
@ -1097,6 +1134,7 @@ func TestAgent_PersistCheck(t *testing.T) {
|
|||
cfg := TestConfig()
|
||||
cfg.Server = false
|
||||
cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir
|
||||
cfg.EnableScriptChecks = true
|
||||
a := NewTestAgent(t.Name(), cfg)
|
||||
defer os.RemoveAll(cfg.DataDir)
|
||||
defer a.Shutdown()
|
||||
|
@ -1230,6 +1268,7 @@ func TestAgent_PurgeCheckOnDuplicate(t *testing.T) {
|
|||
cfg := TestConfig()
|
||||
cfg.Server = false
|
||||
cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir
|
||||
cfg.EnableScriptChecks = true
|
||||
a := NewTestAgent(t.Name(), cfg)
|
||||
defer os.RemoveAll(cfg.DataDir)
|
||||
defer a.Shutdown()
|
||||
|
|
|
@ -625,6 +625,11 @@ type Config struct {
|
|||
// true, we ignore the leave, and rejoin the cluster on start.
|
||||
RejoinAfterLeave bool `mapstructure:"rejoin_after_leave"`
|
||||
|
||||
// EnableScriptChecks controls whether health checks which execute
|
||||
// scripts are enabled. This includes regular script checks and Docker
|
||||
// checks.
|
||||
EnableScriptChecks bool `mapstructure:"enable_script_checks"`
|
||||
|
||||
// CheckUpdateInterval controls the interval on which the output of a health check
|
||||
// is updated if there is no change to the state. For example, a check in a steady
|
||||
// state may run every 5 second generating a unique output (timestamp, etc), forcing
|
||||
|
@ -1932,6 +1937,9 @@ func MergeConfig(a, b *Config) *Config {
|
|||
if b.DNSConfig.RecursorTimeout != 0 {
|
||||
result.DNSConfig.RecursorTimeout = b.DNSConfig.RecursorTimeout
|
||||
}
|
||||
if b.EnableScriptChecks {
|
||||
result.EnableScriptChecks = true
|
||||
}
|
||||
if b.CheckUpdateIntervalRaw != "" || b.CheckUpdateInterval != 0 {
|
||||
result.CheckUpdateInterval = b.CheckUpdateInterval
|
||||
}
|
||||
|
|
|
@ -322,6 +322,10 @@ func TestDecodeConfig(t *testing.T) {
|
|||
in: `{"disable_keyring_file":true}`,
|
||||
c: &Config{DisableKeyringFile: true},
|
||||
},
|
||||
{
|
||||
in: `{"enable_script_checks":true}`,
|
||||
c: &Config{EnableScriptChecks: true},
|
||||
},
|
||||
{
|
||||
in: `{"encrypt_verify_incoming":true}`,
|
||||
c: &Config{EncryptVerifyIncoming: Bool(true)},
|
||||
|
@ -1363,6 +1367,7 @@ func TestMergeConfig(t *testing.T) {
|
|||
ReconnectTimeoutLan: 24 * time.Hour,
|
||||
ReconnectTimeoutWanRaw: "36h",
|
||||
ReconnectTimeoutWan: 36 * time.Hour,
|
||||
EnableScriptChecks: true,
|
||||
CheckUpdateInterval: 8 * time.Minute,
|
||||
CheckUpdateIntervalRaw: "8m",
|
||||
ACLToken: "1111",
|
||||
|
|
|
@ -47,6 +47,11 @@ func (c *CheckType) Valid() bool {
|
|||
return c.IsTTL() || c.IsMonitor() || c.IsHTTP() || c.IsTCP() || c.IsDocker()
|
||||
}
|
||||
|
||||
// IsScript checks if this is a check that execs some kind of script.
|
||||
func (c *CheckType) IsScript() bool {
|
||||
return c.Script != ""
|
||||
}
|
||||
|
||||
// IsTTL checks if this is a TTL type
|
||||
func (c *CheckType) IsTTL() bool {
|
||||
return c.TTL != 0
|
||||
|
|
|
@ -529,7 +529,9 @@ func TestAPI_AgentChecks_serviceBound(t *testing.T) {
|
|||
|
||||
func TestAPI_AgentChecks_Docker(t *testing.T) {
|
||||
t.Parallel()
|
||||
c, s := makeClient(t)
|
||||
c, s := makeClientWithConfig(t, nil, func(c *testutil.TestServerConfig) {
|
||||
c.EnableScriptChecks = true
|
||||
})
|
||||
defer s.Stop()
|
||||
|
||||
agent := c.Agent()
|
||||
|
|
|
@ -80,6 +80,7 @@ func (cmd *AgentCommand) readConfig() *agent.Config {
|
|||
"A unique ID for this node across space and time. Defaults to a randomly-generated ID"+
|
||||
" that persists in the data-dir.")
|
||||
|
||||
f.BoolVar(&cmdCfg.EnableScriptChecks, "enable-script-checks", false, "Enables health check scripts.")
|
||||
var disableHostNodeID configutil.BoolValue
|
||||
f.Var(&disableHostNodeID, "disable-host-node-id",
|
||||
"Setting this to true will prevent Consul from using information from the"+
|
||||
|
|
|
@ -86,6 +86,7 @@ type TestServerConfig struct {
|
|||
VerifyIncomingRPC bool `json:"verify_incoming_rpc,omitempty"`
|
||||
VerifyIncomingHTTPS bool `json:"verify_incoming_https,omitempty"`
|
||||
VerifyOutgoing bool `json:"verify_outgoing,omitempty"`
|
||||
EnableScriptChecks bool `json:"enable_script_checks,omitempty"`
|
||||
ReadyTimeout time.Duration `json:"-"`
|
||||
Stdout, Stderr io.Writer `json:"-"`
|
||||
Args []string `json:"-"`
|
||||
|
|
|
@ -21,10 +21,12 @@ There are five different kinds of checks:
|
|||
that performs the health check, exits with an appropriate exit code, and potentially
|
||||
generates some output. A script is paired with an invocation interval (e.g.
|
||||
every 30 seconds). This is similar to the Nagios plugin system. The output of
|
||||
a script check is limited to 4K. Output larger than this will be truncated.
|
||||
a script check is limited to 4KB. Output larger than this will be truncated.
|
||||
By default, Script checks will be configured with a timeout equal to 30 seconds.
|
||||
It is possible to configure a custom Script check timeout value by specifying the
|
||||
`timeout` field in the check definition.
|
||||
`timeout` field in the check definition. In Consul 0.9.0 and later, the agent
|
||||
must be configured with [`enable_script_checks`](/docs/agent/options.html#_enable_script_checks)
|
||||
set to `true` in order to enable script checks.
|
||||
|
||||
* HTTP + Interval - These checks make an HTTP `GET` request every Interval (e.g.
|
||||
every 30 seconds) to the specified URL. The status of the service depends on
|
||||
|
@ -38,7 +40,7 @@ There are five different kinds of checks:
|
|||
configured with a request timeout equal to the check interval, with a max of
|
||||
10 seconds. It is possible to configure a custom HTTP check timeout value by
|
||||
specifying the `timeout` field in the check definition. The output of the
|
||||
check is limited to roughly 4K. Responses larger than this will be truncated.
|
||||
check is limited to roughly 4KB. Responses larger than this will be truncated.
|
||||
HTTP checks also support SSL. By default, a valid SSL certificate is expected.
|
||||
Certificate verification can be turned off by setting the `tls_skip_verify`
|
||||
field to `true` in the check definition.
|
||||
|
@ -74,15 +76,17 @@ There are five different kinds of checks:
|
|||
valid through the end of the TTL from the time of the last check.
|
||||
|
||||
* Docker + Interval - These checks depend on invoking an external application which
|
||||
is packaged within a Docker Container. The application is triggered within the running
|
||||
container via the Docker Exec API. We expect that the Consul agent user has access
|
||||
to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to
|
||||
determine the Docker API endpoint. The application is expected to run, perform a health
|
||||
check of the service running inside the container, and exit with an appropriate exit code.
|
||||
The check should be paired with an invocation interval. The shell on which the check
|
||||
has to be performed is configurable which makes it possible to run containers which
|
||||
have different shells on the same host. Check output for Docker is limited to
|
||||
4K. Any output larger than this will be truncated.
|
||||
is packaged within a Docker Container. The application is triggered within the running
|
||||
container via the Docker Exec API. We expect that the Consul agent user has access
|
||||
to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to
|
||||
determine the Docker API endpoint. The application is expected to run, perform a health
|
||||
check of the service running inside the container, and exit with an appropriate exit code.
|
||||
The check should be paired with an invocation interval. The shell on which the check
|
||||
has to be performed is configurable which makes it possible to run containers which
|
||||
have different shells on the same host. Check output for Docker is limited to
|
||||
4KB. Any output larger than this will be truncated. In Consul 0.9.0 and later, the agent
|
||||
must be configured with [`enable_script_checks`](/docs/agent/options.html#_enable_script_checks)
|
||||
set to `true` in order to enable Docker health checks.
|
||||
|
||||
## Check Definition
|
||||
|
||||
|
@ -210,6 +214,10 @@ This is the only convention that Consul depends on. Any output of the script
|
|||
will be captured and stored in the `notes` field so that it can be viewed
|
||||
by human operators.
|
||||
|
||||
In Consul 0.9.0 and later, the agent must be configured with
|
||||
[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true`
|
||||
in order to enable script checks.
|
||||
|
||||
## Initial Health Check Status
|
||||
|
||||
By default, when checks are registered against a Consul agent, the state is set
|
||||
|
|
|
@ -147,6 +147,10 @@ will exit with an error at startup.
|
|||
[Nomad](https://www.nomadproject.io/), so if you opt-in to host-based IDs then Consul and Nomad will use
|
||||
information on the host to automatically assign the same ID in both systems.
|
||||
|
||||
* <a name="_disable_keyring_file"></a><a href="#_disable_keyring_file">`-disable-keyring-file`</a> - If set,
|
||||
the keyring will not be persisted to a file. Any installed keys will be lost on shutdown, and only the given
|
||||
`-encrypt` key will be available on startup. This defaults to false.
|
||||
|
||||
* <a name="_dns_port"></a><a href="#_dns_port">`-dns-port`</a> - the DNS port to listen on.
|
||||
This overrides the default port 8600. This is available in Consul 0.7 and later.
|
||||
|
||||
|
@ -154,6 +158,12 @@ will exit with an error at startup.
|
|||
in the "consul." domain. This flag can be used to change that domain. All queries in this domain
|
||||
are assumed to be handled by Consul and will not be recursively resolved.
|
||||
|
||||
* <a name="_enable_script_checks"></a><a href="#_enable_script_checks">`enable-script-checks`</a> This
|
||||
controls whether [health checks that execute scripts](/docs/agent/checks.html) are enabled on
|
||||
this agent, and defaults to `false` so operators must opt-in to allowing these. If enabled,
|
||||
it is recommended to [enable ACLs](/docs/guides/acl.html) as well to control which users are
|
||||
allowed to register new checks to execute scripts. This was added in Consul 0.9.0.
|
||||
|
||||
* <a name="_encrypt"></a><a href="#_encrypt">`-encrypt`</a> - Specifies the secret key to
|
||||
use for encryption of Consul
|
||||
network traffic. This key must be 16-bytes that are Base64-encoded. The
|
||||
|
@ -167,10 +177,6 @@ will exit with an error at startup.
|
|||
initialized with an encryption key, then the provided key is ignored and
|
||||
a warning will be displayed.
|
||||
|
||||
* <a name="_disable_keyring_file"></a><a href="#_disable_keyring_file">`-disable-keyring-file`</a> - If set,
|
||||
the keyring will not be persisted to a file. Any installed keys will be lost on shutdown, and only the given
|
||||
`-encrypt` key will be available on startup. This defaults to false.
|
||||
|
||||
* <a name="_http_port"></a><a href="#_http_port">`-http-port`</a> - the HTTP API port to listen on.
|
||||
This overrides the default port 8500. This option is very useful when deploying Consul
|
||||
to an environment which communicates the HTTP port through the environment e.g. PaaS like CloudFoundry, allowing
|
||||
|
@ -712,6 +718,9 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
|
|||
* <a name="enable_debug"></a><a href="#enable_debug">`enable_debug`</a> When set, enables some
|
||||
additional debugging features. Currently, this is only used to set the runtime profiling HTTP endpoints.
|
||||
|
||||
* <a name="enable_script_checks"></a><a href="#enable_script_checks">`enable_script_checks`</a> Equivalent to the
|
||||
[`-enable-script-checks` command-line flag](#_enable_script_checks).
|
||||
|
||||
* <a name="enable_syslog"></a><a href="#enable_syslog">`enable_syslog`</a> Equivalent to
|
||||
the [`-syslog` command-line flag](#_syslog).
|
||||
|
||||
|
|
|
@ -684,6 +684,10 @@ to use for registration events:
|
|||
[checks](/docs/agent/checks.html). Tokens may also be passed to the
|
||||
[HTTP API](/api/index.html) for operations that require them.
|
||||
|
||||
In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with
|
||||
[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true` in order to enable
|
||||
script checks.
|
||||
|
||||
#### Operator Rules
|
||||
|
||||
The `operator` policy controls access to cluster-level operations in the
|
||||
|
@ -866,6 +870,10 @@ to use for registration events:
|
|||
[checks](/docs/agent/checks.html). Tokens may also be passed to the
|
||||
[HTTP API](/api/index.html) for operations that require them.
|
||||
|
||||
In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with
|
||||
[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true` in order to enable
|
||||
script checks.
|
||||
|
||||
#### Session Rules
|
||||
|
||||
The `session` policy controls access to [Session API](/api/session.html) operations.
|
||||
|
|
|
@ -72,6 +72,12 @@ the replicated log until the expected number of servers has successfully joined.
|
|||
You can read more about this in the [bootstrapping
|
||||
guide](/docs/guides/bootstrapping.html).
|
||||
|
||||
We've included the [`-enable_script_checks`](/docs/agent/options.html#_enable_script_checks)
|
||||
flag set to `true` in order to enable health checks that can execute external scripts.
|
||||
This will be used in examples later. For production use, you'd want to configure
|
||||
[ACLs](/docs/guides/acl.html) in conjunction with this to control the ability to
|
||||
register arbitrary scripts.
|
||||
|
||||
Finally, we add the [`config-dir` flag](/docs/agent/options.html#_config_dir),
|
||||
marking where service and check definitions can be found.
|
||||
|
||||
|
@ -81,7 +87,7 @@ All together, these settings yield a
|
|||
```text
|
||||
vagrant@n1:~$ consul agent -server -bootstrap-expect=1 \
|
||||
-data-dir=/tmp/consul -node=agent-one -bind=172.20.20.10 \
|
||||
-config-dir=/etc/consul.d
|
||||
-enable-script-checks=true -config-dir=/etc/consul.d
|
||||
...
|
||||
```
|
||||
|
||||
|
@ -102,7 +108,7 @@ All together, these settings yield a
|
|||
|
||||
```text
|
||||
vagrant@n2:~$ consul agent -data-dir=/tmp/consul -node=agent-two \
|
||||
-bind=172.20.20.11 -config-dir=/etc/consul.d
|
||||
-bind=172.20.20.11 -enable-script-checks=true -config-dir=/etc/consul.d
|
||||
...
|
||||
```
|
||||
|
||||
|
|
Loading…
Reference in New Issue