diff --git a/api/agent.go b/api/agent.go
index e56a18dcd2..2b950d0a3e 100644
--- a/api/agent.go
+++ b/api/agent.go
@@ -68,6 +68,7 @@ type AgentServiceCheck struct {
Timeout string `json:",omitempty"`
TTL string `json:",omitempty"`
HTTP string `json:",omitempty"`
+ TCP string `json:",omitempty"`
Status string `json:",omitempty"`
}
type AgentServiceChecks []*AgentServiceCheck
diff --git a/command/agent/agent.go b/command/agent/agent.go
index 449b03c476..397f616a43 100644
--- a/command/agent/agent.go
+++ b/command/agent/agent.go
@@ -75,6 +75,9 @@ type Agent struct {
// checkHTTPs maps the check ID to an associated HTTP check
checkHTTPs map[string]*CheckHTTP
+ // checkTCPs maps the check ID to an associated TCP check
+ checkTCPs map[string]*CheckTCP
+
// checkTTLs maps the check ID to an associated check TTL
checkTTLs map[string]*CheckTTL
@@ -145,6 +148,7 @@ func Create(config *Config, logOutput io.Writer) (*Agent, error) {
checkMonitors: make(map[string]*CheckMonitor),
checkTTLs: make(map[string]*CheckTTL),
checkHTTPs: make(map[string]*CheckHTTP),
+ checkTCPs: make(map[string]*CheckTCP),
eventCh: make(chan serf.UserEvent, 1024),
eventBuf: make([]*UserEvent, 256),
shutdownCh: make(chan struct{}),
@@ -440,6 +444,10 @@ func (a *Agent) Shutdown() error {
chk.Stop()
}
+ for _, chk := range a.checkTCPs {
+ chk.Stop()
+ }
+
a.logger.Println("[INFO] agent: requesting shutdown")
var err error
if a.server != nil {
@@ -801,6 +809,27 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType, persist
http.Start()
a.checkHTTPs[check.CheckID] = http
+ } else if chkType.IsTCP() {
+ if existing, ok := a.checkTCPs[check.CheckID]; ok {
+ existing.Stop()
+ }
+ if chkType.Interval < MinInterval {
+ a.logger.Println(fmt.Sprintf("[WARN] agent: check '%s' has interval below minimum of %v",
+ check.CheckID, MinInterval))
+ chkType.Interval = MinInterval
+ }
+
+ tcp := &CheckTCP{
+ Notify: &a.state,
+ CheckID: check.CheckID,
+ TCP: chkType.TCP,
+ Interval: chkType.Interval,
+ Timeout: chkType.Timeout,
+ Logger: a.logger,
+ }
+ tcp.Start()
+ a.checkTCPs[check.CheckID] = tcp
+
} else {
if existing, ok := a.checkMonitors[check.CheckID]; ok {
existing.Stop()
@@ -857,6 +886,10 @@ func (a *Agent) RemoveCheck(checkID string, persist bool) error {
check.Stop()
delete(a.checkHTTPs, checkID)
}
+ if check, ok := a.checkTCPs[checkID]; ok {
+ check.Stop()
+ delete(a.checkTCPs, checkID)
+ }
if check, ok := a.checkTTLs[checkID]; ok {
check.Stop()
delete(a.checkTTLs, checkID)
diff --git a/command/agent/check.go b/command/agent/check.go
index 6677483886..97ac592eb4 100644
--- a/command/agent/check.go
+++ b/command/agent/check.go
@@ -4,6 +4,7 @@ import (
"fmt"
"io/ioutil"
"log"
+ "net"
"net/http"
"os/exec"
"sync"
@@ -31,13 +32,14 @@ const (
// CheckType is used to create either the CheckMonitor
// or the CheckTTL.
-// Three types are supported: Script, HTTP, and TTL
-// Script and HTTP both require Interval
+// Four types are supported: Script, HTTP, TCP and TTL
+// Script, HTTP and TCP all require Interval
// Only one of the types needs to be provided
-// TTL or Script/Interval or HTTP/Interval
+// TTL or Script/Interval or HTTP/Interval or TCP/Interval
type CheckType struct {
Script string
HTTP string
+ TCP string
Interval time.Duration
Timeout time.Duration
@@ -51,7 +53,7 @@ type CheckTypes []*CheckType
// Valid checks if the CheckType is valid
func (c *CheckType) Valid() bool {
- return c.IsTTL() || c.IsMonitor() || c.IsHTTP()
+ return c.IsTTL() || c.IsMonitor() || c.IsHTTP() || c.IsTCP()
}
// IsTTL checks if this is a TTL type
@@ -69,6 +71,11 @@ func (c *CheckType) IsHTTP() bool {
return c.HTTP != "" && c.Interval != 0
}
+// IsTCP checks if this is a TCP type
+func (c *CheckType) IsTCP() bool {
+ return c.TCP != "" && c.Interval != 0
+}
+
// CheckNotifier interface is used by the CheckMonitor
// to notify when a check has a status update. The update
// should take care to be idempotent.
@@ -402,3 +409,86 @@ func (c *CheckHTTP) check() {
c.Notify.UpdateCheck(c.CheckID, structs.HealthCritical, result)
}
}
+
+// CheckTCP is used to periodically make an TCP/UDP connection to
+// determine the health of a given check.
+// The check is passing if the connection succeeds
+// The check is critical if the connection returns an error
+type CheckTCP struct {
+ Notify CheckNotifier
+ CheckID string
+ TCP string
+ Interval time.Duration
+ Timeout time.Duration
+ Logger *log.Logger
+
+ dialer *net.Dialer
+ stop bool
+ stopCh chan struct{}
+ stopLock sync.Mutex
+}
+
+// Start is used to start a TCP check.
+// The check runs until stop is called
+func (c *CheckTCP) Start() {
+ c.stopLock.Lock()
+ defer c.stopLock.Unlock()
+
+ if c.dialer == nil {
+ // Create the socket dialer
+ c.dialer = &net.Dialer{DualStack: true}
+
+ // For long (>10s) interval checks the socket timeout is 10s, otherwise
+ // the timeout is the interval. This means that a check *should* return
+ // before the next check begins.
+ if c.Timeout > 0 && c.Timeout < c.Interval {
+ c.dialer.Timeout = c.Timeout
+ } else if c.Interval < 10*time.Second {
+ c.dialer.Timeout = c.Interval
+ }
+ }
+
+ c.stop = false
+ c.stopCh = make(chan struct{})
+ go c.run()
+}
+
+// Stop is used to stop a TCP check.
+func (c *CheckTCP) Stop() {
+ c.stopLock.Lock()
+ defer c.stopLock.Unlock()
+ if !c.stop {
+ c.stop = true
+ close(c.stopCh)
+ }
+}
+
+// run is invoked by a goroutine to run until Stop() is called
+func (c *CheckTCP) run() {
+ // Get the randomized initial pause time
+ initialPauseTime := randomStagger(c.Interval)
+ c.Logger.Printf("[DEBUG] agent: pausing %v before first socket connection of %s", initialPauseTime, c.TCP)
+ next := time.After(initialPauseTime)
+ for {
+ select {
+ case <-next:
+ c.check()
+ next = time.After(c.Interval)
+ case <-c.stopCh:
+ return
+ }
+ }
+}
+
+// check is invoked periodically to perform the TCP check
+func (c *CheckTCP) check() {
+ conn, err := c.dialer.Dial(`tcp`, c.TCP)
+ if err != nil {
+ c.Logger.Printf("[WARN] agent: socket connection failed '%s': %s", c.TCP, err)
+ c.Notify.UpdateCheck(c.CheckID, structs.HealthCritical, err.Error())
+ return
+ }
+ conn.Close()
+ c.Logger.Printf("[DEBUG] agent: check '%v' is passing", c.CheckID)
+ c.Notify.UpdateCheck(c.CheckID, structs.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP))
+}
diff --git a/command/agent/check_test.go b/command/agent/check_test.go
index 3fbd0ff97a..6b9f59df7e 100644
--- a/command/agent/check_test.go
+++ b/command/agent/check_test.go
@@ -3,6 +3,7 @@ package agent
import (
"fmt"
"log"
+ "net"
"net/http"
"net/http/httptest"
"os"
@@ -321,3 +322,74 @@ func TestCheckHTTP_disablesKeepAlives(t *testing.T) {
t.Fatalf("should have disabled keepalives")
}
}
+
+func mockTCPServer(network string) net.Listener {
+ var (
+ addr string
+ )
+
+ if network == `tcp6` {
+ addr = `[::1]:0`
+ } else {
+ addr = `127.0.0.1:0`
+ }
+
+ listener, err := net.Listen(network, addr)
+ if err != nil {
+ panic(err)
+ }
+
+ return listener
+}
+
+func expectTCPStatus(t *testing.T, tcp string, status string) {
+ mock := &MockNotify{
+ state: make(map[string]string),
+ updates: make(map[string]int),
+ output: make(map[string]string),
+ }
+ check := &CheckTCP{
+ Notify: mock,
+ CheckID: "foo",
+ TCP: tcp,
+ Interval: 10 * time.Millisecond,
+ Logger: log.New(os.Stderr, "", log.LstdFlags),
+ }
+ check.Start()
+ defer check.Stop()
+
+ time.Sleep(50 * time.Millisecond)
+
+ // Should have at least 2 updates
+ if mock.updates["foo"] < 2 {
+ t.Fatalf("should have 2 updates %v", mock.updates)
+ }
+
+ if mock.state["foo"] != status {
+ t.Fatalf("should be %v %v", status, mock.state)
+ }
+}
+
+func TestCheckTCPCritical(t *testing.T) {
+ var (
+ tcpServer net.Listener
+ )
+
+ tcpServer = mockTCPServer(`tcp`)
+ expectTCPStatus(t, `127.0.0.1:0`, "critical")
+ tcpServer.Close()
+}
+
+func TestCheckTCPPassing(t *testing.T) {
+ var (
+ tcpServer net.Listener
+ )
+
+ tcpServer = mockTCPServer(`tcp`)
+ expectTCPStatus(t, tcpServer.Addr().String(), "passing")
+ tcpServer.Close()
+
+ tcpServer = mockTCPServer(`tcp6`)
+ expectTCPStatus(t, tcpServer.Addr().String(), "passing")
+ tcpServer.Close()
+}
diff --git a/website/source/docs/agent/checks.html.markdown b/website/source/docs/agent/checks.html.markdown
index 19789910e7..336b6e99fb 100644
--- a/website/source/docs/agent/checks.html.markdown
+++ b/website/source/docs/agent/checks.html.markdown
@@ -31,6 +31,20 @@ There are three different kinds of checks:
It is possible to configure a custom HTTP check timeout value by specifying
the `timeout` field in the check definition.
+* TCP + Interval - These checks make an TCP connection attempt every Interval
+ (e.g. every 30 seconds) to the specified IP/hostname and port. The status of
+ the service depends on whether the connection attempt is successful (ie - the
+ port is currently accepting connections). If the connection is accepted, the
+ status is `success`, otherwise the status is `critical`. In the case of a
+ hostname that resolves to both IPv4 and IPv6 addresses, an attempt will be
+ made to both addresses, and the first successful connection attempt will
+ result in a successful check. This type of check should be preferred over a
+ script that uses `netcat` or another external process to check a simple socket
+ operation. By default, TCP checks will be configured with a request timeout
+ equal to the check interval, with a max of 10 seconds. It is possible to
+ configure a custom TCP check timeout value by specifying the `timeout` field
+ in the check definition.
+
* Time to Live (TTL) - These checks retain their last known state for a given TTL.
The state of the check must be updated periodically over the HTTP interface. If an
external system fails to update the status within a given TTL, the check is
@@ -75,6 +89,20 @@ A HTTP check:
}
```
+A TCP check:
+
+```javascript
+{
+ "check": {
+ "id": "ssh",
+ "name": "SSH TCP on port 22",
+ "tcp": "localhost:22",
+ "interval": "10s",
+ "timeout": "1s"
+ }
+}
+```
+
A TTL check:
```javascript
@@ -102,7 +130,7 @@ Checks may also contain a `token` field to provide an ACL token. This token is
used for any interaction with the catalog for the check, including
[anti-entropy syncs](/docs/internals/anti-entropy.html) and deregistration.
-Both script and HTTP checks must include an `interval` field. This field is
+Script, TCP and HTTP checks must include an `interval` field. This field is
parsed by Go's `time` package, and has the following
[formatting specification](http://golang.org/pkg/time/#ParseDuration):
> A duration string is a possibly signed sequence of decimal numbers, each with
diff --git a/website/source/docs/agent/http/agent.html.markdown b/website/source/docs/agent/http/agent.html.markdown
index 0b16d5bc3f..4f3670b715 100644
--- a/website/source/docs/agent/http/agent.html.markdown
+++ b/website/source/docs/agent/http/agent.html.markdown
@@ -224,8 +224,8 @@ The endpoint always returns 200.
The register endpoint is used to add a new check to the local agent.
There is more documentation on checks [here](/docs/agent/checks.html).
-Checks may be of script, HTTP, or TTL type. The agent is responsible for managing
-the status of the check and keeping the Catalog in sync.
+Checks may be of script, HTTP, TCP, or TTL type. The agent is responsible for
+managing the status of the check and keeping the Catalog in sync.
The register endpoint expects a JSON request body to be PUT. The request
body must look like:
@@ -237,13 +237,14 @@ body must look like:
"Notes": "Ensure we don't oversubscribe memory",
"Script": "/usr/local/bin/check_mem.py",
"HTTP": "http://example.com",
+ "TCP": "example.com:22",
"Interval": "10s",
"TTL": "15s"
}
```
-The `Name` field is mandatory, as is one of `Script`, `HTTP` or `TTL`.
-`Script` and `HTTP` also require that `Interval` be set.
+The `Name` field is mandatory, as is one of `Script`, `HTTP`, `TCP` or `TTL`.
+`Script`, `TCP` and `HTTP` also require that `Interval` be set.
If an `ID` is not provided, it is set to `Name`. You cannot have duplicate
`ID` entries per agent, so it may be necessary to provide an `ID`.
@@ -258,6 +259,14 @@ be a URL) every `Interval`. If the response is any `2xx` code, the check is `pas
If the response is `429 Too Many Requests`, the check is `warning`. Otherwise, the check
is `critical`.
+An `TCP` check will perform an TCP connection attempt against the value of `TCP`
+(expected to be an IP/hostname and port combination) every `Interval`. If the
+connection attempt is successful, the check is `passing`. If the connection
+attempt is unsuccessful, the check is `critical`. In the case of a hostname
+that resolves to both IPv4 and IPv6 addresses, an attempt will be made to both
+addresses, and the first successful connection attempt will result in a
+successful check.
+
If a `TTL` type is used, then the TTL update endpoint must be used periodically to update
the state of the check.
diff --git a/website/source/docs/agent/services.html.markdown b/website/source/docs/agent/services.html.markdown
index 79327201c6..1b589b31b0 100644
--- a/website/source/docs/agent/services.html.markdown
+++ b/website/source/docs/agent/services.html.markdown
@@ -62,13 +62,14 @@ the DNS interface as well. If a service is failing its health check or a
node has any failing system-level check, the DNS interface will omit that
node from any service query.
-The check must be of the script, HTTP, or TTL type. If it is a script type, `script`
-and `interval` must be provided. If it is a HTTP type, `http` and
-`interval` must be provided. If it is a TTL type, then only `ttl` must be
-provided. The check name is automatically generated as
-`service:`. If there are multiple service checks registered, the
-ID will be generated as `service::` where `` is an
-incrementing number starting from `1`.
+The check must be of the script, HTTP, TCP or TTL type. If it is a script type,
+`script` and `interval` must be provided. If it is a HTTP type, `http` and
+`interval` must be provided. If it is a TCP type, `tcp` and `interval` must be
+provided. If it is a TTL type, then only `ttl` must be provided. The check name
+is automatically generated as `service:`. If there are multiple
+service checks registered, the ID will be generated as
+`service::` where `` is an incrementing number starting
+from `1`.
Note: there is more information about [checks here](/docs/agent/checks.html).