mirror of https://github.com/status-im/consul.git
Add TCP check type
Adds the ability to simply check whether a TCP socket accepts connections to determine if it is healthy. This is a light-weight - though less comprehensive than scripting - method of checking network service health. The check parameter `tcp` should be set to the `address:port` combination for the service to be tested. Supports both IPv6 and IPv4, in the case of a hostname that resolves to both, connections will be attempted via both protocol versions, with the first successful connection returning a successful check result. Example check: ```json { "check": { "id": "ssh", "name": "SSH (TCP)", "tcp": "example.com:22", "interval": "10s" } } ```
This commit is contained in:
parent
dcb5ae783b
commit
b023904298
|
@ -68,6 +68,7 @@ type AgentServiceCheck struct {
|
|||
Timeout string `json:",omitempty"`
|
||||
TTL string `json:",omitempty"`
|
||||
HTTP string `json:",omitempty"`
|
||||
TCP string `json:",omitempty"`
|
||||
Status string `json:",omitempty"`
|
||||
}
|
||||
type AgentServiceChecks []*AgentServiceCheck
|
||||
|
|
|
@ -75,6 +75,9 @@ type Agent struct {
|
|||
// checkHTTPs maps the check ID to an associated HTTP check
|
||||
checkHTTPs map[string]*CheckHTTP
|
||||
|
||||
// checkTCPs maps the check ID to an associated TCP check
|
||||
checkTCPs map[string]*CheckTCP
|
||||
|
||||
// checkTTLs maps the check ID to an associated check TTL
|
||||
checkTTLs map[string]*CheckTTL
|
||||
|
||||
|
@ -145,6 +148,7 @@ func Create(config *Config, logOutput io.Writer) (*Agent, error) {
|
|||
checkMonitors: make(map[string]*CheckMonitor),
|
||||
checkTTLs: make(map[string]*CheckTTL),
|
||||
checkHTTPs: make(map[string]*CheckHTTP),
|
||||
checkTCPs: make(map[string]*CheckTCP),
|
||||
eventCh: make(chan serf.UserEvent, 1024),
|
||||
eventBuf: make([]*UserEvent, 256),
|
||||
shutdownCh: make(chan struct{}),
|
||||
|
@ -440,6 +444,10 @@ func (a *Agent) Shutdown() error {
|
|||
chk.Stop()
|
||||
}
|
||||
|
||||
for _, chk := range a.checkTCPs {
|
||||
chk.Stop()
|
||||
}
|
||||
|
||||
a.logger.Println("[INFO] agent: requesting shutdown")
|
||||
var err error
|
||||
if a.server != nil {
|
||||
|
@ -801,6 +809,27 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType, persist
|
|||
http.Start()
|
||||
a.checkHTTPs[check.CheckID] = http
|
||||
|
||||
} else if chkType.IsTCP() {
|
||||
if existing, ok := a.checkTCPs[check.CheckID]; ok {
|
||||
existing.Stop()
|
||||
}
|
||||
if chkType.Interval < MinInterval {
|
||||
a.logger.Println(fmt.Sprintf("[WARN] agent: check '%s' has interval below minimum of %v",
|
||||
check.CheckID, MinInterval))
|
||||
chkType.Interval = MinInterval
|
||||
}
|
||||
|
||||
tcp := &CheckTCP{
|
||||
Notify: &a.state,
|
||||
CheckID: check.CheckID,
|
||||
TCP: chkType.TCP,
|
||||
Interval: chkType.Interval,
|
||||
Timeout: chkType.Timeout,
|
||||
Logger: a.logger,
|
||||
}
|
||||
tcp.Start()
|
||||
a.checkTCPs[check.CheckID] = tcp
|
||||
|
||||
} else {
|
||||
if existing, ok := a.checkMonitors[check.CheckID]; ok {
|
||||
existing.Stop()
|
||||
|
@ -857,6 +886,10 @@ func (a *Agent) RemoveCheck(checkID string, persist bool) error {
|
|||
check.Stop()
|
||||
delete(a.checkHTTPs, checkID)
|
||||
}
|
||||
if check, ok := a.checkTCPs[checkID]; ok {
|
||||
check.Stop()
|
||||
delete(a.checkTCPs, checkID)
|
||||
}
|
||||
if check, ok := a.checkTTLs[checkID]; ok {
|
||||
check.Stop()
|
||||
delete(a.checkTTLs, checkID)
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"sync"
|
||||
|
@ -31,13 +32,14 @@ const (
|
|||
|
||||
// CheckType is used to create either the CheckMonitor
|
||||
// or the CheckTTL.
|
||||
// Three types are supported: Script, HTTP, and TTL
|
||||
// Script and HTTP both require Interval
|
||||
// Four types are supported: Script, HTTP, TCP and TTL
|
||||
// Script, HTTP and TCP all require Interval
|
||||
// Only one of the types needs to be provided
|
||||
// TTL or Script/Interval or HTTP/Interval
|
||||
// TTL or Script/Interval or HTTP/Interval or TCP/Interval
|
||||
type CheckType struct {
|
||||
Script string
|
||||
HTTP string
|
||||
TCP string
|
||||
Interval time.Duration
|
||||
|
||||
Timeout time.Duration
|
||||
|
@ -51,7 +53,7 @@ type CheckTypes []*CheckType
|
|||
|
||||
// Valid checks if the CheckType is valid
|
||||
func (c *CheckType) Valid() bool {
|
||||
return c.IsTTL() || c.IsMonitor() || c.IsHTTP()
|
||||
return c.IsTTL() || c.IsMonitor() || c.IsHTTP() || c.IsTCP()
|
||||
}
|
||||
|
||||
// IsTTL checks if this is a TTL type
|
||||
|
@ -69,6 +71,11 @@ func (c *CheckType) IsHTTP() bool {
|
|||
return c.HTTP != "" && c.Interval != 0
|
||||
}
|
||||
|
||||
// IsTCP checks if this is a TCP type
|
||||
func (c *CheckType) IsTCP() bool {
|
||||
return c.TCP != "" && c.Interval != 0
|
||||
}
|
||||
|
||||
// CheckNotifier interface is used by the CheckMonitor
|
||||
// to notify when a check has a status update. The update
|
||||
// should take care to be idempotent.
|
||||
|
@ -402,3 +409,86 @@ func (c *CheckHTTP) check() {
|
|||
c.Notify.UpdateCheck(c.CheckID, structs.HealthCritical, result)
|
||||
}
|
||||
}
|
||||
|
||||
// CheckTCP is used to periodically make an TCP/UDP connection to
|
||||
// determine the health of a given check.
|
||||
// The check is passing if the connection succeeds
|
||||
// The check is critical if the connection returns an error
|
||||
type CheckTCP struct {
|
||||
Notify CheckNotifier
|
||||
CheckID string
|
||||
TCP string
|
||||
Interval time.Duration
|
||||
Timeout time.Duration
|
||||
Logger *log.Logger
|
||||
|
||||
dialer *net.Dialer
|
||||
stop bool
|
||||
stopCh chan struct{}
|
||||
stopLock sync.Mutex
|
||||
}
|
||||
|
||||
// Start is used to start a TCP check.
|
||||
// The check runs until stop is called
|
||||
func (c *CheckTCP) Start() {
|
||||
c.stopLock.Lock()
|
||||
defer c.stopLock.Unlock()
|
||||
|
||||
if c.dialer == nil {
|
||||
// Create the socket dialer
|
||||
c.dialer = &net.Dialer{DualStack: true}
|
||||
|
||||
// For long (>10s) interval checks the socket timeout is 10s, otherwise
|
||||
// the timeout is the interval. This means that a check *should* return
|
||||
// before the next check begins.
|
||||
if c.Timeout > 0 && c.Timeout < c.Interval {
|
||||
c.dialer.Timeout = c.Timeout
|
||||
} else if c.Interval < 10*time.Second {
|
||||
c.dialer.Timeout = c.Interval
|
||||
}
|
||||
}
|
||||
|
||||
c.stop = false
|
||||
c.stopCh = make(chan struct{})
|
||||
go c.run()
|
||||
}
|
||||
|
||||
// Stop is used to stop a TCP check.
|
||||
func (c *CheckTCP) Stop() {
|
||||
c.stopLock.Lock()
|
||||
defer c.stopLock.Unlock()
|
||||
if !c.stop {
|
||||
c.stop = true
|
||||
close(c.stopCh)
|
||||
}
|
||||
}
|
||||
|
||||
// run is invoked by a goroutine to run until Stop() is called
|
||||
func (c *CheckTCP) run() {
|
||||
// Get the randomized initial pause time
|
||||
initialPauseTime := randomStagger(c.Interval)
|
||||
c.Logger.Printf("[DEBUG] agent: pausing %v before first socket connection of %s", initialPauseTime, c.TCP)
|
||||
next := time.After(initialPauseTime)
|
||||
for {
|
||||
select {
|
||||
case <-next:
|
||||
c.check()
|
||||
next = time.After(c.Interval)
|
||||
case <-c.stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check is invoked periodically to perform the TCP check
|
||||
func (c *CheckTCP) check() {
|
||||
conn, err := c.dialer.Dial(`tcp`, c.TCP)
|
||||
if err != nil {
|
||||
c.Logger.Printf("[WARN] agent: socket connection failed '%s': %s", c.TCP, err)
|
||||
c.Notify.UpdateCheck(c.CheckID, structs.HealthCritical, err.Error())
|
||||
return
|
||||
}
|
||||
conn.Close()
|
||||
c.Logger.Printf("[DEBUG] agent: check '%v' is passing", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, structs.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP))
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ package agent
|
|||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
|
@ -321,3 +322,74 @@ func TestCheckHTTP_disablesKeepAlives(t *testing.T) {
|
|||
t.Fatalf("should have disabled keepalives")
|
||||
}
|
||||
}
|
||||
|
||||
func mockTCPServer(network string) net.Listener {
|
||||
var (
|
||||
addr string
|
||||
)
|
||||
|
||||
if network == `tcp6` {
|
||||
addr = `[::1]:0`
|
||||
} else {
|
||||
addr = `127.0.0.1:0`
|
||||
}
|
||||
|
||||
listener, err := net.Listen(network, addr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return listener
|
||||
}
|
||||
|
||||
func expectTCPStatus(t *testing.T, tcp string, status string) {
|
||||
mock := &MockNotify{
|
||||
state: make(map[string]string),
|
||||
updates: make(map[string]int),
|
||||
output: make(map[string]string),
|
||||
}
|
||||
check := &CheckTCP{
|
||||
Notify: mock,
|
||||
CheckID: "foo",
|
||||
TCP: tcp,
|
||||
Interval: 10 * time.Millisecond,
|
||||
Logger: log.New(os.Stderr, "", log.LstdFlags),
|
||||
}
|
||||
check.Start()
|
||||
defer check.Stop()
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Should have at least 2 updates
|
||||
if mock.updates["foo"] < 2 {
|
||||
t.Fatalf("should have 2 updates %v", mock.updates)
|
||||
}
|
||||
|
||||
if mock.state["foo"] != status {
|
||||
t.Fatalf("should be %v %v", status, mock.state)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckTCPCritical(t *testing.T) {
|
||||
var (
|
||||
tcpServer net.Listener
|
||||
)
|
||||
|
||||
tcpServer = mockTCPServer(`tcp`)
|
||||
expectTCPStatus(t, `127.0.0.1:0`, "critical")
|
||||
tcpServer.Close()
|
||||
}
|
||||
|
||||
func TestCheckTCPPassing(t *testing.T) {
|
||||
var (
|
||||
tcpServer net.Listener
|
||||
)
|
||||
|
||||
tcpServer = mockTCPServer(`tcp`)
|
||||
expectTCPStatus(t, tcpServer.Addr().String(), "passing")
|
||||
tcpServer.Close()
|
||||
|
||||
tcpServer = mockTCPServer(`tcp6`)
|
||||
expectTCPStatus(t, tcpServer.Addr().String(), "passing")
|
||||
tcpServer.Close()
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue