agent/proxy: implement force kill of unresponsive proxy process

This commit is contained in:
Mitchell Hashimoto 2018-05-02 12:25:00 -07:00
parent 6539280f2a
commit b0f377b519
No known key found for this signature in database
GPG Key ID: 744E147AA52F5B0A
3 changed files with 98 additions and 10 deletions

View File

@ -38,11 +38,16 @@ type Daemon struct {
// a file. // a file.
Logger *log.Logger Logger *log.Logger
// For tests, they can set this to change the default duration to wait
// for a graceful quit.
gracefulWait time.Duration
// process is the started process // process is the started process
lock sync.Mutex lock sync.Mutex
stopped bool stopped bool
stopCh chan struct{} stopCh chan struct{}
process *os.Process exitedCh chan struct{}
process *os.Process
} }
// Start starts the daemon and keeps it running. // Start starts the daemon and keeps it running.
@ -64,17 +69,21 @@ func (p *Daemon) Start() error {
// Setup our stop channel // Setup our stop channel
stopCh := make(chan struct{}) stopCh := make(chan struct{})
exitedCh := make(chan struct{})
p.stopCh = stopCh p.stopCh = stopCh
p.exitedCh = exitedCh
// Start the loop. // Start the loop.
go p.keepAlive(stopCh) go p.keepAlive(stopCh, exitedCh)
return nil return nil
} }
// keepAlive starts and keeps the configured process alive until it // keepAlive starts and keeps the configured process alive until it
// is stopped via Stop. // is stopped via Stop.
func (p *Daemon) keepAlive(stopCh <-chan struct{}) { func (p *Daemon) keepAlive(stopCh <-chan struct{}, exitedCh chan<- struct{}) {
defer close(exitedCh)
p.lock.Lock() p.lock.Lock()
process := p.process process := p.process
p.lock.Unlock() p.lock.Unlock()
@ -196,24 +205,42 @@ func (p *Daemon) start() (*os.Process, error) {
// then this returns no error. // then this returns no error.
func (p *Daemon) Stop() error { func (p *Daemon) Stop() error {
p.lock.Lock() p.lock.Lock()
defer p.lock.Unlock()
// If we're already stopped or never started, then no problem. // If we're already stopped or never started, then no problem.
if p.stopped || p.process == nil { if p.stopped || p.process == nil {
// In the case we never even started, calling Stop makes it so // In the case we never even started, calling Stop makes it so
// that we can't ever start in the future, either, so mark this. // that we can't ever start in the future, either, so mark this.
p.stopped = true p.stopped = true
p.lock.Unlock()
return nil return nil
} }
// Note that we've stopped // Note that we've stopped
p.stopped = true p.stopped = true
close(p.stopCh) close(p.stopCh)
process := p.process
p.lock.Unlock()
err := p.process.Signal(os.Interrupt) gracefulWait := p.gracefulWait
if gracefulWait == 0 {
gracefulWait = 5 * time.Second
}
return err // First, try a graceful stop
//return p.Command.Process.Kill() err := process.Signal(os.Interrupt)
if err == nil {
select {
case <-p.exitedCh:
// Success!
return nil
case <-time.After(gracefulWait):
// Interrupt didn't work
}
}
// Graceful didn't work, forcibly kill
return process.Kill()
} }
// Equal implements Proxy to check for equality. // Equal implements Proxy to check for equality.

View File

@ -6,6 +6,7 @@ import (
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"testing" "testing"
"time"
"github.com/hashicorp/consul/testutil/retry" "github.com/hashicorp/consul/testutil/retry"
"github.com/hashicorp/go-uuid" "github.com/hashicorp/go-uuid"
@ -99,6 +100,48 @@ func TestDaemonRestart(t *testing.T) {
waitFile() waitFile()
} }
func TestDaemonStop_kill(t *testing.T) {
t.Parallel()
require := require.New(t)
td, closer := testTempDir(t)
defer closer()
path := filepath.Join(td, "file")
d := &Daemon{
Command: helperProcess("stop-kill", path),
ProxyToken: "hello",
Logger: testLogger,
gracefulWait: 200 * time.Millisecond,
}
require.NoError(d.Start())
// Wait for the file to exist
retry.Run(t, func(r *retry.R) {
_, err := os.Stat(path)
if err == nil {
return
}
r.Fatalf("error: %s", err)
})
// Stop the process
require.NoError(d.Stop())
// State the file so that we can get the mtime
fi, err := os.Stat(path)
require.NoError(err)
mtime := fi.ModTime()
// The mtime shouldn't change
time.Sleep(100 * time.Millisecond)
fi, err = os.Stat(path)
require.NoError(err)
require.Equal(mtime, fi.ModTime())
}
func TestDaemonEqual(t *testing.T) { func TestDaemonEqual(t *testing.T) {
cases := []struct { cases := []struct {
Name string Name string

View File

@ -120,6 +120,24 @@ func TestHelperProcess(t *testing.T) {
} }
} }
case "stop-kill":
// Setup listeners so it is ignored
ch := make(chan os.Signal, 1)
signal.Notify(ch, os.Interrupt)
defer signal.Stop(ch)
path := args[0]
data := []byte(os.Getenv(EnvProxyToken))
for {
if err := ioutil.WriteFile(path, data, 0644); err != nil {
t.Fatalf("err: %s", err)
}
time.Sleep(25 * time.Millisecond)
}
// Run forever
<-make(chan struct{})
default: default:
fmt.Fprintf(os.Stderr, "Unknown command: %q\n", cmd) fmt.Fprintf(os.Stderr, "Unknown command: %q\n", cmd)
os.Exit(2) os.Exit(2)