mirror of https://github.com/status-im/consul.git
command/lock: Add -child-exitcode, return 2 on child error (#3329)
* Exit 2 if -child-exit-code and the child returned with an error. * There is no platform independent way to check the exact return code of * the child, so on error always return 2. * Closes #947 * Closes #1503
This commit is contained in:
parent
9f10566314
commit
08a8d9f2a7
|
@ -41,8 +41,7 @@ type LockCommand struct {
|
|||
|
||||
child *os.Process
|
||||
childLock sync.Mutex
|
||||
|
||||
verbose bool
|
||||
verbose bool
|
||||
}
|
||||
|
||||
func (c *LockCommand) Help() string {
|
||||
|
@ -75,14 +74,18 @@ func (c *LockCommand) Run(args []string) int {
|
|||
}
|
||||
|
||||
func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
||||
var childDone chan struct{}
|
||||
var limit int
|
||||
var monitorRetry int
|
||||
var name string
|
||||
var passStdin bool
|
||||
var propagateChildCode bool
|
||||
var timeout time.Duration
|
||||
|
||||
f := c.BaseCommand.NewFlagSet(c)
|
||||
f.BoolVar(&propagateChildCode, "child-exit-code", false,
|
||||
"Exit 2 if the child process exited with an error if this is true, "+
|
||||
"otherwise this doesn't propagate an error from the child. The "+
|
||||
"default value is false.")
|
||||
f.IntVar(&limit, "n", 1,
|
||||
"Optional limit on the number of concurrent lock holders. The underlying "+
|
||||
"implementation switches from a lock to a semaphore when the value is "+
|
||||
|
@ -100,7 +103,7 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
|||
"Pass stdin to the child process.")
|
||||
f.DurationVar(&timeout, "timeout", 0,
|
||||
"Maximum amount of time to wait to acquire the lock, specified as a "+
|
||||
"timestamp like \"1s\" or \"3h\". The default value is 0.")
|
||||
"duration like \"1s\" or \"3h\". The default value is 0.")
|
||||
f.BoolVar(&c.verbose, "verbose", false,
|
||||
"Enable verbose (debugging) output.")
|
||||
|
||||
|
@ -185,6 +188,8 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
|||
}
|
||||
|
||||
// Check if we were shutdown but managed to still acquire the lock
|
||||
var childCode int
|
||||
var childErr chan error
|
||||
select {
|
||||
case <-c.ShutdownCh:
|
||||
c.UI.Error("Shutdown triggered during lock acquisition")
|
||||
|
@ -193,11 +198,9 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
|||
}
|
||||
|
||||
// Start the child process
|
||||
childDone = make(chan struct{})
|
||||
childErr = make(chan error, 1)
|
||||
go func() {
|
||||
if err := c.startChild(script, childDone, passStdin); err != nil {
|
||||
c.UI.Error(fmt.Sprintf("%s", err))
|
||||
}
|
||||
childErr <- c.startChild(script, passStdin)
|
||||
}()
|
||||
|
||||
// Monitor for shutdown, child termination, or lock loss
|
||||
|
@ -210,7 +213,10 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
|||
if c.verbose {
|
||||
c.UI.Info("Lock lost, killing child")
|
||||
}
|
||||
case <-childDone:
|
||||
case err := <-childErr:
|
||||
if err != nil {
|
||||
childCode = 2
|
||||
}
|
||||
if c.verbose {
|
||||
c.UI.Info("Child terminated, releasing lock")
|
||||
}
|
||||
|
@ -220,8 +226,9 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
|||
// Prevent starting a new child. The lock is never released
|
||||
// after this point.
|
||||
c.childLock.Lock()
|
||||
|
||||
// Kill any existing child
|
||||
if err := c.killChild(childDone); err != nil {
|
||||
if err := c.killChild(childErr); err != nil {
|
||||
c.UI.Error(fmt.Sprintf("%s", err))
|
||||
}
|
||||
|
||||
|
@ -243,6 +250,13 @@ RELEASE:
|
|||
} else if c.verbose {
|
||||
c.UI.Info("Cleanup succeeded")
|
||||
}
|
||||
|
||||
// If we detected an error from the child process then we propagate
|
||||
// that.
|
||||
if propagateChildCode {
|
||||
return childCode
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
|
@ -321,8 +335,7 @@ func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name
|
|||
|
||||
// startChild is a long running routine used to start and
|
||||
// wait for the child process to exit.
|
||||
func (c *LockCommand) startChild(script string, doneCh chan struct{}, passStdin bool) error {
|
||||
defer close(doneCh)
|
||||
func (c *LockCommand) startChild(script string, passStdin bool) error {
|
||||
if c.verbose {
|
||||
c.UI.Info(fmt.Sprintf("Starting handler '%s'", script))
|
||||
}
|
||||
|
@ -373,7 +386,7 @@ func (c *LockCommand) startChild(script string, doneCh chan struct{}, passStdin
|
|||
// termination.
|
||||
// On Windows, the child is always hard terminated with a SIGKILL, even
|
||||
// on the first attempt.
|
||||
func (c *LockCommand) killChild(childDone chan struct{}) error {
|
||||
func (c *LockCommand) killChild(childErr chan error) error {
|
||||
// Get the child process
|
||||
child := c.child
|
||||
|
||||
|
@ -395,7 +408,7 @@ func (c *LockCommand) killChild(childDone chan struct{}) error {
|
|||
|
||||
// Wait for termination, or until a timeout
|
||||
select {
|
||||
case <-childDone:
|
||||
case <-childErr:
|
||||
if c.verbose {
|
||||
c.UI.Info("Child terminated")
|
||||
}
|
||||
|
|
|
@ -257,3 +257,33 @@ func TestLockCommand_MonitorRetry_Semaphore_Arg(t *testing.T) {
|
|||
t.Fatalf("bad: %#v", opts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockCommand_ChildExitCode(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := agent.NewTestAgent(t.Name(), nil)
|
||||
defer a.Shutdown()
|
||||
|
||||
t.Run("clean exit", func(t *testing.T) {
|
||||
_, c := testLockCommand(t)
|
||||
args := []string{"-http-addr=" + a.HTTPAddr(), "-child-exit-code", "test/prefix", "exit 0"}
|
||||
if got, want := c.Run(args), 0; got != want {
|
||||
t.Fatalf("got %d want %d", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("error exit", func(t *testing.T) {
|
||||
_, c := testLockCommand(t)
|
||||
args := []string{"-http-addr=" + a.HTTPAddr(), "-child-exit-code", "test/prefix", "exit 1"}
|
||||
if got, want := c.Run(args), 2; got != want {
|
||||
t.Fatalf("got %d want %d", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("not propagated", func(t *testing.T) {
|
||||
_, c := testLockCommand(t)
|
||||
args := []string{"-http-addr=" + a.HTTPAddr(), "test/prefix", "exit 1"}
|
||||
if got, want := c.Run(args), 0; got != want {
|
||||
t.Fatalf("got %d want %d", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -52,6 +52,10 @@ Windows has no POSIX compatible notion for `SIGTERM`.
|
|||
|
||||
#### Command Options
|
||||
|
||||
* `-child-exit-code` - Exit 2 if the child process exited with an error
|
||||
if this is true, otherwise this doesn't propagate an error from the
|
||||
child. The default value is false.
|
||||
|
||||
* `-monitor-retry` - Retry up to this number of times if Consul returns a 500 error
|
||||
while monitoring the lock. This allows riding out brief periods of unavailability
|
||||
without causing leader elections, but increases the amount of time required
|
||||
|
@ -67,6 +71,9 @@ Windows has no POSIX compatible notion for `SIGTERM`.
|
|||
|
||||
* `-pass-stdin` - Pass stdin to child process.
|
||||
|
||||
* `timeout` - Maximum amount of time to wait to acquire the lock, specified
|
||||
as a duration like `1s` or `3h`. The default value is 0.
|
||||
|
||||
* `-try` - Attempt to acquire the lock up to the given timeout. The timeout is a
|
||||
positive decimal number, with unit suffix, such as "500ms". Valid time units
|
||||
are "ns", "us" (or "µs"), "ms", "s", "m", "h".
|
||||
|
|
Loading…
Reference in New Issue